1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtCore module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | #include "qisciicodec_p.h" |
40 | #include "qtextcodec_p.h" |
41 | #include "qlist.h" |
42 | |
43 | QT_BEGIN_NAMESPACE |
44 | |
45 | /*! |
46 | \class QIsciiCodec |
47 | \inmodule QtCore |
48 | \brief The QIsciiCodec class provides conversion to and from the ISCII encoding. |
49 | |
50 | \internal |
51 | */ |
52 | |
53 | |
54 | struct Codecs { |
55 | const char name[10]; |
56 | ushort base; |
57 | }; |
58 | |
59 | static const Codecs codecs [] = { |
60 | { .name: "iscii-dev" , .base: 0x900 }, |
61 | { .name: "iscii-bng" , .base: 0x980 }, |
62 | { .name: "iscii-pnj" , .base: 0xa00 }, |
63 | { .name: "iscii-gjr" , .base: 0xa80 }, |
64 | { .name: "iscii-ori" , .base: 0xb00 }, |
65 | { .name: "iscii-tml" , .base: 0xb80 }, |
66 | { .name: "iscii-tlg" , .base: 0xc00 }, |
67 | { .name: "iscii-knd" , .base: 0xc80 }, |
68 | { .name: "iscii-mlm" , .base: 0xd00 } |
69 | }; |
70 | |
71 | QTextCodec *QIsciiCodec::create(const char *name) |
72 | { |
73 | for (int i = 0; i < 9; ++i) { |
74 | if (qTextCodecNameMatch(a: name, b: codecs[i].name)) |
75 | return new QIsciiCodec(i); |
76 | } |
77 | return nullptr; |
78 | } |
79 | |
80 | QIsciiCodec::~QIsciiCodec() |
81 | { |
82 | } |
83 | |
84 | QByteArray QIsciiCodec::name() const |
85 | { |
86 | return codecs[idx].name; |
87 | } |
88 | |
89 | int QIsciiCodec::mibEnum() const |
90 | { |
91 | /* There is no MIBEnum for Iscii */ |
92 | return -3000-idx; |
93 | } |
94 | |
95 | static const uchar inv = 0xFF; |
96 | |
97 | /* iscii range from 0xa0 - 0xff */ |
98 | static const uchar iscii_to_uni_table[0x60] = { |
99 | 0x00, 0x01, 0x02, 0x03, |
100 | 0x05, 0x06, 0x07, 0x08, |
101 | 0x09, 0x0a, 0x0b, 0x0e, |
102 | 0x0f, 0x20, 0x0d, 0x12, |
103 | |
104 | 0x13, 0x14, 0x11, 0x15, |
105 | 0x16, 0x17, 0x18, 0x19, |
106 | 0x1a, 0x1b, 0x1c, 0x1d, |
107 | 0x1e, 0x1f, 0x20, 0x21, |
108 | |
109 | 0x22, 0x23, 0x24, 0x25, |
110 | 0x26, 0x27, 0x28, 0x29, |
111 | 0x2a, 0x2b, 0x2c, 0x2d, |
112 | 0x2e, 0x2f, 0x5f, 0x30, |
113 | |
114 | 0x31, 0x32, 0x33, 0x34, |
115 | 0x35, 0x36, 0x37, 0x38, |
116 | 0x39, inv, 0x3e, 0x3f, |
117 | 0x40, 0x41, 0x42, 0x43, |
118 | |
119 | 0x46, 0x47, 0x48, 0x45, |
120 | 0x4a, 0x4b, 0x4c, 0x49, |
121 | 0x4d, 0x3c, 0x64, 0x00, |
122 | 0x00, 0x00, 0x00, 0x00, |
123 | |
124 | 0x00, 0x66, 0x67, 0x68, |
125 | 0x69, 0x6a, 0x6b, 0x6c, |
126 | 0x6d, 0x6e, 0x6f, 0x00, |
127 | 0x00, 0x00, 0x00, 0x00 |
128 | }; |
129 | |
130 | static const uchar uni_to_iscii_table[0x80] = { |
131 | 0x00, 0xa1, 0xa2, 0xa3, |
132 | 0x00, 0xa4, 0xa5, 0xa6, |
133 | 0xa7, 0xa8, 0xa9, 0xaa, |
134 | 0x00, 0xae, 0xab, 0xac, |
135 | |
136 | 0xad, 0xb2, 0xaf, 0xb0, |
137 | 0xb1, 0xb3, 0xb4, 0xb5, |
138 | 0xb6, 0xb7, 0xb8, 0xb9, |
139 | 0xba, 0xbb, 0xbc, 0xbd, |
140 | |
141 | 0xbe, 0xbf, 0xc0, 0xc1, |
142 | 0xc2, 0xc3, 0xc4, 0xc5, |
143 | 0xc6, 0xc7, 0xc8, 0xc9, |
144 | 0xca, 0xcb, 0xcc, 0xcd, |
145 | |
146 | 0xcf, 0xd0, 0xd1, 0xd2, |
147 | 0xd3, 0xd4, 0xd5, 0xd6, |
148 | 0xd7, 0xd8, 0x00, 0x00, |
149 | 0xe9, 0x00, 0xda, 0xdb, |
150 | |
151 | 0xdc, 0xdd, 0xde, 0xdf, |
152 | 0x00, 0xe3, 0xe0, 0xe1, |
153 | 0xe2, 0xe7, 0xe4, 0xe5, |
154 | 0xe6, 0xe8, 0x00, 0x00, |
155 | |
156 | 0x00, 0x00, 0x00, 0x00, |
157 | 0x00, 0x00, 0x00, 0x00, |
158 | 0x01, 0x02, 0x03, 0x04, // decomposable into the uc codes listed here + nukta |
159 | 0x05, 0x06, 0x07, 0xce, |
160 | |
161 | 0x00, 0x00, 0x00, 0x00, |
162 | 0xea, 0x08, 0xf1, 0xf2, |
163 | 0xf3, 0xf4, 0xf5, 0xf6, |
164 | 0xf7, 0xf8, 0xf9, 0xfa, |
165 | |
166 | 0x00, 0x00, 0x00, 0x00, |
167 | 0x00, 0x00, 0x00, 0x00, |
168 | 0x00, 0x00, 0x00, 0x00, |
169 | 0x00, 0x00, 0x00, 0x00 |
170 | }; |
171 | |
172 | static const uchar uni_to_iscii_pairs[] = { |
173 | 0x00, 0x00, |
174 | 0x15, 0x3c, // 0x958 |
175 | 0x16, 0x3c, // 0x959 |
176 | 0x17, 0x3c, // 0x95a |
177 | 0x1c, 0x3c, // 0x95b |
178 | 0x21, 0x3c, // 0x95c |
179 | 0x22, 0x3c, // 0x95d |
180 | 0x2b, 0x3c, // 0x95e |
181 | 0x64, 0x64 // 0x965 |
182 | }; |
183 | |
184 | |
185 | QByteArray QIsciiCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const |
186 | { |
187 | char replacement = '?'; |
188 | bool halant = false; |
189 | if (state) { |
190 | if (state->flags & ConvertInvalidToNull) |
191 | replacement = 0; |
192 | halant = state->state_data[0]; |
193 | } |
194 | int invalid = 0; |
195 | |
196 | QByteArray result(2 * len, Qt::Uninitialized); //worst case |
197 | |
198 | uchar *ch = reinterpret_cast<uchar *>(result.data()); |
199 | |
200 | const int base = codecs[idx].base; |
201 | |
202 | for (int i =0; i < len; ++i) { |
203 | const ushort codePoint = uc[i].unicode(); |
204 | |
205 | /* The low 7 bits of ISCII is plain ASCII. However, we go all the |
206 | * way up to 0xA0 such that we can roundtrip with convertToUnicode()'s |
207 | * behavior. */ |
208 | if(codePoint < 0xA0) { |
209 | *ch++ = static_cast<uchar>(codePoint); |
210 | continue; |
211 | } |
212 | |
213 | const int pos = codePoint - base; |
214 | if (pos > 0 && pos < 0x80) { |
215 | uchar iscii = uni_to_iscii_table[pos]; |
216 | if (iscii > 0x80) { |
217 | *ch++ = iscii; |
218 | } else if (iscii) { |
219 | Q_ASSERT((2 * iscii) < (sizeof(uni_to_iscii_pairs) / sizeof(uni_to_iscii_pairs[0]))); |
220 | const uchar *pair = uni_to_iscii_pairs + 2*iscii; |
221 | *ch++ = *pair++; |
222 | *ch++ = *pair++; |
223 | } else { |
224 | *ch++ = replacement; |
225 | ++invalid; |
226 | } |
227 | } else { |
228 | if (uc[i].unicode() == 0x200c) { // ZWNJ |
229 | if (halant) |
230 | // Consonant Halant ZWNJ -> Consonant Halant Halant |
231 | *ch++ = 0xe8; |
232 | } else if (uc[i].unicode() == 0x200d) { // ZWJ |
233 | if (halant) |
234 | // Consonant Halant ZWJ -> Consonant Halant Nukta |
235 | *ch++ = 0xe9; |
236 | } else { |
237 | *ch++ = replacement; |
238 | ++invalid; |
239 | } |
240 | } |
241 | halant = (pos == 0x4d); |
242 | } |
243 | result.truncate(pos: ch - (uchar *)result.data()); |
244 | |
245 | if (state) { |
246 | state->invalidChars += invalid; |
247 | state->state_data[0] = halant; |
248 | } |
249 | return result; |
250 | } |
251 | |
252 | QString QIsciiCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const |
253 | { |
254 | bool halant = false; |
255 | if (state) { |
256 | halant = state->state_data[0]; |
257 | } |
258 | |
259 | QString result(len, Qt::Uninitialized); |
260 | QChar *uc = result.data(); |
261 | |
262 | const int base = codecs[idx].base; |
263 | |
264 | for (int i = 0; i < len; ++i) { |
265 | ushort ch = (uchar) chars[i]; |
266 | if (ch < 0xa0) |
267 | *uc++ = ch; |
268 | else { |
269 | ushort c = iscii_to_uni_table[ch - 0xa0]; |
270 | if (halant && (c == inv || c == 0xe9)) { |
271 | // Consonant Halant inv -> Consonant Halant ZWJ |
272 | // Consonant Halant Nukta -> Consonant Halant ZWJ |
273 | *uc++ = QChar(0x200d); |
274 | } else if (halant && c == 0xe8) { |
275 | // Consonant Halant Halant -> Consonant Halant ZWNJ |
276 | *uc++ = QChar(0x200c); |
277 | } else { |
278 | *uc++ = QChar(c+base); |
279 | } |
280 | } |
281 | halant = ((uchar)chars[i] == 0xe8); |
282 | } |
283 | result.resize(size: uc - result.unicode()); |
284 | |
285 | if (state) { |
286 | state->state_data[0] = halant; |
287 | } |
288 | return result; |
289 | } |
290 | |
291 | QT_END_NAMESPACE |
292 | |