1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4// Most of the code here was originally written by Hans Petter Bieker,
5// and is included in Qt with the author's permission, and the grateful
6// thanks of the Qt team.
7
8#include "qtsciicodec_p.h"
9#include "qlist.h"
10
11QT_BEGIN_NAMESPACE
12
13static unsigned char qt_UnicodeToTSCII(ushort u1, ushort u2, ushort u3);
14static unsigned int qt_TSCIIToUnicode(unsigned int code, uint *s);
15
16/*! \class QTsciiCodec
17 \inmodule QtCore5Compat
18 \reentrant
19 \internal
20*/
21
22/*!
23 Destroys the text codec object.
24*/
25QTsciiCodec::~QTsciiCodec()
26{
27}
28
29/*!
30 Converts the first \a len characters in \a uc from Unicode to this
31 encoding, and returns the result in a byte array. The \a state contains
32 some conversion flags, and is used by the codec to maintain state
33 information.
34*/
35QByteArray QTsciiCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
36{
37 char replacement = '?';
38 if (state) {
39 if (state->flags & ConvertInvalidToNull)
40 replacement = 0;
41 }
42 int invalid = 0;
43
44 QByteArray rstr(len, QT_PREPEND_NAMESPACE(Qt::Uninitialized));
45 uchar* cursor = (uchar*)rstr.data();
46 for (int i = 0; i < len; i++) {
47 QChar ch = uc[i];
48 uchar j;
49 if (ch.row() == 0x00 && ch.cell() < 0x80) {
50 // ASCII
51 j = ch.cell();
52 } else if ((j = qt_UnicodeToTSCII(u1: uc[i].unicode(),
53 u2: uc[i + 1].unicode(),
54 u3: uc[i + 2].unicode()))) {
55 // We have to check the combined chars first!
56 i += 2;
57 } else if ((j = qt_UnicodeToTSCII(u1: uc[i].unicode(),
58 u2: uc[i + 1].unicode(), u3: 0))) {
59 i++;
60 } else if ((j = qt_UnicodeToTSCII(u1: uc[i].unicode(), u2: 0, u3: 0))) {
61 } else {
62 // Error
63 j = replacement;
64 ++invalid;
65 }
66 *cursor++ = j;
67 }
68 rstr.resize(size: cursor - (const uchar*)rstr.constData());
69
70 if (state) {
71 state->invalidChars += invalid;
72 }
73 return rstr;
74}
75
76/*!
77 Converts the first \a len characters in \a chars from this encoding
78 to Unicode, and returns the result in a QString. The \a state contains
79 some conversion flags, and is used by the codec to maintain state
80 information.
81*/
82QString QTsciiCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const
83{
84 QChar replacement = QChar::ReplacementCharacter;
85 if (state) {
86 if (state->flags & ConvertInvalidToNull)
87 replacement = QChar::Null;
88 }
89 int invalid = 0;
90
91 QString result;
92 for (int i = 0; i < len; i++) {
93 uchar ch = chars[i];
94 if (ch < 0x80) {
95 // ASCII
96 result += QLatin1Char(ch);
97 } else if (IsTSCIIChar(ch)) {
98 // TSCII
99 uint s[3];
100 uint u = qt_TSCIIToUnicode(code: ch, s);
101 uint *p = s;
102 while (u--) {
103 uint c = *p++;
104 if (c)
105 result += QChar(c);
106 else {
107 result += replacement;
108 ++invalid;
109 }
110 }
111 } else {
112 // Invalid
113 result += replacement;
114 ++invalid;
115 }
116 }
117
118 if (state) {
119 state->invalidChars += invalid;
120 }
121 return result;
122}
123
124/*!
125 Returns the official name for the encoding that is handled by the codec.
126
127 \sa QTextCodec::name()
128*/
129QByteArray QTsciiCodec::name() const
130{
131 return "TSCII";
132}
133
134/*!
135 Returns the MIB enum for the encoding.
136
137 \sa QTextCodec::mibEnum()
138*/
139int QTsciiCodec::mibEnum() const
140{
141 return 2107;
142}
143
144static const int UnToTsLast = 124; // 125 items -- so the last will be 124
145static const ushort UnToTs [][4] = {
146 // *Sorted* list of TSCII maping for unicode chars
147 //FIRST SECOND THIRD TSCII
148 {0x00A0, 0x0000, 0x0000, 0xA0},
149 {0x00A9, 0x0000, 0x0000, 0xA9},
150 {0x0B83, 0x0000, 0x0000, 0xB7},
151 {0x0B85, 0x0000, 0x0000, 0xAB},
152 {0x0B86, 0x0000, 0x0000, 0xAC},
153 {0x0B87, 0x0000, 0x0000, 0xAD},
154 {0x0B88, 0x0000, 0x0000, 0xAE},
155 {0x0B89, 0x0000, 0x0000, 0xAF},
156 {0x0B8A, 0x0000, 0x0000, 0xB0},
157 {0x0B8E, 0x0000, 0x0000, 0xB1},
158 {0x0B8F, 0x0000, 0x0000, 0xB2},
159 {0x0B90, 0x0000, 0x0000, 0xB3},
160 {0x0B92, 0x0000, 0x0000, 0xB4},
161 {0x0B93, 0x0000, 0x0000, 0xB5},
162 {0x0B94, 0x0000, 0x0000, 0xB6},
163 {0x0B95, 0x0000, 0x0000, 0xB8},
164 {0x0B95, 0x0BC1, 0x0000, 0xCC},
165 {0x0B95, 0x0BC2, 0x0000, 0xDC},
166 {0x0B95, 0x0BCD, 0x0000, 0xEC},
167 {0x0B99, 0x0000, 0x0000, 0xB9},
168 {0x0B99, 0x0BC1, 0x0000, 0x99},
169 {0x0B99, 0x0BC2, 0x0000, 0x9B},
170 {0x0B99, 0x0BCD, 0x0000, 0xED},
171 {0x0B9A, 0x0000, 0x0000, 0xBA},
172 {0x0B9A, 0x0BC1, 0x0000, 0xCD},
173 {0x0B9A, 0x0BC2, 0x0000, 0xDD},
174 {0x0B9A, 0x0BCD, 0x0000, 0xEE},
175 {0x0B9C, 0x0000, 0x0000, 0x83},
176 {0x0B9C, 0x0BCD, 0x0000, 0x88},
177 {0x0B9E, 0x0000, 0x0000, 0xBB},
178 {0x0B9E, 0x0BCD, 0x0000, 0xEF},
179 {0x0B9E, 0x0BC1, 0x0000, 0x9A},
180 {0x0B9E, 0x0BC2, 0x0000, 0x9C},
181 {0x0B9F, 0x0000, 0x0000, 0xBC},
182 {0x0B9F, 0x0BBF, 0x0000, 0xCA},
183 {0x0B9F, 0x0BC0, 0x0000, 0xCB},
184 {0x0B9F, 0x0BC1, 0x0000, 0xCE},
185 {0x0B9F, 0x0BC2, 0x0000, 0xDE},
186 {0x0B9F, 0x0BCD, 0x0000, 0xF0},
187 {0x0BA3, 0x0000, 0x0000, 0xBD},
188 {0x0BA3, 0x0BCD, 0x0000, 0xF1},
189 {0x0BA3, 0x0BC1, 0x0000, 0xCF},
190 {0x0BA3, 0x0BC2, 0x0000, 0xDF},
191 {0x0BA4, 0x0000, 0x0000, 0xBE},
192 {0x0BA4, 0x0BC1, 0x0000, 0xD0},
193 {0x0BA4, 0x0BC2, 0x0000, 0xE0},
194 {0x0BA4, 0x0BCD, 0x0000, 0xF2},
195 {0x0BA8, 0x0000, 0x0000, 0xBF},
196 {0x0BA8, 0x0BC1, 0x0000, 0xD1},
197 {0x0BA8, 0x0BC2, 0x0000, 0xE1},
198 {0x0BA8, 0x0BCD, 0x0000, 0xF3},
199 {0x0BA9, 0x0000, 0x0000, 0xC9},
200 {0x0BA9, 0x0BC1, 0x0000, 0xDB},
201 {0x0BA9, 0x0BC2, 0x0000, 0xEB},
202 {0x0BA9, 0x0BCD, 0x0000, 0xFD},
203 {0x0BAA, 0x0000, 0x0000, 0xC0},
204 {0x0BAA, 0x0BC1, 0x0000, 0xD2},
205 {0x0BAA, 0x0BC2, 0x0000, 0xE2},
206 {0x0BAA, 0x0BCD, 0x0000, 0xF4},
207 {0x0BAE, 0x0000, 0x0000, 0xC1},
208 {0x0BAE, 0x0BC1, 0x0000, 0xD3},
209 {0x0BAE, 0x0BC2, 0x0000, 0xE3},
210 {0x0BAE, 0x0BCD, 0x0000, 0xF5},
211 {0x0BAF, 0x0000, 0x0000, 0xC2},
212 {0x0BAF, 0x0BC1, 0x0000, 0xD4},
213 {0x0BAF, 0x0BC2, 0x0000, 0xE4},
214 {0x0BAF, 0x0BCD, 0x0000, 0xF6},
215 {0x0BB0, 0x0000, 0x0000, 0xC3},
216 {0x0BB0, 0x0BC1, 0x0000, 0xD5},
217 {0x0BB0, 0x0BC2, 0x0000, 0xE5},
218 {0x0BB0, 0x0BCD, 0x0000, 0xF7},
219 {0x0BB1, 0x0000, 0x0000, 0xC8},
220 {0x0BB1, 0x0BC1, 0x0000, 0xDA},
221 {0x0BB1, 0x0BC2, 0x0000, 0xEA},
222 {0x0BB1, 0x0BCD, 0x0000, 0xFC},
223 {0x0BB2, 0x0000, 0x0000, 0xC4},
224 {0x0BB2, 0x0BC1, 0x0000, 0xD6},
225 {0x0BB2, 0x0BC2, 0x0000, 0xE6},
226 {0x0BB2, 0x0BCD, 0x0000, 0xF8},
227 {0x0BB3, 0x0000, 0x0000, 0xC7},
228 {0x0BB3, 0x0BC1, 0x0000, 0xD9},
229 {0x0BB3, 0x0BC2, 0x0000, 0xE9},
230 {0x0BB3, 0x0BCD, 0x0000, 0xFB},
231 {0x0BB4, 0x0000, 0x0000, 0xC6},
232 {0x0BB4, 0x0BC1, 0x0000, 0xD8},
233 {0x0BB4, 0x0BC2, 0x0000, 0xE8},
234 {0x0BB4, 0x0BCD, 0x0000, 0xFA},
235 {0x0BB5, 0x0000, 0x0000, 0xC5},
236 {0x0BB5, 0x0BC1, 0x0000, 0xD7},
237 {0x0BB5, 0x0BC2, 0x0000, 0xE7},
238 {0x0BB5, 0x0BCD, 0x0000, 0xF9},
239 {0x0BB7, 0x0000, 0x0000, 0x84},
240 {0x0BB7, 0x0BCD, 0x0000, 0x89},
241 {0x0BB8, 0x0000, 0x0000, 0x85},
242 {0x0BB8, 0x0BCD, 0x0000, 0x8A},
243 {0x0BB9, 0x0000, 0x0000, 0x86},
244 {0x0BB9, 0x0BCD, 0x0000, 0x8B},
245 {0x0BBE, 0x0000, 0x0000, 0xA1},
246 {0x0BBF, 0x0000, 0x0000, 0xA2},
247 {0x0BC0, 0x0000, 0x0000, 0xA3},
248 {0x0BC1, 0x0000, 0x0000, 0xA4},
249 {0x0BC2, 0x0000, 0x0000, 0xA5},
250 {0x0BC6, 0x0000, 0x0000, 0xA6},
251 {0x0BC7, 0x0000, 0x0000, 0xA7},
252 {0x0BC8, 0x0000, 0x0000, 0xA8},
253 {0x0BCC, 0x0000, 0x0000, 0xAA},
254 {0x0BE6, 0x0000, 0x0000, 0x80},
255 {0x0BE7, 0x0000, 0x0000, 0x81},
256 {0x0BE7, 0x0BB7, 0x0000, 0x87},
257 {0x0BE7, 0x0BB7, 0x0B82, 0x8C},
258 {0x0BE8, 0x0000, 0x0000, 0x8D},
259 {0x0BE9, 0x0000, 0x0000, 0x8E},
260 {0x0BEA, 0x0000, 0x0000, 0x8F},
261 {0x0BEB, 0x0000, 0x0000, 0x90},
262 {0x0BEC, 0x0000, 0x0000, 0x95},
263 {0x0BED, 0x0000, 0x0000, 0x96},
264 {0x0BEE, 0x0000, 0x0000, 0x97},
265 {0x0BEF, 0x0000, 0x0000, 0x98},
266 {0x0BF0, 0x0000, 0x0000, 0x9D},
267 {0x0BF1, 0x0000, 0x0000, 0x9E},
268 {0x0BF2, 0x0000, 0x0000, 0x9F},
269 {0x2018, 0x0000, 0x0000, 0x91},
270 {0x2019, 0x0000, 0x0000, 0x92},
271 {0x201C, 0x0000, 0x0000, 0x93},
272 {0x201C, 0x0000, 0x0000, 0x94}
273};
274
275static const ushort TsToUn [][3] = {
276 // Starting at 0x80
277 {0x0BE6, 0x0000, 0x0000},
278 {0x0BE7, 0x0000, 0x0000},
279 {0x0000, 0x0000, 0x0000}, // not covered in the unicode document
280 {0x0B9C, 0x0000, 0x0000},
281 {0x0BB7, 0x0000, 0x0000},
282 {0x0BB8, 0x0000, 0x0000},
283 {0x0BB9, 0x0000, 0x0000},
284 {0x0BE7, 0x0BB7, 0x0000},
285 {0x0B9C, 0x0BCD, 0x0000},
286 {0x0BB7, 0x0BCD, 0x0000},
287 {0x0BB8, 0x0BCD, 0x0000},
288 {0x0BB9, 0x0BCD, 0x0000},
289 {0x0BE7, 0x0BB7, 0x0B82},
290 {0x0BE8, 0x0000, 0x0000},
291 {0x0BE9, 0x0000, 0x0000},
292 {0x0BEA, 0x0000, 0x0000},
293 {0x0BEB, 0x0000, 0x0000},
294 {0x2018, 0x0000, 0x0000},
295 {0x2019, 0x0000, 0x0000},
296 {0x201C, 0x0000, 0x0000}, // double quote left
297 {0x201C, 0x0000, 0x0000}, // double quote right
298 {0x0BEC, 0x0000, 0x0000},
299 {0x0BED, 0x0000, 0x0000},
300 {0x0BEE, 0x0000, 0x0000},
301 {0x0BEF, 0x0000, 0x0000},
302 {0x0B99, 0x0BC1, 0x0000},
303 {0x0B9E, 0x0BC1, 0x0000},
304 {0x0B99, 0x0BC2, 0x0000},
305 {0x0B9E, 0x0BC2, 0x0000},
306 {0x0BF0, 0x0000, 0x0000},
307 {0x0BF1, 0x0000, 0x0000},
308 {0x0BF2, 0x0000, 0x0000},
309 {0x00A0, 0x0000, 0x0000},
310 {0x0BBE, 0x0000, 0x0000},
311 {0x0BBF, 0x0000, 0x0000},
312 {0x0BC0, 0x0000, 0x0000},
313 {0x0BC1, 0x0000, 0x0000},
314 {0x0BC2, 0x0000, 0x0000},
315 {0x0BC6, 0x0000, 0x0000},
316 {0x0BC7, 0x0000, 0x0000},
317 {0x0BC8, 0x0000, 0x0000},
318 {0x00A9, 0x0000, 0x0000},
319 {0x0BCC, 0x0000, 0x0000},
320 {0x0B85, 0x0000, 0x0000},
321 {0x0B86, 0x0000, 0x0000},
322 {0x0B87, 0x0000, 0x0000},
323 {0x0B88, 0x0000, 0x0000},
324 {0x0B89, 0x0000, 0x0000},
325 {0x0B8A, 0x0000, 0x0000},
326 {0x0B8E, 0x0000, 0x0000},
327 {0x0B8F, 0x0000, 0x0000},
328 {0x0B90, 0x0000, 0x0000},
329 {0x0B92, 0x0000, 0x0000},
330 {0x0B93, 0x0000, 0x0000},
331 {0x0B94, 0x0000, 0x0000},
332 {0x0B83, 0x0000, 0x0000},
333 {0x0B95, 0x0000, 0x0000},
334 {0x0B99, 0x0000, 0x0000},
335 {0x0B9A, 0x0000, 0x0000},
336 {0x0B9E, 0x0000, 0x0000},
337 {0x0B9F, 0x0000, 0x0000},
338 {0x0BA3, 0x0000, 0x0000},
339 {0x0BA4, 0x0000, 0x0000},
340 {0x0BA8, 0x0000, 0x0000},
341 {0x0BAA, 0x0000, 0x0000},
342 {0x0BAE, 0x0000, 0x0000},
343 {0x0BAF, 0x0000, 0x0000},
344 {0x0BB0, 0x0000, 0x0000},
345 {0x0BB2, 0x0000, 0x0000},
346 {0x0BB5, 0x0000, 0x0000},
347 {0x0BB4, 0x0000, 0x0000},
348 {0x0BB3, 0x0000, 0x0000},
349 {0x0BB1, 0x0000, 0x0000},
350 {0x0BA9, 0x0000, 0x0000},
351 {0x0B9F, 0x0BBF, 0x0000},
352 {0x0B9F, 0x0BC0, 0x0000},
353 {0x0B95, 0x0BC1, 0x0000},
354 {0x0B9A, 0x0BC1, 0x0000},
355 {0x0B9F, 0x0BC1, 0x0000},
356 {0x0BA3, 0x0BC1, 0x0000},
357 {0x0BA4, 0x0BC1, 0x0000},
358 {0x0BA8, 0x0BC1, 0x0000},
359 {0x0BAA, 0x0BC1, 0x0000},
360 {0x0BAE, 0x0BC1, 0x0000},
361 {0x0BAF, 0x0BC1, 0x0000},
362 {0x0BB0, 0x0BC1, 0x0000},
363 {0x0BB2, 0x0BC1, 0x0000},
364 {0x0BB5, 0x0BC1, 0x0000},
365 {0x0BB4, 0x0BC1, 0x0000},
366 {0x0BB3, 0x0BC1, 0x0000},
367 {0x0BB1, 0x0BC1, 0x0000},
368 {0x0BA9, 0x0BC1, 0x0000},
369 {0x0B95, 0x0BC2, 0x0000},
370 {0x0B9A, 0x0BC2, 0x0000},
371 {0x0B9F, 0x0BC2, 0x0000},
372 {0x0BA3, 0x0BC2, 0x0000},
373 {0x0BA4, 0x0BC2, 0x0000},
374 {0x0BA8, 0x0BC2, 0x0000},
375 {0x0BAA, 0x0BC2, 0x0000},
376 {0x0BAE, 0x0BC2, 0x0000},
377 {0x0BAF, 0x0BC2, 0x0000},
378 {0x0BB0, 0x0BC2, 0x0000},
379 {0x0BB2, 0x0BC2, 0x0000},
380 {0x0BB5, 0x0BC2, 0x0000},
381 {0x0BB4, 0x0BC2, 0x0000},
382 {0x0BB3, 0x0BC2, 0x0000},
383 {0x0BB1, 0x0BC2, 0x0000},
384 {0x0BA9, 0x0BC2, 0x0000},
385 {0x0B95, 0x0BCD, 0x0000},
386 {0x0B99, 0x0BCD, 0x0000},
387 {0x0B9A, 0x0BCD, 0x0000},
388 {0x0B9E, 0x0BCD, 0x0000},
389 {0x0B9F, 0x0BCD, 0x0000},
390 {0x0BA3, 0x0BCD, 0x0000},
391 {0x0BA4, 0x0BCD, 0x0000},
392 {0x0BA8, 0x0BCD, 0x0000},
393 {0x0BAA, 0x0BCD, 0x0000},
394 {0x0BAE, 0x0BCD, 0x0000},
395 {0x0BAF, 0x0BCD, 0x0000},
396 {0x0BB0, 0x0BCD, 0x0000},
397 {0x0BB2, 0x0BCD, 0x0000},
398 {0x0BB5, 0x0BCD, 0x0000},
399 {0x0BB4, 0x0BCD, 0x0000},
400 {0x0BB3, 0x0BCD, 0x0000},
401 {0x0BB1, 0x0BCD, 0x0000},
402 {0x0BA9, 0x0BCD, 0x0000},
403 {0x0000, 0x0000, 0x0000},
404 {0x0000, 0x0000, 0x0000}
405};
406
407static int cmp(const ushort *s1, const ushort *s2, size_t len)
408{
409 int diff = 0;
410
411 while (len-- && (diff = *s1++ - *s2++) == 0)
412 ;
413
414 return diff;
415}
416
417static unsigned char qt_UnicodeToTSCII(ushort u1, ushort u2, ushort u3)
418{
419 ushort s[3];
420 s[0] = u1;
421 s[1] = u2;
422 s[2] = u3;
423
424 int a = 0; // start pos
425 int b = UnToTsLast; // end pos
426
427 // do a binary search for the composed unicode in the list
428 while (a <= b) {
429 int w = (a + b) / 2;
430 int j = cmp(s1: UnToTs[w], s2: s, len: 3);
431
432 if (j == 0)
433 // found it
434 return UnToTs[w][3];
435
436 if (j < 0)
437 a = w + 1;
438 else
439 b = w - 1;
440 }
441
442 return 0;
443}
444
445static unsigned int qt_TSCIIToUnicode(uint code, uint *s)
446{
447 int len = 0;
448 for (int i = 0; i < 3; i++) {
449 uint u = TsToUn[code & 0x7f][i];
450 s[i] = u;
451 if (s[i]) len = i + 1;
452 }
453
454 return len;
455}
456
457QT_END_NAMESPACE
458

source code of qt5compat/src/core5/codecs/qtsciicodec.cpp