1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3#include "qisciicodec_p.h"
4#include "qtextcodec_p.h"
5#include "qlist.h"
6
7QT_BEGIN_NAMESPACE
8
9/*!
10 \class QIsciiCodec
11 \inmodule QtCore5Compat
12 \brief The QIsciiCodec class provides conversion to and from the ISCII encoding.
13
14 \internal
15*/
16
17
18struct Codecs {
19 const char name[10];
20 ushort base;
21};
22
23static const Codecs codecs [] = {
24 { .name: "iscii-dev", .base: 0x900 },
25 { .name: "iscii-bng", .base: 0x980 },
26 { .name: "iscii-pnj", .base: 0xa00 },
27 { .name: "iscii-gjr", .base: 0xa80 },
28 { .name: "iscii-ori", .base: 0xb00 },
29 { .name: "iscii-tml", .base: 0xb80 },
30 { .name: "iscii-tlg", .base: 0xc00 },
31 { .name: "iscii-knd", .base: 0xc80 },
32 { .name: "iscii-mlm", .base: 0xd00 }
33};
34
35QTextCodec *QIsciiCodec::create(const char *name)
36{
37 QIsciiCodec *codec = nullptr;
38 for (int i = 0; i < 9; ++i) {
39 if (qTextCodecNameMatch(a: name, b: codecs[i].name)) {
40 codec = new QIsciiCodec(i);
41 break;
42 }
43 }
44 return codec;
45}
46
47QIsciiCodec::~QIsciiCodec()
48{
49}
50
51QByteArray QIsciiCodec::name() const
52{
53 return codecs[idx].name;
54}
55
56int QIsciiCodec::mibEnum() const
57{
58 /* There is no MIBEnum for Iscii */
59 return -3000-idx;
60}
61
62static const uchar inv = 0xFF;
63
64/* iscii range from 0xa0 - 0xff */
65static const uchar iscii_to_uni_table[0x60] = {
66 0x00, 0x01, 0x02, 0x03,
67 0x05, 0x06, 0x07, 0x08,
68 0x09, 0x0a, 0x0b, 0x0e,
69 0x0f, 0x20, 0x0d, 0x12,
70
71 0x13, 0x14, 0x11, 0x15,
72 0x16, 0x17, 0x18, 0x19,
73 0x1a, 0x1b, 0x1c, 0x1d,
74 0x1e, 0x1f, 0x20, 0x21,
75
76 0x22, 0x23, 0x24, 0x25,
77 0x26, 0x27, 0x28, 0x29,
78 0x2a, 0x2b, 0x2c, 0x2d,
79 0x2e, 0x2f, 0x5f, 0x30,
80
81 0x31, 0x32, 0x33, 0x34,
82 0x35, 0x36, 0x37, 0x38,
83 0x39, inv, 0x3e, 0x3f,
84 0x40, 0x41, 0x42, 0x43,
85
86 0x46, 0x47, 0x48, 0x45,
87 0x4a, 0x4b, 0x4c, 0x49,
88 0x4d, 0x3c, 0x64, 0x00,
89 0x00, 0x00, 0x00, 0x00,
90
91 0x00, 0x66, 0x67, 0x68,
92 0x69, 0x6a, 0x6b, 0x6c,
93 0x6d, 0x6e, 0x6f, 0x00,
94 0x00, 0x00, 0x00, 0x00
95};
96
97static const uchar uni_to_iscii_table[0x80] = {
98 0x00, 0xa1, 0xa2, 0xa3,
99 0x00, 0xa4, 0xa5, 0xa6,
100 0xa7, 0xa8, 0xa9, 0xaa,
101 0x00, 0xae, 0xab, 0xac,
102
103 0xad, 0xb2, 0xaf, 0xb0,
104 0xb1, 0xb3, 0xb4, 0xb5,
105 0xb6, 0xb7, 0xb8, 0xb9,
106 0xba, 0xbb, 0xbc, 0xbd,
107
108 0xbe, 0xbf, 0xc0, 0xc1,
109 0xc2, 0xc3, 0xc4, 0xc5,
110 0xc6, 0xc7, 0xc8, 0xc9,
111 0xca, 0xcb, 0xcc, 0xcd,
112
113 0xcf, 0xd0, 0xd1, 0xd2,
114 0xd3, 0xd4, 0xd5, 0xd6,
115 0xd7, 0xd8, 0x00, 0x00,
116 0xe9, 0x00, 0xda, 0xdb,
117
118 0xdc, 0xdd, 0xde, 0xdf,
119 0x00, 0xe3, 0xe0, 0xe1,
120 0xe2, 0xe7, 0xe4, 0xe5,
121 0xe6, 0xe8, 0x00, 0x00,
122
123 0x00, 0x00, 0x00, 0x00,
124 0x00, 0x00, 0x00, 0x00,
125 0x01, 0x02, 0x03, 0x04, // decomposable into the uc codes listed here + nukta
126 0x05, 0x06, 0x07, 0xce,
127
128 0x00, 0x00, 0x00, 0x00,
129 0xea, 0x08, 0xf1, 0xf2,
130 0xf3, 0xf4, 0xf5, 0xf6,
131 0xf7, 0xf8, 0xf9, 0xfa,
132
133 0x00, 0x00, 0x00, 0x00,
134 0x00, 0x00, 0x00, 0x00,
135 0x00, 0x00, 0x00, 0x00,
136 0x00, 0x00, 0x00, 0x00
137};
138
139static const uchar uni_to_iscii_pairs[] = {
140 0x00, 0x00,
141 0x15, 0x3c, // 0x958
142 0x16, 0x3c, // 0x959
143 0x17, 0x3c, // 0x95a
144 0x1c, 0x3c, // 0x95b
145 0x21, 0x3c, // 0x95c
146 0x22, 0x3c, // 0x95d
147 0x2b, 0x3c, // 0x95e
148 0x64, 0x64 // 0x965
149};
150
151
152QByteArray QIsciiCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
153{
154 char replacement = '?';
155 bool halant = false;
156 if (state) {
157 if (state->flags & ConvertInvalidToNull)
158 replacement = 0;
159 halant = state->state_data[0];
160 }
161 int invalid = 0;
162
163 QByteArray result(2 * len, QT_PREPEND_NAMESPACE(Qt::Uninitialized)); // worst case
164
165 uchar *ch = reinterpret_cast<uchar *>(result.data());
166
167 const int base = codecs[idx].base;
168
169 for (int i =0; i < len; ++i) {
170 const ushort codePoint = uc[i].unicode();
171
172 /* The low 7 bits of ISCII is plain ASCII. However, we go all the
173 * way up to 0xA0 such that we can roundtrip with convertToUnicode()'s
174 * behavior. */
175 if(codePoint < 0xA0) {
176 *ch++ = static_cast<uchar>(codePoint);
177 continue;
178 }
179
180 const int pos = codePoint - base;
181 if (pos > 0 && pos < 0x80) {
182 uchar iscii = uni_to_iscii_table[pos];
183 if (iscii > 0x80) {
184 *ch++ = iscii;
185 } else if (iscii) {
186 Q_ASSERT((2 * iscii) < (sizeof(uni_to_iscii_pairs) / sizeof(uni_to_iscii_pairs[0])));
187 const uchar *pair = uni_to_iscii_pairs + 2*iscii;
188 *ch++ = *pair++;
189 *ch++ = *pair++;
190 } else {
191 *ch++ = replacement;
192 ++invalid;
193 }
194 } else {
195 if (uc[i].unicode() == 0x200c) { // ZWNJ
196 if (halant)
197 // Consonant Halant ZWNJ -> Consonant Halant Halant
198 *ch++ = 0xe8;
199 } else if (uc[i].unicode() == 0x200d) { // ZWJ
200 if (halant)
201 // Consonant Halant ZWJ -> Consonant Halant Nukta
202 *ch++ = 0xe9;
203 } else {
204 *ch++ = replacement;
205 ++invalid;
206 }
207 }
208 halant = (pos == 0x4d);
209 }
210 result.truncate(pos: ch - (uchar *)result.data());
211
212 if (state) {
213 state->invalidChars += invalid;
214 state->state_data[0] = halant;
215 }
216 return result;
217}
218
219QString QIsciiCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const
220{
221 bool halant = false;
222 if (state) {
223 halant = state->state_data[0];
224 }
225
226 QString result(len, QT_PREPEND_NAMESPACE(Qt::Uninitialized));
227 QChar *uc = result.data();
228
229 const int base = codecs[idx].base;
230
231 for (int i = 0; i < len; ++i) {
232 char16_t ch = (uchar) chars[i];
233 if (ch < 0xa0)
234 *uc++ = ch;
235 else {
236 ushort c = iscii_to_uni_table[ch - 0xa0];
237 if (halant && (ch == inv || ch == 0xe9)) {
238 // Consonant Halant inv -> Consonant Halant ZWJ
239 // Consonant Halant Nukta -> Consonant Halant ZWJ
240 *uc++ = QChar(0x200d);
241 } else if (halant && ch == 0xe8) {
242 // Consonant Halant Halant -> Consonant Halant ZWNJ
243 *uc++ = QChar(0x200c);
244 } else {
245 *uc++ = QChar(c+base);
246 }
247 }
248 halant = ((uchar)chars[i] == 0xe8);
249 }
250 result.resize(size: uc - result.unicode());
251
252 if (state) {
253 state->state_data[0] = halant;
254 }
255 return result;
256}
257
258QT_END_NAMESPACE
259

source code of qt5compat/src/core5/codecs/qisciicodec.cpp