1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "nsCodingStateMachine.h"
8
9namespace kencodingprober
10{
11static const unsigned int HZ_cls[256 / 8] = {
12 PCK4BITS(1, 0, 0, 0, 0, 0, 0, 0), // 00 - 07
13 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 08 - 0f
14 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17
15 PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f
16 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 20 - 27
17 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 28 - 2f
18 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37
19 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f
20 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 40 - 47
21 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f
22 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57
23 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f
24 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67
25 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f
26 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77
27 PCK4BITS(0, 0, 0, 4, 0, 5, 2, 0), // 78 - 7f
28 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 80 - 87
29 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 88 - 8f
30 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 90 - 97
31 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 98 - 9f
32 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // a0 - a7
33 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // a8 - af
34 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // b0 - b7
35 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // b8 - bf
36 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // c0 - c7
37 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // c8 - cf
38 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // d0 - d7
39 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // d8 - df
40 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // e0 - e7
41 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // e8 - ef
42 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // f0 - f7
43 PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1) // f8 - ff
44};
45
46static const unsigned int HZ_st[6] = {
47 PCK4BITS(eStart, eError, 3, eStart, eStart, eStart, eError, eError), // 00-07
48 PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), // 08-0f
49 PCK4BITS(eItsMe, eItsMe, eError, eError, eStart, eStart, 4, eError), // 10-17
50 PCK4BITS(5, eError, 6, eError, 5, 5, 4, eError), // 18-1f
51 PCK4BITS(4, eError, 4, 4, 4, eError, 4, eError), // 20-27
52 PCK4BITS(4, eItsMe, eStart, eStart, eStart, eStart, eStart, eStart) // 28-2f
53};
54
55static const unsigned int HZCharLenTable[] = {0, 0, 0, 0, 0, 0};
56
57const SMModel HZSMModel = {
58 .classTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: HZ_cls},
59 .classFactor: 6,
60 .stateTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: HZ_st},
61 .charLenTable: HZCharLenTable,
62 .name: "HZ-GB-2312",
63};
64
65static const unsigned int ISO2022CN_cls[256 / 8] = {
66 PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07
67 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 08 - 0f
68 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17
69 PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f
70 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 20 - 27
71 PCK4BITS(0, 3, 0, 0, 0, 0, 0, 0), // 28 - 2f
72 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37
73 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f
74 PCK4BITS(0, 0, 0, 4, 0, 0, 0, 0), // 40 - 47
75 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f
76 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57
77 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f
78 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67
79 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f
80 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77
81 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f
82 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
83 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
84 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97
85 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f
86 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
87 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
88 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
89 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
90 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
91 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
92 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
93 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
94 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7
95 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef
96 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7
97 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff
98};
99
100static const unsigned int ISO2022CN_st[8] = {
101 PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eStart, eStart), // 00-07
102 PCK4BITS(eStart, eError, eError, eError, eError, eError, eError, eError), // 08-0f
103 PCK4BITS(eError, eError, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe), // 10-17
104 PCK4BITS(eItsMe, eItsMe, eItsMe, eError, eError, eError, 4, eError), // 18-1f
105 PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError, eError), // 20-27
106 PCK4BITS(5, 6, eError, eError, eError, eError, eError, eError), // 28-2f
107 PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError, eError), // 30-37
108 PCK4BITS(eError, eError, eError, eError, eError, eItsMe, eError, eStart) // 38-3f
109};
110
111static const unsigned int ISO2022CNCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
112
113const SMModel ISO2022CNSMModel = {
114 .classTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022CN_cls},
115 .classFactor: 9,
116 .stateTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022CN_st},
117 .charLenTable: ISO2022CNCharLenTable,
118 .name: "ISO-2022-CN",
119};
120
121static const unsigned int ISO2022JP_cls[256 / 8] = {
122 PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07
123 PCK4BITS(0, 0, 0, 0, 0, 0, 2, 2), // 08 - 0f
124 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17
125 PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f
126 PCK4BITS(0, 0, 0, 0, 7, 0, 0, 0), // 20 - 27
127 PCK4BITS(3, 0, 0, 0, 0, 0, 0, 0), // 28 - 2f
128 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37
129 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f
130 PCK4BITS(6, 0, 4, 0, 8, 0, 0, 0), // 40 - 47
131 PCK4BITS(0, 9, 5, 0, 0, 0, 0, 0), // 48 - 4f
132 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57
133 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f
134 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67
135 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f
136 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77
137 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f
138 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
139 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
140 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97
141 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f
142 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
143 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
144 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
145 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
146 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
147 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
148 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
149 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
150 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7
151 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef
152 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7
153 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff
154};
155
156static const unsigned int ISO2022JP_st[9] = {
157 PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eStart, eStart), // 00-07
158 PCK4BITS(eStart, eStart, eError, eError, eError, eError, eError, eError), // 08-0f
159 PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), // 10-17
160 PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError, eError), // 18-1f
161 PCK4BITS(eError, 5, eError, eError, eError, 4, eError, eError), // 20-27
162 PCK4BITS(eError, eError, eError, 6, eItsMe, eError, eItsMe, eError), // 28-2f
163 PCK4BITS(eError, eError, eError, eError, eError, eError, eItsMe, eItsMe), // 30-37
164 PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError, eError), // 38-3f
165 PCK4BITS(eError, eError, eError, eError, eItsMe, eError, eStart, eStart) // 40-47
166};
167
168static const unsigned int ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
169
170const SMModel ISO2022JPSMModel = {
171 .classTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022JP_cls},
172 .classFactor: 10,
173 .stateTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022JP_st},
174 .charLenTable: ISO2022JPCharLenTable,
175 .name: "ISO-2022-JP",
176};
177
178static const unsigned int ISO2022KR_cls[256 / 8] = {
179 PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07
180 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 08 - 0f
181 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17
182 PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f
183 PCK4BITS(0, 0, 0, 0, 3, 0, 0, 0), // 20 - 27
184 PCK4BITS(0, 4, 0, 0, 0, 0, 0, 0), // 28 - 2f
185 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37
186 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f
187 PCK4BITS(0, 0, 0, 5, 0, 0, 0, 0), // 40 - 47
188 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f
189 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57
190 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f
191 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67
192 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f
193 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77
194 PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f
195 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87
196 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f
197 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97
198 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f
199 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7
200 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af
201 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7
202 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf
203 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7
204 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf
205 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7
206 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df
207 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7
208 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef
209 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7
210 PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff
211};
212
213static const unsigned int ISO2022KR_st[5] = {
214 PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eError, eError), // 00-07
215 PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), // 08-0f
216 PCK4BITS(eItsMe, eItsMe, eError, eError, eError, 4, eError, eError), // 10-17
217 PCK4BITS(eError, eError, eError, eError, 5, eError, eError, eError), // 18-1f
218 PCK4BITS(eError, eError, eError, eItsMe, eStart, eStart, eStart, eStart) // 20-27
219};
220
221static const unsigned int ISO2022KRCharLenTable[] = {0, 0, 0, 0, 0, 0};
222
223const SMModel ISO2022KRSMModel = {
224 .classTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022KR_cls},
225 .classFactor: 6,
226 .stateTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022KR_st},
227 .charLenTable: ISO2022KRCharLenTable,
228 .name: "ISO-2022-KR",
229};
230}
231

source code of kcodecs/src/probers/nsEscSM.cpp