1 | /* -*- C++ -*- |
2 | SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org> |
3 | |
4 | SPDX-License-Identifier: MIT |
5 | */ |
6 | |
7 | #include "nsCodingStateMachine.h" |
8 | |
9 | namespace kencodingprober |
10 | { |
11 | static const unsigned int HZ_cls[256 / 8] = { |
12 | PCK4BITS(1, 0, 0, 0, 0, 0, 0, 0), // 00 - 07 |
13 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 08 - 0f |
14 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17 |
15 | PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f |
16 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 20 - 27 |
17 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 28 - 2f |
18 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37 |
19 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f |
20 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 40 - 47 |
21 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f |
22 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57 |
23 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f |
24 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67 |
25 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f |
26 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77 |
27 | PCK4BITS(0, 0, 0, 4, 0, 5, 2, 0), // 78 - 7f |
28 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 80 - 87 |
29 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 88 - 8f |
30 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 90 - 97 |
31 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 98 - 9f |
32 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // a0 - a7 |
33 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // a8 - af |
34 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // b0 - b7 |
35 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // b8 - bf |
36 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // c0 - c7 |
37 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // c8 - cf |
38 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // d0 - d7 |
39 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // d8 - df |
40 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // e0 - e7 |
41 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // e8 - ef |
42 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // f0 - f7 |
43 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1) // f8 - ff |
44 | }; |
45 | |
46 | static const unsigned int HZ_st[6] = { |
47 | PCK4BITS(eStart, eError, 3, eStart, eStart, eStart, eError, eError), // 00-07 |
48 | PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), // 08-0f |
49 | PCK4BITS(eItsMe, eItsMe, eError, eError, eStart, eStart, 4, eError), // 10-17 |
50 | PCK4BITS(5, eError, 6, eError, 5, 5, 4, eError), // 18-1f |
51 | PCK4BITS(4, eError, 4, 4, 4, eError, 4, eError), // 20-27 |
52 | PCK4BITS(4, eItsMe, eStart, eStart, eStart, eStart, eStart, eStart) // 28-2f |
53 | }; |
54 | |
55 | static const unsigned int HZCharLenTable[] = {0, 0, 0, 0, 0, 0}; |
56 | |
57 | const SMModel HZSMModel = { |
58 | .classTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: HZ_cls}, |
59 | .classFactor: 6, |
60 | .stateTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: HZ_st}, |
61 | .charLenTable: HZCharLenTable, |
62 | .name: "HZ-GB-2312" , |
63 | }; |
64 | |
65 | static const unsigned int ISO2022CN_cls[256 / 8] = { |
66 | PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07 |
67 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 08 - 0f |
68 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17 |
69 | PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f |
70 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 20 - 27 |
71 | PCK4BITS(0, 3, 0, 0, 0, 0, 0, 0), // 28 - 2f |
72 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37 |
73 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f |
74 | PCK4BITS(0, 0, 0, 4, 0, 0, 0, 0), // 40 - 47 |
75 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f |
76 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57 |
77 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f |
78 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67 |
79 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f |
80 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77 |
81 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f |
82 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87 |
83 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f |
84 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97 |
85 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f |
86 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7 |
87 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af |
88 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7 |
89 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf |
90 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7 |
91 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf |
92 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7 |
93 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df |
94 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7 |
95 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef |
96 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7 |
97 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff |
98 | }; |
99 | |
100 | static const unsigned int ISO2022CN_st[8] = { |
101 | PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eStart, eStart), // 00-07 |
102 | PCK4BITS(eStart, eError, eError, eError, eError, eError, eError, eError), // 08-0f |
103 | PCK4BITS(eError, eError, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe), // 10-17 |
104 | PCK4BITS(eItsMe, eItsMe, eItsMe, eError, eError, eError, 4, eError), // 18-1f |
105 | PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError, eError), // 20-27 |
106 | PCK4BITS(5, 6, eError, eError, eError, eError, eError, eError), // 28-2f |
107 | PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError, eError), // 30-37 |
108 | PCK4BITS(eError, eError, eError, eError, eError, eItsMe, eError, eStart) // 38-3f |
109 | }; |
110 | |
111 | static const unsigned int ISO2022CNCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; |
112 | |
113 | const SMModel ISO2022CNSMModel = { |
114 | .classTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022CN_cls}, |
115 | .classFactor: 9, |
116 | .stateTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022CN_st}, |
117 | .charLenTable: ISO2022CNCharLenTable, |
118 | .name: "ISO-2022-CN" , |
119 | }; |
120 | |
121 | static const unsigned int ISO2022JP_cls[256 / 8] = { |
122 | PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07 |
123 | PCK4BITS(0, 0, 0, 0, 0, 0, 2, 2), // 08 - 0f |
124 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17 |
125 | PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f |
126 | PCK4BITS(0, 0, 0, 0, 7, 0, 0, 0), // 20 - 27 |
127 | PCK4BITS(3, 0, 0, 0, 0, 0, 0, 0), // 28 - 2f |
128 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37 |
129 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f |
130 | PCK4BITS(6, 0, 4, 0, 8, 0, 0, 0), // 40 - 47 |
131 | PCK4BITS(0, 9, 5, 0, 0, 0, 0, 0), // 48 - 4f |
132 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57 |
133 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f |
134 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67 |
135 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f |
136 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77 |
137 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f |
138 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87 |
139 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f |
140 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97 |
141 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f |
142 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7 |
143 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af |
144 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7 |
145 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf |
146 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7 |
147 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf |
148 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7 |
149 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df |
150 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7 |
151 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef |
152 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7 |
153 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff |
154 | }; |
155 | |
156 | static const unsigned int ISO2022JP_st[9] = { |
157 | PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eStart, eStart), // 00-07 |
158 | PCK4BITS(eStart, eStart, eError, eError, eError, eError, eError, eError), // 08-0f |
159 | PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), // 10-17 |
160 | PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError, eError), // 18-1f |
161 | PCK4BITS(eError, 5, eError, eError, eError, 4, eError, eError), // 20-27 |
162 | PCK4BITS(eError, eError, eError, 6, eItsMe, eError, eItsMe, eError), // 28-2f |
163 | PCK4BITS(eError, eError, eError, eError, eError, eError, eItsMe, eItsMe), // 30-37 |
164 | PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError, eError), // 38-3f |
165 | PCK4BITS(eError, eError, eError, eError, eItsMe, eError, eStart, eStart) // 40-47 |
166 | }; |
167 | |
168 | static const unsigned int ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; |
169 | |
170 | const SMModel ISO2022JPSMModel = { |
171 | .classTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022JP_cls}, |
172 | .classFactor: 10, |
173 | .stateTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022JP_st}, |
174 | .charLenTable: ISO2022JPCharLenTable, |
175 | .name: "ISO-2022-JP" , |
176 | }; |
177 | |
178 | static const unsigned int ISO2022KR_cls[256 / 8] = { |
179 | PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07 |
180 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 08 - 0f |
181 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17 |
182 | PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f |
183 | PCK4BITS(0, 0, 0, 0, 3, 0, 0, 0), // 20 - 27 |
184 | PCK4BITS(0, 4, 0, 0, 0, 0, 0, 0), // 28 - 2f |
185 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37 |
186 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f |
187 | PCK4BITS(0, 0, 0, 5, 0, 0, 0, 0), // 40 - 47 |
188 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f |
189 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57 |
190 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f |
191 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67 |
192 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f |
193 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77 |
194 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f |
195 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87 |
196 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f |
197 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97 |
198 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f |
199 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7 |
200 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af |
201 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7 |
202 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf |
203 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7 |
204 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf |
205 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7 |
206 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df |
207 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7 |
208 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef |
209 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7 |
210 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff |
211 | }; |
212 | |
213 | static const unsigned int ISO2022KR_st[5] = { |
214 | PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eError, eError), // 00-07 |
215 | PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), // 08-0f |
216 | PCK4BITS(eItsMe, eItsMe, eError, eError, eError, 4, eError, eError), // 10-17 |
217 | PCK4BITS(eError, eError, eError, eError, 5, eError, eError, eError), // 18-1f |
218 | PCK4BITS(eError, eError, eError, eItsMe, eStart, eStart, eStart, eStart) // 20-27 |
219 | }; |
220 | |
221 | static const unsigned int ISO2022KRCharLenTable[] = {0, 0, 0, 0, 0, 0}; |
222 | |
223 | const SMModel ISO2022KRSMModel = { |
224 | .classTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022KR_cls}, |
225 | .classFactor: 6, |
226 | .stateTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022KR_st}, |
227 | .charLenTable: ISO2022KRCharLenTable, |
228 | .name: "ISO-2022-KR" , |
229 | }; |
230 | } |
231 | |