| 1 | /* -*- C++ -*- |
| 2 | SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org> |
| 3 | |
| 4 | SPDX-License-Identifier: MIT |
| 5 | */ |
| 6 | |
| 7 | #include "nsCodingStateMachine.h" |
| 8 | |
| 9 | namespace kencodingprober |
| 10 | { |
| 11 | static const unsigned int HZ_cls[256 / 8] = { |
| 12 | PCK4BITS(1, 0, 0, 0, 0, 0, 0, 0), // 00 - 07 |
| 13 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 08 - 0f |
| 14 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17 |
| 15 | PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f |
| 16 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 20 - 27 |
| 17 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 28 - 2f |
| 18 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37 |
| 19 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f |
| 20 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 40 - 47 |
| 21 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f |
| 22 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57 |
| 23 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f |
| 24 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67 |
| 25 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f |
| 26 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77 |
| 27 | PCK4BITS(0, 0, 0, 4, 0, 5, 2, 0), // 78 - 7f |
| 28 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 80 - 87 |
| 29 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 88 - 8f |
| 30 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 90 - 97 |
| 31 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // 98 - 9f |
| 32 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // a0 - a7 |
| 33 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // a8 - af |
| 34 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // b0 - b7 |
| 35 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // b8 - bf |
| 36 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // c0 - c7 |
| 37 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // c8 - cf |
| 38 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // d0 - d7 |
| 39 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // d8 - df |
| 40 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // e0 - e7 |
| 41 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // e8 - ef |
| 42 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1), // f0 - f7 |
| 43 | PCK4BITS(1, 1, 1, 1, 1, 1, 1, 1) // f8 - ff |
| 44 | }; |
| 45 | |
| 46 | static const unsigned int HZ_st[6] = { |
| 47 | PCK4BITS(eStart, eError, 3, eStart, eStart, eStart, eError, eError), // 00-07 |
| 48 | PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), // 08-0f |
| 49 | PCK4BITS(eItsMe, eItsMe, eError, eError, eStart, eStart, 4, eError), // 10-17 |
| 50 | PCK4BITS(5, eError, 6, eError, 5, 5, 4, eError), // 18-1f |
| 51 | PCK4BITS(4, eError, 4, 4, 4, eError, 4, eError), // 20-27 |
| 52 | PCK4BITS(4, eItsMe, eStart, eStart, eStart, eStart, eStart, eStart) // 28-2f |
| 53 | }; |
| 54 | |
| 55 | static const unsigned int HZCharLenTable[] = {0, 0, 0, 0, 0, 0}; |
| 56 | |
| 57 | const SMModel HZSMModel = { |
| 58 | .classTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: HZ_cls}, |
| 59 | .classFactor: 6, |
| 60 | .stateTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: HZ_st}, |
| 61 | .charLenTable: HZCharLenTable, |
| 62 | .name: "HZ-GB-2312" , |
| 63 | }; |
| 64 | |
| 65 | static const unsigned int ISO2022CN_cls[256 / 8] = { |
| 66 | PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07 |
| 67 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 08 - 0f |
| 68 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17 |
| 69 | PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f |
| 70 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 20 - 27 |
| 71 | PCK4BITS(0, 3, 0, 0, 0, 0, 0, 0), // 28 - 2f |
| 72 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37 |
| 73 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f |
| 74 | PCK4BITS(0, 0, 0, 4, 0, 0, 0, 0), // 40 - 47 |
| 75 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f |
| 76 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57 |
| 77 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f |
| 78 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67 |
| 79 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f |
| 80 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77 |
| 81 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f |
| 82 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87 |
| 83 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f |
| 84 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97 |
| 85 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f |
| 86 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7 |
| 87 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af |
| 88 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7 |
| 89 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf |
| 90 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7 |
| 91 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf |
| 92 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7 |
| 93 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df |
| 94 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7 |
| 95 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef |
| 96 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7 |
| 97 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff |
| 98 | }; |
| 99 | |
| 100 | static const unsigned int ISO2022CN_st[8] = { |
| 101 | PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eStart, eStart), // 00-07 |
| 102 | PCK4BITS(eStart, eError, eError, eError, eError, eError, eError, eError), // 08-0f |
| 103 | PCK4BITS(eError, eError, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe), // 10-17 |
| 104 | PCK4BITS(eItsMe, eItsMe, eItsMe, eError, eError, eError, 4, eError), // 18-1f |
| 105 | PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError, eError), // 20-27 |
| 106 | PCK4BITS(5, 6, eError, eError, eError, eError, eError, eError), // 28-2f |
| 107 | PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError, eError), // 30-37 |
| 108 | PCK4BITS(eError, eError, eError, eError, eError, eItsMe, eError, eStart) // 38-3f |
| 109 | }; |
| 110 | |
| 111 | static const unsigned int ISO2022CNCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; |
| 112 | |
| 113 | const SMModel ISO2022CNSMModel = { |
| 114 | .classTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022CN_cls}, |
| 115 | .classFactor: 9, |
| 116 | .stateTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022CN_st}, |
| 117 | .charLenTable: ISO2022CNCharLenTable, |
| 118 | .name: "ISO-2022-CN" , |
| 119 | }; |
| 120 | |
| 121 | static const unsigned int ISO2022JP_cls[256 / 8] = { |
| 122 | PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07 |
| 123 | PCK4BITS(0, 0, 0, 0, 0, 0, 2, 2), // 08 - 0f |
| 124 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17 |
| 125 | PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f |
| 126 | PCK4BITS(0, 0, 0, 0, 7, 0, 0, 0), // 20 - 27 |
| 127 | PCK4BITS(3, 0, 0, 0, 0, 0, 0, 0), // 28 - 2f |
| 128 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37 |
| 129 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f |
| 130 | PCK4BITS(6, 0, 4, 0, 8, 0, 0, 0), // 40 - 47 |
| 131 | PCK4BITS(0, 9, 5, 0, 0, 0, 0, 0), // 48 - 4f |
| 132 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57 |
| 133 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f |
| 134 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67 |
| 135 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f |
| 136 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77 |
| 137 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f |
| 138 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87 |
| 139 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f |
| 140 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97 |
| 141 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f |
| 142 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7 |
| 143 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af |
| 144 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7 |
| 145 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf |
| 146 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7 |
| 147 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf |
| 148 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7 |
| 149 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df |
| 150 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7 |
| 151 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef |
| 152 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7 |
| 153 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff |
| 154 | }; |
| 155 | |
| 156 | static const unsigned int ISO2022JP_st[9] = { |
| 157 | PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eStart, eStart), // 00-07 |
| 158 | PCK4BITS(eStart, eStart, eError, eError, eError, eError, eError, eError), // 08-0f |
| 159 | PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), // 10-17 |
| 160 | PCK4BITS(eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eItsMe, eError, eError), // 18-1f |
| 161 | PCK4BITS(eError, 5, eError, eError, eError, 4, eError, eError), // 20-27 |
| 162 | PCK4BITS(eError, eError, eError, 6, eItsMe, eError, eItsMe, eError), // 28-2f |
| 163 | PCK4BITS(eError, eError, eError, eError, eError, eError, eItsMe, eItsMe), // 30-37 |
| 164 | PCK4BITS(eError, eError, eError, eItsMe, eError, eError, eError, eError), // 38-3f |
| 165 | PCK4BITS(eError, eError, eError, eError, eItsMe, eError, eStart, eStart) // 40-47 |
| 166 | }; |
| 167 | |
| 168 | static const unsigned int ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; |
| 169 | |
| 170 | const SMModel ISO2022JPSMModel = { |
| 171 | .classTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022JP_cls}, |
| 172 | .classFactor: 10, |
| 173 | .stateTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022JP_st}, |
| 174 | .charLenTable: ISO2022JPCharLenTable, |
| 175 | .name: "ISO-2022-JP" , |
| 176 | }; |
| 177 | |
| 178 | static const unsigned int ISO2022KR_cls[256 / 8] = { |
| 179 | PCK4BITS(2, 0, 0, 0, 0, 0, 0, 0), // 00 - 07 |
| 180 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 08 - 0f |
| 181 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 10 - 17 |
| 182 | PCK4BITS(0, 0, 0, 1, 0, 0, 0, 0), // 18 - 1f |
| 183 | PCK4BITS(0, 0, 0, 0, 3, 0, 0, 0), // 20 - 27 |
| 184 | PCK4BITS(0, 4, 0, 0, 0, 0, 0, 0), // 28 - 2f |
| 185 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 30 - 37 |
| 186 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 38 - 3f |
| 187 | PCK4BITS(0, 0, 0, 5, 0, 0, 0, 0), // 40 - 47 |
| 188 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 48 - 4f |
| 189 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 50 - 57 |
| 190 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 58 - 5f |
| 191 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 60 - 67 |
| 192 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 68 - 6f |
| 193 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 70 - 77 |
| 194 | PCK4BITS(0, 0, 0, 0, 0, 0, 0, 0), // 78 - 7f |
| 195 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 80 - 87 |
| 196 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 88 - 8f |
| 197 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 90 - 97 |
| 198 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // 98 - 9f |
| 199 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a0 - a7 |
| 200 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // a8 - af |
| 201 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b0 - b7 |
| 202 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // b8 - bf |
| 203 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c0 - c7 |
| 204 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // c8 - cf |
| 205 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d0 - d7 |
| 206 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // d8 - df |
| 207 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e0 - e7 |
| 208 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // e8 - ef |
| 209 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2), // f0 - f7 |
| 210 | PCK4BITS(2, 2, 2, 2, 2, 2, 2, 2) // f8 - ff |
| 211 | }; |
| 212 | |
| 213 | static const unsigned int ISO2022KR_st[5] = { |
| 214 | PCK4BITS(eStart, 3, eError, eStart, eStart, eStart, eError, eError), // 00-07 |
| 215 | PCK4BITS(eError, eError, eError, eError, eItsMe, eItsMe, eItsMe, eItsMe), // 08-0f |
| 216 | PCK4BITS(eItsMe, eItsMe, eError, eError, eError, 4, eError, eError), // 10-17 |
| 217 | PCK4BITS(eError, eError, eError, eError, 5, eError, eError, eError), // 18-1f |
| 218 | PCK4BITS(eError, eError, eError, eItsMe, eStart, eStart, eStart, eStart) // 20-27 |
| 219 | }; |
| 220 | |
| 221 | static const unsigned int ISO2022KRCharLenTable[] = {0, 0, 0, 0, 0, 0}; |
| 222 | |
| 223 | const SMModel ISO2022KRSMModel = { |
| 224 | .classTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022KR_cls}, |
| 225 | .classFactor: 6, |
| 226 | .stateTable: {.idxsft: eIdxSft4bits, .sftmsk: eSftMsk4bits, .bitsft: eBitSft4bits, .unitmsk: eUnitMsk4bits, .data: ISO2022KR_st}, |
| 227 | .charLenTable: ISO2022KRCharLenTable, |
| 228 | .name: "ISO-2022-KR" , |
| 229 | }; |
| 230 | } |
| 231 | |