1 | /* -*- C++ -*- |
2 | SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org> |
3 | |
4 | SPDX-License-Identifier: MIT |
5 | */ |
6 | |
7 | #ifndef __JPCNTX_H__ |
8 | #define __JPCNTX_H__ |
9 | |
10 | #include "kcodecs_export.h" |
11 | |
12 | #include <qglobal.h> |
13 | |
14 | #define NUM_OF_CATEGORY 6 |
15 | |
16 | #define ENOUGH_REL_THRESHOLD 100 |
17 | #define MAX_REL_THRESHOLD 1000 |
18 | namespace kencodingprober |
19 | { |
20 | // hiragana frequency category table |
21 | extern const char jp2CharContext[83][83]; |
22 | |
23 | class KCODECS_NO_EXPORT JapaneseContextAnalysis |
24 | { |
25 | public: |
26 | JapaneseContextAnalysis() |
27 | { |
28 | Reset(); |
29 | } |
30 | virtual ~JapaneseContextAnalysis() |
31 | { |
32 | } |
33 | |
34 | void HandleData(const char *aBuf, unsigned int aLen); |
35 | |
36 | void HandleOneChar(const char *aStr, unsigned int aCharLen) |
37 | { |
38 | int order; |
39 | |
40 | // if we received enough data, stop here |
41 | if (mTotalRel > MAX_REL_THRESHOLD) { |
42 | mDone = true; |
43 | } |
44 | if (mDone) { |
45 | return; |
46 | } |
47 | |
48 | // Only 2-bytes characters are of our interest |
49 | order = (aCharLen == 2) ? GetOrder(str: aStr) : -1; |
50 | if (order != -1 && mLastCharOrder != -1) { |
51 | mTotalRel++; |
52 | // count this sequence to its category counter |
53 | mRelSample[(int)jp2CharContext[mLastCharOrder][order]]++; |
54 | } |
55 | mLastCharOrder = order; |
56 | } |
57 | |
58 | float GetConfidence(); |
59 | void Reset(void); |
60 | void SetOpion() |
61 | { |
62 | } |
63 | bool GotEnoughData() |
64 | { |
65 | return mTotalRel > ENOUGH_REL_THRESHOLD; |
66 | } |
67 | |
68 | protected: |
69 | virtual int GetOrder(const char *str, unsigned int *charLen) = 0; |
70 | virtual int GetOrder(const char *str) = 0; |
71 | |
72 | // category counters, each integer counts sequence in its category |
73 | unsigned int mRelSample[NUM_OF_CATEGORY]; |
74 | |
75 | // total sequence received |
76 | unsigned int mTotalRel; |
77 | |
78 | // The order of previous char |
79 | int mLastCharOrder; |
80 | |
81 | // if last byte in current buffer is not the last byte of a character, we |
82 | // need to know how many byte to skip in next buffer. |
83 | unsigned int mNeedToSkipCharNum; |
84 | |
85 | // If this flag is set to true, detection is done and conclusion has been made |
86 | bool mDone; |
87 | }; |
88 | |
89 | class KCODECS_NO_EXPORT SJISContextAnalysis : public JapaneseContextAnalysis |
90 | { |
91 | // SJISContextAnalysis(){}; |
92 | protected: |
93 | int GetOrder(const char *str, unsigned int *charLen) override; |
94 | |
95 | int GetOrder(const char *str) override |
96 | { |
97 | // We only interested in Hiragana, so first byte is '\202' |
98 | if (*str == '\202' && (unsigned char)*(str + 1) >= (unsigned char)0x9f && (unsigned char)*(str + 1) <= (unsigned char)0xf1) { |
99 | return (unsigned char)*(str + 1) - (unsigned char)0x9f; |
100 | } |
101 | return -1; |
102 | } |
103 | }; |
104 | |
105 | class KCODECS_NO_EXPORT EUCJPContextAnalysis : public JapaneseContextAnalysis |
106 | { |
107 | protected: |
108 | int GetOrder(const char *str, unsigned int *charLen) override; |
109 | int GetOrder(const char *str) override |
110 | // We only interested in Hiragana, so first byte is '\244' |
111 | { |
112 | if (*str == '\244' // |
113 | && (unsigned char)*(str + 1) >= (unsigned char)0xa1 // |
114 | && (unsigned char)*(str + 1) <= (unsigned char)0xf3) { |
115 | return (unsigned char)*(str + 1) - (unsigned char)0xa1; |
116 | } |
117 | return -1; |
118 | } |
119 | }; |
120 | } |
121 | #endif /* __JPCNTX_H__ */ |
122 | |