1 | /* -*- C++ -*- |
2 | SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org> |
3 | |
4 | SPDX-License-Identifier: MIT |
5 | */ |
6 | |
7 | #ifndef __JPCNTX_H__ |
8 | #define __JPCNTX_H__ |
9 | |
10 | #include "kcodecs_export.h" |
11 | |
12 | #include <qglobal.h> |
13 | |
14 | #define NUM_OF_CATEGORY 6 |
15 | |
16 | #define ENOUGH_REL_THRESHOLD 100 |
17 | #define MAX_REL_THRESHOLD 1000 |
18 | namespace kencodingprober |
19 | { |
20 | // hiragana frequency category table |
21 | extern const char jp2CharContext[83][83]; |
22 | |
23 | class KCODECS_NO_EXPORT JapaneseContextAnalysis |
24 | { |
25 | public: |
26 | JapaneseContextAnalysis() |
27 | { |
28 | Reset(); |
29 | } |
30 | virtual ~JapaneseContextAnalysis() |
31 | { |
32 | } |
33 | |
34 | void HandleData(const char *aBuf, unsigned int aLen); |
35 | |
36 | void HandleOneChar(const char *aStr, unsigned int aCharLen) |
37 | { |
38 | int order; |
39 | |
40 | // if we received enough data, stop here |
41 | if (mTotalRel > MAX_REL_THRESHOLD) { |
42 | mDone = true; |
43 | } |
44 | if (mDone) { |
45 | return; |
46 | } |
47 | |
48 | // Only 2-bytes characters are of our interest |
49 | order = (aCharLen == 2) ? GetOrder(str: aStr) : -1; |
50 | if (order != -1 && mLastCharOrder != -1) { |
51 | mTotalRel++; |
52 | // count this sequence to its category counter |
53 | mRelSample[(int)jp2CharContext[mLastCharOrder][order]]++; |
54 | } |
55 | mLastCharOrder = order; |
56 | } |
57 | |
58 | float GetConfidence(); |
59 | void Reset(void); |
60 | bool GotEnoughData() |
61 | { |
62 | return mTotalRel > ENOUGH_REL_THRESHOLD; |
63 | } |
64 | |
65 | protected: |
66 | virtual int GetOrder(const char *str, unsigned int *charLen) = 0; |
67 | virtual int GetOrder(const char *str) = 0; |
68 | |
69 | // category counters, each integer counts sequence in its category |
70 | unsigned int mRelSample[NUM_OF_CATEGORY]; |
71 | |
72 | // total sequence received |
73 | unsigned int mTotalRel; |
74 | |
75 | // The order of previous char |
76 | int mLastCharOrder; |
77 | |
78 | // if last byte in current buffer is not the last byte of a character, we |
79 | // need to know how many byte to skip in next buffer. |
80 | unsigned int mNeedToSkipCharNum; |
81 | |
82 | // If this flag is set to true, detection is done and conclusion has been made |
83 | bool mDone; |
84 | }; |
85 | |
86 | class KCODECS_NO_EXPORT SJISContextAnalysis : public JapaneseContextAnalysis |
87 | { |
88 | // SJISContextAnalysis(){}; |
89 | protected: |
90 | int GetOrder(const char *str, unsigned int *charLen) override; |
91 | |
92 | int GetOrder(const char *str) override |
93 | { |
94 | // We only interested in Hiragana, so first byte is '\202' |
95 | if (*str == '\202' && (unsigned char)*(str + 1) >= (unsigned char)0x9f && (unsigned char)*(str + 1) <= (unsigned char)0xf1) { |
96 | return (unsigned char)*(str + 1) - (unsigned char)0x9f; |
97 | } |
98 | return -1; |
99 | } |
100 | }; |
101 | |
102 | class KCODECS_NO_EXPORT EUCJPContextAnalysis : public JapaneseContextAnalysis |
103 | { |
104 | protected: |
105 | int GetOrder(const char *str, unsigned int *charLen) override; |
106 | int GetOrder(const char *str) override |
107 | // We only interested in Hiragana, so first byte is '\244' |
108 | { |
109 | if (*str == '\244' // |
110 | && (unsigned char)*(str + 1) >= (unsigned char)0xa1 // |
111 | && (unsigned char)*(str + 1) <= (unsigned char)0xf3) { |
112 | return (unsigned char)*(str + 1) - (unsigned char)0xa1; |
113 | } |
114 | return -1; |
115 | } |
116 | }; |
117 | } |
118 | #endif /* __JPCNTX_H__ */ |
119 | |