1 | /* -*- C++ -*- |
---|---|
2 | SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org> |
3 | |
4 | SPDX-License-Identifier: MIT |
5 | */ |
6 | |
7 | // for japanese encoding, observe characteristic: |
8 | // 1, kana character (or hankaku?) often have high frequency of appearance |
9 | // 2, kana character often exist in group |
10 | // 3, certain combination of kana is never used in japanese language |
11 | |
12 | #include "nsEUCJPProber.h" |
13 | |
14 | namespace kencodingprober |
15 | { |
16 | void nsEUCJPProber::Reset(void) |
17 | { |
18 | mCodingSM->Reset(); |
19 | mState = eDetecting; |
20 | mContextAnalyser.Reset(); |
21 | mDistributionAnalyser.Reset(); |
22 | } |
23 | |
24 | nsProbingState nsEUCJPProber::HandleData(const char *aBuf, unsigned int aLen) |
25 | { |
26 | if (aLen == 0) { |
27 | return mState; |
28 | } |
29 | |
30 | for (unsigned int i = 0; i < aLen; i++) { |
31 | const nsSMState codingState = mCodingSM->NextState(c: aBuf[i]); |
32 | if (codingState == eError) { |
33 | mState = eNotMe; |
34 | break; |
35 | } |
36 | if (codingState == eItsMe) { |
37 | mState = eFoundIt; |
38 | break; |
39 | } |
40 | if (codingState == eStart) { |
41 | unsigned int charLen = mCodingSM->GetCurrentCharLen(); |
42 | |
43 | if (i == 0) { |
44 | mLastChar[1] = aBuf[0]; |
45 | mContextAnalyser.HandleOneChar(aStr: mLastChar, aCharLen: charLen); |
46 | mDistributionAnalyser.HandleOneChar(aStr: mLastChar, aCharLen: charLen); |
47 | } else { |
48 | mContextAnalyser.HandleOneChar(aStr: aBuf + i - 1, aCharLen: charLen); |
49 | mDistributionAnalyser.HandleOneChar(aStr: aBuf + i - 1, aCharLen: charLen); |
50 | } |
51 | } |
52 | } |
53 | |
54 | mLastChar[0] = aBuf[aLen - 1]; |
55 | |
56 | if (mState == eDetecting) { |
57 | if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) { |
58 | mState = eFoundIt; |
59 | } |
60 | } |
61 | |
62 | return mState; |
63 | } |
64 | |
65 | float nsEUCJPProber::GetConfidence(void) |
66 | { |
67 | float contxtCf = mContextAnalyser.GetConfidence(); |
68 | float distribCf = mDistributionAnalyser.GetConfidence(); |
69 | |
70 | return (contxtCf > distribCf ? contxtCf : distribCf); |
71 | } |
72 | } |
73 |