1 | /* -*- C++ -*- |
---|---|
2 | SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org> |
3 | |
4 | SPDX-License-Identifier: MIT |
5 | */ |
6 | |
7 | // for S-JIS encoding, observe characteristic: |
8 | // 1, kana character (or hankaku?) often have high frequency of appearance |
9 | // 2, kana character often exist in group |
10 | // 3, certain combination of kana is never used in japanese language |
11 | |
12 | #include "nsSJISProber.h" |
13 | |
14 | namespace kencodingprober |
15 | { |
16 | void nsSJISProber::Reset(void) |
17 | { |
18 | mCodingSM->Reset(); |
19 | mState = eDetecting; |
20 | mContextAnalyser.Reset(); |
21 | mDistributionAnalyser.Reset(); |
22 | } |
23 | |
24 | nsProbingState nsSJISProber::HandleData(const char *aBuf, unsigned int aLen) |
25 | { |
26 | if (aLen == 0) { |
27 | return mState; |
28 | } |
29 | |
30 | for (unsigned int i = 0; i < aLen; i++) { |
31 | const nsSMState codingState = mCodingSM->NextState(c: aBuf[i]); |
32 | if (codingState == eError) { |
33 | mState = eNotMe; |
34 | break; |
35 | } |
36 | if (codingState == eItsMe) { |
37 | mState = eFoundIt; |
38 | break; |
39 | } |
40 | if (codingState == eStart) { |
41 | unsigned int charLen = mCodingSM->GetCurrentCharLen(); |
42 | if (i == 0) { |
43 | mLastChar[1] = aBuf[0]; |
44 | mContextAnalyser.HandleOneChar(aStr: mLastChar + 2 - charLen, aCharLen: charLen); |
45 | mDistributionAnalyser.HandleOneChar(aStr: mLastChar, aCharLen: charLen); |
46 | } else { |
47 | mContextAnalyser.HandleOneChar(aStr: aBuf + i + 1 - charLen, aCharLen: charLen); |
48 | mDistributionAnalyser.HandleOneChar(aStr: aBuf + i - 1, aCharLen: charLen); |
49 | } |
50 | } |
51 | } |
52 | |
53 | mLastChar[0] = aBuf[aLen - 1]; |
54 | |
55 | if (mState == eDetecting) { |
56 | if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) { |
57 | mState = eFoundIt; |
58 | } |
59 | } |
60 | |
61 | return mState; |
62 | } |
63 | |
64 | float nsSJISProber::GetConfidence(void) |
65 | { |
66 | float contxtCf = mContextAnalyser.GetConfidence(); |
67 | float distribCf = mDistributionAnalyser.GetConfidence(); |
68 | |
69 | return (contxtCf > distribCf ? contxtCf : distribCf); |
70 | } |
71 | } |
72 |