1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7// for S-JIS encoding, observe characteristic:
8// 1, kana character (or hankaku?) often have high frequency of appearance
9// 2, kana character often exist in group
10// 3, certain combination of kana is never used in japanese language
11
12#include "nsGB2312Prober.h"
13
14namespace kencodingprober
15{
16void nsGB18030Prober::Reset(void)
17{
18 mCodingSM->Reset();
19 mState = eDetecting;
20 mDistributionAnalyser.Reset();
21 // mContextAnalyser.Reset();
22}
23
24nsProbingState nsGB18030Prober::HandleData(const char *aBuf, unsigned int aLen)
25{
26 if (aLen == 0) {
27 return mState;
28 }
29
30 for (unsigned int i = 0; i < aLen; i++) {
31 const nsSMState codingState = mCodingSM->NextState(c: aBuf[i]);
32 if (codingState == eError) {
33 mState = eNotMe;
34 break;
35 }
36 if (codingState == eItsMe) {
37 mState = eFoundIt;
38 break;
39 }
40 if (codingState == eStart) {
41 unsigned int charLen = mCodingSM->GetCurrentCharLen();
42
43 if (i == 0) {
44 mLastChar[1] = aBuf[0];
45 mDistributionAnalyser.HandleOneChar(aStr: mLastChar, aCharLen: charLen);
46 } else {
47 mDistributionAnalyser.HandleOneChar(aStr: aBuf + i - 1, aCharLen: charLen);
48 }
49 }
50 }
51
52 mLastChar[0] = aBuf[aLen - 1];
53
54 if (mState == eDetecting) {
55 if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) {
56 mState = eFoundIt;
57 }
58 }
59 // else
60 // mDistributionAnalyser.HandleData(aBuf, aLen);
61
62 return mState;
63}
64
65float nsGB18030Prober::GetConfidence(void)
66{
67 float distribCf = mDistributionAnalyser.GetConfidence();
68
69 return (float)distribCf;
70}
71}
72

source code of kcodecs/src/probers/nsGB2312Prober.cpp