1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7// for S-JIS encoding, observe characteristic:
8// 1, kana character (or hankaku?) often have high frequency of appearance
9// 2, kana character often exist in group
10// 3, certain combination of kana is never used in japanese language
11
12#include "nsSJISProber.h"
13
14namespace kencodingprober
15{
16void nsSJISProber::Reset(void)
17{
18 mCodingSM->Reset();
19 mState = eDetecting;
20 mContextAnalyser.Reset();
21 mDistributionAnalyser.Reset();
22}
23
24nsProbingState nsSJISProber::HandleData(const char *aBuf, unsigned int aLen)
25{
26 if (aLen == 0) {
27 return mState;
28 }
29
30 for (unsigned int i = 0; i < aLen; i++) {
31 const nsSMState codingState = mCodingSM->NextState(c: aBuf[i]);
32 if (codingState == eError) {
33 mState = eNotMe;
34 break;
35 }
36 if (codingState == eItsMe) {
37 mState = eFoundIt;
38 break;
39 }
40 if (codingState == eStart) {
41 unsigned int charLen = mCodingSM->GetCurrentCharLen();
42 if (i == 0) {
43 mLastChar[1] = aBuf[0];
44 mContextAnalyser.HandleOneChar(aStr: mLastChar + 2 - charLen, aCharLen: charLen);
45 mDistributionAnalyser.HandleOneChar(aStr: mLastChar, aCharLen: charLen);
46 } else {
47 mContextAnalyser.HandleOneChar(aStr: aBuf + i + 1 - charLen, aCharLen: charLen);
48 mDistributionAnalyser.HandleOneChar(aStr: aBuf + i - 1, aCharLen: charLen);
49 }
50 }
51 }
52
53 mLastChar[0] = aBuf[aLen - 1];
54
55 if (mState == eDetecting) {
56 if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) {
57 mState = eFoundIt;
58 }
59 }
60
61 return mState;
62}
63
64float nsSJISProber::GetConfidence(void)
65{
66 float contxtCf = mContextAnalyser.GetConfidence();
67 float distribCf = mDistributionAnalyser.GetConfidence();
68
69 return (contxtCf > distribCf ? contxtCf : distribCf);
70}
71}
72

source code of kcodecs/src/probers/nsSJISProber.cpp