1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "ChineseGroupProber.h"
8
9#include "UnicodeGroupProber.h"
10#include "nsBig5Prober.h"
11#include "nsGB2312Prober.h"
12
13#include <stdio.h>
14#include <stdlib.h>
15
16namespace kencodingprober
17{
18#ifdef DEBUG_PROBE
19static const char *const ProberName[] = {
20 "Unicode",
21 "GB18030",
22 "Big5",
23};
24
25#endif
26
27ChineseGroupProber::ChineseGroupProber()
28{
29 mProbers[0] = new UnicodeGroupProber();
30 mProbers[1] = new nsGB18030Prober();
31 mProbers[2] = new nsBig5Prober();
32 Reset();
33}
34
35ChineseGroupProber::~ChineseGroupProber()
36{
37 for (unsigned int i = 0; i < CN_NUM_OF_PROBERS; i++) {
38 delete mProbers[i];
39 }
40}
41
42const char *ChineseGroupProber::GetCharSetName()
43{
44 if (mBestGuess == -1) {
45 GetConfidence();
46 if (mBestGuess == -1) {
47 mBestGuess = 1; // assume it's GB18030
48 }
49 }
50 return mProbers[mBestGuess]->GetCharSetName();
51}
52
53void ChineseGroupProber::Reset(void)
54{
55 mActiveNum = 0;
56 for (unsigned int i = 0; i < CN_NUM_OF_PROBERS; i++) {
57 if (mProbers[i]) {
58 mProbers[i]->Reset();
59 mIsActive[i] = true;
60 ++mActiveNum;
61 } else {
62 mIsActive[i] = false;
63 }
64 }
65 mBestGuess = -1;
66 mState = eDetecting;
67}
68
69nsProbingState ChineseGroupProber::HandleData(const char *aBuf, unsigned int aLen)
70{
71 nsProbingState st;
72 unsigned int i;
73
74 // do filtering to reduce load to probers
75 char *highbyteBuf;
76 char *hptr;
77 bool keepNext = true; // assume previous is not ascii, it will do no harm except add some noise
78 hptr = highbyteBuf = (char *)malloc(size: aLen);
79 if (!hptr) {
80 return mState;
81 }
82 for (i = 0; i < aLen; ++i) {
83 if (aBuf[i] & 0x80) {
84 *hptr++ = aBuf[i];
85 keepNext = true;
86 } else {
87 // if previous is highbyte, keep this even it is an ASCII
88 if (keepNext) {
89 *hptr++ = aBuf[i];
90 keepNext = false;
91 }
92 }
93 }
94
95 for (i = 0; i < CN_NUM_OF_PROBERS; ++i) {
96 if (!mIsActive[i]) {
97 continue;
98 }
99 st = mProbers[i]->HandleData(aBuf: highbyteBuf, aLen: hptr - highbyteBuf);
100 if (st == eFoundIt) {
101 mBestGuess = i;
102 mState = eFoundIt;
103 break;
104 } else if (st == eNotMe) {
105 mIsActive[i] = false;
106 --mActiveNum;
107 if (mActiveNum == 0) {
108 mState = eNotMe;
109 break;
110 }
111 }
112 }
113
114 free(ptr: highbyteBuf);
115
116 return mState;
117}
118
119float ChineseGroupProber::GetConfidence(void)
120{
121 unsigned int i;
122 float bestConf = 0.0;
123 float cf;
124
125 switch (mState) {
126 case eFoundIt:
127 return (float)0.99;
128 case eNotMe:
129 return (float)0.01;
130 default:
131 for (i = 0; i < CN_NUM_OF_PROBERS; ++i) {
132 if (!mIsActive[i]) {
133 continue;
134 }
135 cf = mProbers[i]->GetConfidence();
136 if (bestConf < cf) {
137 bestConf = cf;
138 mBestGuess = i;
139 }
140 }
141 }
142 return bestConf;
143}
144
145#ifdef DEBUG_PROBE
146void ChineseGroupProber::DumpStatus()
147{
148 unsigned int i;
149 float cf;
150
151 GetConfidence();
152 for (i = 0; i < CN_NUM_OF_PROBERS; i++) {
153 if (!mIsActive[i]) {
154 printf(" Chinese group inactive: [%s] (confidence is too low).\r\n", ProberName[i]);
155 } else {
156 cf = mProbers[i]->GetConfidence();
157 printf(" Chinese group %1.3f: [%s]\r\n", cf, ProberName[i]);
158 }
159 }
160}
161#endif
162}
163

source code of kcodecs/src/probers/ChineseGroupProber.cpp