1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "nsSBCSGroupProber.h"
8
9#include "UnicodeGroupProber.h"
10#include "nsHebrewProber.h"
11#include "nsSBCharSetProber.h"
12
13#include <stdio.h>
14#include <stdlib.h>
15
16namespace kencodingprober
17{
18nsSBCSGroupProber::nsSBCSGroupProber()
19{
20 mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model);
21 mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel);
22 mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model);
23 mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel);
24 mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model);
25 mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model);
26 mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model);
27 mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model);
28 mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel);
29 mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel);
30
31 mProbers[10] = new nsHebrewProber();
32 mProbers[11] = new UnicodeGroupProber();
33
34 // disable latin2 before latin1 is available, otherwise all latin1
35 // will be detected as latin2 because of their similarity.
36 // mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel);
37 // mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel);
38
39 Reset();
40}
41
42nsSBCSGroupProber::~nsSBCSGroupProber()
43{
44 for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++) {
45 delete mProbers[i];
46 }
47}
48
49const char *nsSBCSGroupProber::GetCharSetName()
50{
51 // if we have no answer yet
52 if (mBestGuess == -1) {
53 GetConfidence();
54 // no charset seems positive
55 if (mBestGuess == -1)
56 // we will use default.
57 {
58 mBestGuess = 0;
59 }
60 }
61 return mProbers[mBestGuess]->GetCharSetName();
62}
63
64void nsSBCSGroupProber::Reset(void)
65{
66 for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++) {
67 if (mProbers[i]) { // not null
68 mProbers[i]->Reset();
69 mIsActive[i] = true;
70 } else {
71 mIsActive[i] = false;
72 }
73 }
74 mBestGuess = -1;
75 mState = eDetecting;
76}
77
78nsProbingState nsSBCSGroupProber::HandleData(const char *aBuf, unsigned int aLen)
79{
80 char *newBuf1 = nullptr;
81 unsigned int newLen1 = 0;
82
83 int activeNum = NUM_OF_SBCS_PROBERS - 1;
84
85 // The UnicodeGroupProber (specifically the UTF16 subprobers) need unmangled data
86 if (mIsActive[NUM_OF_SBCS_PROBERS - 1]) {
87 if (const auto st = mProbers[NUM_OF_SBCS_PROBERS - 1]->HandleData(aBuf, aLen); st == eFoundIt) {
88 mBestGuess = NUM_OF_SBCS_PROBERS - 1;
89 mState = eFoundIt;
90 return mState;
91 } else if (st == eNotMe) {
92 mIsActive[NUM_OF_SBCS_PROBERS - 1] = false;
93 activeNum--;
94 }
95 } else {
96 activeNum--;
97 }
98
99 // apply filter to original buffer, and we got new buffer back
100 // depend on what script it is, we will feed them the new buffer
101 // we got after applying proper filter
102 // this is done without any consideration to KeepEnglishLetters
103 // of each prober since as of now, there are no probers here which
104 // recognize languages with English characters.
105 if (!FilterWithoutEnglishLetters(aBuf, aLen, newBuf: &newBuf1, newLen&: newLen1)) {
106 goto done;
107 }
108
109 if (newLen1 == 0) {
110 goto done; // Nothing to see here, move on.
111 }
112
113 for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS - 1; ++i) {
114 if (!mIsActive[i]) {
115 activeNum--;
116 continue;
117 }
118 auto st = mProbers[i]->HandleData(aBuf: newBuf1, aLen: newLen1);
119 if (st == eFoundIt) {
120 mBestGuess = i;
121 mState = eFoundIt;
122 break;
123 } else if (st == eNotMe) {
124 mIsActive[i] = false;
125 activeNum--;
126 }
127 }
128
129 if (activeNum == 0) {
130 mState = eNotMe;
131 }
132
133done:
134 free(ptr: newBuf1);
135
136 return mState;
137}
138
139float nsSBCSGroupProber::GetConfidence(void)
140{
141 unsigned int i;
142 float bestConf = 0.0;
143 float cf;
144
145 switch (mState) {
146 case eFoundIt:
147 return (float)0.99; // sure yes
148 case eNotMe:
149 return (float)0.01; // sure no
150 default:
151 for (i = 0; i < NUM_OF_SBCS_PROBERS; ++i) {
152 if (!mIsActive[i]) {
153 continue;
154 }
155 cf = mProbers[i]->GetConfidence();
156 if (bestConf < cf) {
157 bestConf = cf;
158 mBestGuess = i;
159 }
160 }
161 }
162 return bestConf;
163}
164
165#ifdef DEBUG_PROBE
166void nsSBCSGroupProber::DumpStatus()
167{
168 unsigned int i;
169 float cf;
170
171 cf = GetConfidence();
172 printf(" SBCS Group Prober --------begin status \r\n");
173 for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) {
174 if (!mIsActive[i]) {
175 printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName());
176 } else {
177 mProbers[i]->DumpStatus();
178 }
179 }
180 printf(" SBCS Group found best match [%s] confidence %f.\r\n", mProbers[mBestGuess]->GetCharSetName(), cf);
181}
182#endif
183}
184

source code of kcodecs/src/probers/nsSBCSGroupProber.cpp