1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "nsSBCharSetProber.h"
8
9#include <stdio.h>
10
11namespace kencodingprober
12{
13template class nsSingleByteCharSetProber<false>;
14template class nsSingleByteCharSetProber<true>;
15
16template<bool Reversed>
17nsProbingState nsSingleByteCharSetProber<Reversed>::HandleData(const char *aBuf, unsigned int aLen)
18{
19 for (unsigned int i = 0; i < aLen; i++) {
20 const unsigned char order = mModel->charToOrderMap[(unsigned char)aBuf[i]];
21
22 if (order < SYMBOL_CAT_ORDER) {
23 mTotalChar++;
24 }
25 if (order < SAMPLE_SIZE) {
26 mFreqChar++;
27
28 if (mLastOrder < SAMPLE_SIZE) {
29 mTotalSeqs++;
30 unsigned int index = Reversed ? mLastOrder + (SAMPLE_SIZE * order) : (mLastOrder * SAMPLE_SIZE) + order;
31 ++(mSeqCounters[(int)mModel->precedenceMatrix[index]]);
32 }
33 }
34 mLastOrder = order;
35 }
36
37 if (mState == eDetecting) {
38 if (mTotalSeqs > 1024) {
39 float cf = GetConfidence();
40 if (cf > 0.95f) {
41 mState = eFoundIt;
42 } else if (cf < 0.05) {
43 mState = eNotMe;
44 }
45 }
46 }
47
48 return mState;
49}
50
51template<bool Reversed>
52void nsSingleByteCharSetProber<Reversed>::Reset(void)
53{
54 mState = eDetecting;
55 mLastOrder = 255;
56 for (unsigned int i = 0; i < NUMBER_OF_SEQ_CAT; i++) {
57 mSeqCounters[i] = 0;
58 }
59 mTotalSeqs = 0;
60 mTotalChar = 0;
61 mFreqChar = 0;
62}
63
64//#define NEGATIVE_APPROACH 1
65
66template<bool Reversed>
67float nsSingleByteCharSetProber<Reversed>::GetConfidence(void)
68{
69#ifdef NEGATIVE_APPROACH
70 if (mTotalSeqs > 0)
71 if (mTotalSeqs > mSeqCounters[NEGATIVE_CAT] * 10) {
72 return (mTotalSeqs - mSeqCounters[NEGATIVE_CAT] * 10.f) / mTotalSeqs * mFreqChar / mTotalChar;
73 }
74 return 0.01f;
75#else // POSITIVE_APPROACH
76
77 if (mTotalSeqs > 0) {
78 float r = 1.0f * mSeqCounters[POSITIVE_CAT] / mTotalSeqs / mModel->mTypicalPositiveRatio;
79 r = r * mFreqChar / mTotalChar;
80 if (r >= 0.99f) {
81 r = 0.99f;
82 }
83 return r;
84 }
85 return 0.01f;
86#endif
87}
88
89template<bool Reversed>
90const char *nsSingleByteCharSetProber<Reversed>::GetCharSetName()
91{
92 return mModel->charsetName;
93}
94
95#ifdef DEBUG_PROBE
96template<bool Reversed>
97void nsSingleByteCharSetProber<Reversed>::DumpStatus()
98{
99 printf(" SBCS: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
100}
101#endif
102}
103

source code of kcodecs/src/probers/nsSBCharSetProber.cpp