1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#ifndef NSSBCHARSETPROBER_H
8#define NSSBCHARSETPROBER_H
9
10#include "nsCharSetProber.h"
11
12#define SAMPLE_SIZE 64
13#define SB_ENOUGH_REL_THRESHOLD 1024
14#define POSITIVE_SHORTCUT_THRESHOLD (float)0.95
15#define NEGATIVE_SHORTCUT_THRESHOLD (float)0.05
16#define SYMBOL_CAT_ORDER 250
17#define NUMBER_OF_SEQ_CAT 4
18#define POSITIVE_CAT (NUMBER_OF_SEQ_CAT - 1)
19#define NEGATIVE_CAT 0
20
21namespace kencodingprober
22{
23typedef struct {
24 const unsigned char *charToOrderMap; // [256] table use to find a char's order
25 const char *precedenceMatrix; // [SAMPLE_SIZE][SAMPLE_SIZE]; table to find a 2-char sequence's frequency
26 float mTypicalPositiveRatio; // = freqSeqs / totalSeqs
27 bool keepEnglishLetter; // says if this script contains English characters (not implemented)
28 const char *charsetName;
29} SequenceModel;
30
31class KCODECS_NO_EXPORT nsSingleByteCharSetProber : public nsCharSetProber
32{
33public:
34 explicit nsSingleByteCharSetProber(const SequenceModel *model)
35 : mModel(model)
36 , mReversed(false)
37 , mNameProber(nullptr)
38 {
39 Reset();
40 }
41 nsSingleByteCharSetProber(const SequenceModel *model, bool reversed, nsCharSetProber *nameProber)
42 : mModel(model)
43 , mReversed(reversed)
44 , mNameProber(nameProber)
45 {
46 Reset();
47 }
48
49 const char *GetCharSetName() override;
50 nsProbingState HandleData(const char *aBuf, unsigned int aLen) override;
51 nsProbingState GetState(void) override
52 {
53 return mState;
54 }
55 void Reset(void) override;
56 float GetConfidence(void) override;
57
58 // This feature is not implemented yet. any current language model
59 // contain this parameter as false. No one is looking at this
60 // parameter or calling this method.
61 // Moreover, the nsSBCSGroupProber which calls the HandleData of this
62 // prober has a hard-coded call to FilterWithoutEnglishLetters which gets rid
63 // of the English letters.
64 bool KeepEnglishLetters()
65 {
66 return mModel->keepEnglishLetter;
67 } // (not implemented)
68
69#ifdef DEBUG_PROBE
70 void DumpStatus() override;
71#endif
72
73protected:
74 nsProbingState mState;
75 const SequenceModel *mModel;
76 const bool mReversed; // true if we need to reverse every pair in the model lookup
77
78 // char order of last character
79 unsigned char mLastOrder;
80
81 unsigned int mTotalSeqs;
82 unsigned int mSeqCounters[NUMBER_OF_SEQ_CAT];
83
84 unsigned int mTotalChar;
85 // characters that fall in our sampling range
86 unsigned int mFreqChar;
87
88 // Optional auxiliary prober for name decision. created and destroyed by the GroupProber
89 nsCharSetProber *mNameProber;
90};
91
92extern const SequenceModel Koi8rModel;
93extern const SequenceModel Win1251Model;
94extern const SequenceModel Latin5Model;
95extern const SequenceModel MacCyrillicModel;
96extern const SequenceModel Ibm866Model;
97extern const SequenceModel Ibm855Model;
98extern const SequenceModel Latin7Model;
99extern const SequenceModel Win1253Model;
100extern const SequenceModel Latin5BulgarianModel;
101extern const SequenceModel Win1251BulgarianModel;
102extern const SequenceModel Latin2HungarianModel;
103extern const SequenceModel Win1250HungarianModel;
104extern const SequenceModel Win1255Model;
105}
106#endif /* NSSBCHARSETPROBER_H */
107

source code of kcodecs/src/probers/nsSBCharSetProber.h