1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#ifndef NSSBCHARSETPROBER_H
8#define NSSBCHARSETPROBER_H
9
10#include "nsCharSetProber.h"
11
12#define SAMPLE_SIZE 64
13#define SYMBOL_CAT_ORDER 250
14#define NUMBER_OF_SEQ_CAT 4
15#define POSITIVE_CAT (NUMBER_OF_SEQ_CAT - 1)
16#define NEGATIVE_CAT 0
17
18namespace kencodingprober
19{
20typedef struct {
21 const unsigned char *charToOrderMap; // [256] table use to find a char's order
22 const char *precedenceMatrix; // [SAMPLE_SIZE][SAMPLE_SIZE]; table to find a 2-char sequence's frequency
23 float mTypicalPositiveRatio; // = freqSeqs / totalSeqs
24 bool keepEnglishLetter; // says if this script contains English characters (not implemented)
25 const char *charsetName;
26} SequenceModel;
27
28template<bool Reversed = false>
29class KCODECS_NO_EXPORT nsSingleByteCharSetProber : public nsCharSetProber
30{
31public:
32 explicit nsSingleByteCharSetProber(const SequenceModel *model)
33 : mModel(model)
34 {
35 Reset();
36 }
37
38 const char *GetCharSetName() override;
39 nsProbingState HandleData(const char *aBuf, unsigned int aLen) override;
40 nsProbingState GetState(void) override
41 {
42 return mState;
43 }
44 void Reset(void) override;
45 float GetConfidence(void) override;
46
47#ifdef DEBUG_PROBE
48 void DumpStatus() override;
49#endif
50
51protected:
52 nsProbingState mState;
53 const SequenceModel *mModel;
54
55 // char order of last character
56 unsigned char mLastOrder;
57
58 unsigned int mTotalSeqs;
59 unsigned int mSeqCounters[NUMBER_OF_SEQ_CAT];
60
61 unsigned int mTotalChar;
62 // characters that fall in our sampling range
63 unsigned int mFreqChar;
64};
65
66extern const SequenceModel Koi8rModel;
67extern const SequenceModel Win1251Model;
68extern const SequenceModel Latin5Model;
69extern const SequenceModel MacCyrillicModel;
70extern const SequenceModel Ibm866Model;
71extern const SequenceModel Ibm855Model;
72extern const SequenceModel Latin7Model;
73extern const SequenceModel Win1253Model;
74extern const SequenceModel Latin5BulgarianModel;
75extern const SequenceModel Win1251BulgarianModel;
76extern const SequenceModel Latin2HungarianModel;
77extern const SequenceModel Win1250HungarianModel;
78extern const SequenceModel Win1255Model;
79}
80#endif /* NSSBCHARSETPROBER_H */
81

source code of kcodecs/src/probers/nsSBCharSetProber.h