1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#ifndef __JPCNTX_H__
8#define __JPCNTX_H__
9
10#include "kcodecs_export.h"
11
12#include <qglobal.h>
13
14#define NUM_OF_CATEGORY 6
15
16#define ENOUGH_REL_THRESHOLD 100
17#define MAX_REL_THRESHOLD 1000
18namespace kencodingprober
19{
20// hiragana frequency category table
21extern const char jp2CharContext[83][83];
22
23class KCODECS_NO_EXPORT JapaneseContextAnalysis
24{
25public:
26 JapaneseContextAnalysis()
27 {
28 Reset();
29 }
30 virtual ~JapaneseContextAnalysis()
31 {
32 }
33
34 void HandleData(const char *aBuf, unsigned int aLen);
35
36 void HandleOneChar(const char *aStr, unsigned int aCharLen)
37 {
38 int order;
39
40 // if we received enough data, stop here
41 if (mTotalRel > MAX_REL_THRESHOLD) {
42 mDone = true;
43 }
44 if (mDone) {
45 return;
46 }
47
48 // Only 2-bytes characters are of our interest
49 order = (aCharLen == 2) ? GetOrder(str: aStr) : -1;
50 if (order != -1 && mLastCharOrder != -1) {
51 mTotalRel++;
52 // count this sequence to its category counter
53 mRelSample[(int)jp2CharContext[mLastCharOrder][order]]++;
54 }
55 mLastCharOrder = order;
56 }
57
58 float GetConfidence();
59 void Reset(void);
60 bool GotEnoughData()
61 {
62 return mTotalRel > ENOUGH_REL_THRESHOLD;
63 }
64
65protected:
66 virtual int GetOrder(const char *str, unsigned int *charLen) = 0;
67 virtual int GetOrder(const char *str) = 0;
68
69 // category counters, each integer counts sequence in its category
70 unsigned int mRelSample[NUM_OF_CATEGORY];
71
72 // total sequence received
73 unsigned int mTotalRel;
74
75 // The order of previous char
76 int mLastCharOrder;
77
78 // if last byte in current buffer is not the last byte of a character, we
79 // need to know how many byte to skip in next buffer.
80 unsigned int mNeedToSkipCharNum;
81
82 // If this flag is set to true, detection is done and conclusion has been made
83 bool mDone;
84};
85
86class KCODECS_NO_EXPORT SJISContextAnalysis : public JapaneseContextAnalysis
87{
88 // SJISContextAnalysis(){};
89protected:
90 int GetOrder(const char *str, unsigned int *charLen) override;
91
92 int GetOrder(const char *str) override
93 {
94 // We only interested in Hiragana, so first byte is '\202'
95 if (*str == '\202' && (unsigned char)*(str + 1) >= (unsigned char)0x9f && (unsigned char)*(str + 1) <= (unsigned char)0xf1) {
96 return (unsigned char)*(str + 1) - (unsigned char)0x9f;
97 }
98 return -1;
99 }
100};
101
102class KCODECS_NO_EXPORT EUCJPContextAnalysis : public JapaneseContextAnalysis
103{
104protected:
105 int GetOrder(const char *str, unsigned int *charLen) override;
106 int GetOrder(const char *str) override
107 // We only interested in Hiragana, so first byte is '\244'
108 {
109 if (*str == '\244' //
110 && (unsigned char)*(str + 1) >= (unsigned char)0xa1 //
111 && (unsigned char)*(str + 1) <= (unsigned char)0xf3) {
112 return (unsigned char)*(str + 1) - (unsigned char)0xa1;
113 }
114 return -1;
115 }
116};
117}
118#endif /* __JPCNTX_H__ */
119

source code of kcodecs/src/probers/JpCntx.h