1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#ifndef __JPCNTX_H__
8#define __JPCNTX_H__
9
10#include "kcodecs_export.h"
11
12#include <qglobal.h>
13
14#define NUM_OF_CATEGORY 6
15
16#define ENOUGH_REL_THRESHOLD 100
17#define MAX_REL_THRESHOLD 1000
18namespace kencodingprober
19{
20// hiragana frequency category table
21extern const char jp2CharContext[83][83];
22
23class KCODECS_NO_EXPORT JapaneseContextAnalysis
24{
25public:
26 JapaneseContextAnalysis()
27 {
28 Reset();
29 }
30 virtual ~JapaneseContextAnalysis()
31 {
32 }
33
34 void HandleData(const char *aBuf, unsigned int aLen);
35
36 void HandleOneChar(const char *aStr, unsigned int aCharLen)
37 {
38 int order;
39
40 // if we received enough data, stop here
41 if (mTotalRel > MAX_REL_THRESHOLD) {
42 mDone = true;
43 }
44 if (mDone) {
45 return;
46 }
47
48 // Only 2-bytes characters are of our interest
49 order = (aCharLen == 2) ? GetOrder(str: aStr) : -1;
50 if (order != -1 && mLastCharOrder != -1) {
51 mTotalRel++;
52 // count this sequence to its category counter
53 mRelSample[(int)jp2CharContext[mLastCharOrder][order]]++;
54 }
55 mLastCharOrder = order;
56 }
57
58 float GetConfidence();
59 void Reset(void);
60 void SetOpion()
61 {
62 }
63 bool GotEnoughData()
64 {
65 return mTotalRel > ENOUGH_REL_THRESHOLD;
66 }
67
68protected:
69 virtual int GetOrder(const char *str, unsigned int *charLen) = 0;
70 virtual int GetOrder(const char *str) = 0;
71
72 // category counters, each integer counts sequence in its category
73 unsigned int mRelSample[NUM_OF_CATEGORY];
74
75 // total sequence received
76 unsigned int mTotalRel;
77
78 // The order of previous char
79 int mLastCharOrder;
80
81 // if last byte in current buffer is not the last byte of a character, we
82 // need to know how many byte to skip in next buffer.
83 unsigned int mNeedToSkipCharNum;
84
85 // If this flag is set to true, detection is done and conclusion has been made
86 bool mDone;
87};
88
89class KCODECS_NO_EXPORT SJISContextAnalysis : public JapaneseContextAnalysis
90{
91 // SJISContextAnalysis(){};
92protected:
93 int GetOrder(const char *str, unsigned int *charLen) override;
94
95 int GetOrder(const char *str) override
96 {
97 // We only interested in Hiragana, so first byte is '\202'
98 if (*str == '\202' && (unsigned char)*(str + 1) >= (unsigned char)0x9f && (unsigned char)*(str + 1) <= (unsigned char)0xf1) {
99 return (unsigned char)*(str + 1) - (unsigned char)0x9f;
100 }
101 return -1;
102 }
103};
104
105class KCODECS_NO_EXPORT EUCJPContextAnalysis : public JapaneseContextAnalysis
106{
107protected:
108 int GetOrder(const char *str, unsigned int *charLen) override;
109 int GetOrder(const char *str) override
110 // We only interested in Hiragana, so first byte is '\244'
111 {
112 if (*str == '\244' //
113 && (unsigned char)*(str + 1) >= (unsigned char)0xa1 //
114 && (unsigned char)*(str + 1) <= (unsigned char)0xf3) {
115 return (unsigned char)*(str + 1) - (unsigned char)0xa1;
116 }
117 return -1;
118 }
119};
120}
121#endif /* __JPCNTX_H__ */
122

source code of kcodecs/src/probers/JpCntx.h