JpCntx.h source code [kcodecs/src/probers/JpCntx.h]

1	/ -- C++ --*
2	SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org>
3
4	SPDX-License-Identifier: MIT
5	*/
6
7	#ifndef __JPCNTX_H__
8	#define __JPCNTX_H__
9
10	#include "kcodecs_export.h"
11
12	#include <qglobal.h>
13
14	#define NUM_OF_CATEGORY 6
15
16	#define ENOUGH_REL_THRESHOLD 100
17	#define MAX_REL_THRESHOLD 1000
18	namespace kencodingprober
19	{
20	// hiragana frequency category table
21	extern const char jp2CharContext[`83`][`83`];
22
23	class KCODECS_NO_EXPORT JapaneseContextAnalysis
24	{
25	public:
26	JapaneseContextAnalysis()
27	{
28	Reset();
29	}
30	virtual ~JapaneseContextAnalysis()
31	{
32	}
33
34	void HandleData(const char aBuf, unsigned* int aLen);
35
36	void HandleOneChar(const char aStr, unsigned* int aCharLen)
37	{
38	int order;
39
40	// if we received enough data, stop here
41	if (mTotalRel > MAX_REL_THRESHOLD) {
42	mDone = true;
43	}
44	if (mDone) {
45	return;
46	}
47
48	// Only 2-bytes characters are of our interest
49	order = (aCharLen == `2`) ? GetOrder(str: aStr) : -`1`;
50	if (order != -`1` && mLastCharOrder != -`1`) {
51	mTotalRel++;
52	// count this sequence to its category counter
53	mRelSample[(int)jp2CharContext[mLastCharOrder][order]]++;
54	}
55	mLastCharOrder = order;
56	}
57
58	float GetConfidence();
59	void Reset(void);
60	void SetOpion()
61	{
62	}
63	bool GotEnoughData()
64	{
65	return mTotalRel > ENOUGH_REL_THRESHOLD;
66	}
67
68	protected:
69	virtual int GetOrder(const char str, unsigned* int *charLen) = `0`;
70	virtual int GetOrder(const char *str) = `0`;
71
72	// category counters, each integer counts sequence in its category
73	unsigned int mRelSample[NUM_OF_CATEGORY];
74
75	// total sequence received
76	unsigned int mTotalRel;
77
78	// The order of previous char
79	int mLastCharOrder;
80
81	// if last byte in current buffer is not the last byte of a character, we
82	// need to know how many byte to skip in next buffer.
83	unsigned int mNeedToSkipCharNum;
84
85	// If this flag is set to true, detection is done and conclusion has been made
86	bool mDone;
87	};
88
89	class KCODECS_NO_EXPORT SJISContextAnalysis : public JapaneseContextAnalysis
90	{
91	// SJISContextAnalysis(){};
92	protected:
93	int GetOrder(const char str, unsigned* int *charLen) override;
94
95	int GetOrder(const char *str) override
96	{
97	// We only interested in Hiragana, so first byte is '\202'
98	if (str == `'\202'` && (unsigned* char)(str + `1`) >= (unsigned* char)`0x9f` && (unsigned char)(str + `1`) <= (unsigned* char)`0xf1`) {
99	return (unsigned char)(str + `1`) - (unsigned* char)`0x9f`;
100	}
101	return -`1`;
102	}
103	};
104
105	class KCODECS_NO_EXPORT EUCJPContextAnalysis : public JapaneseContextAnalysis
106	{
107	protected:
108	int GetOrder(const char str, unsigned* int *charLen) override;
109	int GetOrder(const char *str) override
110	// We only interested in Hiragana, so first byte is '\244'
111	{
112	if (*str == `'\244'` //
113	&& (unsigned char)(str + `1`) >= (unsigned* char)`0xa1` //
114	&& (unsigned char)(str + `1`) <= (unsigned* char)`0xf3`) {
115	return (unsigned char)(str + `1`) - (unsigned* char)`0xa1`;
116	}
117	return -`1`;
118	}
119	};
120	}
121	#endif /* __JPCNTX_H__ */
122

source code of kcodecs/src/probers/JpCntx.h