| 1 | /* -*- C++ -*- |
| 2 | SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org> |
| 3 | |
| 4 | SPDX-License-Identifier: MIT |
| 5 | */ |
| 6 | |
| 7 | #ifndef __JPCNTX_H__ |
| 8 | #define __JPCNTX_H__ |
| 9 | |
| 10 | #include "kcodecs_export.h" |
| 11 | |
| 12 | #include <qglobal.h> |
| 13 | |
| 14 | #define NUM_OF_CATEGORY 6 |
| 15 | |
| 16 | #define ENOUGH_REL_THRESHOLD 100 |
| 17 | #define MAX_REL_THRESHOLD 1000 |
| 18 | namespace kencodingprober |
| 19 | { |
| 20 | // hiragana frequency category table |
| 21 | extern const char jp2CharContext[83][83]; |
| 22 | |
| 23 | class KCODECS_NO_EXPORT JapaneseContextAnalysis |
| 24 | { |
| 25 | public: |
| 26 | JapaneseContextAnalysis() |
| 27 | { |
| 28 | Reset(); |
| 29 | } |
| 30 | virtual ~JapaneseContextAnalysis() |
| 31 | { |
| 32 | } |
| 33 | |
| 34 | void HandleData(const char *aBuf, unsigned int aLen); |
| 35 | |
| 36 | void HandleOneChar(const char *aStr, unsigned int aCharLen) |
| 37 | { |
| 38 | int order; |
| 39 | |
| 40 | // if we received enough data, stop here |
| 41 | if (mTotalRel > MAX_REL_THRESHOLD) { |
| 42 | mDone = true; |
| 43 | } |
| 44 | if (mDone) { |
| 45 | return; |
| 46 | } |
| 47 | |
| 48 | // Only 2-bytes characters are of our interest |
| 49 | order = (aCharLen == 2) ? GetOrder(str: aStr) : -1; |
| 50 | if (order != -1 && mLastCharOrder != -1) { |
| 51 | mTotalRel++; |
| 52 | // count this sequence to its category counter |
| 53 | mRelSample[(int)jp2CharContext[mLastCharOrder][order]]++; |
| 54 | } |
| 55 | mLastCharOrder = order; |
| 56 | } |
| 57 | |
| 58 | float GetConfidence(); |
| 59 | void Reset(void); |
| 60 | bool GotEnoughData() |
| 61 | { |
| 62 | return mTotalRel > ENOUGH_REL_THRESHOLD; |
| 63 | } |
| 64 | |
| 65 | protected: |
| 66 | virtual int GetOrder(const char *str, unsigned int *charLen) = 0; |
| 67 | virtual int GetOrder(const char *str) = 0; |
| 68 | |
| 69 | // category counters, each integer counts sequence in its category |
| 70 | unsigned int mRelSample[NUM_OF_CATEGORY]; |
| 71 | |
| 72 | // total sequence received |
| 73 | unsigned int mTotalRel; |
| 74 | |
| 75 | // The order of previous char |
| 76 | int mLastCharOrder; |
| 77 | |
| 78 | // if last byte in current buffer is not the last byte of a character, we |
| 79 | // need to know how many byte to skip in next buffer. |
| 80 | unsigned int mNeedToSkipCharNum; |
| 81 | |
| 82 | // If this flag is set to true, detection is done and conclusion has been made |
| 83 | bool mDone; |
| 84 | }; |
| 85 | |
| 86 | class KCODECS_NO_EXPORT SJISContextAnalysis : public JapaneseContextAnalysis |
| 87 | { |
| 88 | // SJISContextAnalysis(){}; |
| 89 | protected: |
| 90 | int GetOrder(const char *str, unsigned int *charLen) override; |
| 91 | |
| 92 | int GetOrder(const char *str) override |
| 93 | { |
| 94 | // We only interested in Hiragana, so first byte is '\202' |
| 95 | if (*str == '\202' && (unsigned char)*(str + 1) >= (unsigned char)0x9f && (unsigned char)*(str + 1) <= (unsigned char)0xf1) { |
| 96 | return (unsigned char)*(str + 1) - (unsigned char)0x9f; |
| 97 | } |
| 98 | return -1; |
| 99 | } |
| 100 | }; |
| 101 | |
| 102 | class KCODECS_NO_EXPORT EUCJPContextAnalysis : public JapaneseContextAnalysis |
| 103 | { |
| 104 | protected: |
| 105 | int GetOrder(const char *str, unsigned int *charLen) override; |
| 106 | int GetOrder(const char *str) override |
| 107 | // We only interested in Hiragana, so first byte is '\244' |
| 108 | { |
| 109 | if (*str == '\244' // |
| 110 | && (unsigned char)*(str + 1) >= (unsigned char)0xa1 // |
| 111 | && (unsigned char)*(str + 1) <= (unsigned char)0xf3) { |
| 112 | return (unsigned char)*(str + 1) - (unsigned char)0xa1; |
| 113 | } |
| 114 | return -1; |
| 115 | } |
| 116 | }; |
| 117 | } |
| 118 | #endif /* __JPCNTX_H__ */ |
| 119 | |