| 1 | /* -*- C++ -*- |
| 2 | SPDX-FileCopyrightText: 1998 Netscape Communications Corporation <developer@mozilla.org> |
| 3 | |
| 4 | SPDX-License-Identifier: MIT |
| 5 | */ |
| 6 | |
| 7 | // for S-JIS encoding, observe characteristic: |
| 8 | // 1, kana character (or hankaku?) often have high frequency of appearance |
| 9 | // 2, kana character often exist in group |
| 10 | // 3, certain combination of kana is never used in japanese language |
| 11 | |
| 12 | #ifndef nsEUCJPProber_h__ |
| 13 | #define nsEUCJPProber_h__ |
| 14 | |
| 15 | #include "CharDistribution.h" |
| 16 | #include "JpCntx.h" |
| 17 | #include "nsCharSetProber.h" |
| 18 | #include "nsCodingStateMachine.h" |
| 19 | namespace kencodingprober |
| 20 | { |
| 21 | class KCODECS_NO_EXPORT nsEUCJPProber : public nsCharSetProber |
| 22 | { |
| 23 | public: |
| 24 | nsEUCJPProber(void) |
| 25 | { |
| 26 | mCodingSM = new nsCodingStateMachine(&EUCJPSMModel); |
| 27 | Reset(); |
| 28 | } |
| 29 | ~nsEUCJPProber(void) override |
| 30 | { |
| 31 | delete mCodingSM; |
| 32 | } |
| 33 | nsProbingState HandleData(const char *aBuf, unsigned int aLen) override; |
| 34 | const char *GetCharSetName() override |
| 35 | { |
| 36 | return "EUC-JP" ; |
| 37 | } |
| 38 | nsProbingState GetState(void) override |
| 39 | { |
| 40 | return mState; |
| 41 | } |
| 42 | void Reset(void) override; |
| 43 | float GetConfidence(void) override; |
| 44 | |
| 45 | protected: |
| 46 | nsCodingStateMachine *mCodingSM; |
| 47 | nsProbingState mState; |
| 48 | |
| 49 | EUCJPContextAnalysis mContextAnalyser; |
| 50 | EUCJPDistributionAnalysis mDistributionAnalyser; |
| 51 | |
| 52 | char mLastChar[2]; |
| 53 | }; |
| 54 | } |
| 55 | |
| 56 | #endif /* nsEUCJPProber_h__ */ |
| 57 | |