1 | /* -*- C++ -*- |
---|---|
2 | SPDX-FileCopyrightText: 2008 Wang Kai <wkai@gmail.com> |
3 | |
4 | SPDX-License-Identifier: MIT |
5 | */ |
6 | |
7 | #include "UnicodeGroupProber.h" |
8 | |
9 | #include <QChar> |
10 | #include <math.h> |
11 | |
12 | namespace kencodingprober |
13 | { |
14 | UnicodeGroupProber::UnicodeGroupProber(void) |
15 | { |
16 | mCodingSM[0] = new nsCodingStateMachine(&UTF8SMModel); |
17 | mCodingSM[1] = new nsCodingStateMachine(&UCS2LESMModel); |
18 | mCodingSM[2] = new nsCodingStateMachine(&UCS2BESMModel); |
19 | mActiveSM = NUM_OF_UNICODE_CHARSETS; |
20 | mState = eDetecting; |
21 | mDetectedCharset = "UTF-8"; |
22 | } |
23 | |
24 | UnicodeGroupProber::~UnicodeGroupProber(void) |
25 | { |
26 | for (unsigned int i = 0; i < NUM_OF_UNICODE_CHARSETS; i++) { |
27 | delete mCodingSM[i]; |
28 | } |
29 | } |
30 | |
31 | void UnicodeGroupProber::Reset(void) |
32 | { |
33 | mState = eDetecting; |
34 | for (unsigned int i = 0; i < NUM_OF_UNICODE_CHARSETS; i++) { |
35 | mCodingSM[i]->Reset(); |
36 | } |
37 | mActiveSM = NUM_OF_UNICODE_CHARSETS; |
38 | mDetectedCharset = "UTF-8"; |
39 | } |
40 | |
41 | nsProbingState UnicodeGroupProber::HandleData(const char *aBuf, unsigned int aLen) |
42 | { |
43 | nsSMState codingState; |
44 | |
45 | if (mActiveSM == 0 || aLen < 2) { |
46 | mState = eNotMe; |
47 | return mState; |
48 | } |
49 | |
50 | for (uint i = 0; i < aLen; ++i) { |
51 | for (int j = mActiveSM - 1; j >= 0; --j) { |
52 | // byte is feed to all active state machine |
53 | codingState = mCodingSM[j]->NextState(c: aBuf[i]); |
54 | if (codingState == eError) { |
55 | // got negative answer for this state machine, make it inactive |
56 | mActiveSM--; |
57 | if (mActiveSM == 0) { |
58 | mState = eNotMe; |
59 | return mState; |
60 | } else if (j != (int)mActiveSM) { |
61 | nsCodingStateMachine *t; |
62 | t = mCodingSM[mActiveSM]; |
63 | mCodingSM[mActiveSM] = mCodingSM[j]; |
64 | mCodingSM[j] = t; |
65 | } |
66 | } else if (codingState == eItsMe) { |
67 | mState = eFoundIt; |
68 | mDetectedCharset = mCodingSM[j]->GetCodingStateMachine(); |
69 | return mState; |
70 | } else if (mState == eDetecting) { |
71 | mDetectedCharset = mCodingSM[j]->GetCodingStateMachine(); |
72 | }; |
73 | } |
74 | } |
75 | return mState; |
76 | } |
77 | |
78 | float UnicodeGroupProber::GetConfidence() |
79 | { |
80 | if (mState == eFoundIt) { |
81 | return 0.99f; |
82 | } else { |
83 | return 0.0f; |
84 | } |
85 | } |
86 | |
87 | #ifdef DEBUG_PROBE |
88 | void UnicodeGroupProber::DumpStatus() |
89 | { |
90 | GetConfidence(); |
91 | for (uint i = 0; i < mActiveSM; i++) { |
92 | qDebug() << "Unicode group"<< mCodingSM[i]->DumpCurrentState() << mCodingSM[i]->GetCodingStateMachine(); |
93 | } |
94 | } |
95 | #endif |
96 | |
97 | } |
98 |