1/* -*- C++ -*-
2 SPDX-FileCopyrightText: 2008 Wang Kai <wkai@gmail.com>
3
4 SPDX-License-Identifier: MIT
5*/
6
7#include "UnicodeGroupProber.h"
8
9#include <QChar>
10#include <math.h>
11
12namespace kencodingprober
13{
14UnicodeGroupProber::UnicodeGroupProber(void)
15{
16 mCodingSM[0] = new nsCodingStateMachine(&UTF8SMModel);
17 mCodingSM[1] = new nsCodingStateMachine(&UCS2LESMModel);
18 mCodingSM[2] = new nsCodingStateMachine(&UCS2BESMModel);
19 mActiveSM = NUM_OF_UNICODE_CHARSETS;
20 mState = eDetecting;
21 mDetectedCharset = "UTF-8";
22}
23
24UnicodeGroupProber::~UnicodeGroupProber(void)
25{
26 for (unsigned int i = 0; i < NUM_OF_UNICODE_CHARSETS; i++) {
27 delete mCodingSM[i];
28 }
29}
30
31void UnicodeGroupProber::Reset(void)
32{
33 mState = eDetecting;
34 for (unsigned int i = 0; i < NUM_OF_UNICODE_CHARSETS; i++) {
35 mCodingSM[i]->Reset();
36 }
37 mActiveSM = NUM_OF_UNICODE_CHARSETS;
38 mDetectedCharset = "UTF-8";
39}
40
41nsProbingState UnicodeGroupProber::HandleData(const char *aBuf, unsigned int aLen)
42{
43 nsSMState codingState;
44
45 if (mActiveSM == 0 || aLen < 2) {
46 mState = eNotMe;
47 return mState;
48 }
49
50 for (uint i = 0; i < aLen; ++i) {
51 for (int j = mActiveSM - 1; j >= 0; --j) {
52 // byte is feed to all active state machine
53 codingState = mCodingSM[j]->NextState(c: aBuf[i]);
54 if (codingState == eError) {
55 // got negative answer for this state machine, make it inactive
56 mActiveSM--;
57 if (mActiveSM == 0) {
58 mState = eNotMe;
59 return mState;
60 } else if (j != (int)mActiveSM) {
61 nsCodingStateMachine *t;
62 t = mCodingSM[mActiveSM];
63 mCodingSM[mActiveSM] = mCodingSM[j];
64 mCodingSM[j] = t;
65 }
66 } else if (codingState == eItsMe) {
67 mState = eFoundIt;
68 mDetectedCharset = mCodingSM[j]->GetCodingStateMachine();
69 return mState;
70 } else if (mState == eDetecting) {
71 mDetectedCharset = mCodingSM[j]->GetCodingStateMachine();
72 };
73 }
74 }
75 return mState;
76}
77
78float UnicodeGroupProber::GetConfidence()
79{
80 if (mState == eFoundIt) {
81 return 0.99f;
82 } else {
83 return 0.0f;
84 }
85}
86
87#ifdef DEBUG_PROBE
88void UnicodeGroupProber::DumpStatus()
89{
90 GetConfidence();
91 for (uint i = 0; i < mActiveSM; i++) {
92 qDebug() << "Unicode group" << mCodingSM[i]->DumpCurrentState() << mCodingSM[i]->GetCodingStateMachine();
93 }
94}
95#endif
96
97}
98

source code of kcodecs/src/probers/UnicodeGroupProber.cpp