1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 |
3 | |
4 | #ifndef SIMTEXTH_H |
5 | #define SIMTEXTH_H |
6 | |
7 | const int textSimilarityThreshold = 190; |
8 | |
9 | #include <QString> |
10 | #include <QList> |
11 | |
12 | QT_BEGIN_NAMESPACE |
13 | |
14 | class Translator; |
15 | |
16 | struct Candidate |
17 | { |
18 | Candidate() {} |
19 | Candidate(const QString &c, const QString &s, const QString &d, const QString &t) |
20 | : context(c), source(s), disambiguation(d), translation(t) |
21 | {} |
22 | |
23 | QString context; |
24 | QString source; |
25 | QString disambiguation; |
26 | QString translation; |
27 | }; |
28 | |
29 | inline bool operator==( const Candidate& c, const Candidate& d ) { |
30 | return c.translation == d.translation && c.source == d.source && c.context == d.context |
31 | && c.disambiguation == d.disambiguation; |
32 | } |
33 | inline bool operator!=( const Candidate& c, const Candidate& d ) { |
34 | return !operator==( c, d ); |
35 | } |
36 | |
37 | typedef QList<Candidate> CandidateList; |
38 | |
39 | struct CoMatrix |
40 | { |
41 | CoMatrix(const QString &str); |
42 | CoMatrix() {} |
43 | |
44 | /* |
45 | The matrix has 20 * 20 = 400 entries. This requires 50 bytes, or 13 |
46 | words. Some operations are performed on words for more efficiency. |
47 | */ |
48 | union { |
49 | quint8 b[52]; |
50 | quint32 w[13]; |
51 | }; |
52 | }; |
53 | |
54 | /** |
55 | * This class is more efficient for searching through a large array of candidate strings, since we only |
56 | * have to construct the CoMatrix for the \a stringToMatch once, |
57 | * after that we just call getSimilarityScore(strCandidate). |
58 | * \sa getSimilarityScore |
59 | */ |
60 | class StringSimilarityMatcher { |
61 | public: |
62 | StringSimilarityMatcher(const QString &stringToMatch); |
63 | int getSimilarityScore(const QString &strCandidate); |
64 | |
65 | private: |
66 | CoMatrix m_cm; |
67 | int m_length; |
68 | }; |
69 | |
70 | /** |
71 | * Checks how similar two strings are. |
72 | * The return value is the score, and a higher score is more similar |
73 | * than one with a low score. |
74 | * Linguist considers a score over 190 to be a good match. |
75 | * \sa StringSimilarityMatcher |
76 | */ |
77 | static inline int getSimilarityScore(const QString &str1, const QString &str2) |
78 | { |
79 | return StringSimilarityMatcher(str1).getSimilarityScore(strCandidate: str2); |
80 | } |
81 | |
82 | CandidateList similarTextHeuristicCandidates( const Translator *tor, |
83 | const QString &text, |
84 | int maxCandidates ); |
85 | |
86 | QT_END_NAMESPACE |
87 | |
88 | #endif |
89 | |