1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 |
3 | |
4 | #include "lupdate.h" |
5 | |
6 | #include "simtexth.h" |
7 | #include "translator.h" |
8 | |
9 | #include <QtCore/QCoreApplication> |
10 | #include <QtCore/QDebug> |
11 | #include <QtCore/QList> |
12 | #include <QtCore/QMap> |
13 | #include <QtCore/QStringList> |
14 | |
15 | QT_BEGIN_NAMESPACE |
16 | |
17 | /* |
18 | Augments a Translator with trivially derived translations. |
19 | |
20 | For example, if "Enabled:" is consistendly translated as "Eingeschaltet:" no |
21 | matter the context or the comment, "Eingeschaltet:" is added as the |
22 | translation of any untranslated "Enabled:" text and is marked Unfinished. |
23 | |
24 | Returns the number of additional messages that this heuristic translated. |
25 | */ |
26 | |
27 | int applySameTextHeuristic(Translator &tor) |
28 | { |
29 | QMap<QString, QStringList> translated; |
30 | QMap<QString, bool> avoid; // Want a QTreeSet, in fact |
31 | QList<bool> untranslated(tor.messageCount()); |
32 | int inserted = 0; |
33 | |
34 | for (int i = 0; i < tor.messageCount(); ++i) { |
35 | const TranslatorMessage &msg = tor.message(i); |
36 | if (!msg.isTranslated()) { |
37 | if (msg.type() == TranslatorMessage::Unfinished) |
38 | untranslated[i] = true; |
39 | } else { |
40 | const QString &key = msg.sourceText(); |
41 | const auto t = translated.constFind(key); |
42 | if (t != translated.constEnd()) { |
43 | /* |
44 | The same source text is translated at least two |
45 | different ways. Do nothing then. |
46 | */ |
47 | if (*t != msg.translations()) { |
48 | translated.remove(key); |
49 | avoid.insert(key, value: true); |
50 | } |
51 | } else if (!avoid.contains(key)) { |
52 | translated.insert(key, value: msg.translations()); |
53 | } |
54 | } |
55 | } |
56 | |
57 | for (int i = 0; i < tor.messageCount(); ++i) { |
58 | if (untranslated[i]) { |
59 | TranslatorMessage &msg = tor.message(i); |
60 | const auto t = translated.constFind(key: msg.sourceText()); |
61 | if (t != translated.constEnd()) { |
62 | msg.setTranslations(*t); |
63 | ++inserted; |
64 | } |
65 | } |
66 | } |
67 | return inserted; |
68 | } |
69 | |
70 | |
71 | |
72 | /* |
73 | Merges two Translator objects. The first one |
74 | is a set of source texts and translations for a previous version of |
75 | the internationalized program; the second one is a set of fresh |
76 | source texts newly extracted from the source code, without any |
77 | translation yet. |
78 | */ |
79 | |
80 | Translator merge( |
81 | const Translator &tor, const Translator &virginTor, const QList<Translator> &aliens, |
82 | UpdateOptions options, QString &err) |
83 | { |
84 | int known = 0; |
85 | int neww = 0; |
86 | int obsoleted = 0; |
87 | int similarTextHeuristicCount = 0; |
88 | |
89 | Translator outTor; |
90 | outTor.setLanguageCode(tor.languageCode()); |
91 | outTor.setSourceLanguageCode(tor.sourceLanguageCode()); |
92 | outTor.setLocationsType(tor.locationsType()); |
93 | |
94 | /* |
95 | The types of all the messages from the vernacular translator |
96 | are updated according to the virgin translator. |
97 | */ |
98 | for (TranslatorMessage m : tor.messages()) { |
99 | TranslatorMessage::Type newType = TranslatorMessage::Finished; |
100 | |
101 | if (m.sourceText().isEmpty() && m.id().isEmpty()) { |
102 | // context/file comment |
103 | int mvi = virginTor.find(context: m.context()); |
104 | if (mvi >= 0) |
105 | m.setComment(virginTor.constMessage(i: mvi).comment()); |
106 | } else { |
107 | TranslatorMessage::ExtraData ; |
108 | const TranslatorMessage *mv; |
109 | int mvi = virginTor.find(msg: m); |
110 | if (mvi < 0) { |
111 | if (!(options & HeuristicSimilarText)) { |
112 | makeObsolete: |
113 | switch (m.type()) { |
114 | case TranslatorMessage::Finished: |
115 | newType = TranslatorMessage::Vanished; |
116 | obsoleted++; |
117 | break; |
118 | case TranslatorMessage::Unfinished: |
119 | newType = TranslatorMessage::Obsolete; |
120 | obsoleted++; |
121 | break; |
122 | default: |
123 | newType = m.type(); |
124 | break; |
125 | } |
126 | m.clearReferences(); |
127 | } else { |
128 | mvi = virginTor.find(context: m.context(), comment: m.comment(), refs: m.allReferences()); |
129 | if (mvi < 0) { |
130 | // did not find it in the virgin, mark it as obsolete |
131 | goto makeObsolete; |
132 | } |
133 | mv = &virginTor.constMessage(i: mvi); |
134 | // Do not just accept it if its on the same line number, |
135 | // but different source text. |
136 | // Also check if the texts are more or less similar before |
137 | // we consider them to represent the same message... |
138 | if (getSimilarityScore(str1: m.sourceText(), str2: mv->sourceText()) < textSimilarityThreshold) { |
139 | // The virgin and vernacular sourceTexts are so different that we could not find it |
140 | goto makeObsolete; |
141 | } |
142 | // It is just slightly modified, assume that it is the same string |
143 | |
144 | extras = mv->extras(); |
145 | |
146 | // Mark it as unfinished. (Since the source text |
147 | // was changed it might require re-translating...) |
148 | newType = TranslatorMessage::Unfinished; |
149 | ++similarTextHeuristicCount; |
150 | neww++; |
151 | goto outdateSource; |
152 | } |
153 | } else { |
154 | mv = &virginTor.message(i: mvi); |
155 | extras = mv->extras(); |
156 | if (!mv->id().isEmpty() |
157 | && (mv->context() != m.context() |
158 | || mv->sourceText() != m.sourceText() |
159 | || mv->comment() != m.comment())) { |
160 | known++; |
161 | newType = TranslatorMessage::Unfinished; |
162 | m.setContext(mv->context()); |
163 | m.setComment(mv->comment()); |
164 | if (mv->sourceText() != m.sourceText()) { |
165 | outdateSource: |
166 | m.setOldSourceText(m.sourceText()); |
167 | m.setSourceText(mv->sourceText()); |
168 | const QString &oldpluralsource = m.extra(ba: QLatin1String("po-msgid_plural" )); |
169 | if (!oldpluralsource.isEmpty()) |
170 | extras.insert(key: QLatin1String("po-old_msgid_plural" ), value: oldpluralsource); |
171 | } |
172 | } else { |
173 | switch (m.type()) { |
174 | case TranslatorMessage::Finished: |
175 | default: |
176 | if (m.isPlural() == mv->isPlural()) { |
177 | newType = TranslatorMessage::Finished; |
178 | } else { |
179 | newType = TranslatorMessage::Unfinished; |
180 | } |
181 | known++; |
182 | break; |
183 | case TranslatorMessage::Unfinished: |
184 | newType = TranslatorMessage::Unfinished; |
185 | known++; |
186 | break; |
187 | case TranslatorMessage::Vanished: |
188 | newType = TranslatorMessage::Finished; |
189 | neww++; |
190 | break; |
191 | case TranslatorMessage::Obsolete: |
192 | newType = TranslatorMessage::Unfinished; |
193 | neww++; |
194 | break; |
195 | } |
196 | } |
197 | |
198 | // Always get the filename and linenumber info from the |
199 | // virgin Translator, in case it has changed location. |
200 | // This should also enable us to read a file that does not |
201 | // have the <location> element. |
202 | // why not use operator=()? Because it overwrites e.g. userData. |
203 | m.setReferences(mv->allReferences()); |
204 | m.setPlural(mv->isPlural()); |
205 | m.setExtras(extras); |
206 | m.setExtraComment(mv->extraComment()); |
207 | m.setId(mv->id()); |
208 | } |
209 | } |
210 | |
211 | m.setType(newType); |
212 | outTor.append(msg: m); |
213 | } |
214 | |
215 | /* |
216 | Messages found only in the virgin translator are added to the |
217 | vernacular translator. |
218 | */ |
219 | for (const TranslatorMessage &mv : virginTor.messages()) { |
220 | if (mv.sourceText().isEmpty() && mv.id().isEmpty()) { |
221 | if (tor.find(context: mv.context()) >= 0) |
222 | continue; |
223 | } else { |
224 | if (tor.find(msg: mv) >= 0) |
225 | continue; |
226 | if (options & HeuristicSimilarText) { |
227 | int mi = tor.find(context: mv.context(), comment: mv.comment(), refs: mv.allReferences()); |
228 | if (mi >= 0) { |
229 | // The similar message found in tor (ts file) must NOT correspond exactly |
230 | // to an other message is virginTor |
231 | if (virginTor.find(msg: tor.constMessage(i: mi)) < 0) { |
232 | if (getSimilarityScore(str1: tor.constMessage(i: mi).sourceText(), str2: mv.sourceText()) |
233 | >= textSimilarityThreshold) |
234 | continue; |
235 | } |
236 | } |
237 | } |
238 | } |
239 | if (options & NoLocations) |
240 | outTor.append(msg: mv); |
241 | else |
242 | outTor.appendSorted(msg: mv); |
243 | if (!mv.sourceText().isEmpty() || !mv.id().isEmpty()) |
244 | ++neww; |
245 | } |
246 | |
247 | /* |
248 | "Alien" translators can be used to augment the vernacular translator. |
249 | */ |
250 | for (const Translator &alf : aliens) { |
251 | for (TranslatorMessage mv : alf.messages()) { |
252 | if (mv.sourceText().isEmpty() || !mv.isTranslated()) |
253 | continue; |
254 | int mvi = outTor.find(msg: mv); |
255 | if (mvi >= 0) { |
256 | TranslatorMessage &tm = outTor.message(i: mvi); |
257 | if (tm.type() != TranslatorMessage::Finished && !tm.isTranslated()) { |
258 | tm.setTranslations(mv.translations()); |
259 | --neww; |
260 | ++known; |
261 | } |
262 | } else { |
263 | /* |
264 | * Don't do simtex search, as the locations are likely to be |
265 | * completely off anyway, so we'd find nothing. |
266 | */ |
267 | /* |
268 | * Add the unmatched messages as obsoletes, so the Linguist GUI |
269 | * will offer them as possible translations. |
270 | */ |
271 | mv.clearReferences(); |
272 | mv.setType(mv.type() == TranslatorMessage::Finished |
273 | ? TranslatorMessage::Vanished : TranslatorMessage::Obsolete); |
274 | if (options & NoLocations) |
275 | outTor.append(msg: mv); |
276 | else |
277 | outTor.appendSorted(msg: mv); |
278 | ++known; |
279 | ++obsoleted; |
280 | } |
281 | } |
282 | } |
283 | |
284 | /* |
285 | The same-text heuristic handles cases where a message has an |
286 | obsolete counterpart with a different context or comment. |
287 | */ |
288 | int sameTextHeuristicCount = (options & HeuristicSameText) ? applySameTextHeuristic(tor&: outTor) : 0; |
289 | |
290 | if (options & Verbose) { |
291 | int totalFound = neww + known; |
292 | err += QStringLiteral(" Found %1 source text(s) (%2 new and %3 already existing)\n" ) |
293 | .arg(a: totalFound).arg(a: neww).arg(a: known); |
294 | |
295 | if (obsoleted) { |
296 | if (options & NoObsolete) { |
297 | err += QStringLiteral(" Removed %1 obsolete entries\n" ).arg(a: obsoleted); |
298 | } else { |
299 | err += QStringLiteral(" Kept %1 obsolete entries\n" ).arg(a: obsoleted); |
300 | } |
301 | } |
302 | |
303 | if (sameTextHeuristicCount) |
304 | err += QStringLiteral(" Same-text heuristic provided %1 translation(s)\n" ) |
305 | .arg(a: sameTextHeuristicCount); |
306 | if (similarTextHeuristicCount) |
307 | err += QStringLiteral(" Similar-text heuristic provided %1 translation(s)\n" ) |
308 | .arg(a: similarTextHeuristicCount); |
309 | } |
310 | return outTor; |
311 | } |
312 | |
313 | QT_END_NAMESPACE |
314 | |