| 1 | // Copyright (C) 2016 The Qt Company Ltd. |
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 |
| 3 | |
| 4 | #include "lupdate.h" |
| 5 | |
| 6 | #include "simtexth.h" |
| 7 | #include "translator.h" |
| 8 | |
| 9 | #include <QtCore/QCoreApplication> |
| 10 | #include <QtCore/QDebug> |
| 11 | #include <QtCore/QList> |
| 12 | #include <QtCore/QMap> |
| 13 | #include <QtCore/QStringList> |
| 14 | |
| 15 | QT_BEGIN_NAMESPACE |
| 16 | |
| 17 | /* |
| 18 | Augments a Translator with trivially derived translations. |
| 19 | |
| 20 | For example, if "Enabled:" is consistendly translated as "Eingeschaltet:" no |
| 21 | matter the context or the comment, "Eingeschaltet:" is added as the |
| 22 | translation of any untranslated "Enabled:" text and is marked Unfinished. |
| 23 | |
| 24 | Returns the number of additional messages that this heuristic translated. |
| 25 | */ |
| 26 | |
| 27 | int applySameTextHeuristic(Translator &tor) |
| 28 | { |
| 29 | QMap<QString, QStringList> translated; |
| 30 | QMap<QString, bool> avoid; // Want a QTreeSet, in fact |
| 31 | QList<bool> untranslated(tor.messageCount()); |
| 32 | int inserted = 0; |
| 33 | |
| 34 | for (int i = 0; i < tor.messageCount(); ++i) { |
| 35 | const TranslatorMessage &msg = tor.message(i); |
| 36 | if (!msg.isTranslated()) { |
| 37 | if (msg.type() == TranslatorMessage::Unfinished) |
| 38 | untranslated[i] = true; |
| 39 | } else { |
| 40 | const QString &key = msg.sourceText(); |
| 41 | const auto t = translated.constFind(key); |
| 42 | if (t != translated.constEnd()) { |
| 43 | /* |
| 44 | The same source text is translated at least two |
| 45 | different ways. Do nothing then. |
| 46 | */ |
| 47 | if (*t != msg.translations()) { |
| 48 | translated.remove(key); |
| 49 | avoid.insert(key, value: true); |
| 50 | } |
| 51 | } else if (!avoid.contains(key)) { |
| 52 | translated.insert(key, value: msg.translations()); |
| 53 | } |
| 54 | } |
| 55 | } |
| 56 | |
| 57 | for (int i = 0; i < tor.messageCount(); ++i) { |
| 58 | if (untranslated[i]) { |
| 59 | TranslatorMessage &msg = tor.message(i); |
| 60 | const auto t = translated.constFind(key: msg.sourceText()); |
| 61 | if (t != translated.constEnd()) { |
| 62 | msg.setTranslations(*t); |
| 63 | ++inserted; |
| 64 | } |
| 65 | } |
| 66 | } |
| 67 | return inserted; |
| 68 | } |
| 69 | |
| 70 | |
| 71 | |
| 72 | /* |
| 73 | Merges two Translator objects. The first one |
| 74 | is a set of source texts and translations for a previous version of |
| 75 | the internationalized program; the second one is a set of fresh |
| 76 | source texts newly extracted from the source code, without any |
| 77 | translation yet. |
| 78 | */ |
| 79 | |
| 80 | Translator merge( |
| 81 | const Translator &tor, const Translator &virginTor, const QList<Translator> &aliens, |
| 82 | UpdateOptions options, QString &err) |
| 83 | { |
| 84 | int known = 0; |
| 85 | int neww = 0; |
| 86 | int obsoleted = 0; |
| 87 | int similarTextHeuristicCount = 0; |
| 88 | |
| 89 | Translator outTor; |
| 90 | outTor.setLanguageCode(tor.languageCode()); |
| 91 | outTor.setSourceLanguageCode(tor.sourceLanguageCode()); |
| 92 | outTor.setLocationsType(tor.locationsType()); |
| 93 | |
| 94 | /* |
| 95 | The types of all the messages from the vernacular translator |
| 96 | are updated according to the virgin translator. |
| 97 | */ |
| 98 | for (TranslatorMessage m : tor.messages()) { |
| 99 | TranslatorMessage::Type newType = TranslatorMessage::Finished; |
| 100 | |
| 101 | if (m.sourceText().isEmpty() && m.id().isEmpty()) { |
| 102 | // context/file comment |
| 103 | int mvi = virginTor.find(context: m.context()); |
| 104 | if (mvi >= 0) |
| 105 | m.setComment(virginTor.constMessage(i: mvi).comment()); |
| 106 | } else { |
| 107 | TranslatorMessage::ExtraData ; |
| 108 | const TranslatorMessage *mv; |
| 109 | int mvi = virginTor.find(msg: m); |
| 110 | if (mvi < 0) { |
| 111 | if (!(options & HeuristicSimilarText)) { |
| 112 | makeObsolete: |
| 113 | switch (m.type()) { |
| 114 | case TranslatorMessage::Finished: |
| 115 | newType = TranslatorMessage::Vanished; |
| 116 | obsoleted++; |
| 117 | break; |
| 118 | case TranslatorMessage::Unfinished: |
| 119 | newType = TranslatorMessage::Obsolete; |
| 120 | obsoleted++; |
| 121 | break; |
| 122 | default: |
| 123 | newType = m.type(); |
| 124 | break; |
| 125 | } |
| 126 | m.clearReferences(); |
| 127 | } else { |
| 128 | mvi = virginTor.find(context: m.context(), comment: m.comment(), refs: m.allReferences()); |
| 129 | if (mvi < 0) { |
| 130 | // did not find it in the virgin, mark it as obsolete |
| 131 | goto makeObsolete; |
| 132 | } |
| 133 | mv = &virginTor.constMessage(i: mvi); |
| 134 | // Do not just accept it if its on the same line number, |
| 135 | // but different source text. |
| 136 | // Also check if the texts are more or less similar before |
| 137 | // we consider them to represent the same message... |
| 138 | if (getSimilarityScore(str1: m.sourceText(), str2: mv->sourceText()) < textSimilarityThreshold) { |
| 139 | // The virgin and vernacular sourceTexts are so different that we could not find it |
| 140 | goto makeObsolete; |
| 141 | } |
| 142 | // It is just slightly modified, assume that it is the same string |
| 143 | |
| 144 | extras = mv->extras(); |
| 145 | |
| 146 | // Mark it as unfinished. (Since the source text |
| 147 | // was changed it might require re-translating...) |
| 148 | newType = TranslatorMessage::Unfinished; |
| 149 | ++similarTextHeuristicCount; |
| 150 | neww++; |
| 151 | goto outdateSource; |
| 152 | } |
| 153 | } else { |
| 154 | mv = &virginTor.message(i: mvi); |
| 155 | extras = mv->extras(); |
| 156 | if (!mv->id().isEmpty() |
| 157 | && (mv->context() != m.context() |
| 158 | || mv->sourceText() != m.sourceText() |
| 159 | || mv->comment() != m.comment())) { |
| 160 | known++; |
| 161 | newType = TranslatorMessage::Unfinished; |
| 162 | m.setContext(mv->context()); |
| 163 | m.setComment(mv->comment()); |
| 164 | if (mv->sourceText() != m.sourceText()) { |
| 165 | outdateSource: |
| 166 | m.setOldSourceText(m.sourceText()); |
| 167 | m.setSourceText(mv->sourceText()); |
| 168 | const QString &oldpluralsource = m.extra(ba: QLatin1String("po-msgid_plural" )); |
| 169 | if (!oldpluralsource.isEmpty()) |
| 170 | extras.insert(key: QLatin1String("po-old_msgid_plural" ), value: oldpluralsource); |
| 171 | } |
| 172 | } else { |
| 173 | switch (m.type()) { |
| 174 | case TranslatorMessage::Finished: |
| 175 | default: |
| 176 | if (m.isPlural() == mv->isPlural()) { |
| 177 | newType = TranslatorMessage::Finished; |
| 178 | } else { |
| 179 | newType = TranslatorMessage::Unfinished; |
| 180 | } |
| 181 | known++; |
| 182 | break; |
| 183 | case TranslatorMessage::Unfinished: |
| 184 | newType = TranslatorMessage::Unfinished; |
| 185 | known++; |
| 186 | break; |
| 187 | case TranslatorMessage::Vanished: |
| 188 | newType = TranslatorMessage::Finished; |
| 189 | neww++; |
| 190 | break; |
| 191 | case TranslatorMessage::Obsolete: |
| 192 | newType = TranslatorMessage::Unfinished; |
| 193 | neww++; |
| 194 | break; |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | // Always get the filename and linenumber info from the |
| 199 | // virgin Translator, in case it has changed location. |
| 200 | // This should also enable us to read a file that does not |
| 201 | // have the <location> element. |
| 202 | // why not use operator=()? Because it overwrites e.g. userData. |
| 203 | m.setReferences(mv->allReferences()); |
| 204 | m.setPlural(mv->isPlural()); |
| 205 | m.setExtras(extras); |
| 206 | m.setExtraComment(mv->extraComment()); |
| 207 | m.setId(mv->id()); |
| 208 | } |
| 209 | } |
| 210 | |
| 211 | m.setType(newType); |
| 212 | outTor.append(msg: m); |
| 213 | } |
| 214 | |
| 215 | /* |
| 216 | Messages found only in the virgin translator are added to the |
| 217 | vernacular translator. |
| 218 | */ |
| 219 | for (const TranslatorMessage &mv : virginTor.messages()) { |
| 220 | if (mv.sourceText().isEmpty() && mv.id().isEmpty()) { |
| 221 | if (tor.find(context: mv.context()) >= 0) |
| 222 | continue; |
| 223 | } else { |
| 224 | if (tor.find(msg: mv) >= 0) |
| 225 | continue; |
| 226 | if (options & HeuristicSimilarText) { |
| 227 | int mi = tor.find(context: mv.context(), comment: mv.comment(), refs: mv.allReferences()); |
| 228 | if (mi >= 0) { |
| 229 | // The similar message found in tor (ts file) must NOT correspond exactly |
| 230 | // to an other message is virginTor |
| 231 | if (virginTor.find(msg: tor.constMessage(i: mi)) < 0) { |
| 232 | if (getSimilarityScore(str1: tor.constMessage(i: mi).sourceText(), str2: mv.sourceText()) |
| 233 | >= textSimilarityThreshold) |
| 234 | continue; |
| 235 | } |
| 236 | } |
| 237 | } |
| 238 | } |
| 239 | if (options & NoLocations) |
| 240 | outTor.append(msg: mv); |
| 241 | else |
| 242 | outTor.appendSorted(msg: mv); |
| 243 | if (!mv.sourceText().isEmpty() || !mv.id().isEmpty()) |
| 244 | ++neww; |
| 245 | } |
| 246 | |
| 247 | /* |
| 248 | "Alien" translators can be used to augment the vernacular translator. |
| 249 | */ |
| 250 | for (const Translator &alf : aliens) { |
| 251 | for (TranslatorMessage mv : alf.messages()) { |
| 252 | if (mv.sourceText().isEmpty() || !mv.isTranslated()) |
| 253 | continue; |
| 254 | int mvi = outTor.find(msg: mv); |
| 255 | if (mvi >= 0) { |
| 256 | TranslatorMessage &tm = outTor.message(i: mvi); |
| 257 | if (tm.type() != TranslatorMessage::Finished && !tm.isTranslated()) { |
| 258 | tm.setTranslations(mv.translations()); |
| 259 | --neww; |
| 260 | ++known; |
| 261 | } |
| 262 | } else { |
| 263 | /* |
| 264 | * Don't do simtex search, as the locations are likely to be |
| 265 | * completely off anyway, so we'd find nothing. |
| 266 | */ |
| 267 | /* |
| 268 | * Add the unmatched messages as obsoletes, so the Linguist GUI |
| 269 | * will offer them as possible translations. |
| 270 | */ |
| 271 | mv.clearReferences(); |
| 272 | mv.setType(mv.type() == TranslatorMessage::Finished |
| 273 | ? TranslatorMessage::Vanished : TranslatorMessage::Obsolete); |
| 274 | if (options & NoLocations) |
| 275 | outTor.append(msg: mv); |
| 276 | else |
| 277 | outTor.appendSorted(msg: mv); |
| 278 | ++known; |
| 279 | ++obsoleted; |
| 280 | } |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | /* |
| 285 | The same-text heuristic handles cases where a message has an |
| 286 | obsolete counterpart with a different context or comment. |
| 287 | */ |
| 288 | int sameTextHeuristicCount = (options & HeuristicSameText) ? applySameTextHeuristic(tor&: outTor) : 0; |
| 289 | |
| 290 | if (options & Verbose) { |
| 291 | int totalFound = neww + known; |
| 292 | err += QStringLiteral(" Found %1 source text(s) (%2 new and %3 already existing)\n" ) |
| 293 | .arg(a: totalFound).arg(a: neww).arg(a: known); |
| 294 | |
| 295 | if (obsoleted) { |
| 296 | if (options & NoObsolete) { |
| 297 | err += QStringLiteral(" Removed %1 obsolete entries\n" ).arg(a: obsoleted); |
| 298 | } else { |
| 299 | err += QStringLiteral(" Kept %1 obsolete entries\n" ).arg(a: obsoleted); |
| 300 | } |
| 301 | } |
| 302 | |
| 303 | if (sameTextHeuristicCount) |
| 304 | err += QStringLiteral(" Same-text heuristic provided %1 translation(s)\n" ) |
| 305 | .arg(a: sameTextHeuristicCount); |
| 306 | if (similarTextHeuristicCount) |
| 307 | err += QStringLiteral(" Similar-text heuristic provided %1 translation(s)\n" ) |
| 308 | .arg(a: similarTextHeuristicCount); |
| 309 | } |
| 310 | return outTor; |
| 311 | } |
| 312 | |
| 313 | QT_END_NAMESPACE |
| 314 | |