| 1 | // Copyright (C) 2016 The Qt Company Ltd. |
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 |
| 3 | |
| 4 | #include "lupdate.h" |
| 5 | |
| 6 | #include "simtexth.h" |
| 7 | #include "translator.h" |
| 8 | |
| 9 | #include <QtCore/QCoreApplication> |
| 10 | #include <QtCore/QDebug> |
| 11 | #include <QtCore/QList> |
| 12 | #include <QtCore/QMap> |
| 13 | #include <QtCore/QStringList> |
| 14 | |
| 15 | QT_BEGIN_NAMESPACE |
| 16 | |
| 17 | using namespace Qt::Literals::StringLiterals; |
| 18 | |
| 19 | /* |
| 20 | Augments a Translator with trivially derived translations. |
| 21 | |
| 22 | For example, if "Enabled:" is consistendly translated as "Eingeschaltet:" no |
| 23 | matter the context or the comment, "Eingeschaltet:" is added as the |
| 24 | translation of any untranslated "Enabled:" text and is marked Unfinished. |
| 25 | |
| 26 | Returns the number of additional messages that this heuristic translated. |
| 27 | */ |
| 28 | |
| 29 | int applySameTextHeuristic(Translator &tor) |
| 30 | { |
| 31 | QMap<QString, QStringList> translated; |
| 32 | QMap<QString, bool> avoid; // Want a QTreeSet, in fact |
| 33 | QList<bool> untranslated(tor.messageCount()); |
| 34 | int inserted = 0; |
| 35 | |
| 36 | for (int i = 0; i < tor.messageCount(); ++i) { |
| 37 | const TranslatorMessage &msg = tor.message(i); |
| 38 | if (!msg.isTranslated()) { |
| 39 | if (msg.type() == TranslatorMessage::Unfinished) |
| 40 | untranslated[i] = true; |
| 41 | } else { |
| 42 | const QString &key = msg.sourceText(); |
| 43 | const auto t = translated.constFind(key); |
| 44 | if (t != translated.constEnd()) { |
| 45 | /* |
| 46 | The same source text is translated at least two |
| 47 | different ways. Do nothing then. |
| 48 | */ |
| 49 | if (*t != msg.translations()) { |
| 50 | translated.remove(key); |
| 51 | avoid.insert(key, value: true); |
| 52 | } |
| 53 | } else if (!avoid.contains(key)) { |
| 54 | translated.insert(key, value: msg.translations()); |
| 55 | } |
| 56 | } |
| 57 | } |
| 58 | |
| 59 | for (int i = 0; i < tor.messageCount(); ++i) { |
| 60 | if (untranslated[i]) { |
| 61 | TranslatorMessage &msg = tor.message(i); |
| 62 | const auto t = translated.constFind(key: msg.sourceText()); |
| 63 | if (t != translated.constEnd()) { |
| 64 | msg.setTranslations(*t); |
| 65 | ++inserted; |
| 66 | } |
| 67 | } |
| 68 | } |
| 69 | return inserted; |
| 70 | } |
| 71 | |
| 72 | |
| 73 | |
| 74 | /* |
| 75 | Merges two Translator objects. The first one |
| 76 | is a set of source texts and translations for a previous version of |
| 77 | the internationalized program; the second one is a set of fresh |
| 78 | source texts newly extracted from the source code, without any |
| 79 | translation yet. |
| 80 | */ |
| 81 | |
| 82 | Translator merge( |
| 83 | const Translator &tor, const Translator &virginTor, const QList<Translator> &aliens, |
| 84 | UpdateOptions options, QString &err) |
| 85 | { |
| 86 | int known = 0; |
| 87 | int neww = 0; |
| 88 | int obsoleted = 0; |
| 89 | int similarTextHeuristicCount = 0; |
| 90 | |
| 91 | Translator outTor; |
| 92 | outTor.setLanguageCode(tor.languageCode()); |
| 93 | outTor.setSourceLanguageCode(tor.sourceLanguageCode()); |
| 94 | outTor.setLocationsType(tor.locationsType()); |
| 95 | |
| 96 | /* |
| 97 | The types of all the messages from the vernacular translator |
| 98 | are updated according to the virgin translator. |
| 99 | */ |
| 100 | for (TranslatorMessage m : tor.messages()) { |
| 101 | TranslatorMessage::Type newType = TranslatorMessage::Finished; |
| 102 | |
| 103 | |
| 104 | TranslatorMessage::ExtraData ; |
| 105 | const TranslatorMessage *mv; |
| 106 | int mvi = virginTor.find(msg: m); |
| 107 | if (mvi < 0) { |
| 108 | if (!(options & HeuristicSimilarText)) { |
| 109 | makeObsolete: |
| 110 | switch (m.type()) { |
| 111 | case TranslatorMessage::Finished: |
| 112 | newType = TranslatorMessage::Vanished; |
| 113 | obsoleted++; |
| 114 | break; |
| 115 | case TranslatorMessage::Unfinished: |
| 116 | newType = TranslatorMessage::Obsolete; |
| 117 | obsoleted++; |
| 118 | break; |
| 119 | default: |
| 120 | newType = m.type(); |
| 121 | break; |
| 122 | } |
| 123 | m.clearReferences(); |
| 124 | } else { |
| 125 | mvi = virginTor.find(context: m.context(), comment: m.comment(), refs: m.allReferences()); |
| 126 | if (mvi < 0) { |
| 127 | // did not find it in the virgin, mark it as obsolete |
| 128 | goto makeObsolete; |
| 129 | } |
| 130 | mv = &virginTor.constMessage(i: mvi); |
| 131 | // Do not just accept it if its on the same line number, |
| 132 | // but different source text. |
| 133 | // Also check if the texts are more or less similar before |
| 134 | // we consider them to represent the same message... |
| 135 | if (getSimilarityScore(str1: m.sourceText(), str2: mv->sourceText()) < textSimilarityThreshold) { |
| 136 | // The virgin and vernacular sourceTexts are so different that we could not find it |
| 137 | goto makeObsolete; |
| 138 | } |
| 139 | // It is just slightly modified, assume that it is the same string |
| 140 | |
| 141 | extras = mv->extras(); |
| 142 | |
| 143 | // Mark it as unfinished. (Since the source text |
| 144 | // was changed it might require re-translating...) |
| 145 | newType = TranslatorMessage::Unfinished; |
| 146 | ++similarTextHeuristicCount; |
| 147 | neww++; |
| 148 | goto outdateSource; |
| 149 | } |
| 150 | } else { |
| 151 | mv = &virginTor.message(i: mvi); |
| 152 | extras = mv->extras(); |
| 153 | if (!mv->id().isEmpty() |
| 154 | && (mv->context() != m.context() |
| 155 | || mv->sourceText() != m.sourceText() |
| 156 | || mv->comment() != m.comment())) { |
| 157 | known++; |
| 158 | newType = TranslatorMessage::Unfinished; |
| 159 | m.setContext(mv->context()); |
| 160 | m.setComment(mv->comment()); |
| 161 | if (mv->sourceText() != m.sourceText()) { |
| 162 | outdateSource: |
| 163 | m.setOldSourceText(m.sourceText()); |
| 164 | m.setSourceText(mv->sourceText()); |
| 165 | const QString &oldpluralsource = m.extra(ba: "po-msgid_plural"_L1 ); |
| 166 | if (!oldpluralsource.isEmpty()) |
| 167 | extras.insert(key: "po-old_msgid_plural"_L1 , value: oldpluralsource); |
| 168 | } |
| 169 | } else { |
| 170 | switch (m.type()) { |
| 171 | case TranslatorMessage::Finished: |
| 172 | default: |
| 173 | if (m.isPlural() == mv->isPlural()) { |
| 174 | newType = TranslatorMessage::Finished; |
| 175 | } else { |
| 176 | newType = TranslatorMessage::Unfinished; |
| 177 | } |
| 178 | known++; |
| 179 | break; |
| 180 | case TranslatorMessage::Unfinished: |
| 181 | newType = TranslatorMessage::Unfinished; |
| 182 | known++; |
| 183 | break; |
| 184 | case TranslatorMessage::Vanished: |
| 185 | newType = TranslatorMessage::Finished; |
| 186 | neww++; |
| 187 | break; |
| 188 | case TranslatorMessage::Obsolete: |
| 189 | newType = TranslatorMessage::Unfinished; |
| 190 | neww++; |
| 191 | break; |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | // Always get the filename and linenumber info from the |
| 196 | // virgin Translator, in case it has changed location. |
| 197 | // This should also enable us to read a file that does not |
| 198 | // have the <location> element. |
| 199 | // why not use operator=()? Because it overwrites e.g. userData. |
| 200 | m.setReferences(mv->allReferences()); |
| 201 | m.setPlural(mv->isPlural()); |
| 202 | m.setExtras(extras); |
| 203 | m.setExtraComment(mv->extraComment()); |
| 204 | m.setId(mv->id()); |
| 205 | m.setLabel(mv->label()); |
| 206 | } |
| 207 | |
| 208 | |
| 209 | m.setType(newType); |
| 210 | outTor.append(msg: m); |
| 211 | } |
| 212 | |
| 213 | /* |
| 214 | Messages found only in the virgin translator are added to the |
| 215 | vernacular translator. |
| 216 | */ |
| 217 | for (const TranslatorMessage &mv : virginTor.messages()) { |
| 218 | |
| 219 | if (tor.find(msg: mv) >= 0) |
| 220 | continue; |
| 221 | if (options & HeuristicSimilarText) { |
| 222 | int mi = tor.find(context: mv.context(), comment: mv.comment(), refs: mv.allReferences()); |
| 223 | if (mi >= 0) { |
| 224 | // The similar message found in tor (ts file) must NOT correspond exactly |
| 225 | // to an other message is virginTor |
| 226 | if (virginTor.find(msg: tor.constMessage(i: mi)) < 0) { |
| 227 | if (getSimilarityScore(str1: tor.constMessage(i: mi).sourceText(), str2: mv.sourceText()) |
| 228 | >= textSimilarityThreshold) |
| 229 | continue; |
| 230 | } |
| 231 | } |
| 232 | } |
| 233 | |
| 234 | outTor.appendSorted(msg: mv); |
| 235 | ++neww; |
| 236 | } |
| 237 | |
| 238 | /* |
| 239 | "Alien" translators can be used to augment the vernacular translator. |
| 240 | */ |
| 241 | for (const Translator &alf : aliens) { |
| 242 | for (TranslatorMessage mv : alf.messages()) { |
| 243 | if (mv.sourceText().isEmpty() || !mv.isTranslated()) |
| 244 | continue; |
| 245 | int mvi = outTor.find(msg: mv); |
| 246 | if (mvi >= 0) { |
| 247 | TranslatorMessage &tm = outTor.message(i: mvi); |
| 248 | if (tm.type() != TranslatorMessage::Finished && !tm.isTranslated()) { |
| 249 | tm.setTranslations(mv.translations()); |
| 250 | --neww; |
| 251 | ++known; |
| 252 | } |
| 253 | } else { |
| 254 | /* |
| 255 | * Don't do simtex search, as the locations are likely to be |
| 256 | * completely off anyway, so we'd find nothing. |
| 257 | */ |
| 258 | /* |
| 259 | * Add the unmatched messages as obsoletes, so the Linguist GUI |
| 260 | * will offer them as possible translations. |
| 261 | */ |
| 262 | mv.clearReferences(); |
| 263 | mv.setType(mv.type() == TranslatorMessage::Finished |
| 264 | ? TranslatorMessage::Vanished : TranslatorMessage::Obsolete); |
| 265 | outTor.appendSorted(msg: mv); |
| 266 | ++known; |
| 267 | ++obsoleted; |
| 268 | } |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | /* |
| 273 | The same-text heuristic handles cases where a message has an |
| 274 | obsolete counterpart with a different context or comment. |
| 275 | */ |
| 276 | int sameTextHeuristicCount = (options & HeuristicSameText) ? applySameTextHeuristic(tor&: outTor) : 0; |
| 277 | |
| 278 | if (options & Verbose) { |
| 279 | int totalFound = neww + known; |
| 280 | err += QStringLiteral(" Found %1 source text(s) (%2 new and %3 already existing)\n" ) |
| 281 | .arg(a: totalFound).arg(a: neww).arg(a: known); |
| 282 | |
| 283 | if (obsoleted) { |
| 284 | if (options & NoObsolete) { |
| 285 | err += QStringLiteral(" Removed %1 obsolete entries\n" ).arg(a: obsoleted); |
| 286 | } else { |
| 287 | err += QStringLiteral(" Kept %1 obsolete entries\n" ).arg(a: obsoleted); |
| 288 | } |
| 289 | } |
| 290 | |
| 291 | if (sameTextHeuristicCount) |
| 292 | err += QStringLiteral(" Same-text heuristic provided %1 translation(s)\n" ) |
| 293 | .arg(a: sameTextHeuristicCount); |
| 294 | if (similarTextHeuristicCount) |
| 295 | err += QStringLiteral(" Similar-text heuristic provided %1 translation(s)\n" ) |
| 296 | .arg(a: similarTextHeuristicCount); |
| 297 | } |
| 298 | return outTor; |
| 299 | } |
| 300 | |
| 301 | QT_END_NAMESPACE |
| 302 | |