1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
3
4#include "translator.h"
5
6#include <QtCore/QByteArray>
7#include <QtCore/QDebug>
8#include <QtCore/QRegularExpression>
9#include <QtCore/QTextStream>
10
11#include <QtCore/QXmlStreamReader>
12
13#include <algorithm>
14
15using namespace Qt::StringLiterals;
16
17QT_BEGIN_NAMESPACE
18
19QDebug &operator<<(QDebug &d, const QXmlStreamAttribute &attr)
20{
21 return d << "[" << attr.name().toString() << "," << attr.value().toString() << "]";
22}
23
24
25class TSReader : public QXmlStreamReader
26{
27public:
28 TSReader(QIODevice &dev, ConversionData &cd)
29 : QXmlStreamReader(&dev), m_cd(cd)
30 {}
31
32 // the "real thing"
33 bool read(Translator &translator);
34
35private:
36 bool elementStarts(const QString &str) const
37 {
38 return isStartElement() && name() == str;
39 }
40
41 bool isWhiteSpace() const
42 {
43 return isCharacters() && text().toString().trimmed().isEmpty();
44 }
45
46 // needed to expand <byte ... />
47 QString readContents();
48 // needed to join <lengthvariant>s
49 QString readTransContents();
50
51 void handleError();
52
53 ConversionData &m_cd;
54};
55
56void TSReader::handleError()
57{
58 if (isComment())
59 return;
60 if (hasError() && error() == CustomError) // raised by readContents
61 return;
62
63 const QString loc = QString::fromLatin1(ba: "at %3:%1:%2")
64 .arg(a: lineNumber()).arg(a: columnNumber()).arg(a: m_cd.m_sourceFileName);
65
66 switch (tokenType()) {
67 case NoToken: // Cannot happen
68 default: // likewise
69 case Invalid:
70 raiseError(message: QString::fromLatin1(ba: "Parse error %1: %2").arg(args: loc, args: errorString()));
71 break;
72 case StartElement:
73 raiseError(message: QString::fromLatin1(ba: "Unexpected tag <%1> %2").arg(args: name().toString(), args: loc));
74 break;
75 case Characters:
76 {
77 QString tok = text().toString();
78 if (tok.size() > 30)
79 tok = tok.left(n: 30) + QLatin1String("[...]");
80 raiseError(message: QString::fromLatin1(ba: "Unexpected characters '%1' %2").arg(args&: tok, args: loc));
81 }
82 break;
83 case EntityReference:
84 raiseError(message: QString::fromLatin1(ba: "Unexpected entity '&%1;' %2").arg(args: name().toString(), args: loc));
85 break;
86 case ProcessingInstruction:
87 raiseError(message: QString::fromLatin1(ba: "Unexpected processing instruction %1").arg(a: loc));
88 break;
89 }
90}
91
92static QString byteValue(QString value)
93{
94 int base = 10;
95 if (value.startsWith(s: QLatin1String("x"))) {
96 base = 16;
97 value.remove(i: 0, len: 1);
98 }
99 int n = value.toUInt(ok: 0, base);
100 return (n != 0) ? QString(QChar(n)) : QString();
101}
102
103QString TSReader::readContents()
104{
105 static const QString strbyte = u"byte"_s;
106 static const QString strvalue = u"value"_s;
107
108 QString result;
109 while (!atEnd()) {
110 readNext();
111 if (isEndElement()) {
112 break;
113 } else if (isCharacters()) {
114 result += text();
115 } else if (elementStarts(str: strbyte)) {
116 // <byte value="...">
117 result += byteValue(value: attributes().value(qualifiedName: strvalue).toString());
118 readNext();
119 if (!isEndElement()) {
120 handleError();
121 break;
122 }
123 } else {
124 handleError();
125 break;
126 }
127 }
128 //qDebug() << "TEXT: " << result;
129 return result;
130}
131
132QString TSReader::readTransContents()
133{
134 static const QString strlengthvariant = u"lengthvariant"_s;
135 static const QString strvariants = u"variants"_s;
136 static const QString stryes = u"yes"_s;
137
138 if (attributes().value(qualifiedName: strvariants) == stryes) {
139 QString result;
140 while (!atEnd()) {
141 readNext();
142 if (isEndElement()) {
143 break;
144 } else if (isWhiteSpace()) {
145 // ignore these, just whitespace
146 } else if (elementStarts(str: strlengthvariant)) {
147 if (!result.isEmpty())
148 result += QChar(Translator::BinaryVariantSeparator);
149 result += readContents();
150 } else {
151 handleError();
152 break;
153 }
154 }
155 return result;
156 } else {
157 return readContents();
158 }
159}
160
161bool TSReader::read(Translator &translator)
162{
163 static const QString strcatalog = u"catalog"_s;
164 static const QString strcomment = u"comment"_s;
165 static const QString strcontext = u"context"_s;
166 static const QString strdependencies = u"dependencies"_s;
167 static const QString strdependency = u"dependency"_s;
168 static const QString strextracomment = u"extracomment"_s;
169 static const QString strfilename = u"filename"_s;
170 static const QString strid = u"id"_s;
171 static const QString strlanguage = u"language"_s;
172 static const QString strline = u"line"_s;
173 static const QString strlocation = u"location"_s;
174 static const QString strmessage = u"message"_s;
175 static const QString strname = u"name"_s;
176 static const QString strnumerus = u"numerus"_s;
177 static const QString strnumerusform = u"numerusform"_s;
178 static const QString strobsolete = u"obsolete"_s;
179 static const QString stroldcomment = u"oldcomment"_s;
180 static const QString stroldsource = u"oldsource"_s;
181 static const QString strsource = u"source"_s;
182 static const QString strsourcelanguage = u"sourcelanguage"_s;
183 static const QString strtranslation = u"translation"_s;
184 static const QString strtranslatorcomment = u"translatorcomment"_s;
185 static const QString strTS = u"TS"_s;
186 static const QString strtype = u"type"_s;
187 static const QString strunfinished = u"unfinished"_s;
188 static const QString struserdata = u"userdata"_s;
189 static const QString strvanished = u"vanished"_s;
190 //static const QString strversion = u"version"_s;
191 static const QString stryes = u"yes"_s;
192
193 static const QString strextrans(QLatin1String("extra-"));
194
195 while (!atEnd()) {
196 readNext();
197 if (isStartDocument()) {
198 // <!DOCTYPE TS>
199 //qDebug() << attributes();
200 } else if (isEndDocument()) {
201 // <!DOCTYPE TS>
202 //qDebug() << attributes();
203 } else if (isDTD()) {
204 // <!DOCTYPE TS>
205 //qDebug() << tokenString();
206 } else if (elementStarts(str: strTS)) {
207 // <TS>
208 //qDebug() << "TS " << attributes();
209 QHash<QString, int> currentLine;
210 QString currentFile;
211 bool maybeRelative = false, maybeAbsolute = false;
212
213 QXmlStreamAttributes atts = attributes();
214 //QString version = atts.value(strversion).toString();
215 translator.setLanguageCode(atts.value(qualifiedName: strlanguage).toString());
216 translator.setSourceLanguageCode(atts.value(qualifiedName: strsourcelanguage).toString());
217 while (!atEnd()) {
218 readNext();
219 if (isEndElement()) {
220 // </TS> found, finish local loop
221 break;
222 } else if (isWhiteSpace()) {
223 // ignore these, just whitespace
224 } else if (isStartElement()
225 && name().toString().startsWith(s: strextrans)) {
226 // <extra-...>
227 QString tag = name().toString();
228 translator.setExtra(ba: tag.mid(position: 6), var: readContents());
229 // </extra-...>
230 } else if (elementStarts(str: strdependencies)) {
231 /*
232 * <dependencies>
233 * <dependency catalog="qtsystems_no"/>
234 * <dependency catalog="qtbase_no"/>
235 * </dependencies>
236 **/
237 QStringList dependencies;
238 while (!atEnd()) {
239 readNext();
240 if (isEndElement()) {
241 // </dependencies> found, finish local loop
242 break;
243 } else if (elementStarts(str: strdependency)) {
244 // <dependency>
245 QXmlStreamAttributes atts = attributes();
246 dependencies.append(t: atts.value(qualifiedName: strcatalog).toString());
247 while (!atEnd()) {
248 readNext();
249 if (isEndElement()) {
250 // </dependency> found, finish local loop
251 break;
252 }
253 }
254 }
255 }
256 translator.setDependencies(dependencies);
257 } else if (elementStarts(str: strcontext)) {
258 // <context>
259 QString context;
260 while (!atEnd()) {
261 readNext();
262 if (isEndElement()) {
263 // </context> found, finish local loop
264 break;
265 } else if (isWhiteSpace()) {
266 // ignore these, just whitespace
267 } else if (elementStarts(str: strname)) {
268 // <name>
269 context = readElementText();
270 // </name>
271 } else if (elementStarts(str: strmessage)) {
272 // <message>
273 TranslatorMessage::References refs;
274 QString currentMsgFile = currentFile;
275
276 TranslatorMessage msg;
277 msg.setId(attributes().value(qualifiedName: strid).toString());
278 msg.setContext(context);
279 msg.setType(TranslatorMessage::Finished);
280 msg.setPlural(attributes().value(qualifiedName: strnumerus) == stryes);
281 msg.setTsLineNumber(lineNumber());
282 while (!atEnd()) {
283 readNext();
284 if (isEndElement()) {
285 // </message> found, finish local loop
286 msg.setReferences(refs);
287 translator.append(msg);
288 break;
289 } else if (isWhiteSpace()) {
290 // ignore these, just whitespace
291 } else if (elementStarts(str: strsource)) {
292 // <source>...</source>
293 msg.setSourceText(readContents());
294 } else if (elementStarts(str: stroldsource)) {
295 // <oldsource>...</oldsource>
296 msg.setOldSourceText(readContents());
297 } else if (elementStarts(str: stroldcomment)) {
298 // <oldcomment>...</oldcomment>
299 msg.setOldComment(readContents());
300 } else if (elementStarts(str: strextracomment)) {
301 // <extracomment>...</extracomment>
302 msg.setExtraComment(readContents());
303 } else if (elementStarts(str: strtranslatorcomment)) {
304 // <translatorcomment>...</translatorcomment>
305 msg.setTranslatorComment(readContents());
306 } else if (elementStarts(str: strlocation)) {
307 // <location/>
308 maybeAbsolute = true;
309 QXmlStreamAttributes atts = attributes();
310 QString fileName = atts.value(qualifiedName: strfilename).toString();
311 if (fileName.isEmpty()) {
312 fileName = currentMsgFile;
313 maybeRelative = true;
314 } else {
315 if (refs.isEmpty())
316 currentFile = fileName;
317 currentMsgFile = fileName;
318 }
319 const QString lin = atts.value(qualifiedName: strline).toString();
320 if (lin.isEmpty()) {
321 refs.append(t: TranslatorMessage::Reference(fileName, -1));
322 } else {
323 bool bOK;
324 int lineNo = lin.toInt(ok: &bOK);
325 if (bOK) {
326 if (lin.startsWith(c: QLatin1Char('+')) || lin.startsWith(c: QLatin1Char('-'))) {
327 lineNo = (currentLine[fileName] += lineNo);
328 maybeRelative = true;
329 }
330 refs.append(t: TranslatorMessage::Reference(fileName, lineNo));
331 }
332 }
333 readContents();
334 } else if (elementStarts(str: strcomment)) {
335 // <comment>...</comment>
336 msg.setComment(readContents());
337 } else if (elementStarts(str: struserdata)) {
338 // <userdata>...</userdata>
339 msg.setUserData(readContents());
340 } else if (elementStarts(str: strtranslation)) {
341 // <translation>
342 QXmlStreamAttributes atts = attributes();
343 QStringView type = atts.value(qualifiedName: strtype);
344 if (type == strunfinished)
345 msg.setType(TranslatorMessage::Unfinished);
346 else if (type == strvanished)
347 msg.setType(TranslatorMessage::Vanished);
348 else if (type == strobsolete)
349 msg.setType(TranslatorMessage::Obsolete);
350 if (msg.isPlural()) {
351 QStringList translations;
352 while (!atEnd()) {
353 readNext();
354 if (isEndElement()) {
355 break;
356 } else if (isWhiteSpace()) {
357 // ignore these, just whitespace
358 } else if (elementStarts(str: strnumerusform)) {
359 translations.append(t: readTransContents());
360 } else {
361 handleError();
362 break;
363 }
364 }
365 msg.setTranslations(translations);
366 } else {
367 msg.setTranslation(readTransContents());
368 }
369 // </translation>
370 } else if (isStartElement()
371 && name().toString().startsWith(s: strextrans)) {
372 // <extra-...>
373 QString tag = name().toString();
374 msg.setExtra(ba: tag.mid(position: 6), var: readContents());
375 // </extra-...>
376 } else {
377 handleError();
378 }
379 }
380 // </message>
381 } else {
382 handleError();
383 }
384 }
385 // </context>
386 } else {
387 handleError();
388 }
389 // if the file is empty adopt AbsoluteLocation (default location type for Translator)
390 if (translator.messageCount() == 0)
391 maybeAbsolute = true;
392 translator.setLocationsType(maybeRelative ? Translator::RelativeLocations :
393 maybeAbsolute ? Translator::AbsoluteLocations :
394 Translator::NoLocations);
395 } // </TS>
396 } else {
397 handleError();
398 }
399 }
400 if (hasError()) {
401 m_cd.appendError(error: errorString());
402 return false;
403 }
404 return true;
405}
406
407static QString tsNumericEntity(int ch)
408{
409 return QString(ch <= 0x20 ? QLatin1String("<byte value=\"x%1\"/>")
410 : QLatin1String("&#x%1;")) .arg(a: ch, fieldWidth: 0, base: 16);
411}
412
413static QString tsProtect(const QString &str)
414{
415 QString result;
416 result.reserve(asize: str.size() * 12 / 10);
417 for (int i = 0; i != str.size(); ++i) {
418 const QChar ch = str[i];
419 uint c = ch.unicode();
420 switch (c) {
421 case '\"':
422 result += QLatin1String("&quot;");
423 break;
424 case '&':
425 result += QLatin1String("&amp;");
426 break;
427 case '>':
428 result += QLatin1String("&gt;");
429 break;
430 case '<':
431 result += QLatin1String("&lt;");
432 break;
433 case '\'':
434 result += QLatin1String("&apos;");
435 break;
436 default:
437 if ((c < 0x20 || (ch > QChar(0x7f) && ch.isSpace())) && c != '\n' && c != '\t')
438 result += tsNumericEntity(ch: c);
439 else // this also covers surrogates
440 result += QChar(c);
441 }
442 }
443 return result;
444}
445
446static void writeExtras(QTextStream &t, const char *indent,
447 const TranslatorMessage::ExtraData &extras, QRegularExpression drops)
448{
449 QStringList outs;
450 for (auto it = extras.cbegin(), end = extras.cend(); it != end; ++it) {
451 if (!drops.match(subject: it.key()).hasMatch()) {
452 outs << (QStringLiteral("<extra-") + it.key() + QLatin1Char('>')
453 + tsProtect(str: it.value())
454 + QStringLiteral("</extra-") + it.key() + QLatin1Char('>'));
455 }
456 }
457 outs.sort();
458 for (const QString &out : std::as_const(t&: outs))
459 t << indent << out << Qt::endl;
460}
461
462static void writeVariants(QTextStream &t, const char *indent, const QString &input)
463{
464 int offset;
465 if ((offset = input.indexOf(c: QChar(Translator::BinaryVariantSeparator))) >= 0) {
466 t << " variants=\"yes\">";
467 int start = 0;
468 forever {
469 t << "\n " << indent << "<lengthvariant>"
470 << tsProtect(str: input.mid(position: start, n: offset - start))
471 << "</lengthvariant>";
472 if (offset == input.size())
473 break;
474 start = offset + 1;
475 offset = input.indexOf(c: QChar(Translator::BinaryVariantSeparator), from: start);
476 if (offset < 0)
477 offset = input.size();
478 }
479 t << "\n" << indent;
480 } else {
481 t << ">" << tsProtect(str: input);
482 }
483}
484
485bool saveTS(const Translator &translator, QIODevice &dev, ConversionData &cd)
486{
487 bool result = true;
488 QTextStream t(&dev);
489
490 // The xml prolog allows processors to easily detect the correct encoding
491 t << "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<!DOCTYPE TS>\n";
492
493 t << "<TS version=\"2.1\"";
494
495 QString languageCode = translator.languageCode();
496 if (!languageCode.isEmpty() && languageCode != QLatin1String("C"))
497 t << " language=\"" << languageCode << "\"";
498 languageCode = translator.sourceLanguageCode();
499 if (!languageCode.isEmpty() && languageCode != QLatin1String("C"))
500 t << " sourcelanguage=\"" << languageCode << "\"";
501 t << ">\n";
502
503 const QStringList deps = translator.dependencies();
504 if (!deps.isEmpty()) {
505 t << "<dependencies>\n";
506 for (const QString &dep : deps)
507 t << "<dependency catalog=\"" << dep << "\"/>\n";
508 t << "</dependencies>\n";
509 }
510
511 QRegularExpression drops(QRegularExpression::anchoredPattern(expression: cd.dropTags().join(sep: QLatin1Char('|'))));
512
513 writeExtras(t, indent: " ", extras: translator.extras(), drops);
514
515 QHash<QString, QList<TranslatorMessage> > messageOrder;
516 QList<QString> contextOrder;
517 for (const TranslatorMessage &msg : translator.messages()) {
518 // no need for such noise
519 if ((msg.type() == TranslatorMessage::Obsolete || msg.type() == TranslatorMessage::Vanished)
520 && msg.translation().isEmpty()) {
521 continue;
522 }
523
524 QList<TranslatorMessage> &context = messageOrder[msg.context()];
525 if (context.isEmpty())
526 contextOrder.append(t: msg.context());
527 context.append(t: msg);
528 }
529 if (cd.sortContexts())
530 std::sort(first: contextOrder.begin(), last: contextOrder.end());
531
532 QHash<QString, int> currentLine;
533 QString currentFile;
534 for (const QString &context : std::as_const(t&: contextOrder)) {
535 t << "<context>\n"
536 " <name>"
537 << tsProtect(str: context)
538 << "</name>\n";
539 for (const TranslatorMessage &msg : std::as_const(t&: messageOrder[context])) {
540 //msg.dump();
541
542 t << " <message";
543 if (!msg.id().isEmpty())
544 t << " id=\"" << tsProtect(str: msg.id()) << "\"";
545 if (msg.isPlural())
546 t << " numerus=\"yes\"";
547 t << ">\n";
548 if (translator.locationsType() != Translator::NoLocations) {
549 QString cfile = currentFile;
550 bool first = true;
551 for (const TranslatorMessage::Reference &ref : msg.allReferences()) {
552 QString fn = cd.m_targetDir.relativeFilePath(fileName: ref.fileName())
553 .replace(before: QLatin1Char('\\'),after: QLatin1Char('/'));
554 int ln = ref.lineNumber();
555 QString ld;
556 if (translator.locationsType() == Translator::RelativeLocations) {
557 if (ln != -1) {
558 int dlt = ln - currentLine[fn];
559 if (dlt >= 0)
560 ld.append(c: QLatin1Char('+'));
561 ld.append(s: QString::number(dlt));
562 currentLine[fn] = ln;
563 }
564
565 if (fn != cfile) {
566 if (first)
567 currentFile = fn;
568 cfile = fn;
569 } else {
570 fn.clear();
571 }
572 first = false;
573 } else {
574 if (ln != -1)
575 ld = QString::number(ln);
576 }
577 t << " <location";
578 if (!fn.isEmpty())
579 t << " filename=\"" << fn << "\"";
580 if (!ld.isEmpty())
581 t << " line=\"" << ld << "\"";
582 t << "/>\n";
583 }
584 }
585
586 t << " <source>"
587 << tsProtect(str: msg.sourceText())
588 << "</source>\n";
589
590 if (!msg.oldSourceText().isEmpty())
591 t << " <oldsource>" << tsProtect(str: msg.oldSourceText()) << "</oldsource>\n";
592
593 if (!msg.comment().isEmpty()) {
594 t << " <comment>"
595 << tsProtect(str: msg.comment())
596 << "</comment>\n";
597 }
598
599 if (!msg.oldComment().isEmpty())
600 t << " <oldcomment>" << tsProtect(str: msg.oldComment()) << "</oldcomment>\n";
601
602 if (!msg.extraComment().isEmpty())
603 t << " <extracomment>" << tsProtect(str: msg.extraComment())
604 << "</extracomment>\n";
605
606 if (!msg.translatorComment().isEmpty())
607 t << " <translatorcomment>" << tsProtect(str: msg.translatorComment())
608 << "</translatorcomment>\n";
609
610 t << " <translation";
611 if (msg.type() == TranslatorMessage::Unfinished)
612 t << " type=\"unfinished\"";
613 else if (msg.type() == TranslatorMessage::Vanished)
614 t << " type=\"vanished\"";
615 else if (msg.type() == TranslatorMessage::Obsolete)
616 t << " type=\"obsolete\"";
617 if (msg.isPlural()) {
618 t << ">";
619 const QStringList &translns = msg.translations();
620 for (int j = 0; j < translns.size(); ++j) {
621 t << "\n <numerusform";
622 writeVariants(t, indent: " ", input: translns[j]);
623 t << "</numerusform>";
624 }
625 t << "\n ";
626 } else {
627 writeVariants(t, indent: " ", input: msg.translation());
628 }
629 t << "</translation>\n";
630
631 writeExtras(t, indent: " ", extras: msg.extras(), drops);
632
633 if (!msg.userData().isEmpty())
634 t << " <userdata>" << msg.userData() << "</userdata>\n";
635 t << " </message>\n";
636 }
637 t << "</context>\n";
638 }
639
640 t << "</TS>\n";
641 return result;
642}
643
644bool loadTS(Translator &translator, QIODevice &dev, ConversionData &cd)
645{
646 TSReader reader(dev, cd);
647 return reader.read(translator);
648}
649
650int initTS()
651{
652 Translator::FileFormat format;
653
654 format.extension = QLatin1String("ts");
655 format.fileType = Translator::FileFormat::TranslationSource;
656 format.priority = 0;
657 format.untranslatedDescription = QT_TRANSLATE_NOOP("FMT", "Qt translation sources");
658 format.loader = &loadTS;
659 format.saver = &saveTS;
660 Translator::registerFileFormat(format);
661
662 return 1;
663}
664
665Q_CONSTRUCTOR_FUNCTION(initTS)
666
667QT_END_NAMESPACE
668

source code of qttools/src/linguist/shared/ts.cpp