1// Copyright (C) 2002-2007 Detlev Offenbach <detlev@die-offenbachs.de>
2// Copyright (C) 2021 The Qt Company Ltd.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
4
5#include <translator.h>
6#include "lupdate.h"
7
8#include <QtCore/qhash.h>
9#include <QtCore/qlist.h>
10#include <QtCore/qstring.h>
11#include <QtCore/qtextstream.h>
12#include <QtCore/qstack.h>
13
14#include <cctype>
15#include <cerrno>
16#include <cstdio>
17#include <cstring>
18
19QT_BEGIN_NAMESPACE
20
21static const char PythonMagicComment[] = "TRANSLATOR ";
22
23/*
24 The first part of this source file is the Python tokenizer. We skip
25 most of Python; the only tokens that interest us are defined here.
26*/
27
28enum Token { Tok_Eof, Tok_class, Tok_def, Tok_return, Tok_tr,
29 Tok_trUtf8, Tok_translate, Tok_Ident,
30 Tok_Comment, Tok_Dot, Tok_String,
31 Tok_LeftParen, Tok_RightParen,
32 Tok_Comma, Tok_None, Tok_Integer};
33
34enum class StringType
35{
36 NoString,
37 String,
38 FormatString,
39 RawString
40};
41
42/*
43 The tokenizer maintains the following global variables. The names
44 should be self-explanatory.
45*/
46static QString yyFileName;
47static int yyCh;
48static QByteArray yyIdent;
49static char yyComment[65536];
50static size_t yyCommentLen;
51static char yyString[65536];
52static size_t yyStringLen;
53static int yyParenDepth;
54static int yyLineNo;
55static int yyCurLineNo;
56
57struct ExtraComment
58{
59 QByteArray extraComment;
60 int lineNo;
61};
62
63static QList<ExtraComment> extraComments;
64
65static QList<ExtraComment> ids;
66
67QHash<QByteArray, Token> tokens = {
68 {"None", Tok_None},
69 {"class", Tok_class},
70 {"def", Tok_def},
71 {"return", Tok_return},
72 {"__tr", Tok_tr}, // Legacy?
73 {"__trUtf8", Tok_trUtf8}
74};
75
76// the file to read from (if reading from a file)
77static FILE *yyInFile;
78
79// the string to read from and current position in the string (otherwise)
80static int yyInPos;
81static int buf;
82
83static int (*getChar)();
84static int (*peekChar)();
85
86static int yyIndentationSize;
87static int yyContinuousSpaceCount;
88static bool yyCountingIndentation;
89
90// (Context, indentation level) pair.
91using ContextPair = QPair<QByteArray, int>;
92// Stack of (Context, indentation level) pairs.
93using ContextStack = QStack<ContextPair>;
94static ContextStack yyContextStack;
95
96static int getCharFromFile()
97{
98 int c;
99
100 if (buf < 0) {
101 c = getc(stream: yyInFile);
102 } else {
103 c = buf;
104 buf = -1;
105 }
106 if (c == '\n') {
107 yyCurLineNo++;
108 yyCountingIndentation = true;
109 yyContinuousSpaceCount = 0;
110 } else if (yyCountingIndentation && (c == 32 || c == 9)) {
111 yyContinuousSpaceCount++;
112 } else {
113 yyCountingIndentation = false;
114 }
115 return c;
116}
117
118static int peekCharFromFile()
119{
120 int c = getc(stream: yyInFile);
121 buf = c;
122 return c;
123}
124
125static void startTokenizer(const QString &fileName, int (*getCharFunc)(),
126 int (*peekCharFunc)())
127{
128 yyInPos = 0;
129 buf = -1;
130 getChar = getCharFunc;
131 peekChar = peekCharFunc;
132
133 yyFileName = fileName;
134 yyCh = getChar();
135 yyParenDepth = 0;
136 yyCurLineNo = 1;
137
138 yyIndentationSize = -1;
139 yyContinuousSpaceCount = 0;
140 yyCountingIndentation = false;
141 yyContextStack.clear();
142}
143
144static bool parseStringEscape(int quoteChar, StringType stringType)
145{
146 static const char tab[] = "abfnrtv";
147 static const char backTab[] = "\a\b\f\n\r\t\v";
148
149 yyCh = getChar();
150 if (yyCh == EOF)
151 return false;
152
153 if (stringType == StringType::RawString) {
154 if (yyCh != quoteChar) // Only quotes can be escaped in raw strings
155 yyString[yyStringLen++] = '\\';
156 yyString[yyStringLen++] = yyCh;
157 yyCh = getChar();
158 return true;
159 }
160
161 if (yyCh == 'x') {
162 QByteArray hex = "0";
163 yyCh = getChar();
164 if (yyCh == EOF)
165 return false;
166 while (std::isxdigit(yyCh)) {
167 hex += char(yyCh);
168 yyCh = getChar();
169 if (yyCh == EOF)
170 return false;
171 }
172 uint n;
173#ifdef Q_CC_MSVC
174 sscanf_s(hex, "%x", &n);
175#else
176 std::sscanf(s: hex, format: "%x", &n);
177#endif
178 if (yyStringLen < sizeof(yyString) - 1)
179 yyString[yyStringLen++] = char(n);
180 return true;
181 }
182
183 if (yyCh >= '0' && yyCh < '8') {
184 QByteArray oct;
185 int n = 0;
186 do {
187 oct += char(yyCh);
188 ++n;
189 yyCh = getChar();
190 if (yyCh == EOF)
191 return false;
192 } while (yyCh >= '0' && yyCh < '8' && n < 3);
193#ifdef Q_CC_MSVC
194 sscanf_s(oct, "%o", &n);
195#else
196 std::sscanf(s: oct, format: "%o", &n);
197#endif
198 if (yyStringLen < sizeof(yyString) - 1)
199 yyString[yyStringLen++] = char(n);
200 return true;
201 }
202
203 const char *p = std::strchr(s: tab, c: yyCh);
204 if (yyStringLen < sizeof(yyString) - 1) {
205 yyString[yyStringLen++] = p == nullptr
206 ? char(yyCh) : backTab[p - tab];
207 }
208 yyCh = getChar();
209 return true;
210}
211
212static Token parseString(StringType stringType = StringType::NoString)
213{
214 int quoteChar = yyCh;
215 bool tripleQuote = false;
216 bool singleQuote = true;
217 bool in = false;
218
219 yyCh = getChar();
220
221 while (yyCh != EOF) {
222 if (singleQuote && (yyCh == '\n' || (in && yyCh == quoteChar)))
223 break;
224
225 if (yyCh == quoteChar) {
226 if (peekChar() == quoteChar) {
227 yyCh = getChar();
228 if (!tripleQuote) {
229 tripleQuote = true;
230 singleQuote = false;
231 in = true;
232 yyCh = getChar();
233 } else {
234 yyCh = getChar();
235 if (yyCh == quoteChar) {
236 tripleQuote = false;
237 break;
238 }
239 }
240 } else if (tripleQuote) {
241 if (yyStringLen < sizeof(yyString) - 1)
242 yyString[yyStringLen++] = char(yyCh);
243 yyCh = getChar();
244 continue;
245 } else {
246 break;
247 }
248 } else {
249 in = true;
250 }
251
252 if (yyCh == '\\') {
253 if (!parseStringEscape(quoteChar, stringType))
254 return Tok_Eof;
255 } else {
256 char *yStart = yyString + yyStringLen;
257 char *yp = yStart;
258 while (yyCh != EOF && (tripleQuote || yyCh != '\n') && yyCh != quoteChar
259 && yyCh != '\\') {
260 *yp++ = char(yyCh);
261 yyCh = getChar();
262 }
263 yyStringLen += yp - yStart;
264 }
265 }
266 yyString[yyStringLen] = '\0';
267
268 if (yyCh != quoteChar) {
269 printf(format: "%c\n", yyCh);
270
271 qWarning(msg: "%s:%d: Unterminated string",
272 qPrintable(yyFileName), yyLineNo);
273 }
274
275 if (yyCh == EOF)
276 return Tok_Eof;
277 yyCh = getChar();
278 return Tok_String;
279}
280
281static QByteArray readLine()
282{
283 QByteArray result;
284 while (true) {
285 yyCh = getChar();
286 if (yyCh == EOF || yyCh == '\n')
287 break;
288 result.append(c: char(yyCh));
289 }
290 return result;
291}
292
293static Token getToken(StringType stringType = StringType::NoString)
294{
295 yyIdent.clear();
296 yyCommentLen = 0;
297 yyStringLen = 0;
298 while (yyCh != EOF) {
299 yyLineNo = yyCurLineNo;
300
301 if (std::isalpha(yyCh) || yyCh == '_') {
302 do {
303 yyIdent.append(c: char(yyCh));
304 yyCh = getChar();
305 } while (std::isalnum(yyCh) || yyCh == '_');
306
307 return tokens.value(key: yyIdent, defaultValue: Tok_Ident);
308 }
309 switch (yyCh) {
310 case '#':
311 switch (getChar()) {
312 case ':':
313 extraComments.append(t: {.extraComment: readLine().trimmed(), .lineNo: yyCurLineNo});
314 break;
315 case '=':
316 ids.append(t: {.extraComment: readLine().trimmed(), .lineNo: yyCurLineNo});
317 break;
318 case EOF:
319 return Tok_Eof;
320 case '\n':
321 break;
322 default:
323 do {
324 yyCh = getChar();
325 } while (yyCh != EOF && yyCh != '\n');
326 break;
327 }
328 break;
329 case '"':
330 case '\'':
331 return parseString(stringType);
332 case '(':
333 yyParenDepth++;
334 yyCh = getChar();
335 return Tok_LeftParen;
336 case ')':
337 yyParenDepth--;
338 yyCh = getChar();
339 return Tok_RightParen;
340 case ',':
341 yyCh = getChar();
342 return Tok_Comma;
343 case '.':
344 yyCh = getChar();
345 return Tok_Dot;
346 case '0':
347 case '1':
348 case '2':
349 case '3':
350 case '4':
351 case '5':
352 case '6':
353 case '7':
354 case '8':
355 case '9': {
356 QByteArray ba;
357 ba += char(yyCh);
358 yyCh = getChar();
359 const bool hex = yyCh == 'x';
360 if (hex) {
361 ba += char(yyCh);
362 yyCh = getChar();
363 }
364 while ((hex ? std::isxdigit(yyCh) : std::isdigit(yyCh))) {
365 ba += char(yyCh);
366 yyCh = getChar();
367 }
368 bool ok;
369 auto v = ba.toLongLong(ok: &ok);
370 Q_UNUSED(v);
371 if (ok)
372 return Tok_Integer;
373 break;
374 }
375 default:
376 yyCh = getChar();
377 }
378 }
379 return Tok_Eof;
380}
381
382/*
383 The second part of this source file is the parser. It accomplishes
384 a very easy task: It finds all strings inside a tr() or translate()
385 call, and possibly finds out the context of the call. It supports
386 three cases:
387 (1) the context is specified, as in FunnyDialog.tr("Hello") or
388 translate("FunnyDialog", "Hello");
389 (2) the call appears within an inlined function;
390 (3) the call appears within a function defined outside the class definition.
391*/
392
393static Token yyTok;
394
395static bool match(Token t)
396{
397 const bool matches = (yyTok == t);
398 if (matches)
399 yyTok = getToken();
400 return matches;
401}
402
403static bool matchStringStart()
404{
405 if (yyTok == Tok_String)
406 return true;
407 // Check for f"bla{var}" and raw strings r"bla".
408 if (yyTok == Tok_Ident && yyIdent.size() == 1) {
409 switch (yyIdent.at(i: 0)) {
410 case 'r':
411 yyTok = getToken(stringType: StringType::RawString);
412 return yyTok == Tok_String;
413 case 'f':
414 yyTok = getToken(stringType: StringType::FormatString);
415 return yyTok == Tok_String;
416 }
417 }
418 return false;
419}
420
421static bool matchString(QByteArray *s)
422{
423 s->clear();
424 bool ok = false;
425 while (matchStringStart()) {
426 *s += yyString;
427 yyTok = getToken();
428 ok = true;
429 }
430 return ok;
431}
432
433static bool matchEncoding(bool *utf8)
434{
435 // Remove any leading module paths.
436 if (yyTok == Tok_Ident && std::strcmp(s1: yyIdent, s2: "PySide6") == 0) {
437 yyTok = getToken();
438
439 if (yyTok != Tok_Dot)
440 return false;
441
442 yyTok = getToken();
443 }
444
445 if (yyTok == Tok_Ident && (std::strcmp(s1: yyIdent, s2: "QtGui") == 0
446 || std::strcmp(s1: yyIdent, s2: "QtCore") == 0)) {
447 yyTok = getToken();
448
449 if (yyTok != Tok_Dot)
450 return false;
451
452 yyTok = getToken();
453 }
454
455 if (yyTok == Tok_Ident) {
456 if (std::strcmp(s1: yyIdent, s2: "QApplication") == 0
457 || std::strcmp(s1: yyIdent, s2: "QGuiApplication") == 0
458 || std::strcmp(s1: yyIdent, s2: "QCoreApplication") == 0) {
459 yyTok = getToken();
460
461 if (yyTok == Tok_Dot)
462 yyTok = getToken();
463 }
464
465 *utf8 = QByteArray(yyIdent).endsWith(bv: "UTF8");
466 yyTok = getToken();
467 return true;
468 }
469 return false;
470}
471
472static bool matchStringOrNone(QByteArray *s)
473{
474 bool matches = matchString(s);
475
476 if (!matches)
477 matches = match(t: Tok_None);
478
479 return matches;
480}
481
482/*
483 * match any expression that can return a number, which can be
484 * 1. Literal number (e.g. '11')
485 * 2. simple identifier (e.g. 'm_count')
486 * 3. simple function call (e.g. 'size()')
487 * 4. function call on an object (e.g. 'list.size()')
488 *
489 * Other cases:
490 * size(2,4)
491 * list().size()
492 * list(a,b).size(2,4)
493 * etc...
494 */
495static bool matchExpression()
496{
497 if (match(t: Tok_Integer))
498 return true;
499
500 int parenlevel = 0;
501 while (match(t: Tok_Ident) || parenlevel > 0) {
502 if (yyTok == Tok_RightParen) {
503 if (parenlevel == 0)
504 break;
505 --parenlevel;
506 yyTok = getToken();
507 } else if (yyTok == Tok_LeftParen) {
508 yyTok = getToken();
509 if (yyTok == Tok_RightParen) {
510 yyTok = getToken();
511 } else {
512 ++parenlevel;
513 }
514 } else if (yyTok == Tok_Ident) {
515 continue;
516 } else if (parenlevel == 0) {
517 return false;
518 }
519 }
520 return true;
521}
522
523static bool parseTranslate(QByteArray *text, QByteArray *context, QByteArray *comment,
524 bool *utf8, bool *plural)
525{
526 text->clear();
527 context->clear();
528 comment->clear();
529 *utf8 = false;
530 *plural = false;
531
532 yyTok = getToken();
533 if (!match(t: Tok_LeftParen) || !matchString(s: context) || !match(t: Tok_Comma)
534 || !matchString(s: text)) {
535 return false;
536 }
537
538 if (match(t: Tok_RightParen))
539 return true;
540
541 // not a comma or a right paren, illegal syntax
542 if (!match(t: Tok_Comma))
543 return false;
544
545 // python accepts trailing commas within parenthesis, so allow a comma with nothing after
546 if (match(t: Tok_RightParen))
547 return true;
548
549 // check for comment
550 if (!matchStringOrNone(s: comment))
551 return false; // not a comment, or a trailing comma... something is wrong
552
553 if (match(t: Tok_RightParen))
554 return true;
555
556 // not a comma or a right paren, illegal syntax
557 if (!match(t: Tok_Comma))
558 return false;
559
560 // python accepts trailing commas within parenthesis, so allow a comma with nothing after
561 if (match(t: Tok_RightParen))
562 return true;
563
564 // look for optional encoding information
565 if (matchEncoding(utf8)) {
566 if (match(t: Tok_RightParen))
567 return true;
568
569 // not a comma or a right paren, illegal syntax
570 if (!match(t: Tok_Comma))
571 return false;
572
573 // python accepts trailing commas within parenthesis, so allow a comma with nothing after
574 if (match(t: Tok_RightParen))
575 return true;
576 }
577
578 // Must be a plural expression
579 if (!matchExpression())
580 return false;
581
582 *plural = true;
583
584 // Ignore any trailing comma here
585 match(t: Tok_Comma);
586
587 // This must be the end, or there are too many parameters
588 if (match(t: Tok_RightParen))
589 return true;
590
591 return false;
592}
593
594static void setMessageParameters(TranslatorMessage *message,
595 int lineNo)
596{
597 // PYSIDE-2863: parseTranslate() can read past the message
598 // and capture extraComments intended for the next message.
599 // Use only extraComments for the current message.
600 QByteArray extraComment;
601 while (!extraComments.isEmpty() && extraComments.constFirst().lineNo <= lineNo) {
602 if (!extraComment.isEmpty())
603 extraComment += ' ';
604 extraComment += extraComments.takeFirst().extraComment;
605 }
606
607 if (!extraComment.isEmpty())
608 message->setExtraComment(QString::fromUtf8(ba: extraComment));
609
610 while (!ids.isEmpty() && ids.constFirst().lineNo <= lineNo)
611 message->setId(QString::fromUtf8(ba: ids.takeFirst().extraComment));
612}
613
614static void parse(Translator &tor, ConversionData &cd,
615 const QByteArray &initialContext = {},
616 const QByteArray &defaultContext = {})
617{
618 QByteArray context;
619 QByteArray text;
620 QByteArray comment;
621 QByteArray prefix;
622 bool utf8 = false;
623
624 yyTok = getToken();
625 while (yyTok != Tok_Eof) {
626
627 switch (yyTok) {
628 case Tok_class: {
629 if (yyIndentationSize < 0 && yyContinuousSpaceCount > 0)
630 yyIndentationSize = yyContinuousSpaceCount; // First indented "class"
631 const int indent = yyIndentationSize > 0
632 ? yyContinuousSpaceCount / yyIndentationSize : 0;
633 while (!yyContextStack.isEmpty() && yyContextStack.top().second >= indent)
634 yyContextStack.pop();
635 yyTok = getToken();
636 yyContextStack.push(t: {yyIdent, indent});
637 yyTok = getToken();
638 }
639 break;
640 case Tok_def:
641 if (yyIndentationSize < 0 && yyContinuousSpaceCount > 0)
642 yyIndentationSize = yyContinuousSpaceCount; // First indented "def"
643 if (!yyContextStack.isEmpty()) {
644 // Pop classes if the function is further outdented than the class on the top
645 // (end of a nested class).
646 const int classIndent = yyIndentationSize > 0
647 ? yyContinuousSpaceCount / yyIndentationSize - 1 : 0;
648 while (!yyContextStack.isEmpty() && yyContextStack.top().second > classIndent)
649 yyContextStack.pop();
650 }
651 yyTok = getToken();
652 break;
653 case Tok_tr:
654 case Tok_trUtf8: {
655 utf8 = true;
656 yyTok = getToken();
657 const int lineNo = yyCurLineNo;
658 if (match(t: Tok_LeftParen) && matchString(s: &text)) {
659 comment.clear();
660 bool plural = false;
661
662 if (match(t: Tok_RightParen)) {
663 // There is no comment or plural arguments.
664 } else if (match(t: Tok_Comma) && matchStringOrNone(s: &comment)) {
665 // There is a comment argument.
666 if (match(t: Tok_RightParen)) {
667 // There is no plural argument.
668 } else if (match(t: Tok_Comma)) {
669 // There is a plural argument.
670 plural = true;
671 }
672 }
673
674 if (prefix.isEmpty())
675 context = defaultContext;
676 else if (prefix == "self")
677 context = yyContextStack.isEmpty()
678 ? initialContext : yyContextStack.top().first;
679 else
680 context = prefix;
681
682 prefix.clear();
683
684 if (!text.isEmpty()) {
685 TranslatorMessage message(QString::fromUtf8(ba: context),
686 QString::fromUtf8(ba: text),
687 QString::fromUtf8(ba: comment),
688 {}, yyFileName, yyLineNo,
689 {}, TranslatorMessage::Unfinished, plural);
690 setMessageParameters(message: &message, lineNo);
691 tor.extend(msg: message, cd);
692 }
693 }
694 }
695 break;
696 case Tok_translate: {
697 bool plural{};
698 const int lineNo = yyCurLineNo;
699 if (parseTranslate(text: &text, context: &context, comment: &comment, utf8: &utf8, plural: &plural)
700 && !text.isEmpty()) {
701 TranslatorMessage message(QString::fromUtf8(ba: context),
702 QString::fromUtf8(ba: text),
703 QString::fromUtf8(ba: comment),
704 {}, yyFileName, yyLineNo,
705 {}, TranslatorMessage::Unfinished, plural);
706 setMessageParameters(message: &message, lineNo);
707 tor.extend(msg: message, cd);
708 }
709 }
710 break;
711 case Tok_Ident:
712 if (!prefix.isEmpty())
713 prefix += '.';
714 prefix += yyIdent;
715 yyTok = getToken();
716 if (yyTok != Tok_Dot)
717 prefix.clear();
718 break;
719 case Tok_Comment:
720 comment = yyComment;
721 comment = comment.simplified();
722 if (comment.left(n: sizeof(PythonMagicComment) - 1) == PythonMagicComment) {
723 comment.remove(index: 0, len: sizeof(PythonMagicComment) - 1);
724 int k = comment.indexOf(ch: ' ');
725 if (k == -1) {
726 context = comment;
727 } else {
728 context = comment.left(n: k);
729 comment.remove( index: 0, len: k + 1);
730 TranslatorMessage message(QString::fromUtf8(ba: context),
731 {}, QString::fromUtf8(ba: comment), {},
732 yyFileName, yyLineNo, {});
733 tor.extend(msg: message, cd);
734 }
735 }
736 yyTok = getToken();
737 break;
738 default:
739 yyTok = getToken();
740 }
741 }
742
743 if (yyParenDepth != 0) {
744 qWarning(msg: "%s: Unbalanced parentheses in Python code",
745 qPrintable(yyFileName));
746 }
747}
748
749bool loadPython(Translator &translator, const QString &fileName, ConversionData &cd)
750{
751 // Match the function aliases to our tokens
752 static bool firstTime = true;
753 if (firstTime) {
754 firstTime = false;
755 const auto &nameMap = trFunctionAliasManager.nameToTrFunctionMap();
756 for (auto it = nameMap.cbegin(), end = nameMap.cend(); it != end; ++it) {
757 switch (it.value()) {
758 case TrFunctionAliasManager::Function_tr:
759 case TrFunctionAliasManager::Function_QT_TR_NOOP:
760 tokens.insert(key: it.key().toUtf8(), value: Tok_tr);
761 break;
762 case TrFunctionAliasManager::Function_trUtf8:
763 tokens.insert(key: it.key().toUtf8(), value: Tok_trUtf8);
764 break;
765 case TrFunctionAliasManager::Function_translate:
766 case TrFunctionAliasManager::Function_QT_TRANSLATE_NOOP:
767 // QTranslator::findMessage() has the same parameters as QApplication::translate().
768 case TrFunctionAliasManager::Function_findMessage:
769 tokens.insert(key: it.key().toUtf8(), value: Tok_translate);
770 break;
771 default:
772 break;
773 }
774 }
775 }
776
777#ifdef Q_CC_MSVC
778 const auto *fileNameC = reinterpret_cast<const wchar_t *>(fileName.utf16());
779 const bool ok = _wfopen_s(&yyInFile, fileNameC, L"r") == 0;
780#else
781 const QByteArray fileNameC = QFile::encodeName(fileName);
782 yyInFile = std::fopen( filename: fileNameC.constData(), modes: "r");
783 const bool ok = yyInFile != nullptr;
784#endif
785 if (!ok) {
786 cd.appendError(QStringLiteral("Cannot open %1").arg(a: fileName));
787 return false;
788 }
789
790 startTokenizer(fileName, getCharFunc: getCharFromFile, peekCharFunc: peekCharFromFile);
791 parse(tor&: translator, cd);
792 std::fclose(stream: yyInFile);
793 return true;
794}
795
796QT_END_NAMESPACE
797

Provided by KDAB

Privacy Policy
Start learning QML with our Intro Training
Find out more

source code of qttools/src/linguist/lupdate/python.cpp