1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 |
3 | |
4 | #include "lupdate.h" |
5 | |
6 | #include <translator.h> |
7 | |
8 | #include <QtCore/QDebug> |
9 | #include <QtCore/QFile> |
10 | #include <QtCore/QStack> |
11 | #include <QtCore/QStack> |
12 | #include <QtCore/QString> |
13 | #include <QtCore/QCoreApplication> |
14 | #include <QtCore/QStringConverter> |
15 | #include <QtCore/QTextStream> |
16 | |
17 | #include <iostream> |
18 | |
19 | #include <ctype.h> |
20 | |
21 | QT_BEGIN_NAMESPACE |
22 | |
23 | enum { Tok_Eof, Tok_class, Tok_return, Tok_tr, |
24 | Tok_translate, Tok_Ident, Tok_Package, |
25 | , Tok_String, Tok_Colon, Tok_Dot, |
26 | Tok_LeftBrace, Tok_RightBrace, Tok_LeftParen, |
27 | Tok_RightParen, Tok_Comma, Tok_Semicolon, |
28 | Tok_Integer, Tok_Plus, Tok_PlusPlus, Tok_PlusEq, Tok_null }; |
29 | |
30 | class Scope |
31 | { |
32 | public: |
33 | QString name; |
34 | enum Type {Clazz, Function, Other} type; |
35 | int line; |
36 | |
37 | Scope(const QString & name, Type type, int line) : |
38 | name(name), |
39 | type(type), |
40 | line(line) |
41 | {} |
42 | |
43 | ~Scope() |
44 | {} |
45 | }; |
46 | |
47 | /* |
48 | The tokenizer maintains the following global variables. The names |
49 | should be self-explanatory. |
50 | */ |
51 | |
52 | static QString yyFileName; |
53 | static QChar yyCh; |
54 | static QString yyIdent; |
55 | static QString ; |
56 | static QString yyString; |
57 | static bool yyEOF = false; |
58 | |
59 | static qlonglong yyInteger; |
60 | static int yyParenDepth; |
61 | static int yyLineNo; |
62 | static int yyCurLineNo; |
63 | static int yyParenLineNo; |
64 | static int yyTok; |
65 | |
66 | // the string to read from and current position in the string |
67 | static QString yyInStr; |
68 | static int yyInPos; |
69 | |
70 | // The parser maintains the following global variables. |
71 | static QString yyPackage; |
72 | static QStack<Scope*> yyScope; |
73 | |
74 | std::ostream &yyMsg(int line = 0) |
75 | { |
76 | return std::cerr << qPrintable(yyFileName) << ':' << (line ? line : yyLineNo) << ": " ; |
77 | } |
78 | |
79 | static QChar getChar() |
80 | { |
81 | if (yyInPos >= yyInStr.size()) { |
82 | yyEOF = true; |
83 | return QChar(); |
84 | } |
85 | QChar c = yyInStr[yyInPos++]; |
86 | if (c == QLatin1Char('\n')) |
87 | ++yyCurLineNo; |
88 | return c; |
89 | } |
90 | |
91 | static int getToken() |
92 | { |
93 | const char tab[] = "bfnrt\"\'\\" ; |
94 | const char backTab[] = "\b\f\n\r\t\"\'\\" ; |
95 | |
96 | yyIdent.clear(); |
97 | yyComment.clear(); |
98 | yyString.clear(); |
99 | |
100 | while (!yyEOF) { |
101 | yyLineNo = yyCurLineNo; |
102 | |
103 | if ( yyCh.isLetter() || yyCh.toLatin1() == '_' ) { |
104 | do { |
105 | yyIdent.append(c: yyCh); |
106 | yyCh = getChar(); |
107 | } while ( yyCh.isLetterOrNumber() || yyCh.toLatin1() == '_' ); |
108 | |
109 | if (yyTok != Tok_Dot) { |
110 | switch ( yyIdent.at(i: 0).toLatin1() ) { |
111 | case 'r': |
112 | if ( yyIdent == QLatin1String("return" ) ) |
113 | return Tok_return; |
114 | break; |
115 | case 'c': |
116 | if ( yyIdent == QLatin1String("class" ) ) |
117 | return Tok_class; |
118 | break; |
119 | case 'n': |
120 | if ( yyIdent == QLatin1String("null" ) ) |
121 | return Tok_null; |
122 | break; |
123 | } |
124 | } |
125 | switch ( yyIdent.at(i: 0).toLatin1() ) { |
126 | case 'p': |
127 | if( yyIdent == QLatin1String("package" ) ) |
128 | return Tok_Package; |
129 | break; |
130 | case 't': |
131 | if ( yyIdent == QLatin1String("tr" ) ) |
132 | return Tok_tr; |
133 | if ( yyIdent == QLatin1String("translate" ) ) |
134 | return Tok_translate; |
135 | } |
136 | return Tok_Ident; |
137 | } else { |
138 | switch ( yyCh.toLatin1() ) { |
139 | |
140 | case '/': |
141 | yyCh = getChar(); |
142 | if ( yyCh == QLatin1Char('/') ) { |
143 | do { |
144 | yyCh = getChar(); |
145 | if (yyEOF) |
146 | break; |
147 | yyComment.append(c: yyCh); |
148 | } while (yyCh != QLatin1Char('\n')); |
149 | return Tok_Comment; |
150 | |
151 | } else if ( yyCh == QLatin1Char('*') ) { |
152 | bool metAster = false; |
153 | bool metAsterSlash = false; |
154 | |
155 | while ( !metAsterSlash ) { |
156 | yyCh = getChar(); |
157 | if (yyEOF) { |
158 | yyMsg() << "Unterminated Java comment.\n" ; |
159 | return Tok_Comment; |
160 | } |
161 | |
162 | yyComment.append( c: yyCh ); |
163 | |
164 | if ( yyCh == QLatin1Char('*') ) |
165 | metAster = true; |
166 | else if ( metAster && yyCh == QLatin1Char('/') ) |
167 | metAsterSlash = true; |
168 | else |
169 | metAster = false; |
170 | } |
171 | yyComment.chop(n: 2); |
172 | yyCh = getChar(); |
173 | |
174 | return Tok_Comment; |
175 | } |
176 | break; |
177 | case '"': |
178 | yyCh = getChar(); |
179 | |
180 | while (!yyEOF && yyCh != QLatin1Char('\n') && yyCh != QLatin1Char('"')) { |
181 | |
182 | if ( yyCh == QLatin1Char('\\') ) { |
183 | yyCh = getChar(); |
184 | if ( yyCh == QLatin1Char('u') ) { |
185 | yyCh = getChar(); |
186 | uint unicode(0); |
187 | for (int i = 4; i > 0; --i) { |
188 | unicode = unicode << 4; |
189 | if( yyCh.isDigit() ) { |
190 | unicode += yyCh.digitValue(); |
191 | } |
192 | else { |
193 | int sub(yyCh.toLower().toLatin1() - 87); |
194 | if( sub > 15 || sub < 10) { |
195 | yyMsg() << "Invalid Unicode value.\n" ; |
196 | break; |
197 | } |
198 | unicode += sub; |
199 | } |
200 | yyCh = getChar(); |
201 | } |
202 | yyString.append(c: QChar(unicode)); |
203 | } |
204 | else if ( yyCh == QLatin1Char('\n') ) { |
205 | yyCh = getChar(); |
206 | } |
207 | else { |
208 | yyString.append( c: QLatin1Char(backTab[strchr( s: tab, c: yyCh.toLatin1() ) - tab]) ); |
209 | yyCh = getChar(); |
210 | } |
211 | } else { |
212 | yyString.append(c: yyCh); |
213 | yyCh = getChar(); |
214 | } |
215 | } |
216 | |
217 | if ( yyCh != QLatin1Char('"') ) |
218 | yyMsg() << "Unterminated string.\n" ; |
219 | |
220 | yyCh = getChar(); |
221 | |
222 | return Tok_String; |
223 | |
224 | case ':': |
225 | yyCh = getChar(); |
226 | return Tok_Colon; |
227 | case '\'': |
228 | yyCh = getChar(); |
229 | |
230 | if ( yyCh == QLatin1Char('\\') ) |
231 | yyCh = getChar(); |
232 | do { |
233 | yyCh = getChar(); |
234 | } while (!yyEOF && yyCh != QLatin1Char('\'')); |
235 | yyCh = getChar(); |
236 | break; |
237 | case '{': |
238 | yyCh = getChar(); |
239 | return Tok_LeftBrace; |
240 | case '}': |
241 | yyCh = getChar(); |
242 | return Tok_RightBrace; |
243 | case '(': |
244 | if (yyParenDepth == 0) |
245 | yyParenLineNo = yyCurLineNo; |
246 | yyParenDepth++; |
247 | yyCh = getChar(); |
248 | return Tok_LeftParen; |
249 | case ')': |
250 | if (yyParenDepth == 0) |
251 | yyParenLineNo = yyCurLineNo; |
252 | yyParenDepth--; |
253 | yyCh = getChar(); |
254 | return Tok_RightParen; |
255 | case ',': |
256 | yyCh = getChar(); |
257 | return Tok_Comma; |
258 | case '.': |
259 | yyCh = getChar(); |
260 | return Tok_Dot; |
261 | case ';': |
262 | yyCh = getChar(); |
263 | return Tok_Semicolon; |
264 | case '+': |
265 | yyCh = getChar(); |
266 | if (yyCh == QLatin1Char('+')) { |
267 | yyCh = getChar(); |
268 | return Tok_PlusPlus; |
269 | } |
270 | if( yyCh == QLatin1Char('=') ){ |
271 | yyCh = getChar(); |
272 | return Tok_PlusEq; |
273 | } |
274 | return Tok_Plus; |
275 | case '0': |
276 | case '1': |
277 | case '2': |
278 | case '3': |
279 | case '4': |
280 | case '5': |
281 | case '6': |
282 | case '7': |
283 | case '8': |
284 | case '9': |
285 | { |
286 | QByteArray ba; |
287 | ba += yyCh.toLatin1(); |
288 | yyCh = getChar(); |
289 | bool hex = yyCh == QLatin1Char('x'); |
290 | if ( hex ) { |
291 | ba += yyCh.toLatin1(); |
292 | yyCh = getChar(); |
293 | } |
294 | while ( hex ? isxdigit(yyCh.toLatin1()) : yyCh.isDigit() ) { |
295 | ba += yyCh.toLatin1(); |
296 | yyCh = getChar(); |
297 | } |
298 | bool ok; |
299 | yyInteger = ba.toLongLong(ok: &ok); |
300 | if (ok) return Tok_Integer; |
301 | break; |
302 | } |
303 | default: |
304 | yyCh = getChar(); |
305 | } |
306 | } |
307 | } |
308 | return Tok_Eof; |
309 | } |
310 | |
311 | static bool match( int t ) |
312 | { |
313 | bool matches = ( yyTok == t ); |
314 | if ( matches ) |
315 | yyTok = getToken(); |
316 | return matches; |
317 | } |
318 | |
319 | static bool matchString( QString &s ) |
320 | { |
321 | if ( yyTok != Tok_String ) |
322 | return false; |
323 | |
324 | s = yyString; |
325 | yyTok = getToken(); |
326 | while ( yyTok == Tok_Plus ) { |
327 | yyTok = getToken(); |
328 | if (yyTok == Tok_String) |
329 | s += yyString; |
330 | else { |
331 | yyMsg() << |
332 | "String used in translation can contain only literals" |
333 | " concatenated with other literals, not expressions or numbers.\n" ; |
334 | return false; |
335 | } |
336 | yyTok = getToken(); |
337 | } |
338 | return true; |
339 | } |
340 | |
341 | static bool matchStringOrNull(QString &s) |
342 | { |
343 | bool matches = matchString(s); |
344 | if (!matches) { |
345 | matches = (yyTok == Tok_null); |
346 | if (matches) |
347 | yyTok = getToken(); |
348 | } |
349 | return matches; |
350 | } |
351 | |
352 | /* |
353 | * match any expression that can return a number, which can be |
354 | * 1. Literal number (e.g. '11') |
355 | * 2. simple identifier (e.g. 'm_count') |
356 | * 3. simple function call (e.g. 'size()' ) |
357 | * 4. function call on an object (e.g. 'list.size()') |
358 | * 5. function call on an object (e.g. 'list->size()') |
359 | * |
360 | * Other cases: |
361 | * size(2,4) |
362 | * list().size() |
363 | * list(a,b).size(2,4) |
364 | * etc... |
365 | */ |
366 | static bool matchExpression() |
367 | { |
368 | if (match(t: Tok_Integer)) { |
369 | return true; |
370 | } |
371 | |
372 | int parenlevel = 0; |
373 | while (match(t: Tok_Ident) || parenlevel > 0) { |
374 | if (yyTok == Tok_RightParen) { |
375 | if (parenlevel == 0) break; |
376 | --parenlevel; |
377 | yyTok = getToken(); |
378 | } else if (yyTok == Tok_LeftParen) { |
379 | yyTok = getToken(); |
380 | if (yyTok == Tok_RightParen) { |
381 | yyTok = getToken(); |
382 | } else { |
383 | ++parenlevel; |
384 | } |
385 | } else if (yyTok == Tok_Ident) { |
386 | continue; |
387 | } else if (parenlevel == 0) { |
388 | return false; |
389 | } |
390 | } |
391 | return true; |
392 | } |
393 | |
394 | static const QString context() |
395 | { |
396 | QString context(yyPackage); |
397 | bool innerClass = false; |
398 | for (int i = 0; i < yyScope.size(); ++i) { |
399 | if (yyScope.at(i)->type == Scope::Clazz) { |
400 | if (innerClass) |
401 | context.append(s: QLatin1String("$" )); |
402 | else |
403 | context.append(s: QLatin1String("." )); |
404 | |
405 | context.append(s: yyScope.at(i)->name); |
406 | innerClass = true; |
407 | } |
408 | } |
409 | return context; |
410 | } |
411 | |
412 | static void recordMessage( |
413 | Translator *tor, const QString &context, const QString &text, const QString &, |
414 | const QString &, bool plural, ConversionData &cd) |
415 | { |
416 | TranslatorMessage msg( |
417 | context, text, comment, QString(), |
418 | yyFileName, yyLineNo, QStringList(), |
419 | TranslatorMessage::Unfinished, plural); |
420 | msg.setExtraComment(extracomment.simplified()); |
421 | tor->extend(msg, cd); |
422 | } |
423 | |
424 | static void parse(Translator *tor, ConversionData &cd) |
425 | { |
426 | QString text; |
427 | QString com; |
428 | QString ; |
429 | |
430 | yyEOF = false; |
431 | yyCh = getChar(); |
432 | |
433 | yyTok = getToken(); |
434 | while ( yyTok != Tok_Eof ) { |
435 | switch ( yyTok ) { |
436 | case Tok_class: |
437 | yyTok = getToken(); |
438 | if(yyTok == Tok_Ident) { |
439 | yyScope.push(t: new Scope(yyIdent, Scope::Clazz, yyLineNo)); |
440 | } |
441 | else { |
442 | yyMsg() << "'class' must be followed by a class name.\n" ; |
443 | break; |
444 | } |
445 | while (!match(t: Tok_LeftBrace)) { |
446 | yyTok = getToken(); |
447 | } |
448 | break; |
449 | |
450 | case Tok_tr: |
451 | yyTok = getToken(); |
452 | if ( match(t: Tok_LeftParen) && matchString(s&: text) ) { |
453 | com.clear(); |
454 | bool plural = false; |
455 | |
456 | if ( match(t: Tok_RightParen) ) { |
457 | // no comment |
458 | } else if (match(t: Tok_Comma) && matchStringOrNull(s&: com)) { //comment |
459 | if ( match(t: Tok_RightParen)) { |
460 | // ok, |
461 | } else if (match(t: Tok_Comma)) { |
462 | plural = true; |
463 | } |
464 | } |
465 | if (!text.isEmpty()) |
466 | recordMessage(tor, context: context(), text, comment: com, extracomment, plural, cd); |
467 | } |
468 | break; |
469 | case Tok_translate: |
470 | { |
471 | QString contextOverride; |
472 | yyTok = getToken(); |
473 | if ( match(t: Tok_LeftParen) && |
474 | matchString(s&: contextOverride) && |
475 | match(t: Tok_Comma) && |
476 | matchString(s&: text) ) { |
477 | |
478 | com.clear(); |
479 | bool plural = false; |
480 | if (!match(t: Tok_RightParen)) { |
481 | // look for comment |
482 | if ( match(t: Tok_Comma) && matchStringOrNull(s&: com)) { |
483 | if (!match(t: Tok_RightParen)) { |
484 | if (match(t: Tok_Comma) && matchExpression() && match(t: Tok_RightParen)) { |
485 | plural = true; |
486 | } else { |
487 | break; |
488 | } |
489 | } |
490 | } else { |
491 | break; |
492 | } |
493 | } |
494 | if (!text.isEmpty()) |
495 | recordMessage(tor, context: contextOverride, text, comment: com, extracomment, plural, cd); |
496 | } |
497 | } |
498 | break; |
499 | |
500 | case Tok_Ident: |
501 | yyTok = getToken(); |
502 | break; |
503 | |
504 | case Tok_Comment: |
505 | if (yyComment.startsWith(c: QLatin1Char(':'))) { |
506 | yyComment.remove(i: 0, len: 1); |
507 | extracomment.append(s: yyComment); |
508 | } |
509 | yyTok = getToken(); |
510 | break; |
511 | |
512 | case Tok_RightBrace: |
513 | if ( yyScope.isEmpty() ) { |
514 | yyMsg() << "Excess closing brace.\n" ; |
515 | } |
516 | else |
517 | delete (yyScope.pop()); |
518 | extracomment.clear(); |
519 | yyTok = getToken(); |
520 | break; |
521 | |
522 | case Tok_LeftBrace: |
523 | yyScope.push(t: new Scope(QString(), Scope::Other, yyLineNo)); |
524 | yyTok = getToken(); |
525 | break; |
526 | |
527 | case Tok_Semicolon: |
528 | extracomment.clear(); |
529 | yyTok = getToken(); |
530 | break; |
531 | |
532 | case Tok_Package: |
533 | yyTok = getToken(); |
534 | while(!match(t: Tok_Semicolon)) { |
535 | switch(yyTok) { |
536 | case Tok_Ident: |
537 | yyPackage.append(s: yyIdent); |
538 | break; |
539 | case Tok_Dot: |
540 | yyPackage.append(s: QLatin1String("." )); |
541 | break; |
542 | default: |
543 | yyMsg() << "'package' must be followed by package name.\n" ; |
544 | break; |
545 | } |
546 | yyTok = getToken(); |
547 | } |
548 | break; |
549 | |
550 | default: |
551 | yyTok = getToken(); |
552 | } |
553 | } |
554 | |
555 | if ( !yyScope.isEmpty() ) |
556 | yyMsg(line: yyScope.top()->line) << "Unbalanced opening brace.\n" ; |
557 | else if ( yyParenDepth != 0 ) |
558 | yyMsg(line: yyParenLineNo) << "Unbalanced opening parenthesis.\n" ; |
559 | } |
560 | |
561 | |
562 | bool loadJava(Translator &translator, const QString &filename, ConversionData &cd) |
563 | { |
564 | QFile file(filename); |
565 | if (!file.open(flags: QIODevice::ReadOnly)) { |
566 | cd.appendError(QStringLiteral("Cannot open %1: %2" ).arg(args: filename, args: file.errorString())); |
567 | return false; |
568 | } |
569 | |
570 | yyInPos = -1; |
571 | yyFileName = filename; |
572 | yyPackage.clear(); |
573 | yyScope.clear(); |
574 | yyTok = -1; |
575 | yyParenDepth = 0; |
576 | yyCurLineNo = 0; |
577 | yyParenLineNo = 1; |
578 | |
579 | QTextStream ts(&file); |
580 | ts.setEncoding(cd.m_sourceIsUtf16 ? QStringConverter::Utf16 : QStringConverter::Utf8); |
581 | ts.setAutoDetectUnicode(true); |
582 | yyInStr = ts.readAll(); |
583 | yyInPos = 0; |
584 | yyFileName = filename; |
585 | yyCurLineNo = 1; |
586 | yyParenLineNo = 1; |
587 | |
588 | parse(tor: &translator, cd); |
589 | return true; |
590 | } |
591 | |
592 | QT_END_NAMESPACE |
593 | |