1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the Qt Linguist of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:GPL-EXCEPT$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT |
21 | ** included in the packaging of this file. Please review the following |
22 | ** information to ensure the GNU General Public License requirements will |
23 | ** be met: https://www.gnu.org/licenses/gpl-3.0.html. |
24 | ** |
25 | ** $QT_END_LICENSE$ |
26 | ** |
27 | ****************************************************************************/ |
28 | |
29 | #include "lupdate.h" |
30 | |
31 | #include <translator.h> |
32 | |
33 | #include <QtCore/QDebug> |
34 | #include <QtCore/QFile> |
35 | #include <QtCore/QRegExp> |
36 | #include <QtCore/QStack> |
37 | #include <QtCore/QStack> |
38 | #include <QtCore/QString> |
39 | #include <QtCore/QTextCodec> |
40 | #include <QtCore/QCoreApplication> |
41 | |
42 | #include <iostream> |
43 | |
44 | #include <ctype.h> |
45 | |
46 | QT_BEGIN_NAMESPACE |
47 | |
48 | enum { Tok_Eof, Tok_class, Tok_return, Tok_tr, |
49 | Tok_translate, Tok_Ident, Tok_Package, |
50 | , Tok_String, Tok_Colon, Tok_Dot, |
51 | Tok_LeftBrace, Tok_RightBrace, Tok_LeftParen, |
52 | Tok_RightParen, Tok_Comma, Tok_Semicolon, |
53 | Tok_Integer, Tok_Plus, Tok_PlusPlus, Tok_PlusEq, Tok_null }; |
54 | |
55 | class Scope |
56 | { |
57 | public: |
58 | QString name; |
59 | enum Type {Clazz, Function, Other} type; |
60 | int line; |
61 | |
62 | Scope(const QString & name, Type type, int line) : |
63 | name(name), |
64 | type(type), |
65 | line(line) |
66 | {} |
67 | |
68 | ~Scope() |
69 | {} |
70 | }; |
71 | |
72 | /* |
73 | The tokenizer maintains the following global variables. The names |
74 | should be self-explanatory. |
75 | */ |
76 | |
77 | static QString yyFileName; |
78 | static QChar yyCh; |
79 | static QString yyIdent; |
80 | static QString ; |
81 | static QString yyString; |
82 | |
83 | |
84 | static qlonglong yyInteger; |
85 | static int yyParenDepth; |
86 | static int yyLineNo; |
87 | static int yyCurLineNo; |
88 | static int yyParenLineNo; |
89 | static int yyTok; |
90 | |
91 | // the string to read from and current position in the string |
92 | static QString yyInStr; |
93 | static int yyInPos; |
94 | |
95 | // The parser maintains the following global variables. |
96 | static QString yyPackage; |
97 | static QStack<Scope*> yyScope; |
98 | |
99 | std::ostream &yyMsg(int line = 0) |
100 | { |
101 | return std::cerr << qPrintable(yyFileName) << ':' << (line ? line : yyLineNo) << ": " ; |
102 | } |
103 | |
104 | static QChar getChar() |
105 | { |
106 | if (yyInPos >= yyInStr.size()) |
107 | return QChar(EOF); |
108 | QChar c = yyInStr[yyInPos++]; |
109 | if (c == QLatin1Char('\n')) |
110 | ++yyCurLineNo; |
111 | return c; |
112 | } |
113 | |
114 | static int getToken() |
115 | { |
116 | const char tab[] = "bfnrt\"\'\\" ; |
117 | const char backTab[] = "\b\f\n\r\t\"\'\\" ; |
118 | |
119 | yyIdent.clear(); |
120 | yyComment.clear(); |
121 | yyString.clear(); |
122 | |
123 | while (yyCh != QChar(EOF)) { |
124 | yyLineNo = yyCurLineNo; |
125 | |
126 | if ( yyCh.isLetter() || yyCh.toLatin1() == '_' ) { |
127 | do { |
128 | yyIdent.append(c: yyCh); |
129 | yyCh = getChar(); |
130 | } while ( yyCh.isLetterOrNumber() || yyCh.toLatin1() == '_' ); |
131 | |
132 | if (yyTok != Tok_Dot) { |
133 | switch ( yyIdent.at(i: 0).toLatin1() ) { |
134 | case 'r': |
135 | if ( yyIdent == QLatin1String("return" ) ) |
136 | return Tok_return; |
137 | break; |
138 | case 'c': |
139 | if ( yyIdent == QLatin1String("class" ) ) |
140 | return Tok_class; |
141 | break; |
142 | case 'n': |
143 | if ( yyIdent == QLatin1String("null" ) ) |
144 | return Tok_null; |
145 | break; |
146 | } |
147 | } |
148 | switch ( yyIdent.at(i: 0).toLatin1() ) { |
149 | case 'p': |
150 | if( yyIdent == QLatin1String("package" ) ) |
151 | return Tok_Package; |
152 | break; |
153 | case 't': |
154 | if ( yyIdent == QLatin1String("tr" ) ) |
155 | return Tok_tr; |
156 | if ( yyIdent == QLatin1String("translate" ) ) |
157 | return Tok_translate; |
158 | } |
159 | return Tok_Ident; |
160 | } else { |
161 | switch ( yyCh.toLatin1() ) { |
162 | |
163 | case '/': |
164 | yyCh = getChar(); |
165 | if ( yyCh == QLatin1Char('/') ) { |
166 | do { |
167 | yyCh = getChar(); |
168 | if (yyCh == QChar(EOF)) |
169 | break; |
170 | yyComment.append(c: yyCh); |
171 | } while (yyCh != QLatin1Char('\n')); |
172 | return Tok_Comment; |
173 | |
174 | } else if ( yyCh == QLatin1Char('*') ) { |
175 | bool metAster = false; |
176 | bool metAsterSlash = false; |
177 | |
178 | while ( !metAsterSlash ) { |
179 | yyCh = getChar(); |
180 | if (yyCh == QChar(EOF)) { |
181 | yyMsg() << qPrintable(LU::tr("Unterminated Java comment.\n" )); |
182 | return Tok_Comment; |
183 | } |
184 | |
185 | yyComment.append( c: yyCh ); |
186 | |
187 | if ( yyCh == QLatin1Char('*') ) |
188 | metAster = true; |
189 | else if ( metAster && yyCh == QLatin1Char('/') ) |
190 | metAsterSlash = true; |
191 | else |
192 | metAster = false; |
193 | } |
194 | yyComment.chop(n: 2); |
195 | yyCh = getChar(); |
196 | |
197 | return Tok_Comment; |
198 | } |
199 | break; |
200 | case '"': |
201 | yyCh = getChar(); |
202 | |
203 | while (yyCh != QChar(EOF) && yyCh != QLatin1Char('\n') && yyCh != QLatin1Char('"')) { |
204 | if ( yyCh == QLatin1Char('\\') ) { |
205 | yyCh = getChar(); |
206 | if ( yyCh == QLatin1Char('u') ) { |
207 | yyCh = getChar(); |
208 | uint unicode(0); |
209 | for (int i = 4; i > 0; --i) { |
210 | unicode = unicode << 4; |
211 | if( yyCh.isDigit() ) { |
212 | unicode += yyCh.digitValue(); |
213 | } |
214 | else { |
215 | int sub(yyCh.toLower().toLatin1() - 87); |
216 | if( sub > 15 || sub < 10) { |
217 | yyMsg() << qPrintable(LU::tr("Invalid Unicode value.\n" )); |
218 | break; |
219 | } |
220 | unicode += sub; |
221 | } |
222 | yyCh = getChar(); |
223 | } |
224 | yyString.append(c: QChar(unicode)); |
225 | } |
226 | else if ( yyCh == QLatin1Char('\n') ) { |
227 | yyCh = getChar(); |
228 | } |
229 | else { |
230 | yyString.append( c: QLatin1Char(backTab[strchr( s: tab, c: yyCh.toLatin1() ) - tab]) ); |
231 | yyCh = getChar(); |
232 | } |
233 | } else { |
234 | yyString.append(c: yyCh); |
235 | yyCh = getChar(); |
236 | } |
237 | } |
238 | |
239 | if ( yyCh != QLatin1Char('"') ) |
240 | yyMsg() << qPrintable(LU::tr("Unterminated string.\n" )); |
241 | |
242 | yyCh = getChar(); |
243 | |
244 | return Tok_String; |
245 | |
246 | case ':': |
247 | yyCh = getChar(); |
248 | return Tok_Colon; |
249 | case '\'': |
250 | yyCh = getChar(); |
251 | |
252 | if ( yyCh == QLatin1Char('\\') ) |
253 | yyCh = getChar(); |
254 | do { |
255 | yyCh = getChar(); |
256 | } while (yyCh != QChar(EOF) && yyCh != QLatin1Char('\'')); |
257 | yyCh = getChar(); |
258 | break; |
259 | case '{': |
260 | yyCh = getChar(); |
261 | return Tok_LeftBrace; |
262 | case '}': |
263 | yyCh = getChar(); |
264 | return Tok_RightBrace; |
265 | case '(': |
266 | if (yyParenDepth == 0) |
267 | yyParenLineNo = yyCurLineNo; |
268 | yyParenDepth++; |
269 | yyCh = getChar(); |
270 | return Tok_LeftParen; |
271 | case ')': |
272 | if (yyParenDepth == 0) |
273 | yyParenLineNo = yyCurLineNo; |
274 | yyParenDepth--; |
275 | yyCh = getChar(); |
276 | return Tok_RightParen; |
277 | case ',': |
278 | yyCh = getChar(); |
279 | return Tok_Comma; |
280 | case '.': |
281 | yyCh = getChar(); |
282 | return Tok_Dot; |
283 | case ';': |
284 | yyCh = getChar(); |
285 | return Tok_Semicolon; |
286 | case '+': |
287 | yyCh = getChar(); |
288 | if (yyCh == QLatin1Char('+')) { |
289 | yyCh = getChar(); |
290 | return Tok_PlusPlus; |
291 | } |
292 | if( yyCh == QLatin1Char('=') ){ |
293 | yyCh = getChar(); |
294 | return Tok_PlusEq; |
295 | } |
296 | return Tok_Plus; |
297 | case '0': |
298 | case '1': |
299 | case '2': |
300 | case '3': |
301 | case '4': |
302 | case '5': |
303 | case '6': |
304 | case '7': |
305 | case '8': |
306 | case '9': |
307 | { |
308 | QByteArray ba; |
309 | ba += yyCh.toLatin1(); |
310 | yyCh = getChar(); |
311 | bool hex = yyCh == QLatin1Char('x'); |
312 | if ( hex ) { |
313 | ba += yyCh.toLatin1(); |
314 | yyCh = getChar(); |
315 | } |
316 | while ( hex ? isxdigit(yyCh.toLatin1()) : yyCh.isDigit() ) { |
317 | ba += yyCh.toLatin1(); |
318 | yyCh = getChar(); |
319 | } |
320 | bool ok; |
321 | yyInteger = ba.toLongLong(ok: &ok); |
322 | if (ok) return Tok_Integer; |
323 | break; |
324 | } |
325 | default: |
326 | yyCh = getChar(); |
327 | } |
328 | } |
329 | } |
330 | return Tok_Eof; |
331 | } |
332 | |
333 | static bool match( int t ) |
334 | { |
335 | bool matches = ( yyTok == t ); |
336 | if ( matches ) |
337 | yyTok = getToken(); |
338 | return matches; |
339 | } |
340 | |
341 | static bool matchString( QString &s ) |
342 | { |
343 | if ( yyTok != Tok_String ) |
344 | return false; |
345 | |
346 | s = yyString; |
347 | yyTok = getToken(); |
348 | while ( yyTok == Tok_Plus ) { |
349 | yyTok = getToken(); |
350 | if (yyTok == Tok_String) |
351 | s += yyString; |
352 | else { |
353 | yyMsg() << qPrintable(LU::tr( |
354 | "String used in translation can contain only literals" |
355 | " concatenated with other literals, not expressions or numbers.\n" )); |
356 | return false; |
357 | } |
358 | yyTok = getToken(); |
359 | } |
360 | return true; |
361 | } |
362 | |
363 | static bool matchStringOrNull(QString &s) |
364 | { |
365 | bool matches = matchString(s); |
366 | if (!matches) { |
367 | matches = (yyTok == Tok_null); |
368 | if (matches) |
369 | yyTok = getToken(); |
370 | } |
371 | return matches; |
372 | } |
373 | |
374 | /* |
375 | * match any expression that can return a number, which can be |
376 | * 1. Literal number (e.g. '11') |
377 | * 2. simple identifier (e.g. 'm_count') |
378 | * 3. simple function call (e.g. 'size()' ) |
379 | * 4. function call on an object (e.g. 'list.size()') |
380 | * 5. function call on an object (e.g. 'list->size()') |
381 | * |
382 | * Other cases: |
383 | * size(2,4) |
384 | * list().size() |
385 | * list(a,b).size(2,4) |
386 | * etc... |
387 | */ |
388 | static bool matchExpression() |
389 | { |
390 | if (match(t: Tok_Integer)) { |
391 | return true; |
392 | } |
393 | |
394 | int parenlevel = 0; |
395 | while (match(t: Tok_Ident) || parenlevel > 0) { |
396 | if (yyTok == Tok_RightParen) { |
397 | if (parenlevel == 0) break; |
398 | --parenlevel; |
399 | yyTok = getToken(); |
400 | } else if (yyTok == Tok_LeftParen) { |
401 | yyTok = getToken(); |
402 | if (yyTok == Tok_RightParen) { |
403 | yyTok = getToken(); |
404 | } else { |
405 | ++parenlevel; |
406 | } |
407 | } else if (yyTok == Tok_Ident) { |
408 | continue; |
409 | } else if (parenlevel == 0) { |
410 | return false; |
411 | } |
412 | } |
413 | return true; |
414 | } |
415 | |
416 | static const QString context() |
417 | { |
418 | QString context(yyPackage); |
419 | bool innerClass = false; |
420 | for (int i = 0; i < yyScope.size(); ++i) { |
421 | if (yyScope.at(i)->type == Scope::Clazz) { |
422 | if (innerClass) |
423 | context.append(s: QLatin1String("$" )); |
424 | else |
425 | context.append(s: QLatin1String("." )); |
426 | |
427 | context.append(s: yyScope.at(i)->name); |
428 | innerClass = true; |
429 | } |
430 | } |
431 | return context; |
432 | } |
433 | |
434 | static void recordMessage( |
435 | Translator *tor, const QString &context, const QString &text, const QString &, |
436 | const QString &, bool plural, ConversionData &cd) |
437 | { |
438 | TranslatorMessage msg( |
439 | context, text, comment, QString(), |
440 | yyFileName, yyLineNo, QStringList(), |
441 | TranslatorMessage::Unfinished, plural); |
442 | msg.setExtraComment(extracomment.simplified()); |
443 | tor->extend(msg, cd); |
444 | } |
445 | |
446 | static void parse(Translator *tor, ConversionData &cd) |
447 | { |
448 | QString text; |
449 | QString com; |
450 | QString ; |
451 | |
452 | yyCh = getChar(); |
453 | |
454 | yyTok = getToken(); |
455 | while ( yyTok != Tok_Eof ) { |
456 | switch ( yyTok ) { |
457 | case Tok_class: |
458 | yyTok = getToken(); |
459 | if(yyTok == Tok_Ident) { |
460 | yyScope.push(t: new Scope(yyIdent, Scope::Clazz, yyLineNo)); |
461 | } |
462 | else { |
463 | yyMsg() << qPrintable(LU::tr("'class' must be followed by a class name.\n" )); |
464 | break; |
465 | } |
466 | while (!match(t: Tok_LeftBrace)) { |
467 | yyTok = getToken(); |
468 | } |
469 | break; |
470 | |
471 | case Tok_tr: |
472 | yyTok = getToken(); |
473 | if ( match(t: Tok_LeftParen) && matchString(s&: text) ) { |
474 | com.clear(); |
475 | bool plural = false; |
476 | |
477 | if ( match(t: Tok_RightParen) ) { |
478 | // no comment |
479 | } else if (match(t: Tok_Comma) && matchStringOrNull(s&: com)) { //comment |
480 | if ( match(t: Tok_RightParen)) { |
481 | // ok, |
482 | } else if (match(t: Tok_Comma)) { |
483 | plural = true; |
484 | } |
485 | } |
486 | if (!text.isEmpty()) |
487 | recordMessage(tor, context: context(), text, comment: com, extracomment, plural, cd); |
488 | } |
489 | break; |
490 | case Tok_translate: |
491 | { |
492 | QString contextOverride; |
493 | yyTok = getToken(); |
494 | if ( match(t: Tok_LeftParen) && |
495 | matchString(s&: contextOverride) && |
496 | match(t: Tok_Comma) && |
497 | matchString(s&: text) ) { |
498 | |
499 | com.clear(); |
500 | bool plural = false; |
501 | if (!match(t: Tok_RightParen)) { |
502 | // look for comment |
503 | if ( match(t: Tok_Comma) && matchStringOrNull(s&: com)) { |
504 | if (!match(t: Tok_RightParen)) { |
505 | if (match(t: Tok_Comma) && matchExpression() && match(t: Tok_RightParen)) { |
506 | plural = true; |
507 | } else { |
508 | break; |
509 | } |
510 | } |
511 | } else { |
512 | break; |
513 | } |
514 | } |
515 | if (!text.isEmpty()) |
516 | recordMessage(tor, context: contextOverride, text, comment: com, extracomment, plural, cd); |
517 | } |
518 | } |
519 | break; |
520 | |
521 | case Tok_Ident: |
522 | yyTok = getToken(); |
523 | break; |
524 | |
525 | case Tok_Comment: |
526 | if (yyComment.startsWith(c: QLatin1Char(':'))) { |
527 | yyComment.remove(i: 0, len: 1); |
528 | extracomment.append(s: yyComment); |
529 | } |
530 | yyTok = getToken(); |
531 | break; |
532 | |
533 | case Tok_RightBrace: |
534 | if ( yyScope.isEmpty() ) { |
535 | yyMsg() << qPrintable(LU::tr("Excess closing brace.\n" )); |
536 | } |
537 | else |
538 | delete (yyScope.pop()); |
539 | extracomment.clear(); |
540 | yyTok = getToken(); |
541 | break; |
542 | |
543 | case Tok_LeftBrace: |
544 | yyScope.push(t: new Scope(QString(), Scope::Other, yyLineNo)); |
545 | yyTok = getToken(); |
546 | break; |
547 | |
548 | case Tok_Semicolon: |
549 | extracomment.clear(); |
550 | yyTok = getToken(); |
551 | break; |
552 | |
553 | case Tok_Package: |
554 | yyTok = getToken(); |
555 | while(!match(t: Tok_Semicolon)) { |
556 | switch(yyTok) { |
557 | case Tok_Ident: |
558 | yyPackage.append(s: yyIdent); |
559 | break; |
560 | case Tok_Dot: |
561 | yyPackage.append(s: QLatin1String("." )); |
562 | break; |
563 | default: |
564 | yyMsg() << qPrintable(LU::tr("'package' must be followed by package name.\n" )); |
565 | break; |
566 | } |
567 | yyTok = getToken(); |
568 | } |
569 | break; |
570 | |
571 | default: |
572 | yyTok = getToken(); |
573 | } |
574 | } |
575 | |
576 | if ( !yyScope.isEmpty() ) |
577 | yyMsg(line: yyScope.top()->line) << qPrintable(LU::tr("Unbalanced opening brace.\n" )); |
578 | else if ( yyParenDepth != 0 ) |
579 | yyMsg(line: yyParenLineNo) << qPrintable(LU::tr("Unbalanced opening parenthesis.\n" )); |
580 | } |
581 | |
582 | |
583 | bool loadJava(Translator &translator, const QString &filename, ConversionData &cd) |
584 | { |
585 | QFile file(filename); |
586 | if (!file.open(flags: QIODevice::ReadOnly)) { |
587 | cd.appendError(error: LU::tr(sourceText: "Cannot open %1: %2" ).arg(args: filename, args: file.errorString())); |
588 | return false; |
589 | } |
590 | |
591 | yyInPos = -1; |
592 | yyFileName = filename; |
593 | yyPackage.clear(); |
594 | yyScope.clear(); |
595 | yyTok = -1; |
596 | yyParenDepth = 0; |
597 | yyCurLineNo = 0; |
598 | yyParenLineNo = 1; |
599 | |
600 | QTextStream ts(&file); |
601 | ts.setCodec(QTextCodec::codecForName(name: cd.m_sourceIsUtf16 ? "UTF-16" : "UTF-8" )); |
602 | ts.setAutoDetectUnicode(true); |
603 | yyInStr = ts.readAll(); |
604 | yyInPos = 0; |
605 | yyFileName = filename; |
606 | yyCurLineNo = 1; |
607 | yyParenLineNo = 1; |
608 | |
609 | parse(tor: &translator, cd); |
610 | return true; |
611 | } |
612 | |
613 | QT_END_NAMESPACE |
614 | |