1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
3
4#include "lupdate.h"
5
6#include <translator.h>
7
8#include <QtCore/QDebug>
9#include <QtCore/QFile>
10#include <QtCore/QStack>
11#include <QtCore/QStack>
12#include <QtCore/QString>
13#include <QtCore/QCoreApplication>
14#include <QtCore/QStringConverter>
15#include <QtCore/QTextStream>
16
17#include <iostream>
18
19#include <ctype.h>
20
21QT_BEGIN_NAMESPACE
22
23enum { Tok_Eof, Tok_class, Tok_return, Tok_tr,
24 Tok_translate, Tok_Ident, Tok_Package,
25 Tok_Comment, Tok_String, Tok_Colon, Tok_Dot,
26 Tok_LeftBrace, Tok_RightBrace, Tok_LeftParen,
27 Tok_RightParen, Tok_Comma, Tok_Semicolon,
28 Tok_Integer, Tok_Plus, Tok_PlusPlus, Tok_PlusEq, Tok_null };
29
30class Scope
31{
32 public:
33 QString name;
34 enum Type {Clazz, Function, Other} type;
35 int line;
36
37 Scope(const QString & name, Type type, int line) :
38 name(name),
39 type(type),
40 line(line)
41 {}
42
43 ~Scope()
44 {}
45};
46
47/*
48 The tokenizer maintains the following global variables. The names
49 should be self-explanatory.
50*/
51
52static QString yyFileName;
53static QChar yyCh;
54static QString yyIdent;
55static QString yyComment;
56static QString yyString;
57static bool yyEOF = false;
58
59static qlonglong yyInteger;
60static int yyParenDepth;
61static int yyLineNo;
62static int yyCurLineNo;
63static int yyParenLineNo;
64static int yyTok;
65
66// the string to read from and current position in the string
67static QString yyInStr;
68static int yyInPos;
69
70// The parser maintains the following global variables.
71static QString yyPackage;
72static QStack<Scope*> yyScope;
73
74std::ostream &yyMsg(int line = 0)
75{
76 return std::cerr << qPrintable(yyFileName) << ':' << (line ? line : yyLineNo) << ": ";
77}
78
79static QChar getChar()
80{
81 if (yyInPos >= yyInStr.size()) {
82 yyEOF = true;
83 return QChar();
84 }
85 QChar c = yyInStr[yyInPos++];
86 if (c == QLatin1Char('\n'))
87 ++yyCurLineNo;
88 return c;
89}
90
91static int getToken()
92{
93 const char tab[] = "bfnrt\"\'\\";
94 const char backTab[] = "\b\f\n\r\t\"\'\\";
95
96 yyIdent.clear();
97 yyComment.clear();
98 yyString.clear();
99
100 while (!yyEOF) {
101 yyLineNo = yyCurLineNo;
102
103 if ( yyCh.isLetter() || yyCh.toLatin1() == '_' ) {
104 do {
105 yyIdent.append(c: yyCh);
106 yyCh = getChar();
107 } while ( yyCh.isLetterOrNumber() || yyCh.toLatin1() == '_' );
108
109 if (yyTok != Tok_Dot) {
110 switch ( yyIdent.at(i: 0).toLatin1() ) {
111 case 'r':
112 if ( yyIdent == QLatin1String("return") )
113 return Tok_return;
114 break;
115 case 'c':
116 if ( yyIdent == QLatin1String("class") )
117 return Tok_class;
118 break;
119 case 'n':
120 if ( yyIdent == QLatin1String("null") )
121 return Tok_null;
122 break;
123 }
124 }
125 switch ( yyIdent.at(i: 0).toLatin1() ) {
126 case 'p':
127 if( yyIdent == QLatin1String("package") )
128 return Tok_Package;
129 break;
130 case 't':
131 if ( yyIdent == QLatin1String("tr") )
132 return Tok_tr;
133 if ( yyIdent == QLatin1String("translate") )
134 return Tok_translate;
135 }
136 return Tok_Ident;
137 } else {
138 switch ( yyCh.toLatin1() ) {
139
140 case '/':
141 yyCh = getChar();
142 if ( yyCh == QLatin1Char('/') ) {
143 do {
144 yyCh = getChar();
145 if (yyEOF)
146 break;
147 yyComment.append(c: yyCh);
148 } while (yyCh != QLatin1Char('\n'));
149 return Tok_Comment;
150
151 } else if ( yyCh == QLatin1Char('*') ) {
152 bool metAster = false;
153 bool metAsterSlash = false;
154
155 while ( !metAsterSlash ) {
156 yyCh = getChar();
157 if (yyEOF) {
158 yyMsg() << "Unterminated Java comment.\n";
159 return Tok_Comment;
160 }
161
162 yyComment.append( c: yyCh );
163
164 if ( yyCh == QLatin1Char('*') )
165 metAster = true;
166 else if ( metAster && yyCh == QLatin1Char('/') )
167 metAsterSlash = true;
168 else
169 metAster = false;
170 }
171 yyComment.chop(n: 2);
172 yyCh = getChar();
173
174 return Tok_Comment;
175 }
176 break;
177 case '"':
178 yyCh = getChar();
179
180 while (!yyEOF && yyCh != QLatin1Char('\n') && yyCh != QLatin1Char('"')) {
181
182 if ( yyCh == QLatin1Char('\\') ) {
183 yyCh = getChar();
184 if ( yyCh == QLatin1Char('u') ) {
185 yyCh = getChar();
186 uint unicode(0);
187 for (int i = 4; i > 0; --i) {
188 unicode = unicode << 4;
189 if( yyCh.isDigit() ) {
190 unicode += yyCh.digitValue();
191 }
192 else {
193 int sub(yyCh.toLower().toLatin1() - 87);
194 if( sub > 15 || sub < 10) {
195 yyMsg() << "Invalid Unicode value.\n";
196 break;
197 }
198 unicode += sub;
199 }
200 yyCh = getChar();
201 }
202 yyString.append(c: QChar(unicode));
203 }
204 else if ( yyCh == QLatin1Char('\n') ) {
205 yyCh = getChar();
206 }
207 else {
208 yyString.append( c: QLatin1Char(backTab[strchr( s: tab, c: yyCh.toLatin1() ) - tab]) );
209 yyCh = getChar();
210 }
211 } else {
212 yyString.append(c: yyCh);
213 yyCh = getChar();
214 }
215 }
216
217 if ( yyCh != QLatin1Char('"') )
218 yyMsg() << "Unterminated string.\n";
219
220 yyCh = getChar();
221
222 return Tok_String;
223
224 case ':':
225 yyCh = getChar();
226 return Tok_Colon;
227 case '\'':
228 yyCh = getChar();
229
230 if ( yyCh == QLatin1Char('\\') )
231 yyCh = getChar();
232 do {
233 yyCh = getChar();
234 } while (!yyEOF && yyCh != QLatin1Char('\''));
235 yyCh = getChar();
236 break;
237 case '{':
238 yyCh = getChar();
239 return Tok_LeftBrace;
240 case '}':
241 yyCh = getChar();
242 return Tok_RightBrace;
243 case '(':
244 if (yyParenDepth == 0)
245 yyParenLineNo = yyCurLineNo;
246 yyParenDepth++;
247 yyCh = getChar();
248 return Tok_LeftParen;
249 case ')':
250 if (yyParenDepth == 0)
251 yyParenLineNo = yyCurLineNo;
252 yyParenDepth--;
253 yyCh = getChar();
254 return Tok_RightParen;
255 case ',':
256 yyCh = getChar();
257 return Tok_Comma;
258 case '.':
259 yyCh = getChar();
260 return Tok_Dot;
261 case ';':
262 yyCh = getChar();
263 return Tok_Semicolon;
264 case '+':
265 yyCh = getChar();
266 if (yyCh == QLatin1Char('+')) {
267 yyCh = getChar();
268 return Tok_PlusPlus;
269 }
270 if( yyCh == QLatin1Char('=') ){
271 yyCh = getChar();
272 return Tok_PlusEq;
273 }
274 return Tok_Plus;
275 case '0':
276 case '1':
277 case '2':
278 case '3':
279 case '4':
280 case '5':
281 case '6':
282 case '7':
283 case '8':
284 case '9':
285 {
286 QByteArray ba;
287 ba += yyCh.toLatin1();
288 yyCh = getChar();
289 bool hex = yyCh == QLatin1Char('x');
290 if ( hex ) {
291 ba += yyCh.toLatin1();
292 yyCh = getChar();
293 }
294 while ( hex ? isxdigit(yyCh.toLatin1()) : yyCh.isDigit() ) {
295 ba += yyCh.toLatin1();
296 yyCh = getChar();
297 }
298 bool ok;
299 yyInteger = ba.toLongLong(ok: &ok);
300 if (ok) return Tok_Integer;
301 break;
302 }
303 default:
304 yyCh = getChar();
305 }
306 }
307 }
308 return Tok_Eof;
309}
310
311static bool match( int t )
312{
313 bool matches = ( yyTok == t );
314 if ( matches )
315 yyTok = getToken();
316 return matches;
317}
318
319static bool matchString( QString &s )
320{
321 if ( yyTok != Tok_String )
322 return false;
323
324 s = yyString;
325 yyTok = getToken();
326 while ( yyTok == Tok_Plus ) {
327 yyTok = getToken();
328 if (yyTok == Tok_String)
329 s += yyString;
330 else {
331 yyMsg() <<
332 "String used in translation can contain only literals"
333 " concatenated with other literals, not expressions or numbers.\n";
334 return false;
335 }
336 yyTok = getToken();
337 }
338 return true;
339}
340
341static bool matchStringOrNull(QString &s)
342{
343 bool matches = matchString(s);
344 if (!matches) {
345 matches = (yyTok == Tok_null);
346 if (matches)
347 yyTok = getToken();
348 }
349 return matches;
350}
351
352/*
353 * match any expression that can return a number, which can be
354 * 1. Literal number (e.g. '11')
355 * 2. simple identifier (e.g. 'm_count')
356 * 3. simple function call (e.g. 'size()' )
357 * 4. function call on an object (e.g. 'list.size()')
358 * 5. function call on an object (e.g. 'list->size()')
359 *
360 * Other cases:
361 * size(2,4)
362 * list().size()
363 * list(a,b).size(2,4)
364 * etc...
365 */
366static bool matchExpression()
367{
368 if (match(t: Tok_Integer)) {
369 return true;
370 }
371
372 int parenlevel = 0;
373 while (match(t: Tok_Ident) || parenlevel > 0) {
374 if (yyTok == Tok_RightParen) {
375 if (parenlevel == 0) break;
376 --parenlevel;
377 yyTok = getToken();
378 } else if (yyTok == Tok_LeftParen) {
379 yyTok = getToken();
380 if (yyTok == Tok_RightParen) {
381 yyTok = getToken();
382 } else {
383 ++parenlevel;
384 }
385 } else if (yyTok == Tok_Ident) {
386 continue;
387 } else if (parenlevel == 0) {
388 return false;
389 }
390 }
391 return true;
392}
393
394static const QString context()
395{
396 QString context(yyPackage);
397 bool innerClass = false;
398 for (int i = 0; i < yyScope.size(); ++i) {
399 if (yyScope.at(i)->type == Scope::Clazz) {
400 if (innerClass)
401 context.append(s: QLatin1String("$"));
402 else
403 context.append(s: QLatin1String("."));
404
405 context.append(s: yyScope.at(i)->name);
406 innerClass = true;
407 }
408 }
409 return context;
410}
411
412static void recordMessage(
413 Translator *tor, const QString &context, const QString &text, const QString &comment,
414 const QString &extracomment, bool plural, ConversionData &cd)
415{
416 TranslatorMessage msg(
417 context, text, comment, QString(),
418 yyFileName, yyLineNo, QStringList(),
419 TranslatorMessage::Unfinished, plural);
420 msg.setExtraComment(extracomment.simplified());
421 tor->extend(msg, cd);
422}
423
424static void parse(Translator *tor, ConversionData &cd)
425{
426 QString text;
427 QString com;
428 QString extracomment;
429
430 yyEOF = false;
431 yyCh = getChar();
432
433 yyTok = getToken();
434 while ( yyTok != Tok_Eof ) {
435 switch ( yyTok ) {
436 case Tok_class:
437 yyTok = getToken();
438 if(yyTok == Tok_Ident) {
439 yyScope.push(t: new Scope(yyIdent, Scope::Clazz, yyLineNo));
440 }
441 else {
442 yyMsg() << "'class' must be followed by a class name.\n";
443 break;
444 }
445 while (!match(t: Tok_LeftBrace)) {
446 yyTok = getToken();
447 }
448 break;
449
450 case Tok_tr:
451 yyTok = getToken();
452 if ( match(t: Tok_LeftParen) && matchString(s&: text) ) {
453 com.clear();
454 bool plural = false;
455
456 if ( match(t: Tok_RightParen) ) {
457 // no comment
458 } else if (match(t: Tok_Comma) && matchStringOrNull(s&: com)) { //comment
459 if ( match(t: Tok_RightParen)) {
460 // ok,
461 } else if (match(t: Tok_Comma)) {
462 plural = true;
463 }
464 }
465 if (!text.isEmpty())
466 recordMessage(tor, context: context(), text, comment: com, extracomment, plural, cd);
467 }
468 break;
469 case Tok_translate:
470 {
471 QString contextOverride;
472 yyTok = getToken();
473 if ( match(t: Tok_LeftParen) &&
474 matchString(s&: contextOverride) &&
475 match(t: Tok_Comma) &&
476 matchString(s&: text) ) {
477
478 com.clear();
479 bool plural = false;
480 if (!match(t: Tok_RightParen)) {
481 // look for comment
482 if ( match(t: Tok_Comma) && matchStringOrNull(s&: com)) {
483 if (!match(t: Tok_RightParen)) {
484 if (match(t: Tok_Comma) && matchExpression() && match(t: Tok_RightParen)) {
485 plural = true;
486 } else {
487 break;
488 }
489 }
490 } else {
491 break;
492 }
493 }
494 if (!text.isEmpty())
495 recordMessage(tor, context: contextOverride, text, comment: com, extracomment, plural, cd);
496 }
497 }
498 break;
499
500 case Tok_Ident:
501 yyTok = getToken();
502 break;
503
504 case Tok_Comment:
505 if (yyComment.startsWith(c: QLatin1Char(':'))) {
506 yyComment.remove(i: 0, len: 1);
507 extracomment.append(s: yyComment);
508 }
509 yyTok = getToken();
510 break;
511
512 case Tok_RightBrace:
513 if ( yyScope.isEmpty() ) {
514 yyMsg() << "Excess closing brace.\n";
515 }
516 else
517 delete (yyScope.pop());
518 extracomment.clear();
519 yyTok = getToken();
520 break;
521
522 case Tok_LeftBrace:
523 yyScope.push(t: new Scope(QString(), Scope::Other, yyLineNo));
524 yyTok = getToken();
525 break;
526
527 case Tok_Semicolon:
528 extracomment.clear();
529 yyTok = getToken();
530 break;
531
532 case Tok_Package:
533 yyTok = getToken();
534 while(!match(t: Tok_Semicolon)) {
535 switch(yyTok) {
536 case Tok_Ident:
537 yyPackage.append(s: yyIdent);
538 break;
539 case Tok_Dot:
540 yyPackage.append(s: QLatin1String("."));
541 break;
542 default:
543 yyMsg() << "'package' must be followed by package name.\n";
544 break;
545 }
546 yyTok = getToken();
547 }
548 break;
549
550 default:
551 yyTok = getToken();
552 }
553 }
554
555 if ( !yyScope.isEmpty() )
556 yyMsg(line: yyScope.top()->line) << "Unbalanced opening brace.\n";
557 else if ( yyParenDepth != 0 )
558 yyMsg(line: yyParenLineNo) << "Unbalanced opening parenthesis.\n";
559}
560
561
562bool loadJava(Translator &translator, const QString &filename, ConversionData &cd)
563{
564 QFile file(filename);
565 if (!file.open(flags: QIODevice::ReadOnly)) {
566 cd.appendError(QStringLiteral("Cannot open %1: %2").arg(args: filename, args: file.errorString()));
567 return false;
568 }
569
570 yyInPos = -1;
571 yyFileName = filename;
572 yyPackage.clear();
573 yyScope.clear();
574 yyTok = -1;
575 yyParenDepth = 0;
576 yyCurLineNo = 0;
577 yyParenLineNo = 1;
578
579 QTextStream ts(&file);
580 ts.setEncoding(cd.m_sourceIsUtf16 ? QStringConverter::Utf16 : QStringConverter::Utf8);
581 ts.setAutoDetectUnicode(true);
582 yyInStr = ts.readAll();
583 yyInPos = 0;
584 yyFileName = filename;
585 yyCurLineNo = 1;
586 yyParenLineNo = 1;
587
588 parse(tor: &translator, cd);
589 return true;
590}
591
592QT_END_NAMESPACE
593

source code of qttools/src/linguist/lupdate/java.cpp