1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
3
4#include "lupdate.h"
5
6#include <translator.h>
7
8#include <QtCore/QDebug>
9#include <QtCore/QFile>
10#include <QtCore/QStack>
11#include <QtCore/QStack>
12#include <QtCore/QString>
13#include <QtCore/QCoreApplication>
14#include <QtCore/QStringConverter>
15#include <QtCore/QTextStream>
16
17#include <iostream>
18
19#include <ctype.h>
20
21QT_BEGIN_NAMESPACE
22
23using namespace Qt::Literals::StringLiterals;
24
25enum { Tok_Eof, Tok_class, Tok_return, Tok_tr,
26 Tok_translate, Tok_Ident, Tok_Package,
27 Tok_Comment, Tok_String, Tok_Colon, Tok_Dot,
28 Tok_LeftBrace, Tok_RightBrace, Tok_LeftParen,
29 Tok_RightParen, Tok_Comma, Tok_Semicolon,
30 Tok_Integer, Tok_Plus, Tok_PlusPlus, Tok_PlusEq, Tok_null };
31
32class Scope
33{
34 public:
35 QString name;
36 enum Type {Clazz, Function, Other} type;
37 int line;
38
39 Scope(const QString & name, Type type, int line) :
40 name(name),
41 type(type),
42 line(line)
43 {}
44
45 ~Scope()
46 {}
47};
48
49/*
50 The tokenizer maintains the following global variables. The names
51 should be self-explanatory.
52*/
53
54static QString yyFileName;
55static QChar yyCh;
56static QString yyIdent;
57static QString yyComment;
58static QString yyString;
59static bool yyEOF = false;
60
61static qlonglong yyInteger;
62static int yyParenDepth;
63static int yyLineNo;
64static int yyCurLineNo;
65static int yyParenLineNo;
66static int yyTok;
67
68// the string to read from and current position in the string
69static QString yyInStr;
70static int yyInPos;
71
72// The parser maintains the following global variables.
73static QString yyPackage;
74static QStack<Scope*> yyScope;
75
76std::ostream &yyMsg(int line = 0)
77{
78 return std::cerr << qPrintable(yyFileName) << ':' << (line ? line : yyLineNo) << ": ";
79}
80
81static QChar getChar()
82{
83 if (yyInPos >= yyInStr.size()) {
84 yyEOF = true;
85 return QChar();
86 }
87 QChar c = yyInStr[yyInPos++];
88 if (c == u'\n')
89 ++yyCurLineNo;
90 return c;
91}
92
93static int getToken()
94{
95 const char tab[] = "bfnrt\"\'\\";
96 const char backTab[] = "\b\f\n\r\t\"\'\\";
97
98 yyIdent.clear();
99 yyComment.clear();
100 yyString.clear();
101
102 while (!yyEOF) {
103 yyLineNo = yyCurLineNo;
104
105 if ( yyCh.isLetter() || yyCh.toLatin1() == '_' ) {
106 do {
107 yyIdent.append(c: yyCh);
108 yyCh = getChar();
109 } while ( yyCh.isLetterOrNumber() || yyCh.toLatin1() == '_' );
110
111 if (yyTok != Tok_Dot) {
112 switch ( yyIdent.at(i: 0).toLatin1() ) {
113 case 'r':
114 if (yyIdent == "return"_L1)
115 return Tok_return;
116 break;
117 case 'c':
118 if (yyIdent == "class"_L1)
119 return Tok_class;
120 break;
121 case 'n':
122 if (yyIdent == "null"_L1)
123 return Tok_null;
124 break;
125 }
126 }
127 switch ( yyIdent.at(i: 0).toLatin1() ) {
128 case 'p':
129 if (yyIdent == "package"_L1)
130 return Tok_Package;
131 break;
132 case 't':
133 if (yyIdent == "tr"_L1)
134 return Tok_tr;
135 if (yyIdent == "translate"_L1)
136 return Tok_translate;
137 }
138 return Tok_Ident;
139 } else {
140 switch ( yyCh.toLatin1() ) {
141
142 case '/':
143 yyCh = getChar();
144 if (yyCh == u'/') {
145 do {
146 yyCh = getChar();
147 if (yyEOF)
148 break;
149 yyComment.append(c: yyCh);
150 } while (yyCh != u'\n');
151 return Tok_Comment;
152
153 } else if (yyCh == u'*') {
154 bool metAster = false;
155 bool metAsterSlash = false;
156
157 while ( !metAsterSlash ) {
158 yyCh = getChar();
159 if (yyEOF) {
160 yyMsg() << "Unterminated Java comment.\n";
161 return Tok_Comment;
162 }
163
164 yyComment.append( c: yyCh );
165
166 if (yyCh == u'*')
167 metAster = true;
168 else if (metAster && yyCh == u'/')
169 metAsterSlash = true;
170 else
171 metAster = false;
172 }
173 yyComment.chop(n: 2);
174 yyCh = getChar();
175
176 return Tok_Comment;
177 }
178 break;
179 case '"':
180 yyCh = getChar();
181
182 while (!yyEOF && yyCh != u'\n' && yyCh != u'"') {
183
184 if (yyCh == u'\\') {
185 yyCh = getChar();
186 if (yyCh == u'u') {
187 yyCh = getChar();
188 uint unicode(0);
189 for (int i = 4; i > 0; --i) {
190 unicode = unicode << 4;
191 if( yyCh.isDigit() ) {
192 unicode += yyCh.digitValue();
193 }
194 else {
195 int sub(yyCh.toLower().toLatin1() - 87);
196 if( sub > 15 || sub < 10) {
197 yyMsg() << "Invalid Unicode value.\n";
198 break;
199 }
200 unicode += sub;
201 }
202 yyCh = getChar();
203 }
204 yyString.append(c: QChar(unicode));
205 } else if (yyCh == u'\n') {
206 yyCh = getChar();
207 } else if (const char *p = strchr(s: tab, c: yyCh.toLatin1()); p) {
208 yyString.append(c: QLatin1Char(backTab[p - tab]));
209 yyCh = getChar();
210 } else {
211 yyMsg() << "Invalid escaped character \'\\" << qPrintable(yyCh)
212 << "\'\n";
213 yyCh = getChar();
214 }
215 } else {
216 yyString.append(c: yyCh);
217 yyCh = getChar();
218 }
219 }
220
221 if (yyCh != u'"')
222 yyMsg() << "Unterminated string.\n";
223
224 yyCh = getChar();
225
226 return Tok_String;
227
228 case ':':
229 yyCh = getChar();
230 return Tok_Colon;
231 case '\'':
232 yyCh = getChar();
233
234 if (yyCh == u'\\')
235 yyCh = getChar();
236 do {
237 yyCh = getChar();
238 } while (!yyEOF && yyCh != u'\'');
239 yyCh = getChar();
240 break;
241 case '{':
242 yyCh = getChar();
243 return Tok_LeftBrace;
244 case '}':
245 yyCh = getChar();
246 return Tok_RightBrace;
247 case '(':
248 if (yyParenDepth == 0)
249 yyParenLineNo = yyCurLineNo;
250 yyParenDepth++;
251 yyCh = getChar();
252 return Tok_LeftParen;
253 case ')':
254 if (yyParenDepth == 0)
255 yyParenLineNo = yyCurLineNo;
256 yyParenDepth--;
257 yyCh = getChar();
258 return Tok_RightParen;
259 case ',':
260 yyCh = getChar();
261 return Tok_Comma;
262 case '.':
263 yyCh = getChar();
264 return Tok_Dot;
265 case ';':
266 yyCh = getChar();
267 return Tok_Semicolon;
268 case '+':
269 yyCh = getChar();
270 if (yyCh == u'+') {
271 yyCh = getChar();
272 return Tok_PlusPlus;
273 }
274 if (yyCh == u'=') {
275 yyCh = getChar();
276 return Tok_PlusEq;
277 }
278 return Tok_Plus;
279 case '0':
280 case '1':
281 case '2':
282 case '3':
283 case '4':
284 case '5':
285 case '6':
286 case '7':
287 case '8':
288 case '9':
289 {
290 QByteArray ba;
291 ba += yyCh.toLatin1();
292 yyCh = getChar();
293 bool hex = yyCh == u'x';
294 if ( hex ) {
295 ba += yyCh.toLatin1();
296 yyCh = getChar();
297 }
298 while ( hex ? isxdigit(yyCh.toLatin1()) : yyCh.isDigit() ) {
299 ba += yyCh.toLatin1();
300 yyCh = getChar();
301 }
302 bool ok;
303 yyInteger = ba.toLongLong(ok: &ok);
304 if (ok) return Tok_Integer;
305 break;
306 }
307 default:
308 yyCh = getChar();
309 }
310 }
311 }
312 return Tok_Eof;
313}
314
315static bool match( int t )
316{
317 bool matches = ( yyTok == t );
318 if ( matches )
319 yyTok = getToken();
320 return matches;
321}
322
323static bool matchString( QString &s )
324{
325 if ( yyTok != Tok_String )
326 return false;
327
328 s = yyString;
329 yyTok = getToken();
330 while ( yyTok == Tok_Plus ) {
331 yyTok = getToken();
332 if (yyTok == Tok_String)
333 s += yyString;
334 else {
335 yyMsg() <<
336 "String used in translation can contain only literals"
337 " concatenated with other literals, not expressions or numbers.\n";
338 return false;
339 }
340 yyTok = getToken();
341 }
342 return true;
343}
344
345static bool matchStringOrNull(QString &s)
346{
347 bool matches = matchString(s);
348 if (!matches) {
349 matches = (yyTok == Tok_null);
350 if (matches)
351 yyTok = getToken();
352 }
353 return matches;
354}
355
356/*
357 * match any expression that can return a number, which can be
358 * 1. Literal number (e.g. '11')
359 * 2. simple identifier (e.g. 'm_count')
360 * 3. simple function call (e.g. 'size()' )
361 * 4. function call on an object (e.g. 'list.size()')
362 * 5. function call on an object (e.g. 'list->size()')
363 *
364 * Other cases:
365 * size(2,4)
366 * list().size()
367 * list(a,b).size(2,4)
368 * etc...
369 */
370static bool matchExpression()
371{
372 if (match(t: Tok_Integer)) {
373 return true;
374 }
375
376 int parenlevel = 0;
377 while (match(t: Tok_Ident) || parenlevel > 0) {
378 if (yyTok == Tok_RightParen) {
379 if (parenlevel == 0) break;
380 --parenlevel;
381 yyTok = getToken();
382 } else if (yyTok == Tok_LeftParen) {
383 yyTok = getToken();
384 if (yyTok == Tok_RightParen) {
385 yyTok = getToken();
386 } else {
387 ++parenlevel;
388 }
389 } else if (yyTok == Tok_Ident) {
390 continue;
391 } else if (parenlevel == 0) {
392 return false;
393 }
394 }
395 return true;
396}
397
398static const QString context()
399{
400 QString context(yyPackage);
401 bool innerClass = false;
402 for (int i = 0; i < yyScope.size(); ++i) {
403 if (yyScope.at(i)->type == Scope::Clazz) {
404 if (innerClass)
405 context.append(s: "$"_L1);
406 else
407 context.append(s: "."_L1);
408
409 context.append(s: yyScope.at(i)->name);
410 innerClass = true;
411 }
412 }
413 return context;
414}
415
416static void recordMessage(
417 Translator *tor, const QString &context, const QString &text, const QString &comment,
418 const QString &extracomment, bool plural, ConversionData &cd)
419{
420 TranslatorMessage msg(
421 context, text, comment, QString(),
422 yyFileName, yyLineNo, QStringList(),
423 TranslatorMessage::Unfinished, plural);
424 msg.setExtraComment(extracomment.simplified());
425 tor->extend(msg, cd);
426}
427
428static void parse(Translator *tor, ConversionData &cd)
429{
430 QString text;
431 QString com;
432 QString extracomment;
433
434 yyEOF = false;
435 yyCh = getChar();
436
437 yyTok = getToken();
438 while ( yyTok != Tok_Eof ) {
439 switch ( yyTok ) {
440 case Tok_class:
441 yyTok = getToken();
442 if(yyTok == Tok_Ident) {
443 yyScope.push(t: new Scope(yyIdent, Scope::Clazz, yyLineNo));
444 }
445 else {
446 yyMsg() << "'class' must be followed by a class name.\n";
447 break;
448 }
449 while (!match(t: Tok_LeftBrace)) {
450 yyTok = getToken();
451 }
452 break;
453
454 case Tok_tr:
455 yyTok = getToken();
456 if ( match(t: Tok_LeftParen) && matchString(s&: text) ) {
457 com.clear();
458 bool plural = false;
459
460 if ( match(t: Tok_RightParen) ) {
461 // no comment
462 } else if (match(t: Tok_Comma) && matchStringOrNull(s&: com)) { //comment
463 if ( match(t: Tok_RightParen)) {
464 // ok,
465 } else if (match(t: Tok_Comma)) {
466 plural = true;
467 }
468 }
469 recordMessage(tor, context: context(), text, comment: com, extracomment, plural, cd);
470 }
471 break;
472 case Tok_translate:
473 {
474 QString contextOverride;
475 yyTok = getToken();
476 if ( match(t: Tok_LeftParen) &&
477 matchString(s&: contextOverride) &&
478 match(t: Tok_Comma) &&
479 matchString(s&: text) ) {
480
481 com.clear();
482 bool plural = false;
483 if (!match(t: Tok_RightParen)) {
484 // look for comment
485 if ( match(t: Tok_Comma) && matchStringOrNull(s&: com)) {
486 if (!match(t: Tok_RightParen)) {
487 if (match(t: Tok_Comma) && matchExpression() && match(t: Tok_RightParen)) {
488 plural = true;
489 } else {
490 break;
491 }
492 }
493 } else {
494 break;
495 }
496 }
497 recordMessage(tor, context: contextOverride, text, comment: com, extracomment, plural, cd);
498 }
499 }
500 break;
501
502 case Tok_Ident:
503 yyTok = getToken();
504 break;
505
506 case Tok_Comment:
507 if (yyComment.startsWith(c: u':')) {
508 yyComment.remove(i: 0, len: 1);
509 extracomment.append(s: yyComment);
510 }
511 yyTok = getToken();
512 break;
513
514 case Tok_RightBrace:
515 if ( yyScope.isEmpty() ) {
516 yyMsg() << "Excess closing brace.\n";
517 }
518 else
519 delete (yyScope.pop());
520 extracomment.clear();
521 yyTok = getToken();
522 break;
523
524 case Tok_LeftBrace:
525 yyScope.push(t: new Scope(QString(), Scope::Other, yyLineNo));
526 yyTok = getToken();
527 break;
528
529 case Tok_Semicolon:
530 extracomment.clear();
531 yyTok = getToken();
532 break;
533
534 case Tok_Package:
535 yyTok = getToken();
536 while(!match(t: Tok_Semicolon)) {
537 switch(yyTok) {
538 case Tok_Ident:
539 yyPackage.append(s: yyIdent);
540 break;
541 case Tok_Dot:
542 yyPackage.append(s: "."_L1);
543 break;
544 default:
545 yyMsg() << "'package' must be followed by package name.\n";
546 break;
547 }
548 yyTok = getToken();
549 }
550 break;
551
552 default:
553 yyTok = getToken();
554 }
555 }
556
557 if ( !yyScope.isEmpty() )
558 yyMsg(line: yyScope.top()->line) << "Unbalanced opening brace.\n";
559 else if ( yyParenDepth != 0 )
560 yyMsg(line: yyParenLineNo) << "Unbalanced opening parenthesis.\n";
561}
562
563
564bool loadJava(Translator &translator, const QString &filename, ConversionData &cd)
565{
566 QFile file(filename);
567 if (!file.open(flags: QIODevice::ReadOnly)) {
568 cd.appendError(QStringLiteral("Cannot open %1: %2").arg(args: filename, args: file.errorString()));
569 return false;
570 }
571
572 yyInPos = -1;
573 yyFileName = filename;
574 yyPackage.clear();
575 yyScope.clear();
576 yyTok = -1;
577 yyParenDepth = 0;
578 yyCurLineNo = 0;
579 yyParenLineNo = 1;
580
581 QTextStream ts(&file);
582 ts.setEncoding(cd.m_sourceIsUtf16 ? QStringConverter::Utf16 : QStringConverter::Utf8);
583 ts.setAutoDetectUnicode(true);
584 yyInStr = ts.readAll();
585 yyInPos = 0;
586 yyFileName = filename;
587 yyCurLineNo = 1;
588 yyParenLineNo = 1;
589
590 parse(tor: &translator, cd);
591 return true;
592}
593
594QT_END_NAMESPACE
595

source code of qttools/src/linguist/lupdate/java.cpp