1 | /* |
2 | Gettext translation file analyzer |
3 | |
4 | SPDX-FileCopyrightText: 2007 Montel Laurent <montel@kde.org> |
5 | SPDX-FileCopyrightText: 2009 Jos van den Oever <jos@vandenoever.info> |
6 | SPDX-FileCopyrightText: 2014 Nick Shaforostoff <shaforostoff@gmail.com> |
7 | |
8 | SPDX-License-Identifier: LGPL-2.1-or-later |
9 | */ |
10 | |
11 | |
12 | #include "poextractor.h" |
13 | #include <QFile> |
14 | #include <fstream> |
15 | |
16 | using namespace KFileMetaData; |
17 | |
18 | POExtractor::(QObject* parent) |
19 | : ExtractorPlugin(parent) |
20 | { |
21 | |
22 | } |
23 | |
24 | const QStringList supportedMimeTypes = { |
25 | QStringLiteral("text/x-gettext-translation" ), |
26 | }; |
27 | |
28 | QStringList POExtractor::() const |
29 | { |
30 | return supportedMimeTypes; |
31 | } |
32 | |
33 | void POExtractor::() |
34 | { |
35 | messages++; |
36 | fuzzy+=isFuzzy; |
37 | untranslated+=(!isTranslated); |
38 | |
39 | isFuzzy = false; |
40 | isTranslated = false; |
41 | state = WHITESPACE; |
42 | } |
43 | |
44 | void POExtractor::handleComment(const char* data, quint32 length) |
45 | { |
46 | state = COMMENT; |
47 | if (length >= 8 && strncmp(s1: data, s2: "#, fuzzy" , n: 8) == 0) { // could be better |
48 | isFuzzy = true; |
49 | } |
50 | } |
51 | |
52 | void POExtractor::handleLine(const char* data, quint32 length) |
53 | { |
54 | if (state == ERROR) { |
55 | return; |
56 | } |
57 | if (state == WHITESPACE) { |
58 | if (length == 0) { |
59 | return; |
60 | } |
61 | if (data[0] != '#') { |
62 | state = COMMENT; //this allows PO files w/o comments |
63 | } else { |
64 | handleComment(data, length); |
65 | return; |
66 | } |
67 | } |
68 | if (state == COMMENT) { |
69 | if (length == 0) { |
70 | state = WHITESPACE; |
71 | } else if (data[0] == '#') { |
72 | handleComment(data, length); |
73 | } else if (length > 7 && strncmp(s1: "msgctxt" , s2: data, n: 7) == 0) { |
74 | state = MSGCTXT; |
75 | } else if (length > 7 && strncmp(s1: "msgid \"" , s2: data, n: 7) == 0) { |
76 | state = MSGID; |
77 | } else { |
78 | state = ERROR; |
79 | } |
80 | return; |
81 | } else if (length > 1 && data[0] == '"' && data[length-1] == '"' |
82 | && (state == MSGCTXT || state == MSGID || state == MSGSTR |
83 | || state == MSGID_PLURAL)) { |
84 | // continued text field |
85 | isTranslated = state == MSGSTR && length > 2; |
86 | } else if (state == MSGCTXT |
87 | && length > 7 && strncmp(s1: "msgid \"" , s2: data, n: 7) == 0) { |
88 | state = MSGID; |
89 | } else if (state == MSGID |
90 | && length > 14 && strncmp(s1: "msgid_plural \"" , s2: data, n: 14) == 0) { |
91 | state = MSGID_PLURAL; |
92 | } else if ((state == MSGID || state == MSGID_PLURAL || state == MSGSTR) |
93 | && length > 8 && strncmp(s1: "msgstr" , s2: data, n: 6) == 0) { |
94 | state = MSGSTR; |
95 | isTranslated = strncmp(s1: data+length-3, s2: " \"\"" , n: 3) != 0; |
96 | } else if (state == MSGSTR) { |
97 | if (length == 0) { |
98 | endMessage(); |
99 | } else if (data[0]=='#' || data[0]=='m') { //allow PO without empty line between entries |
100 | endMessage(); |
101 | state = COMMENT; |
102 | handleLine(data, length); |
103 | } else { |
104 | state = ERROR; |
105 | } |
106 | } else { |
107 | state = ERROR; |
108 | } |
109 | #if 0 |
110 | if (messages > 1 || state != MSGSTR) return; |
111 | |
112 | // handle special values in the first message |
113 | // assumption is that value takes up only one line |
114 | if (strncmp("\"POT-Creation-Date: " , data, 20) == 0) { |
115 | result->add(Property::TranslationTemplateDate, QByteArray(data + 20, length - 21)); |
116 | } else if (strncmp("\"PO-Revision-Date: " , data, 19) == 0) { |
117 | result->add(Property::TranslationLastUpDate, QByteArray(data + 19, length - 20)); |
118 | } else if (strncmp("\"Last-Translator: " , data, 18) == 0) { |
119 | result->add(Property::TranslationLastAuthor, QByteArray(data + 18, length - 19)); |
120 | } |
121 | #endif |
122 | } |
123 | |
124 | void POExtractor::(ExtractionResult* result) |
125 | { |
126 | std::ifstream fstream(QFile::encodeName(fileName: result->inputUrl()).constData()); |
127 | if (!fstream.is_open()) { |
128 | return; |
129 | } |
130 | |
131 | result->addType(type: Type::Text); |
132 | if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) { |
133 | return; |
134 | } |
135 | |
136 | state = WHITESPACE; |
137 | messages = 0; |
138 | untranslated = 0; |
139 | fuzzy = 0; |
140 | isFuzzy = false; |
141 | isTranslated = false; |
142 | |
143 | std::string line; |
144 | int lines = 0; |
145 | while (std::getline(is&: fstream, str&: line)) { |
146 | //TODO add a parsed text of translation units |
147 | //QByteArray arr = QByteArray::fromRawData(line.c_str(), line.size()); |
148 | //result->append(QString::fromUtf8(arr)); |
149 | |
150 | handleLine(data: line.c_str(), length: line.size()); |
151 | lines++; |
152 | |
153 | |
154 | if (messages <= 1 && state == MSGSTR) |
155 | { |
156 | // handle special values in the first message |
157 | // assumption is that value takes up only one line |
158 | if (strncmp(s1: "\"POT-Creation-Date: " , s2: line.c_str(), n: 20) == 0) { |
159 | result->add(property: Property::TranslationTemplateDate, value: QByteArray(line.c_str() + 20, line.size() - 21)); |
160 | } else if (strncmp(s1: "\"PO-Revision-Date: " , s2: line.c_str(), n: 19) == 0) { |
161 | result->add(property: Property::TranslationLastUpDate, value: QByteArray(line.c_str() + 19, line.size() - 20)); |
162 | } else if (strncmp(s1: "\"Last-Translator: " , s2: line.c_str(), n: 18) == 0) { |
163 | result->add(property: Property::TranslationLastAuthor, value: QString::fromUtf8(ba: QByteArray::fromRawData(data: line.c_str() + 18, size: line.size() - 19))); |
164 | } |
165 | } |
166 | } |
167 | handleLine(data: "" , length: 0); //for files with non-empty last line |
168 | messages--;//cause header does not count |
169 | |
170 | result->add(property: Property::TranslationUnitsTotal, value: messages); |
171 | result->add(property: Property::TranslationUnitsWithTranslation, value: messages-untranslated); |
172 | result->add(property: Property::TranslationUnitsWithDraftTranslation, value: fuzzy); |
173 | result->add(property: Property::LineCount, value: lines); |
174 | //TODO WordCount |
175 | } |
176 | |
177 | #include "moc_poextractor.cpp" |
178 | |