1/*
2 Gettext translation file analyzer
3
4 SPDX-FileCopyrightText: 2007 Montel Laurent <montel@kde.org>
5 SPDX-FileCopyrightText: 2009 Jos van den Oever <jos@vandenoever.info>
6 SPDX-FileCopyrightText: 2014 Nick Shaforostoff <shaforostoff@gmail.com>
7
8 SPDX-License-Identifier: LGPL-2.1-or-later
9*/
10
11
12#include "poextractor.h"
13#include <QFile>
14#include <fstream>
15
16using namespace KFileMetaData;
17
18POExtractor::POExtractor(QObject* parent)
19 : ExtractorPlugin(parent)
20{
21
22}
23
24const QStringList supportedMimeTypes = {
25 QStringLiteral("text/x-gettext-translation"),
26};
27
28QStringList POExtractor::mimetypes() const
29{
30 return supportedMimeTypes;
31}
32
33void POExtractor::endMessage()
34{
35 messages++;
36 fuzzy+=isFuzzy;
37 untranslated+=(!isTranslated);
38
39 isFuzzy = false;
40 isTranslated = false;
41 state = WHITESPACE;
42}
43
44void POExtractor::handleComment(const char* data, quint32 length)
45{
46 state = COMMENT;
47 if (length >= 8 && strncmp(s1: data, s2: "#, fuzzy", n: 8) == 0) { // could be better
48 isFuzzy = true;
49 }
50}
51
52void POExtractor::handleLine(const char* data, quint32 length)
53{
54 if (state == ERROR) {
55 return;
56 }
57 if (state == WHITESPACE) {
58 if (length == 0) {
59 return;
60 }
61 if (data[0] != '#') {
62 state = COMMENT; //this allows PO files w/o comments
63 } else {
64 handleComment(data, length);
65 return;
66 }
67 }
68 if (state == COMMENT) {
69 if (length == 0) {
70 state = WHITESPACE;
71 } else if (data[0] == '#') {
72 handleComment(data, length);
73 } else if (length > 7 && strncmp(s1: "msgctxt", s2: data, n: 7) == 0) {
74 state = MSGCTXT;
75 } else if (length > 7 && strncmp(s1: "msgid \"", s2: data, n: 7) == 0) {
76 state = MSGID;
77 } else {
78 state = ERROR;
79 }
80 return;
81 } else if (length > 1 && data[0] == '"' && data[length-1] == '"'
82 && (state == MSGCTXT || state == MSGID || state == MSGSTR
83 || state == MSGID_PLURAL)) {
84 // continued text field
85 isTranslated = state == MSGSTR && length > 2;
86 } else if (state == MSGCTXT
87 && length > 7 && strncmp(s1: "msgid \"", s2: data, n: 7) == 0) {
88 state = MSGID;
89 } else if (state == MSGID
90 && length > 14 && strncmp(s1: "msgid_plural \"", s2: data, n: 14) == 0) {
91 state = MSGID_PLURAL;
92 } else if ((state == MSGID || state == MSGID_PLURAL || state == MSGSTR)
93 && length > 8 && strncmp(s1: "msgstr", s2: data, n: 6) == 0) {
94 state = MSGSTR;
95 isTranslated = strncmp(s1: data+length-3, s2: " \"\"", n: 3) != 0;
96 } else if (state == MSGSTR) {
97 if (length == 0) {
98 endMessage();
99 } else if (data[0]=='#' || data[0]=='m') { //allow PO without empty line between entries
100 endMessage();
101 state = COMMENT;
102 handleLine(data, length);
103 } else {
104 state = ERROR;
105 }
106 } else {
107 state = ERROR;
108 }
109#if 0
110 if (messages > 1 || state != MSGSTR) return;
111
112 // handle special values in the first message
113 // assumption is that value takes up only one line
114 if (strncmp("\"POT-Creation-Date: ", data, 20) == 0) {
115 result->add(Property::TranslationTemplateDate, QByteArray(data + 20, length - 21));
116 } else if (strncmp("\"PO-Revision-Date: ", data, 19) == 0) {
117 result->add(Property::TranslationLastUpDate, QByteArray(data + 19, length - 20));
118 } else if (strncmp("\"Last-Translator: ", data, 18) == 0) {
119 result->add(Property::TranslationLastAuthor, QByteArray(data + 18, length - 19));
120 }
121#endif
122}
123
124void POExtractor::extract(ExtractionResult* result)
125{
126 std::ifstream fstream(QFile::encodeName(fileName: result->inputUrl()).constData());
127 if (!fstream.is_open()) {
128 return;
129 }
130
131 result->addType(type: Type::Text);
132 if (!(result->inputFlags() & ExtractionResult::ExtractPlainText)) {
133 return;
134 }
135
136 state = WHITESPACE;
137 messages = 0;
138 untranslated = 0;
139 fuzzy = 0;
140 isFuzzy = false;
141 isTranslated = false;
142
143 std::string line;
144 int lines = 0;
145 while (std::getline(is&: fstream, str&: line)) {
146 //TODO add a parsed text of translation units
147 //QByteArray arr = QByteArray::fromRawData(line.c_str(), line.size());
148 //result->append(QString::fromUtf8(arr));
149
150 handleLine(data: line.c_str(), length: line.size());
151 lines++;
152
153
154 if (messages <= 1 && state == MSGSTR)
155 {
156 // handle special values in the first message
157 // assumption is that value takes up only one line
158 if (strncmp(s1: "\"POT-Creation-Date: ", s2: line.c_str(), n: 20) == 0) {
159 result->add(property: Property::TranslationTemplateDate, value: QByteArray(line.c_str() + 20, line.size() - 21));
160 } else if (strncmp(s1: "\"PO-Revision-Date: ", s2: line.c_str(), n: 19) == 0) {
161 result->add(property: Property::TranslationLastUpDate, value: QByteArray(line.c_str() + 19, line.size() - 20));
162 } else if (strncmp(s1: "\"Last-Translator: ", s2: line.c_str(), n: 18) == 0) {
163 result->add(property: Property::TranslationLastAuthor, value: QString::fromUtf8(ba: QByteArray::fromRawData(data: line.c_str() + 18, size: line.size() - 19)));
164 }
165 }
166 }
167 handleLine(data: "", length: 0); //for files with non-empty last line
168 messages--;//cause header does not count
169
170 result->add(property: Property::TranslationUnitsTotal, value: messages);
171 result->add(property: Property::TranslationUnitsWithTranslation, value: messages-untranslated);
172 result->add(property: Property::TranslationUnitsWithDraftTranslation, value: fuzzy);
173 result->add(property: Property::LineCount, value: lines);
174 //TODO WordCount
175}
176
177#include "moc_poextractor.cpp"
178

source code of kfilemetadata/src/extractors/poextractor.cpp