1//! Parse PO files.
2
3extern crate linereader;
4
5use super::escape::unescape;
6use crate::catalog::Catalog;
7use crate::message::*;
8use crate::metadata::{CatalogMetadata, MetadataParseError};
9use crate::po_file::escape::UnescapeError;
10use linereader::LineReader;
11use std::path::Path;
12use std::str::{FromStr, Utf8Error};
13
14/// PO file parse options.
15#[derive(Clone, Copy, Default)]
16pub struct POParseOptions {
17 /// If true, only parse msgctxt, msgid and msgstr.
18 pub message_body_only: bool,
19 /// If true, skip parsing untranslated messages.
20 pub translated_only: bool,
21 /// If true, decode UTF-8 unsafely without checks.
22 pub unsafe_utf8_decode: bool,
23}
24
25impl POParseOptions {
26 /// Creates a default POParseOptions
27 pub fn new() -> Self {
28 Self::default()
29 }
30}
31
32/// Error in parsing a PO file
33#[derive(Debug)]
34pub struct POParseError {
35 message: String,
36}
37
38impl POParseError {
39 fn new(s: &str) -> Self {
40 Self {
41 message: s.to_string(),
42 }
43 }
44}
45
46impl From<std::io::Error> for POParseError {
47 fn from(value: std::io::Error) -> Self {
48 Self {
49 message: value.to_string(),
50 }
51 }
52}
53
54impl From<MetadataParseError> for POParseError {
55 fn from(value: MetadataParseError) -> Self {
56 Self {
57 message: value.to_string(),
58 }
59 }
60}
61
62impl From<UnescapeError> for POParseError {
63 fn from(value: UnescapeError) -> Self {
64 Self {
65 message: value.to_string(),
66 }
67 }
68}
69
70impl From<Utf8Error> for POParseError {
71 fn from(value: Utf8Error) -> Self {
72 Self {
73 message: value.to_string(),
74 }
75 }
76}
77
78impl std::fmt::Display for POParseError {
79 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
80 write!(f, "PO parse error: {}", self.message)
81 }
82}
83
84impl std::error::Error for POParseError {}
85
86#[derive(Clone, Copy)]
87enum POMessageField {
88 None,
89 Comments,
90 Source,
91 Flags,
92 Context,
93 ID,
94 IDPlural,
95 Translated,
96 TranslatedPlural(usize),
97}
98
99#[derive(Default, Debug)]
100struct POMessage {
101 comments: String,
102 source: String,
103 flags: String,
104 msgctxt: String,
105 msgid: String,
106 msgid_plural: String,
107 msgstr: String,
108 msgstr_plural: Vec<String>,
109}
110
111struct POParserState {
112 dirty: bool,
113 metadata_parsed: bool,
114 options: POParseOptions,
115 current_message: POMessage,
116 current_field: POMessageField,
117 catalog: Catalog,
118}
119
120impl POMessage {
121 pub fn is_plural(&self) -> bool {
122 !self.msgid_plural.is_empty()
123 }
124
125 pub fn is_translated(&self) -> bool {
126 if self.is_plural() {
127 !(self.msgstr_plural.is_empty() || self.msgstr_plural[0].is_empty())
128 } else {
129 !self.msgstr.is_empty()
130 }
131 }
132}
133
134impl Default for POParserState {
135 fn default() -> Self {
136 Self::new(&POParseOptions::new())
137 }
138}
139
140impl POParserState {
141 pub fn new(options: &POParseOptions) -> Self {
142 POParserState {
143 dirty: false,
144 metadata_parsed: false,
145 options: *options,
146 current_message: POMessage::default(),
147 current_field: POMessageField::None,
148 catalog: Catalog::empty(),
149 }
150 }
151
152 fn get_field(&mut self) -> &mut String {
153 let message = &mut self.current_message;
154 match self.current_field {
155 POMessageField::Comments => &mut message.comments,
156 POMessageField::Source => &mut message.source,
157 POMessageField::Flags => &mut message.flags,
158 POMessageField::Context => &mut message.msgctxt,
159 POMessageField::ID => &mut message.msgid,
160 POMessageField::IDPlural => &mut message.msgid_plural,
161 POMessageField::Translated => &mut message.msgstr,
162 POMessageField::TranslatedPlural(idx) => {
163 while message.msgstr_plural.len() <= idx {
164 message.msgstr_plural.push(String::new());
165 }
166 &mut message.msgstr_plural[idx]
167 }
168 _ => panic!(),
169 }
170 }
171
172 fn fill_field(&mut self, data: &str) {
173 self.get_field().push_str(data)
174 }
175
176 fn fill_field_with_newline(&mut self, data: &str) {
177 let field = self.get_field();
178 if !field.is_empty() && !field.ends_with('\n') {
179 field.push('\n');
180 }
181 field.push_str(data)
182 }
183
184 fn save_message(&mut self) -> Result<(), POParseError> {
185 let mut po_message = std::mem::take(&mut self.current_message);
186 if !self.metadata_parsed {
187 if po_message.msgid.is_empty() && !po_message.msgstr.is_empty() {
188 let unescaped = unescape(&po_message.msgstr)?;
189 self.catalog.metadata = CatalogMetadata::parse(&unescaped)?;
190 self.metadata_parsed = true;
191 } else {
192 return Err(POParseError::new(
193 "Metadata does not exist or is ill-formed",
194 ));
195 }
196 } else if po_message.is_translated() || !self.options.translated_only {
197 if po_message.is_plural() {
198 for plural_form in po_message.msgstr_plural.iter_mut() {
199 *plural_form = unescape(plural_form)?;
200 }
201 self.catalog.append_or_update(
202 Message::build_plural()
203 .with_comments(po_message.comments)
204 .with_source(po_message.source)
205 .with_flags(MessageFlags::from_str(&po_message.flags).unwrap())
206 .with_msgctxt(unescape(&po_message.msgctxt)?)
207 .with_msgid(unescape(&po_message.msgid)?)
208 .with_msgid_plural(unescape(&po_message.msgid_plural)?)
209 .with_msgstr_plural(po_message.msgstr_plural)
210 .done(),
211 );
212 } else {
213 self.catalog.append_or_update(
214 Message::build_singular()
215 .with_comments(po_message.comments)
216 .with_source(po_message.source)
217 .with_flags(MessageFlags::from_str(&po_message.flags).unwrap())
218 .with_msgctxt(unescape(&po_message.msgctxt)?)
219 .with_msgid(unescape(&po_message.msgid)?)
220 .with_msgstr(unescape(&po_message.msgstr)?)
221 .done(),
222 );
223 }
224 }
225 Ok(())
226 }
227
228 pub fn consume_line(&mut self, line: &str) -> Result<(), POParseError> {
229 static HEADER_FIELDS: [(&str, POMessageField); 3] = [
230 ("#. ", POMessageField::Comments),
231 ("#: ", POMessageField::Source),
232 ("#, ", POMessageField::Flags),
233 ];
234 static CONTENT_FIELDS: [(&str, POMessageField); 14] = [
235 ("msgctxt ", POMessageField::Context),
236 ("msgid ", POMessageField::ID),
237 ("msgid_plural ", POMessageField::IDPlural),
238 ("msgstr ", POMessageField::Translated),
239 ("msgstr[0] ", POMessageField::TranslatedPlural(0)),
240 ("msgstr[1] ", POMessageField::TranslatedPlural(1)),
241 ("msgstr[2] ", POMessageField::TranslatedPlural(2)),
242 ("msgstr[3] ", POMessageField::TranslatedPlural(3)),
243 ("msgstr[4] ", POMessageField::TranslatedPlural(4)),
244 ("msgstr[5] ", POMessageField::TranslatedPlural(5)),
245 ("msgstr[6] ", POMessageField::TranslatedPlural(6)),
246 ("msgstr[7] ", POMessageField::TranslatedPlural(7)),
247 ("msgstr[8] ", POMessageField::TranslatedPlural(8)),
248 ("msgstr[9] ", POMessageField::TranslatedPlural(9)),
249 ];
250
251 if line.is_empty() {
252 if self.dirty {
253 self.save_message()?;
254 self.dirty = false;
255 }
256 } else if line.starts_with('#') {
257 if !self.options.message_body_only {
258 for (prefix, field) in &HEADER_FIELDS {
259 if line.starts_with(*prefix) {
260 self.current_field = *field;
261 self.fill_field_with_newline(&line[prefix.len()..]);
262 self.dirty = true;
263 break;
264 }
265 }
266 }
267 } else if line.starts_with('m') {
268 for (prefix, field) in &CONTENT_FIELDS {
269 if line.starts_with(*prefix) {
270 self.current_field = *field;
271 let trimmed = &line[prefix.len()..];
272 self.fill_field(&trimmed[1..trimmed.len() - 1]);
273 self.dirty = true;
274 }
275 }
276 } else if line.starts_with('"') {
277 self.fill_field(&line[1..line.len() - 1]);
278 self.dirty = true;
279 }
280
281 Ok(())
282 }
283}
284
285/// Parse a PO file with custom parse options and returns a catalog on success.
286pub fn parse_with_option(path: &Path, options: &POParseOptions) -> Result<Catalog, POParseError> {
287 let file: File = std::fs::File::open(path)?;
288 let mut parser: POParserState = POParserState::new(options);
289 let mut reader: LineReader = LineReader::new(inner:file);
290 while let Some(line: Result<&[u8], Error>) = reader.next_line() {
291 let line: &[u8] = line?;
292 let mut line: &str = if options.unsafe_utf8_decode {
293 unsafe { std::str::from_utf8_unchecked(line) }
294 } else {
295 std::str::from_utf8(line)?
296 };
297 if line.ends_with('\n') {
298 line = &line[0..line.len() - 1];
299 }
300 if line.ends_with('\r') {
301 line = &line[0..line.len() - 1];
302 }
303 parser.consume_line(line)?;
304 }
305 parser.consume_line("")?;
306 Ok(parser.catalog)
307}
308
309/// Parse a PO file and returns a catalog on success.
310pub fn parse(path: &Path) -> Result<Catalog, POParseError> {
311 parse_with_option(path, &POParseOptions::default())
312}
313