1 | //! Parse PO files. |
2 | |
3 | extern crate linereader; |
4 | |
5 | use super::escape::unescape; |
6 | use crate::catalog::Catalog; |
7 | use crate::message::*; |
8 | use crate::metadata::{CatalogMetadata, MetadataParseError}; |
9 | use crate::po_file::escape::UnescapeError; |
10 | use linereader::LineReader; |
11 | use std::path::Path; |
12 | use std::str::{FromStr, Utf8Error}; |
13 | |
14 | /// PO file parse options. |
15 | #[derive (Clone, Copy, Default)] |
16 | pub struct POParseOptions { |
17 | /// If true, only parse msgctxt, msgid and msgstr. |
18 | pub message_body_only: bool, |
19 | /// If true, skip parsing untranslated messages. |
20 | pub translated_only: bool, |
21 | /// If true, decode UTF-8 unsafely without checks. |
22 | pub unsafe_utf8_decode: bool, |
23 | } |
24 | |
25 | impl POParseOptions { |
26 | /// Creates a default POParseOptions |
27 | pub fn new() -> Self { |
28 | Self::default() |
29 | } |
30 | } |
31 | |
32 | /// Error in parsing a PO file |
33 | #[derive (Debug)] |
34 | pub struct POParseError { |
35 | message: String, |
36 | } |
37 | |
38 | impl POParseError { |
39 | fn new(s: &str) -> Self { |
40 | Self { |
41 | message: s.to_string(), |
42 | } |
43 | } |
44 | } |
45 | |
46 | impl From<std::io::Error> for POParseError { |
47 | fn from(value: std::io::Error) -> Self { |
48 | Self { |
49 | message: value.to_string(), |
50 | } |
51 | } |
52 | } |
53 | |
54 | impl From<MetadataParseError> for POParseError { |
55 | fn from(value: MetadataParseError) -> Self { |
56 | Self { |
57 | message: value.to_string(), |
58 | } |
59 | } |
60 | } |
61 | |
62 | impl From<UnescapeError> for POParseError { |
63 | fn from(value: UnescapeError) -> Self { |
64 | Self { |
65 | message: value.to_string(), |
66 | } |
67 | } |
68 | } |
69 | |
70 | impl From<Utf8Error> for POParseError { |
71 | fn from(value: Utf8Error) -> Self { |
72 | Self { |
73 | message: value.to_string(), |
74 | } |
75 | } |
76 | } |
77 | |
78 | impl std::fmt::Display for POParseError { |
79 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
80 | write!(f, "PO parse error: {}" , self.message) |
81 | } |
82 | } |
83 | |
84 | impl std::error::Error for POParseError {} |
85 | |
86 | #[derive (Clone, Copy)] |
87 | enum POMessageField { |
88 | None, |
89 | Comments, |
90 | Source, |
91 | Flags, |
92 | Context, |
93 | ID, |
94 | IDPlural, |
95 | Translated, |
96 | TranslatedPlural(usize), |
97 | } |
98 | |
99 | #[derive (Default, Debug)] |
100 | struct POMessage { |
101 | comments: String, |
102 | source: String, |
103 | flags: String, |
104 | msgctxt: String, |
105 | msgid: String, |
106 | msgid_plural: String, |
107 | msgstr: String, |
108 | msgstr_plural: Vec<String>, |
109 | } |
110 | |
111 | struct POParserState { |
112 | dirty: bool, |
113 | metadata_parsed: bool, |
114 | options: POParseOptions, |
115 | current_message: POMessage, |
116 | current_field: POMessageField, |
117 | catalog: Catalog, |
118 | } |
119 | |
120 | impl POMessage { |
121 | pub fn is_plural(&self) -> bool { |
122 | !self.msgid_plural.is_empty() |
123 | } |
124 | |
125 | pub fn is_translated(&self) -> bool { |
126 | if self.is_plural() { |
127 | !(self.msgstr_plural.is_empty() || self.msgstr_plural[0].is_empty()) |
128 | } else { |
129 | !self.msgstr.is_empty() |
130 | } |
131 | } |
132 | } |
133 | |
134 | impl Default for POParserState { |
135 | fn default() -> Self { |
136 | Self::new(&POParseOptions::new()) |
137 | } |
138 | } |
139 | |
140 | impl POParserState { |
141 | pub fn new(options: &POParseOptions) -> Self { |
142 | POParserState { |
143 | dirty: false, |
144 | metadata_parsed: false, |
145 | options: *options, |
146 | current_message: POMessage::default(), |
147 | current_field: POMessageField::None, |
148 | catalog: Catalog::empty(), |
149 | } |
150 | } |
151 | |
152 | fn get_field(&mut self) -> &mut String { |
153 | let message = &mut self.current_message; |
154 | match self.current_field { |
155 | POMessageField::Comments => &mut message.comments, |
156 | POMessageField::Source => &mut message.source, |
157 | POMessageField::Flags => &mut message.flags, |
158 | POMessageField::Context => &mut message.msgctxt, |
159 | POMessageField::ID => &mut message.msgid, |
160 | POMessageField::IDPlural => &mut message.msgid_plural, |
161 | POMessageField::Translated => &mut message.msgstr, |
162 | POMessageField::TranslatedPlural(idx) => { |
163 | while message.msgstr_plural.len() <= idx { |
164 | message.msgstr_plural.push(String::new()); |
165 | } |
166 | &mut message.msgstr_plural[idx] |
167 | } |
168 | _ => panic!(), |
169 | } |
170 | } |
171 | |
172 | fn fill_field(&mut self, data: &str) { |
173 | self.get_field().push_str(data) |
174 | } |
175 | |
176 | fn fill_field_with_newline(&mut self, data: &str) { |
177 | let field = self.get_field(); |
178 | if !field.is_empty() && !field.ends_with(' \n' ) { |
179 | field.push(' \n' ); |
180 | } |
181 | field.push_str(data) |
182 | } |
183 | |
184 | fn save_message(&mut self) -> Result<(), POParseError> { |
185 | let mut po_message = std::mem::take(&mut self.current_message); |
186 | if !self.metadata_parsed { |
187 | if po_message.msgid.is_empty() && !po_message.msgstr.is_empty() { |
188 | let unescaped = unescape(&po_message.msgstr)?; |
189 | self.catalog.metadata = CatalogMetadata::parse(&unescaped)?; |
190 | self.metadata_parsed = true; |
191 | } else { |
192 | return Err(POParseError::new( |
193 | "Metadata does not exist or is ill-formed" , |
194 | )); |
195 | } |
196 | } else if po_message.is_translated() || !self.options.translated_only { |
197 | if po_message.is_plural() { |
198 | for plural_form in po_message.msgstr_plural.iter_mut() { |
199 | *plural_form = unescape(plural_form)?; |
200 | } |
201 | self.catalog.append_or_update( |
202 | Message::build_plural() |
203 | .with_comments(po_message.comments) |
204 | .with_source(po_message.source) |
205 | .with_flags(MessageFlags::from_str(&po_message.flags).unwrap()) |
206 | .with_msgctxt(unescape(&po_message.msgctxt)?) |
207 | .with_msgid(unescape(&po_message.msgid)?) |
208 | .with_msgid_plural(unescape(&po_message.msgid_plural)?) |
209 | .with_msgstr_plural(po_message.msgstr_plural) |
210 | .done(), |
211 | ); |
212 | } else { |
213 | self.catalog.append_or_update( |
214 | Message::build_singular() |
215 | .with_comments(po_message.comments) |
216 | .with_source(po_message.source) |
217 | .with_flags(MessageFlags::from_str(&po_message.flags).unwrap()) |
218 | .with_msgctxt(unescape(&po_message.msgctxt)?) |
219 | .with_msgid(unescape(&po_message.msgid)?) |
220 | .with_msgstr(unescape(&po_message.msgstr)?) |
221 | .done(), |
222 | ); |
223 | } |
224 | } |
225 | Ok(()) |
226 | } |
227 | |
228 | pub fn consume_line(&mut self, line: &str) -> Result<(), POParseError> { |
229 | static HEADER_FIELDS: [(&str, POMessageField); 3] = [ |
230 | ("#. " , POMessageField::Comments), |
231 | ("#: " , POMessageField::Source), |
232 | ("#, " , POMessageField::Flags), |
233 | ]; |
234 | static CONTENT_FIELDS: [(&str, POMessageField); 14] = [ |
235 | ("msgctxt " , POMessageField::Context), |
236 | ("msgid " , POMessageField::ID), |
237 | ("msgid_plural " , POMessageField::IDPlural), |
238 | ("msgstr " , POMessageField::Translated), |
239 | ("msgstr[0] " , POMessageField::TranslatedPlural(0)), |
240 | ("msgstr[1] " , POMessageField::TranslatedPlural(1)), |
241 | ("msgstr[2] " , POMessageField::TranslatedPlural(2)), |
242 | ("msgstr[3] " , POMessageField::TranslatedPlural(3)), |
243 | ("msgstr[4] " , POMessageField::TranslatedPlural(4)), |
244 | ("msgstr[5] " , POMessageField::TranslatedPlural(5)), |
245 | ("msgstr[6] " , POMessageField::TranslatedPlural(6)), |
246 | ("msgstr[7] " , POMessageField::TranslatedPlural(7)), |
247 | ("msgstr[8] " , POMessageField::TranslatedPlural(8)), |
248 | ("msgstr[9] " , POMessageField::TranslatedPlural(9)), |
249 | ]; |
250 | |
251 | if line.is_empty() { |
252 | if self.dirty { |
253 | self.save_message()?; |
254 | self.dirty = false; |
255 | } |
256 | } else if line.starts_with('#' ) { |
257 | if !self.options.message_body_only { |
258 | for (prefix, field) in &HEADER_FIELDS { |
259 | if line.starts_with(*prefix) { |
260 | self.current_field = *field; |
261 | self.fill_field_with_newline(&line[prefix.len()..]); |
262 | self.dirty = true; |
263 | break; |
264 | } |
265 | } |
266 | } |
267 | } else if line.starts_with('m' ) { |
268 | for (prefix, field) in &CONTENT_FIELDS { |
269 | if line.starts_with(*prefix) { |
270 | self.current_field = *field; |
271 | let trimmed = &line[prefix.len()..]; |
272 | self.fill_field(&trimmed[1..trimmed.len() - 1]); |
273 | self.dirty = true; |
274 | } |
275 | } |
276 | } else if line.starts_with('"' ) { |
277 | self.fill_field(&line[1..line.len() - 1]); |
278 | self.dirty = true; |
279 | } |
280 | |
281 | Ok(()) |
282 | } |
283 | } |
284 | |
285 | /// Parse a PO file with custom parse options and returns a catalog on success. |
286 | pub fn parse_with_option(path: &Path, options: &POParseOptions) -> Result<Catalog, POParseError> { |
287 | let file: File = std::fs::File::open(path)?; |
288 | let mut parser: POParserState = POParserState::new(options); |
289 | let mut reader: LineReader = LineReader::new(inner:file); |
290 | while let Some(line: Result<&[u8], Error>) = reader.next_line() { |
291 | let line: &[u8] = line?; |
292 | let mut line: &str = if options.unsafe_utf8_decode { |
293 | unsafe { std::str::from_utf8_unchecked(line) } |
294 | } else { |
295 | std::str::from_utf8(line)? |
296 | }; |
297 | if line.ends_with(' \n' ) { |
298 | line = &line[0..line.len() - 1]; |
299 | } |
300 | if line.ends_with(' \r' ) { |
301 | line = &line[0..line.len() - 1]; |
302 | } |
303 | parser.consume_line(line)?; |
304 | } |
305 | parser.consume_line("" )?; |
306 | Ok(parser.catalog) |
307 | } |
308 | |
309 | /// Parse a PO file and returns a catalog on success. |
310 | pub fn parse(path: &Path) -> Result<Catalog, POParseError> { |
311 | parse_with_option(path, &POParseOptions::default()) |
312 | } |
313 | |