| 1 | //! Parse PO files. |
| 2 | |
| 3 | extern crate linereader; |
| 4 | |
| 5 | use super::escape::unescape; |
| 6 | use crate::catalog::Catalog; |
| 7 | use crate::message::*; |
| 8 | use crate::metadata::{CatalogMetadata, MetadataParseError}; |
| 9 | use crate::po_file::escape::UnescapeError; |
| 10 | use linereader::LineReader; |
| 11 | use std::path::Path; |
| 12 | use std::str::{FromStr, Utf8Error}; |
| 13 | |
| 14 | /// PO file parse options. |
| 15 | #[derive (Clone, Copy, Default)] |
| 16 | pub struct POParseOptions { |
| 17 | /// If true, only parse msgctxt, msgid and msgstr. |
| 18 | pub message_body_only: bool, |
| 19 | /// If true, skip parsing untranslated messages. |
| 20 | pub translated_only: bool, |
| 21 | /// If true, decode UTF-8 unsafely without checks. |
| 22 | pub unsafe_utf8_decode: bool, |
| 23 | } |
| 24 | |
| 25 | impl POParseOptions { |
| 26 | /// Creates a default POParseOptions |
| 27 | pub fn new() -> Self { |
| 28 | Self::default() |
| 29 | } |
| 30 | } |
| 31 | |
| 32 | /// Error in parsing a PO file |
| 33 | #[derive (Debug)] |
| 34 | pub struct POParseError { |
| 35 | message: String, |
| 36 | } |
| 37 | |
| 38 | impl POParseError { |
| 39 | fn new(s: &str) -> Self { |
| 40 | Self { |
| 41 | message: s.to_string(), |
| 42 | } |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | impl From<std::io::Error> for POParseError { |
| 47 | fn from(value: std::io::Error) -> Self { |
| 48 | Self { |
| 49 | message: value.to_string(), |
| 50 | } |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | impl From<MetadataParseError> for POParseError { |
| 55 | fn from(value: MetadataParseError) -> Self { |
| 56 | Self { |
| 57 | message: value.to_string(), |
| 58 | } |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | impl From<UnescapeError> for POParseError { |
| 63 | fn from(value: UnescapeError) -> Self { |
| 64 | Self { |
| 65 | message: value.to_string(), |
| 66 | } |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | impl From<Utf8Error> for POParseError { |
| 71 | fn from(value: Utf8Error) -> Self { |
| 72 | Self { |
| 73 | message: value.to_string(), |
| 74 | } |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | impl std::fmt::Display for POParseError { |
| 79 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
| 80 | write!(f, "PO parse error: {}" , self.message) |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | impl std::error::Error for POParseError {} |
| 85 | |
| 86 | #[derive (Clone, Copy)] |
| 87 | enum POMessageField { |
| 88 | None, |
| 89 | Comments, |
| 90 | Source, |
| 91 | Flags, |
| 92 | Context, |
| 93 | ID, |
| 94 | IDPlural, |
| 95 | Translated, |
| 96 | TranslatedPlural(usize), |
| 97 | } |
| 98 | |
| 99 | #[derive (Default, Debug)] |
| 100 | struct POMessage { |
| 101 | comments: String, |
| 102 | source: String, |
| 103 | flags: String, |
| 104 | msgctxt: String, |
| 105 | msgid: String, |
| 106 | msgid_plural: String, |
| 107 | msgstr: String, |
| 108 | msgstr_plural: Vec<String>, |
| 109 | } |
| 110 | |
| 111 | struct POParserState { |
| 112 | dirty: bool, |
| 113 | metadata_parsed: bool, |
| 114 | options: POParseOptions, |
| 115 | current_message: POMessage, |
| 116 | current_field: POMessageField, |
| 117 | catalog: Catalog, |
| 118 | } |
| 119 | |
| 120 | impl POMessage { |
| 121 | pub fn is_plural(&self) -> bool { |
| 122 | !self.msgid_plural.is_empty() |
| 123 | } |
| 124 | |
| 125 | pub fn is_translated(&self) -> bool { |
| 126 | if self.is_plural() { |
| 127 | !(self.msgstr_plural.is_empty() || self.msgstr_plural[0].is_empty()) |
| 128 | } else { |
| 129 | !self.msgstr.is_empty() |
| 130 | } |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | impl Default for POParserState { |
| 135 | fn default() -> Self { |
| 136 | Self::new(&POParseOptions::new()) |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | impl POParserState { |
| 141 | pub fn new(options: &POParseOptions) -> Self { |
| 142 | POParserState { |
| 143 | dirty: false, |
| 144 | metadata_parsed: false, |
| 145 | options: *options, |
| 146 | current_message: POMessage::default(), |
| 147 | current_field: POMessageField::None, |
| 148 | catalog: Catalog::empty(), |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | fn get_field(&mut self) -> &mut String { |
| 153 | let message = &mut self.current_message; |
| 154 | match self.current_field { |
| 155 | POMessageField::Comments => &mut message.comments, |
| 156 | POMessageField::Source => &mut message.source, |
| 157 | POMessageField::Flags => &mut message.flags, |
| 158 | POMessageField::Context => &mut message.msgctxt, |
| 159 | POMessageField::ID => &mut message.msgid, |
| 160 | POMessageField::IDPlural => &mut message.msgid_plural, |
| 161 | POMessageField::Translated => &mut message.msgstr, |
| 162 | POMessageField::TranslatedPlural(idx) => { |
| 163 | while message.msgstr_plural.len() <= idx { |
| 164 | message.msgstr_plural.push(String::new()); |
| 165 | } |
| 166 | &mut message.msgstr_plural[idx] |
| 167 | } |
| 168 | _ => panic!(), |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | fn fill_field(&mut self, data: &str) { |
| 173 | self.get_field().push_str(data) |
| 174 | } |
| 175 | |
| 176 | fn fill_field_with_newline(&mut self, data: &str) { |
| 177 | let field = self.get_field(); |
| 178 | if !field.is_empty() && !field.ends_with(' \n' ) { |
| 179 | field.push(' \n' ); |
| 180 | } |
| 181 | field.push_str(data) |
| 182 | } |
| 183 | |
| 184 | fn save_message(&mut self) -> Result<(), POParseError> { |
| 185 | let mut po_message = std::mem::take(&mut self.current_message); |
| 186 | if !self.metadata_parsed { |
| 187 | if po_message.msgid.is_empty() && !po_message.msgstr.is_empty() { |
| 188 | let unescaped = unescape(&po_message.msgstr)?; |
| 189 | self.catalog.metadata = CatalogMetadata::parse(&unescaped)?; |
| 190 | self.metadata_parsed = true; |
| 191 | } else { |
| 192 | return Err(POParseError::new( |
| 193 | "Metadata does not exist or is ill-formed" , |
| 194 | )); |
| 195 | } |
| 196 | } else if po_message.is_translated() || !self.options.translated_only { |
| 197 | if po_message.is_plural() { |
| 198 | for plural_form in po_message.msgstr_plural.iter_mut() { |
| 199 | *plural_form = unescape(plural_form)?; |
| 200 | } |
| 201 | self.catalog.append_or_update( |
| 202 | Message::build_plural() |
| 203 | .with_comments(po_message.comments) |
| 204 | .with_source(po_message.source) |
| 205 | .with_flags(MessageFlags::from_str(&po_message.flags).unwrap()) |
| 206 | .with_msgctxt(unescape(&po_message.msgctxt)?) |
| 207 | .with_msgid(unescape(&po_message.msgid)?) |
| 208 | .with_msgid_plural(unescape(&po_message.msgid_plural)?) |
| 209 | .with_msgstr_plural(po_message.msgstr_plural) |
| 210 | .done(), |
| 211 | ); |
| 212 | } else { |
| 213 | self.catalog.append_or_update( |
| 214 | Message::build_singular() |
| 215 | .with_comments(po_message.comments) |
| 216 | .with_source(po_message.source) |
| 217 | .with_flags(MessageFlags::from_str(&po_message.flags).unwrap()) |
| 218 | .with_msgctxt(unescape(&po_message.msgctxt)?) |
| 219 | .with_msgid(unescape(&po_message.msgid)?) |
| 220 | .with_msgstr(unescape(&po_message.msgstr)?) |
| 221 | .done(), |
| 222 | ); |
| 223 | } |
| 224 | } |
| 225 | Ok(()) |
| 226 | } |
| 227 | |
| 228 | pub fn consume_line(&mut self, line: &str) -> Result<(), POParseError> { |
| 229 | static HEADER_FIELDS: [(&str, POMessageField); 3] = [ |
| 230 | ("#. " , POMessageField::Comments), |
| 231 | ("#: " , POMessageField::Source), |
| 232 | ("#, " , POMessageField::Flags), |
| 233 | ]; |
| 234 | static CONTENT_FIELDS: [(&str, POMessageField); 14] = [ |
| 235 | ("msgctxt " , POMessageField::Context), |
| 236 | ("msgid " , POMessageField::ID), |
| 237 | ("msgid_plural " , POMessageField::IDPlural), |
| 238 | ("msgstr " , POMessageField::Translated), |
| 239 | ("msgstr[0] " , POMessageField::TranslatedPlural(0)), |
| 240 | ("msgstr[1] " , POMessageField::TranslatedPlural(1)), |
| 241 | ("msgstr[2] " , POMessageField::TranslatedPlural(2)), |
| 242 | ("msgstr[3] " , POMessageField::TranslatedPlural(3)), |
| 243 | ("msgstr[4] " , POMessageField::TranslatedPlural(4)), |
| 244 | ("msgstr[5] " , POMessageField::TranslatedPlural(5)), |
| 245 | ("msgstr[6] " , POMessageField::TranslatedPlural(6)), |
| 246 | ("msgstr[7] " , POMessageField::TranslatedPlural(7)), |
| 247 | ("msgstr[8] " , POMessageField::TranslatedPlural(8)), |
| 248 | ("msgstr[9] " , POMessageField::TranslatedPlural(9)), |
| 249 | ]; |
| 250 | |
| 251 | if line.is_empty() { |
| 252 | if self.dirty { |
| 253 | self.save_message()?; |
| 254 | self.dirty = false; |
| 255 | } |
| 256 | } else if line.starts_with('#' ) { |
| 257 | if !self.options.message_body_only { |
| 258 | for (prefix, field) in &HEADER_FIELDS { |
| 259 | if line.starts_with(*prefix) { |
| 260 | self.current_field = *field; |
| 261 | self.fill_field_with_newline(&line[prefix.len()..]); |
| 262 | self.dirty = true; |
| 263 | break; |
| 264 | } |
| 265 | } |
| 266 | } |
| 267 | } else if line.starts_with('m' ) { |
| 268 | for (prefix, field) in &CONTENT_FIELDS { |
| 269 | if line.starts_with(*prefix) { |
| 270 | self.current_field = *field; |
| 271 | let trimmed = &line[prefix.len()..]; |
| 272 | self.fill_field(&trimmed[1..trimmed.len() - 1]); |
| 273 | self.dirty = true; |
| 274 | } |
| 275 | } |
| 276 | } else if line.starts_with('"' ) { |
| 277 | self.fill_field(&line[1..line.len() - 1]); |
| 278 | self.dirty = true; |
| 279 | } |
| 280 | |
| 281 | Ok(()) |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | /// Parse a PO file with custom parse options and returns a catalog on success. |
| 286 | pub fn parse_with_option(path: &Path, options: &POParseOptions) -> Result<Catalog, POParseError> { |
| 287 | let file: File = std::fs::File::open(path)?; |
| 288 | let mut parser: POParserState = POParserState::new(options); |
| 289 | let mut reader: LineReader = LineReader::new(inner:file); |
| 290 | while let Some(line: Result<&[u8], Error>) = reader.next_line() { |
| 291 | let line: &[u8] = line?; |
| 292 | let mut line: &str = if options.unsafe_utf8_decode { |
| 293 | unsafe { std::str::from_utf8_unchecked(line) } |
| 294 | } else { |
| 295 | std::str::from_utf8(line)? |
| 296 | }; |
| 297 | if line.ends_with(' \n' ) { |
| 298 | line = &line[0..line.len() - 1]; |
| 299 | } |
| 300 | if line.ends_with(' \r' ) { |
| 301 | line = &line[0..line.len() - 1]; |
| 302 | } |
| 303 | parser.consume_line(line)?; |
| 304 | } |
| 305 | parser.consume_line("" )?; |
| 306 | Ok(parser.catalog) |
| 307 | } |
| 308 | |
| 309 | /// Parse a PO file and returns a catalog on success. |
| 310 | pub fn parse(path: &Path) -> Result<Catalog, POParseError> { |
| 311 | parse_with_option(path, &POParseOptions::default()) |
| 312 | } |
| 313 | |