| 1 | // Copyright © SixtyFPS GmbH <info@slint.dev> |
| 2 | // SPDX-License-Identifier: GPL-3.0-only OR LicenseRef-Slint-Royalty-free-2.0 OR LicenseRef-Slint-Software-3.0 |
| 3 | |
| 4 | use crate::llr::Expression; |
| 5 | use core::ops::Not; |
| 6 | use smol_str::{SmolStr, ToSmolStr}; |
| 7 | use std::collections::hash_map::Entry; |
| 8 | use std::collections::HashMap; |
| 9 | use std::path::Path; |
| 10 | use std::rc::Rc; |
| 11 | |
| 12 | #[derive (Clone, Debug)] |
| 13 | pub struct Translations { |
| 14 | /// An array with all the array of string |
| 15 | /// The first vector index is stored in the LLR. |
| 16 | /// The inner vector index is the language id. (The first is the original) |
| 17 | /// Only contains the string that are not having plural forms |
| 18 | pub strings: Vec<Vec<Option<SmolStr>>>, |
| 19 | /// An array with all the strings that are used in a plural form. |
| 20 | /// The first vector index is stored in the LLR. |
| 21 | /// The inner vector index is the language. (The first is the original string) |
| 22 | /// The last vector contains each form |
| 23 | pub plurals: Vec<Vec<Option<Vec<SmolStr>>>>, |
| 24 | |
| 25 | /// Expression is a function that maps its first and only argument (an integer) |
| 26 | /// to the plural form index (an integer) |
| 27 | /// It can only do basic mathematical operations. |
| 28 | /// The expression cannot reference properties or variable. |
| 29 | /// Only builtin math functions, and its first argument |
| 30 | pub plural_rules: Vec<Option<Expression>>, |
| 31 | |
| 32 | /// The "names" of the languages |
| 33 | pub languages: Vec<SmolStr>, |
| 34 | } |
| 35 | |
| 36 | #[derive (Clone)] |
| 37 | pub struct TranslationsBuilder { |
| 38 | result: Translations, |
| 39 | /// Maps (msgid, msgid_plural, msgctx) to the index in the result |
| 40 | /// (the index is in strings or plurals depending if there is a plural) |
| 41 | map: HashMap<(SmolStr, SmolStr, SmolStr), usize>, |
| 42 | |
| 43 | /// The catalog containing the translations |
| 44 | catalogs: Rc<Vec<polib::catalog::Catalog>>, |
| 45 | } |
| 46 | |
| 47 | impl TranslationsBuilder { |
| 48 | pub fn load_translations(path: &Path, domain: &str) -> std::io::Result<Self> { |
| 49 | let mut languages = vec!["" .into()]; |
| 50 | let mut catalogs = Vec::new(); |
| 51 | let mut plural_rules = |
| 52 | vec![Some(plural_rule_parser::parse_rule_expression("n!=1" ).unwrap())]; |
| 53 | for l in std::fs::read_dir(path) |
| 54 | .map_err(|e| std::io::Error::other(format!("Error reading directory {path:?}: {e}" )))? |
| 55 | { |
| 56 | let l = l?; |
| 57 | let path = l.path().join("LC_MESSAGES" ).join(format!(" {domain}.po" )); |
| 58 | if path.exists() { |
| 59 | let catalog = polib::po_file::parse(&path).map_err(|e| { |
| 60 | std::io::Error::other(format!("Error parsing {}: {e}" , path.display())) |
| 61 | })?; |
| 62 | languages.push(l.file_name().to_string_lossy().into()); |
| 63 | plural_rules.push(Some( |
| 64 | plural_rule_parser::parse_rule_expression(&catalog.metadata.plural_rules.expr) |
| 65 | .map_err(|_| { |
| 66 | std::io::Error::other(format!( |
| 67 | "Error parsing plural rules in {}" , |
| 68 | path.display() |
| 69 | )) |
| 70 | })?, |
| 71 | )); |
| 72 | catalogs.push(catalog); |
| 73 | } |
| 74 | } |
| 75 | if catalogs.is_empty() { |
| 76 | return Err(std::io::Error::other(format!( |
| 77 | "No translations found. We look for files in ' {}/<lang>/LC_MESSAGES/ {domain}.po" , |
| 78 | path.display() |
| 79 | ))); |
| 80 | } |
| 81 | Ok(Self { |
| 82 | result: Translations { |
| 83 | strings: Vec::new(), |
| 84 | plurals: Vec::new(), |
| 85 | plural_rules, |
| 86 | languages, |
| 87 | }, |
| 88 | map: HashMap::new(), |
| 89 | catalogs: Rc::new(catalogs), |
| 90 | }) |
| 91 | } |
| 92 | |
| 93 | pub fn lower_translate_call(&mut self, args: Vec<Expression>) -> Expression { |
| 94 | let [original, contextid, _domain, format_args, n, plural] = args |
| 95 | .try_into() |
| 96 | .expect("The resolving pass should have ensured that the arguments are correct" ); |
| 97 | let original = get_string(original).expect("original must be a string" ); |
| 98 | let contextid = get_string(contextid).expect("contextid must be a string" ); |
| 99 | let plural = get_string(plural).expect("plural must be a string" ); |
| 100 | |
| 101 | let is_plural = |
| 102 | !plural.is_empty() || !matches!(n, Expression::NumberLiteral(f) if f == 1.0); |
| 103 | |
| 104 | match self.map.entry((original.clone(), plural.clone(), contextid.clone())) { |
| 105 | Entry::Occupied(entry) => Expression::TranslationReference { |
| 106 | format_args: format_args.into(), |
| 107 | string_index: *entry.get(), |
| 108 | plural: is_plural.then(|| n.into()), |
| 109 | }, |
| 110 | Entry::Vacant(entry) => { |
| 111 | let messages = self.catalogs.iter().map(|catalog| { |
| 112 | catalog.find_message( |
| 113 | contextid.is_empty().not().then_some(contextid.as_str()), |
| 114 | &original, |
| 115 | is_plural.then_some(plural.as_str()), |
| 116 | ) |
| 117 | }); |
| 118 | let idx = if is_plural { |
| 119 | let messages = std::iter::once(Some(vec![original.clone(), plural.clone()])) |
| 120 | .chain(messages.map(|x| { |
| 121 | x.and_then(|x| { |
| 122 | Some( |
| 123 | x.msgstr_plural() |
| 124 | .ok()? |
| 125 | .iter() |
| 126 | .map(|x| x.to_smolstr()) |
| 127 | .collect(), |
| 128 | ) |
| 129 | }) |
| 130 | })) |
| 131 | .collect(); |
| 132 | self.result.plurals.push(messages); |
| 133 | self.result.plurals.len() - 1 |
| 134 | } else { |
| 135 | let messages = std::iter::once(Some(original.clone())) |
| 136 | .chain( |
| 137 | messages |
| 138 | .map(|x| x.and_then(|x| x.msgstr().ok()).map(|x| x.to_smolstr())), |
| 139 | ) |
| 140 | .collect::<Vec<_>>(); |
| 141 | self.result.strings.push(messages); |
| 142 | self.result.strings.len() - 1 |
| 143 | }; |
| 144 | Expression::TranslationReference { |
| 145 | format_args: format_args.into(), |
| 146 | string_index: *entry.insert(idx), |
| 147 | plural: is_plural.then(|| n.into()), |
| 148 | } |
| 149 | } |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | pub fn result(self) -> Translations { |
| 154 | self.result |
| 155 | } |
| 156 | |
| 157 | pub fn collect_characters_seen(&self, characters_seen: &mut impl Extend<char>) { |
| 158 | characters_seen.extend( |
| 159 | self.catalogs |
| 160 | .iter() |
| 161 | .flat_map(|catalog| { |
| 162 | catalog.messages().flat_map(|msg| { |
| 163 | msg.msgstr().ok().into_iter().chain( |
| 164 | msg.msgstr_plural() |
| 165 | .ok() |
| 166 | .into_iter() |
| 167 | .flat_map(|vec| vec.iter().map(|s| s.as_ref())), |
| 168 | ) |
| 169 | }) |
| 170 | }) |
| 171 | .flat_map(|str| str.chars()), |
| 172 | ); |
| 173 | } |
| 174 | } |
| 175 | |
| 176 | fn get_string(plural: Expression) -> Option<SmolStr> { |
| 177 | match plural { |
| 178 | Expression::StringLiteral(s: SmolStr) => Some(s), |
| 179 | _ => None, |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | mod plural_rule_parser { |
| 184 | use super::Expression; |
| 185 | pub struct ParseError<'a>(&'static str, &'a [u8]); |
| 186 | impl std::fmt::Debug for ParseError<'_> { |
| 187 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| 188 | write!(f, "ParseError( {}, rest= {:?})" , self.0, std::str::from_utf8(self.1).unwrap()) |
| 189 | } |
| 190 | } |
| 191 | pub fn parse_rule_expression(string: &str) -> Result<Expression, ParseError> { |
| 192 | let ascii = string.as_bytes(); |
| 193 | let s = parse_expression(ascii)?; |
| 194 | if !s.rest.is_empty() { |
| 195 | return Err(ParseError("extra character in string" , s.rest)); |
| 196 | } |
| 197 | match s.ty { |
| 198 | Ty::Number => Ok(s.expr), |
| 199 | Ty::Boolean => Ok(Expression::Condition { |
| 200 | condition: s.expr.into(), |
| 201 | true_expr: Expression::NumberLiteral(1.).into(), |
| 202 | false_expr: Expression::NumberLiteral(0.).into(), |
| 203 | }), |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | #[derive (Copy, Clone, Debug, PartialEq, Eq)] |
| 208 | enum Ty { |
| 209 | Number, |
| 210 | Boolean, |
| 211 | } |
| 212 | |
| 213 | struct ParsingState<'a> { |
| 214 | expr: Expression, |
| 215 | rest: &'a [u8], |
| 216 | ty: Ty, |
| 217 | } |
| 218 | |
| 219 | impl ParsingState<'_> { |
| 220 | fn skip_whitespace(self) -> Self { |
| 221 | let rest = skip_whitespace(self.rest); |
| 222 | Self { rest, ..self } |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | /// `<condition> ('?' <expr> : <expr> )?` |
| 227 | fn parse_expression(string: &[u8]) -> Result<ParsingState, ParseError> { |
| 228 | let string = skip_whitespace(string); |
| 229 | let state = parse_condition(string)?.skip_whitespace(); |
| 230 | if state.ty != Ty::Boolean { |
| 231 | return Ok(state); |
| 232 | } |
| 233 | if let Some(rest) = state.rest.strip_prefix(b"?" ) { |
| 234 | let s1 = parse_expression(rest)?.skip_whitespace(); |
| 235 | let rest = s1.rest.strip_prefix(b":" ).ok_or(ParseError("expected ':'" , s1.rest))?; |
| 236 | let s2 = parse_expression(rest)?; |
| 237 | if s1.ty != s2.ty { |
| 238 | return Err(ParseError("incompatible types in ternary operator" , s2.rest)); |
| 239 | } |
| 240 | Ok(ParsingState { |
| 241 | expr: Expression::Condition { |
| 242 | condition: state.expr.into(), |
| 243 | true_expr: s1.expr.into(), |
| 244 | false_expr: s2.expr.into(), |
| 245 | }, |
| 246 | rest: skip_whitespace(s2.rest), |
| 247 | ty: s2.ty, |
| 248 | }) |
| 249 | } else { |
| 250 | Ok(state) |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | /// `<and_expr> ("||" <condition>)?` |
| 255 | fn parse_condition(string: &[u8]) -> Result<ParsingState, ParseError> { |
| 256 | let string = skip_whitespace(string); |
| 257 | let state = parse_and_expr(string)?.skip_whitespace(); |
| 258 | if state.rest.is_empty() { |
| 259 | return Ok(state); |
| 260 | } |
| 261 | if let Some(rest) = state.rest.strip_prefix(b"||" ) { |
| 262 | let state2 = parse_condition(rest)?; |
| 263 | if state.ty != Ty::Boolean || state2.ty != Ty::Boolean { |
| 264 | return Err(ParseError("incompatible types in || operator" , state2.rest)); |
| 265 | } |
| 266 | Ok(ParsingState { |
| 267 | expr: Expression::BinaryExpression { |
| 268 | lhs: state.expr.into(), |
| 269 | rhs: state2.expr.into(), |
| 270 | op: '|' , |
| 271 | }, |
| 272 | ty: Ty::Boolean, |
| 273 | rest: skip_whitespace(state2.rest), |
| 274 | }) |
| 275 | } else { |
| 276 | Ok(state) |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | /// `<cmp_expr> ("&&" <and_expr>)?` |
| 281 | fn parse_and_expr(string: &[u8]) -> Result<ParsingState, ParseError> { |
| 282 | let string = skip_whitespace(string); |
| 283 | let state = parse_cmp_expr(string)?.skip_whitespace(); |
| 284 | if state.rest.is_empty() { |
| 285 | return Ok(state); |
| 286 | } |
| 287 | if let Some(rest) = state.rest.strip_prefix(b"&&" ) { |
| 288 | let state2 = parse_and_expr(rest)?; |
| 289 | if state.ty != Ty::Boolean || state2.ty != Ty::Boolean { |
| 290 | return Err(ParseError("incompatible types in || operator" , state2.rest)); |
| 291 | } |
| 292 | Ok(ParsingState { |
| 293 | expr: Expression::BinaryExpression { |
| 294 | lhs: state.expr.into(), |
| 295 | rhs: state2.expr.into(), |
| 296 | op: '&' , |
| 297 | }, |
| 298 | ty: Ty::Boolean, |
| 299 | rest: skip_whitespace(state2.rest), |
| 300 | }) |
| 301 | } else { |
| 302 | Ok(state) |
| 303 | } |
| 304 | } |
| 305 | |
| 306 | /// `<value> ('=='|'!='|'<'|'>'|'<='|'>=' <cmp_expr>)?` |
| 307 | fn parse_cmp_expr(string: &[u8]) -> Result<ParsingState, ParseError> { |
| 308 | let string = skip_whitespace(string); |
| 309 | let mut state = parse_value(string)?; |
| 310 | state.rest = skip_whitespace(state.rest); |
| 311 | if state.rest.is_empty() { |
| 312 | return Ok(state); |
| 313 | } |
| 314 | for (token, op) in [ |
| 315 | (b"==" as &[u8], '=' ), |
| 316 | (b"!=" , '!' ), |
| 317 | (b"<=" , '≤' ), |
| 318 | (b">=" , '≥' ), |
| 319 | (b"<" , '<' ), |
| 320 | (b">" , '>' ), |
| 321 | ] { |
| 322 | if let Some(rest) = state.rest.strip_prefix(token) { |
| 323 | let state2 = parse_cmp_expr(rest)?; |
| 324 | if state.ty != Ty::Number || state2.ty != Ty::Number { |
| 325 | return Err(ParseError("incompatible types in comparison" , state2.rest)); |
| 326 | } |
| 327 | return Ok(ParsingState { |
| 328 | expr: Expression::BinaryExpression { |
| 329 | lhs: state.expr.into(), |
| 330 | rhs: state2.expr.into(), |
| 331 | op, |
| 332 | }, |
| 333 | ty: Ty::Boolean, |
| 334 | rest: skip_whitespace(state2.rest), |
| 335 | }); |
| 336 | } |
| 337 | } |
| 338 | Ok(state) |
| 339 | } |
| 340 | |
| 341 | /// `<term> ('%' <term>)?` |
| 342 | fn parse_value(string: &[u8]) -> Result<ParsingState, ParseError> { |
| 343 | let string = skip_whitespace(string); |
| 344 | let mut state = parse_term(string)?; |
| 345 | state.rest = skip_whitespace(state.rest); |
| 346 | if state.rest.is_empty() { |
| 347 | return Ok(state); |
| 348 | } |
| 349 | if let Some(rest) = state.rest.strip_prefix(b"%" ) { |
| 350 | let state2 = parse_term(rest)?; |
| 351 | if state.ty != Ty::Number || state2.ty != Ty::Number { |
| 352 | return Err(ParseError("incompatible types in % operator" , state2.rest)); |
| 353 | } |
| 354 | Ok(ParsingState { |
| 355 | expr: Expression::BuiltinFunctionCall { |
| 356 | function: crate::expression_tree::BuiltinFunction::Mod, |
| 357 | arguments: vec![state.expr.into(), state2.expr.into()], |
| 358 | }, |
| 359 | ty: Ty::Number, |
| 360 | rest: skip_whitespace(state2.rest), |
| 361 | }) |
| 362 | } else { |
| 363 | Ok(state) |
| 364 | } |
| 365 | } |
| 366 | |
| 367 | fn parse_term(string: &[u8]) -> Result<ParsingState, ParseError> { |
| 368 | let string = skip_whitespace(string); |
| 369 | let state = match string.first().ok_or(ParseError("unexpected end of string" , string))? { |
| 370 | b'n' => ParsingState { |
| 371 | expr: Expression::FunctionParameterReference { index: 0 }, |
| 372 | rest: &string[1..], |
| 373 | ty: Ty::Number, |
| 374 | }, |
| 375 | b'(' => { |
| 376 | let mut s = parse_expression(&string[1..])?; |
| 377 | s.rest = s.rest.strip_prefix(b")" ).ok_or(ParseError("expected ')'" , s.rest))?; |
| 378 | s |
| 379 | } |
| 380 | x if x.is_ascii_digit() => { |
| 381 | let (n, rest) = parse_number(string)?; |
| 382 | ParsingState { expr: Expression::NumberLiteral(n as _), rest, ty: Ty::Number } |
| 383 | } |
| 384 | _ => return Err(ParseError("unexpected token" , string)), |
| 385 | }; |
| 386 | Ok(state) |
| 387 | } |
| 388 | fn parse_number(string: &[u8]) -> Result<(i32, &[u8]), ParseError> { |
| 389 | let end = string.iter().position(|&c| !c.is_ascii_digit()).unwrap_or(string.len()); |
| 390 | let n = std::str::from_utf8(&string[..end]) |
| 391 | .expect("string is valid utf-8" ) |
| 392 | .parse() |
| 393 | .map_err(|_| ParseError("can't parse number" , string))?; |
| 394 | Ok((n, &string[end..])) |
| 395 | } |
| 396 | fn skip_whitespace(mut string: &[u8]) -> &[u8] { |
| 397 | // slice::trim_ascii_start when MSRV >= 1.80 |
| 398 | while !string.is_empty() && string[0].is_ascii_whitespace() { |
| 399 | string = &string[1..]; |
| 400 | } |
| 401 | string |
| 402 | } |
| 403 | |
| 404 | #[test ] |
| 405 | fn test_parse_rule_expression() { |
| 406 | #[track_caller ] |
| 407 | fn p(string: &str) -> String { |
| 408 | let ctx = crate::llr::EvaluationContext { |
| 409 | compilation_unit: &crate::llr::CompilationUnit { |
| 410 | public_components: Default::default(), |
| 411 | sub_components: Default::default(), |
| 412 | used_sub_components: Default::default(), |
| 413 | globals: Default::default(), |
| 414 | has_debug_info: false, |
| 415 | translations: None, |
| 416 | popup_menu: None, |
| 417 | }, |
| 418 | current_sub_component: None, |
| 419 | current_global: None, |
| 420 | generator_state: (), |
| 421 | parent: None, |
| 422 | argument_types: &[crate::langtype::Type::Int32], |
| 423 | }; |
| 424 | crate::llr::pretty_print::DisplayExpression( |
| 425 | &parse_rule_expression(string).expect("parse error" ), |
| 426 | &ctx, |
| 427 | ) |
| 428 | .to_string() |
| 429 | } |
| 430 | |
| 431 | // en |
| 432 | assert_eq!(p("n != 1" ), "((arg_0 ! 1.0) ? 1.0 : 0.0)" ); |
| 433 | // fr |
| 434 | assert_eq!(p("n > 1" ), "((arg_0 > 1.0) ? 1.0 : 0.0)" ); |
| 435 | // ar |
| 436 | assert_eq!( |
| 437 | p("(n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5)" ), |
| 438 | "((arg_0 = 0.0) ? 0.0 : ((arg_0 = 1.0) ? 1.0 : ((arg_0 = 2.0) ? 2.0 : (((Mod(arg_0, 100.0) ≥ 3.0) & (Mod(arg_0, 100.0) ≤ 10.0)) ? 3.0 : ((Mod(arg_0, 100.0) ≥ 11.0) ? 4.0 : 5.0)))))" |
| 439 | ); |
| 440 | // ga |
| 441 | assert_eq!(p("n==1 ? 0 : n==2 ? 1 : (n>2 && n<7) ? 2 :(n>6 && n<11) ? 3 : 4" ), "((arg_0 = 1.0) ? 0.0 : ((arg_0 = 2.0) ? 1.0 : (((arg_0 > 2.0) & (arg_0 < 7.0)) ? 2.0 : (((arg_0 > 6.0) & (arg_0 < 11.0)) ? 3.0 : 4.0))))" ); |
| 442 | // ja |
| 443 | assert_eq!(p("0" ), "0.0" ); |
| 444 | // pl |
| 445 | assert_eq!( |
| 446 | p("(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)" ), |
| 447 | "((arg_0 = 1.0) ? 0.0 : (((Mod(arg_0, 10.0) ≥ 2.0) & ((Mod(arg_0, 10.0) ≤ 4.0) & ((Mod(arg_0, 100.0) < 10.0) | (Mod(arg_0, 100.0) ≥ 20.0)))) ? 1.0 : 2.0))" , |
| 448 | ); |
| 449 | |
| 450 | // ru |
| 451 | assert_eq!( |
| 452 | p("(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)" ), |
| 453 | "(((Mod(arg_0, 10.0) = 1.0) & (Mod(arg_0, 100.0) ! 11.0)) ? 0.0 : (((Mod(arg_0, 10.0) ≥ 2.0) & ((Mod(arg_0, 10.0) ≤ 4.0) & ((Mod(arg_0, 100.0) < 10.0) | (Mod(arg_0, 100.0) ≥ 20.0)))) ? 1.0 : 2.0))" , |
| 454 | ); |
| 455 | } |
| 456 | } |
| 457 | |