1use core::str::FromStr;
2
3use smallvec::SmallVec;
4
5use crate::common::TagExt;
6use crate::{script, tag_table, Language, Script, Tag};
7
8type ThreeTags = SmallVec<[Tag; 3]>;
9
10trait SmallVecExt {
11 fn left(&self) -> usize;
12 fn is_full(&self) -> bool;
13}
14
15impl<A: smallvec::Array> SmallVecExt for SmallVec<A> {
16 fn left(&self) -> usize {
17 self.inline_size() - self.len()
18 }
19
20 fn is_full(&self) -> bool {
21 self.len() == self.inline_size()
22 }
23}
24
25/// Converts an `Script` and an `Language` to script and language tags.
26pub fn tags_from_script_and_language(
27 script: Option<Script>,
28 language: Option<&Language>,
29) -> (ThreeTags, ThreeTags) {
30 let mut needs_script = true;
31 let mut scripts = SmallVec::new();
32 let mut languages = SmallVec::new();
33
34 let mut private_use_subtag = None;
35 let mut prefix = "";
36 if let Some(language) = language {
37 let language = language.as_str();
38 if language.starts_with("x-") {
39 private_use_subtag = Some(language);
40 } else {
41 let bytes = language.as_bytes();
42 let mut i = 1;
43 while i < bytes.len() {
44 if bytes.get(i - 1) == Some(&b'-') && bytes.get(i + 1) == Some(&b'-') {
45 if bytes[i] == b'x' {
46 private_use_subtag = Some(&language[i..]);
47 if prefix.is_empty() {
48 prefix = &language[..i - 1];
49 }
50
51 break;
52 } else {
53 prefix = &language[..i - 1];
54 }
55 }
56
57 i += 1;
58 }
59
60 if prefix.is_empty() {
61 prefix = &language[..i];
62 }
63 }
64
65 needs_script = !parse_private_use_subtag(
66 private_use_subtag,
67 "-hbsc",
68 u8::to_ascii_lowercase,
69 &mut scripts,
70 );
71
72 let needs_language = !parse_private_use_subtag(
73 private_use_subtag,
74 "-hbot",
75 u8::to_ascii_uppercase,
76 &mut languages,
77 );
78
79 if needs_language {
80 if let Ok(prefix) = Language::from_str(prefix) {
81 tags_from_language(&prefix, &mut languages);
82 }
83 }
84 }
85
86 if needs_script {
87 all_tags_from_script(script, &mut scripts);
88 }
89
90 (scripts, languages)
91}
92
93fn parse_private_use_subtag(
94 private_use_subtag: Option<&str>,
95 prefix: &str,
96 normalize: fn(&u8) -> u8,
97 tags: &mut ThreeTags,
98) -> bool {
99 let private_use_subtag = match private_use_subtag {
100 Some(v) => v,
101 None => return false,
102 };
103
104 let private_use_subtag = match private_use_subtag.find(prefix) {
105 Some(idx) => &private_use_subtag[idx + prefix.len()..],
106 None => return false,
107 };
108
109 let mut tag = SmallVec::<[u8; 4]>::new();
110 for c in private_use_subtag.bytes().take(4) {
111 if c.is_ascii_alphanumeric() {
112 tag.push((normalize)(&c));
113 } else {
114 break;
115 }
116 }
117
118 if tag.is_empty() {
119 return false;
120 }
121
122 let mut tag = Tag::from_bytes_lossy(tag.as_slice());
123
124 // Some bits magic from HarfBuzz...
125 if tag.as_u32() & 0xDFDFDFDF == Tag::default_script().as_u32() {
126 tag = Tag(tag.as_u32() ^ !0xDFDFDFDF);
127 }
128
129 tags.push(tag);
130
131 true
132}
133
134fn lang_cmp(s1: &str, s2: &str) -> core::cmp::Ordering {
135 let da: usize = s1.find('-').unwrap_or(default:s1.len());
136 let db: usize = s2.find('-').unwrap_or(default:s2.len());
137 let n: usize = core::cmp::max(v1:da, v2:db);
138 let ea: usize = core::cmp::min(v1:n, v2:s1.len());
139 let eb: usize = core::cmp::min(v1:n, v2:s2.len());
140 s1[..ea].cmp(&s2[..eb])
141}
142
143fn tags_from_language(language: &Language, tags: &mut ThreeTags) {
144 let language = language.as_str();
145
146 // Check for matches of multiple subtags.
147 if tag_table::tags_from_complex_language(language, tags) {
148 return;
149 }
150
151 let mut sublang = language;
152
153 // Find a language matching in the first component.
154 if let Some(i) = language.find('-') {
155 // If there is an extended language tag, use it.
156 if language.len() >= 6 {
157 let extlang = match language[i + 1..].find('-') {
158 Some(idx) => idx == 3,
159 None => language.len() - i - 1 == 3,
160 };
161
162 if extlang && language.as_bytes()[i + 1].is_ascii_alphabetic() {
163 sublang = &language[i + 1..];
164 }
165 }
166 }
167
168 use tag_table::OPEN_TYPE_LANGUAGES as LANGUAGES;
169
170 if let Ok(mut idx) = LANGUAGES.binary_search_by(|v| lang_cmp(v.language, sublang)) {
171 while idx != 0 && LANGUAGES[idx].language == LANGUAGES[idx - 1].language {
172 idx -= 1;
173 }
174
175 let len = core::cmp::min(tags.left(), LANGUAGES.len() - idx - 1);
176 for i in 0..len {
177 if LANGUAGES[idx + i].language != LANGUAGES[idx].language {
178 break;
179 }
180
181 if LANGUAGES[idx + i].tag.is_null() {
182 break;
183 }
184
185 if tags.is_full() {
186 break;
187 }
188
189 tags.push(LANGUAGES[idx + i].tag);
190 }
191
192 return;
193 }
194
195 if language.len() == 3 {
196 tags.push(Tag::from_bytes_lossy(language.as_bytes()).to_uppercase());
197 }
198}
199
200fn all_tags_from_script(script: Option<Script>, tags: &mut ThreeTags) {
201 if let Some(script: Script) = script {
202 if let Some(tag: Tag) = new_tag_from_script(script) {
203 // Script::Myanmar maps to 'mym2', but there is no 'mym3'.
204 if tag != Tag::from_bytes(b"mym2") {
205 let mut tag3: [u8; 4] = tag.to_bytes();
206 tag3[3] = b'3';
207 tags.push(Tag::from_bytes(&tag3));
208 }
209
210 if !tags.is_full() {
211 tags.push(tag);
212 }
213 }
214
215 if !tags.is_full() {
216 tags.push(old_tag_from_script(script));
217 }
218 }
219}
220
221fn new_tag_from_script(script: Script) -> Option<Tag> {
222 match script {
223 script::BENGALI => Some(Tag::from_bytes(b"bng2")),
224 script::DEVANAGARI => Some(Tag::from_bytes(b"dev2")),
225 script::GUJARATI => Some(Tag::from_bytes(b"gjr2")),
226 script::GURMUKHI => Some(Tag::from_bytes(b"gur2")),
227 script::KANNADA => Some(Tag::from_bytes(b"knd2")),
228 script::MALAYALAM => Some(Tag::from_bytes(b"mlm2")),
229 script::ORIYA => Some(Tag::from_bytes(b"ory2")),
230 script::TAMIL => Some(Tag::from_bytes(b"tml2")),
231 script::TELUGU => Some(Tag::from_bytes(b"tel2")),
232 script::MYANMAR => Some(Tag::from_bytes(b"mym2")),
233 _ => None,
234 }
235}
236
237fn old_tag_from_script(script: Script) -> Tag {
238 // This seems to be accurate as of end of 2012.
239 match script {
240 // Katakana and Hiragana both map to 'kana'.
241 script::HIRAGANA => Tag::from_bytes(b"kana"),
242
243 // Spaces at the end are preserved, unlike ISO 15924.
244 script::LAO => Tag::from_bytes(b"lao "),
245 script::YI => Tag::from_bytes(b"yi "),
246 // Unicode-5.0 additions.
247 script::NKO => Tag::from_bytes(b"nko "),
248 // Unicode-5.1 additions.
249 script::VAI => Tag::from_bytes(b"vai "),
250
251 // Else, just change first char to lowercase and return.
252 _ => Tag(script.tag().as_u32() | 0x20000000),
253 }
254}
255
256#[rustfmt::skip]
257#[cfg(test)]
258mod tests {
259 #![allow(non_snake_case)]
260
261 use super::*;
262 use core::str::FromStr;
263 use alloc::vec::Vec;
264
265 fn new_tag_to_script(tag: Tag) -> Option<Script> {
266 match &tag.to_bytes() {
267 b"bng2" => Some(script::BENGALI),
268 b"dev2" => Some(script::DEVANAGARI),
269 b"gjr2" => Some(script::GUJARATI),
270 b"gur2" => Some(script::GURMUKHI),
271 b"knd2" => Some(script::KANNADA),
272 b"mlm2" => Some(script::MALAYALAM),
273 b"ory2" => Some(script::ORIYA),
274 b"tml2" => Some(script::TAMIL),
275 b"tel2" => Some(script::TELUGU),
276 b"mym2" => Some(script::MYANMAR),
277 _ => Some(script::UNKNOWN),
278 }
279 }
280
281 fn old_tag_to_script(tag: Tag) -> Option<Script> {
282 if tag == Tag::default_script() {
283 return None;
284 }
285
286 let mut bytes = tag.to_bytes();
287
288 // This side of the conversion is fully algorithmic.
289
290 // Any spaces at the end of the tag are replaced by repeating the last
291 // letter. Eg 'nko ' -> 'Nkoo'
292 if bytes[2] == b' ' {
293 bytes[2] = bytes[1];
294 }
295 if bytes[3] == b' ' {
296 bytes[3] = bytes[2];
297 }
298
299 // Change first char to uppercase.
300 bytes[0] = bytes[0].to_ascii_uppercase();
301
302 Some(Script(Tag::from_bytes(&bytes)))
303 }
304
305 fn tag_to_script(tag: Tag) -> Option<Script> {
306 let bytes = tag.to_bytes();
307 if bytes[3] == b'2' || bytes[3] == b'3' {
308 let mut tag2 = bytes;
309 tag2[3] = b'2';
310 return new_tag_to_script(Tag::from_bytes(&tag2));
311 }
312
313 old_tag_to_script(tag)
314 }
315
316 fn test_simple_tags(tag: &str, script: Script) {
317 let tag = Tag::from_bytes_lossy(tag.as_bytes());
318
319 let (scripts, _) = tags_from_script_and_language(Some(script), None);
320 if !scripts.is_empty() {
321 assert_eq!(tag, scripts[0]);
322 } else {
323 assert_eq!(tag, Tag::default_script());
324 }
325
326 assert_eq!(tag_to_script(tag), Some(script));
327 }
328
329 #[test]
330 fn tag_to_uppercase() {
331 assert_eq!(Tag::from_bytes(b"abcd").to_uppercase(), Tag::from_bytes(b"ABCD"));
332 assert_eq!(Tag::from_bytes(b"abc ").to_uppercase(), Tag::from_bytes(b"ABC "));
333 assert_eq!(Tag::from_bytes(b"ABCD").to_uppercase(), Tag::from_bytes(b"ABCD"));
334 }
335
336 #[test]
337 fn tag_to_lowercase() {
338 assert_eq!(Tag::from_bytes(b"abcd").to_lowercase(), Tag::from_bytes(b"abcd"));
339 assert_eq!(Tag::from_bytes(b"abc ").to_lowercase(), Tag::from_bytes(b"abc "));
340 assert_eq!(Tag::from_bytes(b"ABCD").to_lowercase(), Tag::from_bytes(b"abcd"));
341 }
342
343 #[test]
344 fn script_degenerate() {
345 assert_eq!(Tag::from_bytes(b"DFLT"), Tag::default_script());
346
347 // Hiragana and Katakana both map to 'kana'.
348 test_simple_tags("kana", script::KATAKANA);
349
350 let (scripts, _) = tags_from_script_and_language(Some(script::HIRAGANA), None);
351 assert_eq!(scripts.as_slice(), &[Tag::from_bytes(b"kana")]);
352
353 // Spaces are replaced
354 assert_eq!(tag_to_script(Tag::from_bytes(b"be ")), Script::from_iso15924_tag(Tag::from_bytes(b"Beee")));
355 }
356
357 #[test]
358 fn script_simple() {
359 // Arbitrary non-existent script.
360 test_simple_tags("wwyz", Script::from_iso15924_tag(Tag::from_bytes(b"wWyZ")).unwrap());
361
362 // These we don't really care about.
363 test_simple_tags("zyyy", script::COMMON);
364 test_simple_tags("zinh", script::INHERITED);
365 test_simple_tags("zzzz", script::UNKNOWN);
366
367 test_simple_tags("arab", script::ARABIC);
368 test_simple_tags("copt", script::COPTIC);
369 test_simple_tags("kana", script::KATAKANA);
370 test_simple_tags("latn", script::LATIN);
371
372 // These are trickier since their OT script tags have space.
373 test_simple_tags("lao ", script::LAO);
374 test_simple_tags("yi ", script::YI);
375 // Unicode-5.0 additions.
376 test_simple_tags("nko ", script::NKO);
377 // Unicode-5.1 additions.
378 test_simple_tags("vai ", script::VAI);
379
380 // https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
381
382 // Unicode-5.2 additions.
383 test_simple_tags("mtei", script::MEETEI_MAYEK);
384 // Unicode-6.0 additions.
385 test_simple_tags("mand", script::MANDAIC);
386 }
387
388 macro_rules! test_script_from_language {
389 ($name:ident, $tag:expr, $lang:expr, $script:expr) => {
390 #[test]
391 fn $name() {
392 let tag = Tag::from_bytes_lossy($tag.as_bytes());
393 let (scripts, _) = tags_from_script_and_language(
394 $script, Language::from_str($lang).ok().as_ref(),
395 );
396 if !scripts.is_empty() {
397 assert_eq!(scripts.as_slice(), &[tag]);
398 }
399 }
400 };
401 }
402
403 test_script_from_language!(script_from_language_01, "", "", None);
404 test_script_from_language!(script_from_language_02, "", "en", None);
405 test_script_from_language!(script_from_language_03, "copt", "en", Some(script::COPTIC));
406 test_script_from_language!(script_from_language_04, "", "x-hbsc", None);
407 test_script_from_language!(script_from_language_05, "copt", "x-hbsc", Some(script::COPTIC));
408 test_script_from_language!(script_from_language_06, "abc ", "x-hbscabc", None);
409 test_script_from_language!(script_from_language_07, "deva", "x-hbscdeva", None);
410 test_script_from_language!(script_from_language_08, "dev2", "x-hbscdev2", None);
411 test_script_from_language!(script_from_language_09, "dev3", "x-hbscdev3", None);
412 test_script_from_language!(script_from_language_10, "copt", "x-hbotpap0-hbsccopt", None);
413 test_script_from_language!(script_from_language_11, "", "en-x-hbsc", None);
414 test_script_from_language!(script_from_language_12, "copt", "en-x-hbsc", Some(script::COPTIC));
415 test_script_from_language!(script_from_language_13, "abc ", "en-x-hbscabc", None);
416 test_script_from_language!(script_from_language_14, "deva", "en-x-hbscdeva", None);
417 test_script_from_language!(script_from_language_15, "dev2", "en-x-hbscdev2", None);
418 test_script_from_language!(script_from_language_16, "dev3", "en-x-hbscdev3", None);
419 test_script_from_language!(script_from_language_17, "copt", "en-x-hbotpap0-hbsccopt", None);
420
421 #[test]
422 fn script_indic() {
423 fn check(tag1: &str, tag2: &str, tag3: &str, script: Script) {
424 let tag1 = Tag::from_bytes_lossy(tag1.as_bytes());
425 let tag2 = Tag::from_bytes_lossy(tag2.as_bytes());
426 let tag3 = Tag::from_bytes_lossy(tag3.as_bytes());
427
428 let (scripts, _) = tags_from_script_and_language(Some(script), None);
429 assert_eq!(scripts.as_slice(), &[tag1, tag2, tag3]);
430 assert_eq!(tag_to_script(tag1), Some(script));
431 assert_eq!(tag_to_script(tag2), Some(script));
432 assert_eq!(tag_to_script(tag3), Some(script));
433 }
434
435 check("bng3", "bng2", "beng", script::BENGALI);
436 check("dev3", "dev2", "deva", script::DEVANAGARI);
437 check("gjr3", "gjr2", "gujr", script::GUJARATI);
438 check("gur3", "gur2", "guru", script::GURMUKHI);
439 check("knd3", "knd2", "knda", script::KANNADA);
440 check("mlm3", "mlm2", "mlym", script::MALAYALAM);
441 check("ory3", "ory2", "orya", script::ORIYA);
442 check("tml3", "tml2", "taml", script::TAMIL);
443 check("tel3", "tel2", "telu", script::TELUGU);
444 }
445
446 // TODO: swap tag and lang
447 macro_rules! test_tag_from_language {
448 ($name:ident, $tag:expr, $lang:expr) => {
449 #[test]
450 fn $name() {
451 let tag = Tag::from_bytes_lossy($tag.as_bytes());
452 let (_, languages) = tags_from_script_and_language(
453 None, Language::from_str(&$lang.to_lowercase()).ok().as_ref(),
454 );
455 if !languages.is_empty() {
456 assert_eq!(languages[0], tag);
457 }
458 }
459 };
460 }
461
462 test_tag_from_language!(tag_from_language_dflt, "dflt", "");
463 test_tag_from_language!(tag_from_language_ALT, "ALT", "alt");
464 test_tag_from_language!(tag_from_language_ARA, "ARA", "ar");
465 test_tag_from_language!(tag_from_language_AZE, "AZE", "az");
466 test_tag_from_language!(tag_from_language_az_ir, "AZE", "az-ir");
467 test_tag_from_language!(tag_from_language_az_az, "AZE", "az-az");
468 test_tag_from_language!(tag_from_language_ENG, "ENG", "en");
469 test_tag_from_language!(tag_from_language_en_US, "ENG", "en_US");
470 test_tag_from_language!(tag_from_language_CJA, "CJA", "cja"); /* Western Cham */
471 test_tag_from_language!(tag_from_language_CJM, "CJM", "cjm"); /* Eastern Cham */
472 test_tag_from_language!(tag_from_language_ENV, "EVN", "eve");
473 test_tag_from_language!(tag_from_language_HAL, "HAL", "cfm"); /* BCP47 and current ISO639-3 code for Halam/Falam Chin */
474 test_tag_from_language!(tag_from_language_flm, "HAL", "flm"); /* Retired ISO639-3 code for Halam/Falam Chin */
475 test_tag_from_language!(tag_from_language_hy, "HYE0", "hy");
476 test_tag_from_language!(tag_from_language_hyw, "HYE", "hyw");
477 test_tag_from_language!(tag_from_language_bgr, "QIN", "bgr"); /* Bawm Chin */
478 test_tag_from_language!(tag_from_language_cbl, "QIN", "cbl"); /* Bualkhaw Chin */
479 test_tag_from_language!(tag_from_language_cka, "QIN", "cka"); /* Khumi Awa Chin */
480 test_tag_from_language!(tag_from_language_cmr, "QIN", "cmr"); /* Mro-Khimi Chin */
481 test_tag_from_language!(tag_from_language_cnb, "QIN", "cnb"); /* Chinbon Chin */
482 test_tag_from_language!(tag_from_language_cnh, "QIN", "cnh"); /* Hakha Chin */
483 test_tag_from_language!(tag_from_language_cnk, "QIN", "cnk"); /* Khumi Chin */
484 test_tag_from_language!(tag_from_language_cnw, "QIN", "cnw"); /* Ngawn Chin */
485 test_tag_from_language!(tag_from_language_csh, "QIN", "csh"); /* Asho Chin */
486 test_tag_from_language!(tag_from_language_csy, "QIN", "csy"); /* Siyin Chin */
487 test_tag_from_language!(tag_from_language_ctd, "QIN", "ctd"); /* Tedim Chin */
488 test_tag_from_language!(tag_from_language_czt, "QIN", "czt"); /* Zotung Chin */
489 test_tag_from_language!(tag_from_language_dao, "QIN", "dao"); /* Daai Chin */
490 test_tag_from_language!(tag_from_language_htl, "QIN", "hlt"); /* Matu Chin */
491 test_tag_from_language!(tag_from_language_mrh, "QIN", "mrh"); /* Mara Chin */
492 test_tag_from_language!(tag_from_language_pck, "QIN", "pck"); /* Paite Chin */
493 test_tag_from_language!(tag_from_language_sez, "QIN", "sez"); /* Senthang Chin */
494 test_tag_from_language!(tag_from_language_tcp, "QIN", "tcp"); /* Tawr Chin */
495 test_tag_from_language!(tag_from_language_tcz, "QIN", "tcz"); /* Thado Chin */
496 test_tag_from_language!(tag_from_language_yos, "QIN", "yos"); /* Yos, deprecated by IANA in favor of Zou [zom] */
497 test_tag_from_language!(tag_from_language_zom, "QIN", "zom"); /* Zou */
498 test_tag_from_language!(tag_from_language_FAR, "FAR", "fa");
499 test_tag_from_language!(tag_from_language_fa_IR, "FAR", "fa_IR");
500 test_tag_from_language!(tag_from_language_man, "MNK", "man");
501 test_tag_from_language!(tag_from_language_SWA, "SWA", "aii"); /* Swadaya Aramaic */
502 test_tag_from_language!(tag_from_language_SYR, "SYR", "syr"); /* Syriac [macrolanguage] */
503 test_tag_from_language!(tag_from_language_amw, "SYR", "amw"); /* Western Neo-Aramaic */
504 test_tag_from_language!(tag_from_language_cld, "SYR", "cld"); /* Chaldean Neo-Aramaic */
505 test_tag_from_language!(tag_from_language_syc, "SYR", "syc"); /* Classical Syriac */
506 test_tag_from_language!(tag_from_language_TUA, "TUA", "tru"); /* Turoyo Aramaic */
507 test_tag_from_language!(tag_from_language_zh, "ZHS", "zh"); /* Chinese */
508 test_tag_from_language!(tag_from_language_zh_cn, "ZHS", "zh-cn"); /* Chinese (China) */
509 test_tag_from_language!(tag_from_language_zh_sg, "ZHS", "zh-sg"); /* Chinese (Singapore) */
510 test_tag_from_language!(tag_from_language_zh_mo, "ZHTM", "zh-mo"); /* Chinese (Macao) */
511 test_tag_from_language!(tag_from_language_zh_hant_mo, "ZHTM", "zh-hant-mo"); /* Chinese (Macao) */
512 test_tag_from_language!(tag_from_language_zh_hans_mo, "ZHS", "zh-hans-mo"); /* Chinese (Simplified, Macao) */
513 test_tag_from_language!(tag_from_language_ZHH, "ZHH", "zh-HK"); /* Chinese (Hong Kong) */
514 test_tag_from_language!(tag_from_language_zh_HanT_hK, "ZHH", "zH-HanT-hK"); /* Chinese (Hong Kong) */
515 test_tag_from_language!(tag_from_language_zh_HanS_hK, "ZHS", "zH-HanS-hK"); /* Chinese (Simplified, Hong Kong) */
516 test_tag_from_language!(tag_from_language_zh_tw, "ZHT", "zh-tw"); /* Chinese (Taiwan) */
517 test_tag_from_language!(tag_from_language_ZHS, "ZHS", "zh-Hans"); /* Chinese (Simplified) */
518 test_tag_from_language!(tag_from_language_ZHT, "ZHT", "zh-Hant"); /* Chinese (Traditional) */
519 test_tag_from_language!(tag_from_language_zh_xx, "ZHS", "zh-xx"); /* Chinese (Other) */
520 test_tag_from_language!(tag_from_language_zh_Hans_TW, "ZHS", "zh-Hans-TW");
521 test_tag_from_language!(tag_from_language_yue, "ZHH", "yue");
522 test_tag_from_language!(tag_from_language_yue_Hant, "ZHH", "yue-Hant");
523 test_tag_from_language!(tag_from_language_yue_Hans, "ZHS", "yue-Hans");
524 test_tag_from_language!(tag_from_language_ABC, "ABC", "abc");
525 test_tag_from_language!(tag_from_language_ABCD, "ABCD", "x-hbotabcd");
526 test_tag_from_language!(tag_from_language_asdf_asdf_wer_x_hbotabc_zxc, "ABC", "asdf-asdf-wer-x-hbotabc-zxc");
527 test_tag_from_language!(tag_from_language_asdf_asdf_wer_x_hbotabc, "ABC", "asdf-asdf-wer-x-hbotabc");
528 test_tag_from_language!(tag_from_language_asdf_asdf_wer_x_hbotabcd, "ABCD", "asdf-asdf-wer-x-hbotabcd");
529 test_tag_from_language!(tag_from_language_asdf_asdf_wer_x_hbot_zxc, "dflt", "asdf-asdf-wer-x-hbot-zxc");
530 test_tag_from_language!(tag_from_language_xy, "dflt", "xy");
531 test_tag_from_language!(tag_from_language_xyz, "XYZ", "xyz"); /* Unknown ISO 639-3 */
532 test_tag_from_language!(tag_from_language_xyz_qw, "XYZ", "xyz-qw"); /* Unknown ISO 639-3 */
533
534 /*
535 * Invalid input. The precise answer does not matter, as long as it
536 * does not crash or get into an infinite loop.
537 */
538 test_tag_from_language!(tag_from_language__fonipa, "IPPH", "-fonipa");
539
540 /*
541 * Tags that contain "-fonipa" as a substring but which do not contain
542 * the subtag "fonipa".
543 */
544 test_tag_from_language!(tag_from_language_en_fonipax, "ENG", "en-fonipax");
545 test_tag_from_language!(tag_from_language_en_x_fonipa, "ENG", "en-x-fonipa");
546 test_tag_from_language!(tag_from_language_en_a_fonipa, "ENG", "en-a-fonipa");
547 test_tag_from_language!(tag_from_language_en_a_qwe_b_fonipa, "ENG", "en-a-qwe-b-fonipa");
548
549 /* International Phonetic Alphabet */
550 test_tag_from_language!(tag_from_language_en_fonipa, "IPPH", "en-fonipa");
551 test_tag_from_language!(tag_from_language_en_fonipax_fonipa, "IPPH", "en-fonipax-fonipa");
552 test_tag_from_language!(tag_from_language_rm_ch_fonipa_sursilv_x_foobar, "IPPH", "rm-CH-fonipa-sursilv-x-foobar");
553 test_tag_from_language!(tag_from_language_IPPH, "IPPH", "und-fonipa");
554 test_tag_from_language!(tag_from_language_zh_fonipa, "IPPH", "zh-fonipa");
555
556 /* North American Phonetic Alphabet (Americanist Phonetic Notation) */
557 test_tag_from_language!(tag_from_language_en_fonnapa, "APPH", "en-fonnapa");
558 test_tag_from_language!(tag_from_language_chr_fonnapa, "APPH", "chr-fonnapa");
559 test_tag_from_language!(tag_from_language_APPH, "APPH", "und-fonnapa");
560
561 /* Khutsuri Georgian */
562 test_tag_from_language!(tag_from_language_ka_geok, "KGE", "ka-Geok");
563 test_tag_from_language!(tag_from_language_KGE, "KGE", "und-Geok");
564
565 /* Irish Traditional */
566 test_tag_from_language!(tag_from_language_IRT, "IRT", "ga-Latg");
567
568 /* Moldavian */
569 test_tag_from_language!(tag_from_language_MOL, "MOL", "ro-MD");
570
571 /* Polytonic Greek */
572 test_tag_from_language!(tag_from_language_PGR, "PGR", "el-polyton");
573 test_tag_from_language!(tag_from_language_el_CY_polyton, "PGR", "el-CY-polyton");
574
575 /* Estrangela Syriac */
576 test_tag_from_language!(tag_from_language_aii_Syre, "SYRE", "aii-Syre");
577 test_tag_from_language!(tag_from_language_de_Syre, "SYRE", "de-Syre");
578 test_tag_from_language!(tag_from_language_syr_Syre, "SYRE", "syr-Syre");
579 test_tag_from_language!(tag_from_language_und_Syre, "SYRE", "und-Syre");
580
581 /* Western Syriac */
582 test_tag_from_language!(tag_from_language_aii_Syrj, "SYRJ", "aii-Syrj");
583 test_tag_from_language!(tag_from_language_de_Syrj, "SYRJ", "de-Syrj");
584 test_tag_from_language!(tag_from_language_syr_Syrj, "SYRJ", "syr-Syrj");
585 test_tag_from_language!(tag_from_language_SYRJ, "SYRJ", "und-Syrj");
586
587 /* Eastern Syriac */
588 test_tag_from_language!(tag_from_language_aii_Syrn, "SYRN", "aii-Syrn");
589 test_tag_from_language!(tag_from_language_de_Syrn, "SYRN", "de-Syrn");
590 test_tag_from_language!(tag_from_language_syr_Syrn, "SYRN", "syr-Syrn");
591 test_tag_from_language!(tag_from_language_SYRN, "SYRN", "und-Syrn");
592
593 /* Test that x-hbot overrides the base language */
594 test_tag_from_language!(tag_from_language_fa_x_hbotabc_zxc, "ABC", "fa-x-hbotabc-zxc");
595 test_tag_from_language!(tag_from_language_fa_ir_x_hbotabc_zxc, "ABC", "fa-ir-x-hbotabc-zxc");
596 test_tag_from_language!(tag_from_language_zh_x_hbotabc_zxc, "ABC", "zh-x-hbotabc-zxc");
597 test_tag_from_language!(tag_from_language_zh_cn_x_hbotabc_zxc, "ABC", "zh-cn-x-hbotabc-zxc");
598 test_tag_from_language!(tag_from_language_zh_xy_x_hbotabc_zxc, "ABC", "zh-xy-x-hbotabc-zxc");
599 test_tag_from_language!(tag_from_language_xyz_xy_x_hbotabc_zxc, "ABC", "xyz-xy-x-hbotabc-zxc");
600
601 /* Unnormalized BCP 47 tags */
602 test_tag_from_language!(tag_from_language_ar_aao, "ARA", "ar-aao");
603 test_tag_from_language!(tag_from_language_art_lojban, "JBO", "art-lojban");
604 test_tag_from_language!(tag_from_language_kok_gom, "KOK", "kok-gom");
605 test_tag_from_language!(tag_from_language_i_lux, "LTZ", "i-lux");
606 test_tag_from_language!(tag_from_language_drh, "MNG", "drh");
607 test_tag_from_language!(tag_from_language_ar_ary1, "MOR", "ar-ary");
608 test_tag_from_language!(tag_from_language_ar_ary_DZ, "MOR", "ar-ary-DZ");
609 test_tag_from_language!(tag_from_language_no_bok, "NOR", "no-bok");
610 test_tag_from_language!(tag_from_language_no_nyn, "NYN", "no-nyn");
611 test_tag_from_language!(tag_from_language_i_hak, "ZHS", "i-hak");
612 test_tag_from_language!(tag_from_language_zh_guoyu, "ZHS", "zh-guoyu");
613 test_tag_from_language!(tag_from_language_zh_min, "ZHS", "zh-min");
614 test_tag_from_language!(tag_from_language_zh_min_nan, "ZHS", "zh-min-nan");
615 test_tag_from_language!(tag_from_language_zh_xiang, "ZHS", "zh-xiang");
616
617 /* BCP 47 tags that look similar to unrelated language system tags */
618 test_tag_from_language!(tag_from_language_als, "SQI", "als");
619 test_tag_from_language!(tag_from_language_far, "dflt", "far");
620
621 /* A UN M.49 region code, not an extended language subtag */
622 test_tag_from_language!(tag_from_language_ar_001, "ARA", "ar-001");
623
624 /* An invalid tag */
625 test_tag_from_language!(tag_from_language_invalid, "TRK", "tr@foo=bar");
626
627 macro_rules! test_tags {
628 ($name:ident, $script:expr, $lang:expr, $scripts:expr, $langs:expr) => {
629 #[test]
630 fn $name() {
631 let (scripts, languages) = tags_from_script_and_language(
632 $script, Language::from_str($lang).ok().as_ref(),
633 );
634
635 let exp_scripts: Vec<Tag> = $scripts.iter().map(|v| Tag::from_bytes_lossy(*v)).collect();
636 let exp_langs: Vec<Tag> = $langs.iter().map(|v| Tag::from_bytes_lossy(*v)).collect();
637
638 assert_eq!(exp_scripts, scripts.as_slice());
639 assert_eq!(exp_langs, languages.as_slice());
640 }
641 };
642 }
643
644 test_tags!(tag_full_en, None, "en", &[], &[b"ENG"]);
645 test_tags!(tag_full_en_x_hbscdflt, None, "en-x-hbscdflt", &[b"DFLT"], &[b"ENG"]);
646 test_tags!(tag_full_en_latin, Some(script::LATIN), "en", &[b"latn"], &[b"ENG"]);
647 test_tags!(tag_full_und_fonnapa, None, "und-fonnapa", &[], &[b"APPH"]);
648 test_tags!(tag_full_en_fonnapa, None, "en-fonnapa", &[], &[b"APPH"]);
649 test_tags!(tag_full_x_hbot1234_hbsc5678, None, "x-hbot1234-hbsc5678", &[b"5678"], &[b"1234"]);
650 test_tags!(tag_full_x_hbsc5678_hbot1234, None, "x-hbsc5678-hbot1234", &[b"5678"], &[b"1234"]);
651 test_tags!(tag_full_ml, Some(script::MALAYALAM), "ml", &[b"mlm3", b"mlm2", b"mlym"], &[b"MAL", b"MLR"]);
652 test_tags!(tag_full_xyz, None, "xyz", &[], &[b"XYZ"]);
653 test_tags!(tag_full_xy, None, "xy", &[], &[]);
654}
655