tag.rs source code [crates/rustybuzz-0.13.0/src/tag.rs]

1	use core::str::FromStr;
2
3	use smallvec::SmallVec;
4
5	use crate::common::TagExt;
6	use crate::{script, tag_table, Language, Script, Tag};
7
8	type ThreeTags = SmallVec<[Tag; `3`]>;
9
10	trait SmallVecExt {
11	fn left(&self) -> usize;
12	fn is_full(&self) -> bool;
13	}
14
15	impl<A: smallvec::Array> SmallVecExt for SmallVec<A> {
16	fn left(&self) -> usize {
17	self.inline_size() - self.len()
18	}
19
20	fn is_full(&self) -> bool {
21	self.len() == self.inline_size()
22	}
23	}
24
25	/// Converts an `Script` and an `Language` to script and language tags.
26	pub fn tags_from_script_and_language(
27	script: Option<Script>,
28	language: Option<&Language>,
29	) -> (ThreeTags, ThreeTags) {
30	let mut needs_script = `true`;
31	let mut scripts = SmallVec::new();
32	let mut languages = SmallVec::new();
33
34	let mut private_use_subtag = None;
35	let mut prefix = "";
36	if let Some(language) = language {
37	let language = language.as_str();
38	if language.starts_with("x-") {
39	private_use_subtag = Some(language);
40	} else {
41	let bytes = language.as_bytes();
42	let mut i = `1`;
43	while i < bytes.len() {
44	if bytes.get(i - `1`) == Some(&b'-') && bytes.get(i + `1`) == Some(&b'-') {
45	if bytes[i] == b'x' {
46	private_use_subtag = Some(&language[i..]);
47	if prefix.is_empty() {
48	prefix = &language[..i - `1`];
49	}
50
51	break;
52	} else {
53	prefix = &language[..i - `1`];
54	}
55	}
56
57	i += `1`;
58	}
59
60	if prefix.is_empty() {
61	prefix = &language[..i];
62	}
63	}
64
65	needs_script = !parse_private_use_subtag(
66	private_use_subtag,
67	"-hbsc",
68	u8::to_ascii_lowercase,
69	&mut scripts,
70	);
71
72	let needs_language = !parse_private_use_subtag(
73	private_use_subtag,
74	"-hbot",
75	u8::to_ascii_uppercase,
76	&mut languages,
77	);
78
79	if needs_language {
80	if let Ok(prefix) = Language::from_str(prefix) {
81	tags_from_language(&prefix, &mut languages);
82	}
83	}
84	}
85
86	if needs_script {
87	all_tags_from_script(script, &mut scripts);
88	}
89
90	(scripts, languages)
91	}
92
93	fn parse_private_use_subtag(
94	private_use_subtag: Option<&str>,
95	prefix: &str,
96	normalize: fn(&u8) -> u8,
97	tags: &mut ThreeTags,
98	) -> bool {
99	let private_use_subtag = match private_use_subtag {
100	Some(v) => v,
101	None => return `false`,
102	};
103
104	let private_use_subtag = match private_use_subtag.find(prefix) {
105	Some(idx) => &private_use_subtag[idx + prefix.len()..],
106	None => return `false`,
107	};
108
109	let mut tag = SmallVec::<[u8; `4`]>::new();
110	for c in private_use_subtag.bytes().take(`4`) {
111	if c.is_ascii_alphanumeric() {
112	tag.push((normalize)(&c));
113	} else {
114	break;
115	}
116	}
117
118	if tag.is_empty() {
119	return `false`;
120	}
121
122	let mut tag = Tag::from_bytes_lossy(tag.as_slice());
123
124	// Some bits magic from HarfBuzz...
125	if tag.as_u32() & `0xDFDFDFDF` == Tag::default_script().as_u32() {
126	tag = Tag(tag.as_u32() ^ !`0xDFDFDFDF`);
127	}
128
129	tags.push(tag);
130
131	`true`
132	}
133
134	fn lang_cmp(s1: &str, s2: &str) -> core::cmp::Ordering {
135	let da: usize = s1.find('-').unwrap_or(default:s1.len());
136	let db: usize = s2.find('-').unwrap_or(default:s2.len());
137	let n: usize = core::cmp::max(v1:da, v2:db);
138	let ea: usize = core::cmp::min(v1:n, v2:s1.len());
139	let eb: usize = core::cmp::min(v1:n, v2:s2.len());
140	s1[..ea].cmp(&s2[..eb])
141	}
142
143	fn tags_from_language(language: &Language, tags: &mut ThreeTags) {
144	let language = language.as_str();
145
146	// Check for matches of multiple subtags.
147	if tag_table::tags_from_complex_language(language, tags) {
148	return;
149	}
150
151	let mut sublang = language;
152
153	// Find a language matching in the first component.
154	if let Some(i) = language.find('-') {
155	// If there is an extended language tag, use it.
156	if language.len() >= `6` {
157	let extlang = match language[i + `1`..].find('-') {
158	Some(idx) => idx == `3`,
159	None => language.len() - i - `1` == `3`,
160	};
161
162	if extlang && language.as_bytes()[i + `1`].is_ascii_alphabetic() {
163	sublang = &language[i + `1`..];
164	}
165	}
166	}
167
168	use tag_table::OPEN_TYPE_LANGUAGES as LANGUAGES;
169
170	if let Ok(mut idx) = LANGUAGES.binary_search_by(\|v\| lang_cmp(v.language, sublang)) {
171	while idx != `0` && LANGUAGES[idx].language == LANGUAGES[idx - `1`].language {
172	idx -= `1`;
173	}
174
175	let len = core::cmp::min(tags.left(), LANGUAGES.len() - idx - `1`);
176	for i in `0`..len {
177	if LANGUAGES[idx + i].language != LANGUAGES[idx].language {
178	break;
179	}
180
181	if LANGUAGES[idx + i].tag.is_null() {
182	break;
183	}
184
185	if tags.is_full() {
186	break;
187	}
188
189	tags.push(LANGUAGES[idx + i].tag);
190	}
191
192	return;
193	}
194
195	if language.len() == `3` {
196	tags.push(Tag::from_bytes_lossy(language.as_bytes()).to_uppercase());
197	}
198	}
199
200	fn all_tags_from_script(script: Option<Script>, tags: &mut ThreeTags) {
201	if let Some(script: Script) = script {
202	if let Some(tag: Tag) = new_tag_from_script(script) {
203	// Script::Myanmar maps to 'mym2', but there is no 'mym3'.
204	if tag != Tag::from_bytes(b"mym2") {
205	let mut tag3: [u8; 4] = tag.to_bytes();
206	tag3[`3`] = b'3';
207	tags.push(Tag::from_bytes(&tag3));
208	}
209
210	if !tags.is_full() {
211	tags.push(tag);
212	}
213	}
214
215	if !tags.is_full() {
216	tags.push(old_tag_from_script(script));
217	}
218	}
219	}
220
221	fn new_tag_from_script(script: Script) -> Option<Tag> {
222	match script {
223	script::BENGALI => Some(Tag::from_bytes(b"bng2")),
224	script::DEVANAGARI => Some(Tag::from_bytes(b"dev2")),
225	script::GUJARATI => Some(Tag::from_bytes(b"gjr2")),
226	script::GURMUKHI => Some(Tag::from_bytes(b"gur2")),
227	script::KANNADA => Some(Tag::from_bytes(b"knd2")),
228	script::MALAYALAM => Some(Tag::from_bytes(b"mlm2")),
229	script::ORIYA => Some(Tag::from_bytes(b"ory2")),
230	script::TAMIL => Some(Tag::from_bytes(b"tml2")),
231	script::TELUGU => Some(Tag::from_bytes(b"tel2")),
232	script::MYANMAR => Some(Tag::from_bytes(b"mym2")),
233	_ => None,
234	}
235	}
236
237	fn old_tag_from_script(script: Script) -> Tag {
238	// This seems to be accurate as of end of 2012.
239	match script {
240	// Katakana and Hiragana both map to 'kana'.
241	script::HIRAGANA => Tag::from_bytes(b"kana"),
242
243	// Spaces at the end are preserved, unlike ISO 15924.
244	script::LAO => Tag::from_bytes(b"lao "),
245	script::YI => Tag::from_bytes(b"yi "),
246	// Unicode-5.0 additions.
247	script::NKO => Tag::from_bytes(b"nko "),
248	// Unicode-5.1 additions.
249	script::VAI => Tag::from_bytes(b"vai "),
250
251	// Else, just change first char to lowercase and return.
252	_ => Tag(script.tag().as_u32() \| `0x20000000`),
253	}
254	}
255
256	#[rustfmt::skip]
257	#[cfg(test)]
258	mod tests {
259	#![allow(non_snake_case)]
260
261	use super::*;
262	use core::str::FromStr;
263	use alloc::vec::Vec;
264
265	fn new_tag_to_script(tag: Tag) -> Option<Script> {
266	match &tag.to_bytes() {
267	b"bng2" => Some(script::BENGALI),
268	b"dev2" => Some(script::DEVANAGARI),
269	b"gjr2" => Some(script::GUJARATI),
270	b"gur2" => Some(script::GURMUKHI),
271	b"knd2" => Some(script::KANNADA),
272	b"mlm2" => Some(script::MALAYALAM),
273	b"ory2" => Some(script::ORIYA),
274	b"tml2" => Some(script::TAMIL),
275	b"tel2" => Some(script::TELUGU),
276	b"mym2" => Some(script::MYANMAR),
277	_ => Some(script::UNKNOWN),
278	}
279	}
280
281	fn old_tag_to_script(tag: Tag) -> Option<Script> {
282	if tag == Tag::default_script() {
283	return None;
284	}
285
286	let mut bytes = tag.to_bytes();
287
288	// This side of the conversion is fully algorithmic.
289
290	// Any spaces at the end of the tag are replaced by repeating the last
291	// letter. Eg 'nko ' -> 'Nkoo'
292	if bytes[`2`] == b' ' {
293	bytes[`2`] = bytes[`1`];
294	}
295	if bytes[`3`] == b' ' {
296	bytes[`3`] = bytes[`2`];
297	}
298
299	// Change first char to uppercase.
300	bytes[`0`] = bytes[`0`].to_ascii_uppercase();
301
302	Some(Script(Tag::from_bytes(&bytes)))
303	}
304
305	fn tag_to_script(tag: Tag) -> Option<Script> {
306	let bytes = tag.to_bytes();
307	if bytes[`3`] == b'2' \|\| bytes[`3`] == b'3' {
308	let mut tag2 = bytes;
309	tag2[`3`] = b'2';
310	return new_tag_to_script(Tag::from_bytes(&tag2));
311	}
312
313	old_tag_to_script(tag)
314	}
315
316	fn test_simple_tags(tag: &str, script: Script) {
317	let tag = Tag::from_bytes_lossy(tag.as_bytes());
318
319	let (scripts, _) = tags_from_script_and_language(Some(script), None);
320	if !scripts.is_empty() {
321	assert_eq!(tag, scripts[`0`]);
322	} else {
323	assert_eq!(tag, Tag::default_script());
324	}
325
326	assert_eq!(tag_to_script(tag), Some(script));
327	}
328
329	#[test]
330	fn tag_to_uppercase() {
331	assert_eq!(Tag::from_bytes(b"abcd").to_uppercase(), Tag::from_bytes(b"ABCD"));
332	assert_eq!(Tag::from_bytes(b"abc ").to_uppercase(), Tag::from_bytes(b"ABC "));
333	assert_eq!(Tag::from_bytes(b"ABCD").to_uppercase(), Tag::from_bytes(b"ABCD"));
334	}
335
336	#[test]
337	fn tag_to_lowercase() {
338	assert_eq!(Tag::from_bytes(b"abcd").to_lowercase(), Tag::from_bytes(b"abcd"));
339	assert_eq!(Tag::from_bytes(b"abc ").to_lowercase(), Tag::from_bytes(b"abc "));
340	assert_eq!(Tag::from_bytes(b"ABCD").to_lowercase(), Tag::from_bytes(b"abcd"));
341	}
342
343	#[test]
344	fn script_degenerate() {
345	assert_eq!(Tag::from_bytes(b"DFLT"), Tag::default_script());
346
347	// Hiragana and Katakana both map to 'kana'.
348	test_simple_tags("kana", script::KATAKANA);
349
350	let (scripts, _) = tags_from_script_and_language(Some(script::HIRAGANA), None);
351	assert_eq!(scripts.as_slice(), &[Tag::from_bytes(b"kana")]);
352
353	// Spaces are replaced
354	assert_eq!(tag_to_script(Tag::from_bytes(b"be ")), Script::from_iso15924_tag(Tag::from_bytes(b"Beee")));
355	}
356
357	#[test]
358	fn script_simple() {
359	// Arbitrary non-existent script.
360	test_simple_tags("wwyz", Script::from_iso15924_tag(Tag::from_bytes(b"wWyZ")).unwrap());
361
362	// These we don't really care about.
363	test_simple_tags("zyyy", script::COMMON);
364	test_simple_tags("zinh", script::INHERITED);
365	test_simple_tags("zzzz", script::UNKNOWN);
366
367	test_simple_tags("arab", script::ARABIC);
368	test_simple_tags("copt", script::COPTIC);
369	test_simple_tags("kana", script::KATAKANA);
370	test_simple_tags("latn", script::LATIN);
371
372	// These are trickier since their OT script tags have space.
373	test_simple_tags("lao ", script::LAO);
374	test_simple_tags("yi ", script::YI);
375	// Unicode-5.0 additions.
376	test_simple_tags("nko ", script::NKO);
377	// Unicode-5.1 additions.
378	test_simple_tags("vai ", script::VAI);
379
380	// https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
381
382	// Unicode-5.2 additions.
383	test_simple_tags("mtei", script::MEETEI_MAYEK);
384	// Unicode-6.0 additions.
385	test_simple_tags("mand", script::MANDAIC);
386	}
387
388	macro_rules! test_script_from_language {
389	($name:ident, $tag:expr, $lang:expr, $script:expr) => {
390	#[test]
391	fn $name() {
392	let tag = Tag::from_bytes_lossy($tag.as_bytes());
393	let (scripts, _) = tags_from_script_and_language(
394	$script, Language::from_str($lang).ok().as_ref(),
395	);
396	if !scripts.is_empty() {
397	assert_eq!(scripts.as_slice(), &[tag]);
398	}
399	}
400	};
401	}
402
403	test_script_from_language!(script_from_language_01, "", "", None);
404	test_script_from_language!(script_from_language_02, "", "en", None);
405	test_script_from_language!(script_from_language_03, "copt", "en", Some(script::COPTIC));
406	test_script_from_language!(script_from_language_04, "", "x-hbsc", None);
407	test_script_from_language!(script_from_language_05, "copt", "x-hbsc", Some(script::COPTIC));
408	test_script_from_language!(script_from_language_06, "abc ", "x-hbscabc", None);
409	test_script_from_language!(script_from_language_07, "deva", "x-hbscdeva", None);
410	test_script_from_language!(script_from_language_08, "dev2", "x-hbscdev2", None);
411	test_script_from_language!(script_from_language_09, "dev3", "x-hbscdev3", None);
412	test_script_from_language!(script_from_language_10, "copt", "x-hbotpap0-hbsccopt", None);
413	test_script_from_language!(script_from_language_11, "", "en-x-hbsc", None);
414	test_script_from_language!(script_from_language_12, "copt", "en-x-hbsc", Some(script::COPTIC));
415	test_script_from_language!(script_from_language_13, "abc ", "en-x-hbscabc", None);
416	test_script_from_language!(script_from_language_14, "deva", "en-x-hbscdeva", None);
417	test_script_from_language!(script_from_language_15, "dev2", "en-x-hbscdev2", None);
418	test_script_from_language!(script_from_language_16, "dev3", "en-x-hbscdev3", None);
419	test_script_from_language!(script_from_language_17, "copt", "en-x-hbotpap0-hbsccopt", None);
420
421	#[test]
422	fn script_indic() {
423	fn check(tag1: &str, tag2: &str, tag3: &str, script: Script) {
424	let tag1 = Tag::from_bytes_lossy(tag1.as_bytes());
425	let tag2 = Tag::from_bytes_lossy(tag2.as_bytes());
426	let tag3 = Tag::from_bytes_lossy(tag3.as_bytes());
427
428	let (scripts, _) = tags_from_script_and_language(Some(script), None);
429	assert_eq!(scripts.as_slice(), &[tag1, tag2, tag3]);
430	assert_eq!(tag_to_script(tag1), Some(script));
431	assert_eq!(tag_to_script(tag2), Some(script));
432	assert_eq!(tag_to_script(tag3), Some(script));
433	}
434
435	check("bng3", "bng2", "beng", script::BENGALI);
436	check("dev3", "dev2", "deva", script::DEVANAGARI);
437	check("gjr3", "gjr2", "gujr", script::GUJARATI);
438	check("gur3", "gur2", "guru", script::GURMUKHI);
439	check("knd3", "knd2", "knda", script::KANNADA);
440	check("mlm3", "mlm2", "mlym", script::MALAYALAM);
441	check("ory3", "ory2", "orya", script::ORIYA);
442	check("tml3", "tml2", "taml", script::TAMIL);
443	check("tel3", "tel2", "telu", script::TELUGU);
444	}
445
446	// TODO: swap tag and lang
447	macro_rules! test_tag_from_language {
448	($name:ident, $tag:expr, $lang:expr) => {
449	#[test]
450	fn $name() {
451	let tag = Tag::from_bytes_lossy($tag.as_bytes());
452	let (_, languages) = tags_from_script_and_language(
453	None, Language::from_str(&$lang.to_lowercase()).ok().as_ref(),
454	);
455	if !languages.is_empty() {
456	assert_eq!(languages[`0`], tag);
457	}
458	}
459	};
460	}
461
462	test_tag_from_language!(tag_from_language_dflt, "dflt", "");
463	test_tag_from_language!(tag_from_language_ALT, "ALT", "alt");
464	test_tag_from_language!(tag_from_language_ARA, "ARA", "ar");
465	test_tag_from_language!(tag_from_language_AZE, "AZE", "az");
466	test_tag_from_language!(tag_from_language_az_ir, "AZE", "az-ir");
467	test_tag_from_language!(tag_from_language_az_az, "AZE", "az-az");
468	test_tag_from_language!(tag_from_language_ENG, "ENG", "en");
469	test_tag_from_language!(tag_from_language_en_US, "ENG", "en_US");
470	test_tag_from_language!(tag_from_language_CJA, "CJA", "cja"); / Western Cham /
471	test_tag_from_language!(tag_from_language_CJM, "CJM", "cjm"); / Eastern Cham /
472	test_tag_from_language!(tag_from_language_ENV, "EVN", "eve");
473	test_tag_from_language!(tag_from_language_HAL, "HAL", "cfm"); / BCP47 and current ISO639-3 code for Halam/Falam Chin /
474	test_tag_from_language!(tag_from_language_flm, "HAL", "flm"); / Retired ISO639-3 code for Halam/Falam Chin /
475	test_tag_from_language!(tag_from_language_hy, "HYE0", "hy");
476	test_tag_from_language!(tag_from_language_hyw, "HYE", "hyw");
477	test_tag_from_language!(tag_from_language_bgr, "QIN", "bgr"); / Bawm Chin /
478	test_tag_from_language!(tag_from_language_cbl, "QIN", "cbl"); / Bualkhaw Chin /
479	test_tag_from_language!(tag_from_language_cka, "QIN", "cka"); / Khumi Awa Chin /
480	test_tag_from_language!(tag_from_language_cmr, "QIN", "cmr"); / Mro-Khimi Chin /
481	test_tag_from_language!(tag_from_language_cnb, "QIN", "cnb"); / Chinbon Chin /
482	test_tag_from_language!(tag_from_language_cnh, "QIN", "cnh"); / Hakha Chin /
483	test_tag_from_language!(tag_from_language_cnk, "QIN", "cnk"); / Khumi Chin /
484	test_tag_from_language!(tag_from_language_cnw, "QIN", "cnw"); / Ngawn Chin /
485	test_tag_from_language!(tag_from_language_csh, "QIN", "csh"); / Asho Chin /
486	test_tag_from_language!(tag_from_language_csy, "QIN", "csy"); / Siyin Chin /
487	test_tag_from_language!(tag_from_language_ctd, "QIN", "ctd"); / Tedim Chin /
488	test_tag_from_language!(tag_from_language_czt, "QIN", "czt"); / Zotung Chin /
489	test_tag_from_language!(tag_from_language_dao, "QIN", "dao"); / Daai Chin /
490	test_tag_from_language!(tag_from_language_htl, "QIN", "hlt"); / Matu Chin /
491	test_tag_from_language!(tag_from_language_mrh, "QIN", "mrh"); / Mara Chin /
492	test_tag_from_language!(tag_from_language_pck, "QIN", "pck"); / Paite Chin /
493	test_tag_from_language!(tag_from_language_sez, "QIN", "sez"); / Senthang Chin /
494	test_tag_from_language!(tag_from_language_tcp, "QIN", "tcp"); / Tawr Chin /
495	test_tag_from_language!(tag_from_language_tcz, "QIN", "tcz"); / Thado Chin /
496	test_tag_from_language!(tag_from_language_yos, "QIN", "yos"); / Yos, deprecated by IANA in favor of Zou [zom] /
497	test_tag_from_language!(tag_from_language_zom, "QIN", "zom"); / Zou /
498	test_tag_from_language!(tag_from_language_FAR, "FAR", "fa");
499	test_tag_from_language!(tag_from_language_fa_IR, "FAR", "fa_IR");
500	test_tag_from_language!(tag_from_language_man, "MNK", "man");
501	test_tag_from_language!(tag_from_language_SWA, "SWA", "aii"); / Swadaya Aramaic /
502	test_tag_from_language!(tag_from_language_SYR, "SYR", "syr"); / Syriac [macrolanguage] /
503	test_tag_from_language!(tag_from_language_amw, "SYR", "amw"); / Western Neo-Aramaic /
504	test_tag_from_language!(tag_from_language_cld, "SYR", "cld"); / Chaldean Neo-Aramaic /
505	test_tag_from_language!(tag_from_language_syc, "SYR", "syc"); / Classical Syriac /
506	test_tag_from_language!(tag_from_language_TUA, "TUA", "tru"); / Turoyo Aramaic /
507	test_tag_from_language!(tag_from_language_zh, "ZHS", "zh"); / Chinese /
508	test_tag_from_language!(tag_from_language_zh_cn, "ZHS", "zh-cn"); / Chinese (China) /
509	test_tag_from_language!(tag_from_language_zh_sg, "ZHS", "zh-sg"); / Chinese (Singapore) /
510	test_tag_from_language!(tag_from_language_zh_mo, "ZHTM", "zh-mo"); / Chinese (Macao) /
511	test_tag_from_language!(tag_from_language_zh_hant_mo, "ZHTM", "zh-hant-mo"); / Chinese (Macao) /
512	test_tag_from_language!(tag_from_language_zh_hans_mo, "ZHS", "zh-hans-mo"); / Chinese (Simplified, Macao) /
513	test_tag_from_language!(tag_from_language_ZHH, "ZHH", "zh-HK"); / Chinese (Hong Kong) /
514	test_tag_from_language!(tag_from_language_zh_HanT_hK, "ZHH", "zH-HanT-hK"); / Chinese (Hong Kong) /
515	test_tag_from_language!(tag_from_language_zh_HanS_hK, "ZHS", "zH-HanS-hK"); / Chinese (Simplified, Hong Kong) /
516	test_tag_from_language!(tag_from_language_zh_tw, "ZHT", "zh-tw"); / Chinese (Taiwan) /
517	test_tag_from_language!(tag_from_language_ZHS, "ZHS", "zh-Hans"); / Chinese (Simplified) /
518	test_tag_from_language!(tag_from_language_ZHT, "ZHT", "zh-Hant"); / Chinese (Traditional) /
519	test_tag_from_language!(tag_from_language_zh_xx, "ZHS", "zh-xx"); / Chinese (Other) /
520	test_tag_from_language!(tag_from_language_zh_Hans_TW, "ZHS", "zh-Hans-TW");
521	test_tag_from_language!(tag_from_language_yue, "ZHH", "yue");
522	test_tag_from_language!(tag_from_language_yue_Hant, "ZHH", "yue-Hant");
523	test_tag_from_language!(tag_from_language_yue_Hans, "ZHS", "yue-Hans");
524	test_tag_from_language!(tag_from_language_ABC, "ABC", "abc");
525	test_tag_from_language!(tag_from_language_ABCD, "ABCD", "x-hbotabcd");
526	test_tag_from_language!(tag_from_language_asdf_asdf_wer_x_hbotabc_zxc, "ABC", "asdf-asdf-wer-x-hbotabc-zxc");
527	test_tag_from_language!(tag_from_language_asdf_asdf_wer_x_hbotabc, "ABC", "asdf-asdf-wer-x-hbotabc");
528	test_tag_from_language!(tag_from_language_asdf_asdf_wer_x_hbotabcd, "ABCD", "asdf-asdf-wer-x-hbotabcd");
529	test_tag_from_language!(tag_from_language_asdf_asdf_wer_x_hbot_zxc, "dflt", "asdf-asdf-wer-x-hbot-zxc");
530	test_tag_from_language!(tag_from_language_xy, "dflt", "xy");
531	test_tag_from_language!(tag_from_language_xyz, "XYZ", "xyz"); / Unknown ISO 639-3 /
532	test_tag_from_language!(tag_from_language_xyz_qw, "XYZ", "xyz-qw"); / Unknown ISO 639-3 /
533
534	/*
535	* Invalid input. The precise answer does not matter, as long as it
536	* does not crash or get into an infinite loop.
537	*/
538	test_tag_from_language!(tag_from_language__fonipa, "IPPH", "-fonipa");
539
540	/*
541	* Tags that contain "-fonipa" as a substring but which do not contain
542	* the subtag "fonipa".
543	*/
544	test_tag_from_language!(tag_from_language_en_fonipax, "ENG", "en-fonipax");
545	test_tag_from_language!(tag_from_language_en_x_fonipa, "ENG", "en-x-fonipa");
546	test_tag_from_language!(tag_from_language_en_a_fonipa, "ENG", "en-a-fonipa");
547	test_tag_from_language!(tag_from_language_en_a_qwe_b_fonipa, "ENG", "en-a-qwe-b-fonipa");
548
549	/ International Phonetic Alphabet /
550	test_tag_from_language!(tag_from_language_en_fonipa, "IPPH", "en-fonipa");
551	test_tag_from_language!(tag_from_language_en_fonipax_fonipa, "IPPH", "en-fonipax-fonipa");
552	test_tag_from_language!(tag_from_language_rm_ch_fonipa_sursilv_x_foobar, "IPPH", "rm-CH-fonipa-sursilv-x-foobar");
553	test_tag_from_language!(tag_from_language_IPPH, "IPPH", "und-fonipa");
554	test_tag_from_language!(tag_from_language_zh_fonipa, "IPPH", "zh-fonipa");
555
556	/ North American Phonetic Alphabet (Americanist Phonetic Notation) /
557	test_tag_from_language!(tag_from_language_en_fonnapa, "APPH", "en-fonnapa");
558	test_tag_from_language!(tag_from_language_chr_fonnapa, "APPH", "chr-fonnapa");
559	test_tag_from_language!(tag_from_language_APPH, "APPH", "und-fonnapa");
560
561	/ Khutsuri Georgian /
562	test_tag_from_language!(tag_from_language_ka_geok, "KGE", "ka-Geok");
563	test_tag_from_language!(tag_from_language_KGE, "KGE", "und-Geok");
564
565	/ Irish Traditional /
566	test_tag_from_language!(tag_from_language_IRT, "IRT", "ga-Latg");
567
568	/ Moldavian /
569	test_tag_from_language!(tag_from_language_MOL, "MOL", "ro-MD");
570
571	/ Polytonic Greek /
572	test_tag_from_language!(tag_from_language_PGR, "PGR", "el-polyton");
573	test_tag_from_language!(tag_from_language_el_CY_polyton, "PGR", "el-CY-polyton");
574
575	/ Estrangela Syriac /
576	test_tag_from_language!(tag_from_language_aii_Syre, "SYRE", "aii-Syre");
577	test_tag_from_language!(tag_from_language_de_Syre, "SYRE", "de-Syre");
578	test_tag_from_language!(tag_from_language_syr_Syre, "SYRE", "syr-Syre");
579	test_tag_from_language!(tag_from_language_und_Syre, "SYRE", "und-Syre");
580
581	/ Western Syriac /
582	test_tag_from_language!(tag_from_language_aii_Syrj, "SYRJ", "aii-Syrj");
583	test_tag_from_language!(tag_from_language_de_Syrj, "SYRJ", "de-Syrj");
584	test_tag_from_language!(tag_from_language_syr_Syrj, "SYRJ", "syr-Syrj");
585	test_tag_from_language!(tag_from_language_SYRJ, "SYRJ", "und-Syrj");
586
587	/ Eastern Syriac /
588	test_tag_from_language!(tag_from_language_aii_Syrn, "SYRN", "aii-Syrn");
589	test_tag_from_language!(tag_from_language_de_Syrn, "SYRN", "de-Syrn");
590	test_tag_from_language!(tag_from_language_syr_Syrn, "SYRN", "syr-Syrn");
591	test_tag_from_language!(tag_from_language_SYRN, "SYRN", "und-Syrn");
592
593	/ Test that x-hbot overrides the base language /
594	test_tag_from_language!(tag_from_language_fa_x_hbotabc_zxc, "ABC", "fa-x-hbotabc-zxc");
595	test_tag_from_language!(tag_from_language_fa_ir_x_hbotabc_zxc, "ABC", "fa-ir-x-hbotabc-zxc");
596	test_tag_from_language!(tag_from_language_zh_x_hbotabc_zxc, "ABC", "zh-x-hbotabc-zxc");
597	test_tag_from_language!(tag_from_language_zh_cn_x_hbotabc_zxc, "ABC", "zh-cn-x-hbotabc-zxc");
598	test_tag_from_language!(tag_from_language_zh_xy_x_hbotabc_zxc, "ABC", "zh-xy-x-hbotabc-zxc");
599	test_tag_from_language!(tag_from_language_xyz_xy_x_hbotabc_zxc, "ABC", "xyz-xy-x-hbotabc-zxc");
600
601	/ Unnormalized BCP 47 tags /
602	test_tag_from_language!(tag_from_language_ar_aao, "ARA", "ar-aao");
603	test_tag_from_language!(tag_from_language_art_lojban, "JBO", "art-lojban");
604	test_tag_from_language!(tag_from_language_kok_gom, "KOK", "kok-gom");
605	test_tag_from_language!(tag_from_language_i_lux, "LTZ", "i-lux");
606	test_tag_from_language!(tag_from_language_drh, "MNG", "drh");
607	test_tag_from_language!(tag_from_language_ar_ary1, "MOR", "ar-ary");
608	test_tag_from_language!(tag_from_language_ar_ary_DZ, "MOR", "ar-ary-DZ");
609	test_tag_from_language!(tag_from_language_no_bok, "NOR", "no-bok");
610	test_tag_from_language!(tag_from_language_no_nyn, "NYN", "no-nyn");
611	test_tag_from_language!(tag_from_language_i_hak, "ZHS", "i-hak");
612	test_tag_from_language!(tag_from_language_zh_guoyu, "ZHS", "zh-guoyu");
613	test_tag_from_language!(tag_from_language_zh_min, "ZHS", "zh-min");
614	test_tag_from_language!(tag_from_language_zh_min_nan, "ZHS", "zh-min-nan");
615	test_tag_from_language!(tag_from_language_zh_xiang, "ZHS", "zh-xiang");
616
617	/ BCP 47 tags that look similar to unrelated language system tags /
618	test_tag_from_language!(tag_from_language_als, "SQI", "als");
619	test_tag_from_language!(tag_from_language_far, "dflt", "far");
620
621	/ A UN M.49 region code, not an extended language subtag /
622	test_tag_from_language!(tag_from_language_ar_001, "ARA", "ar-001");
623
624	/ An invalid tag /
625	test_tag_from_language!(tag_from_language_invalid, "TRK", "tr@foo=bar");
626
627	macro_rules! test_tags {
628	($name:ident, $script:expr, $lang:expr, $scripts:expr, $langs:expr) => {
629	#[test]
630	fn $name() {
631	let (scripts, languages) = tags_from_script_and_language(
632	$script, Language::from_str($lang).ok().as_ref(),
633	);
634
635	let exp_scripts: Vec<Tag> = $scripts.iter().map(\|v\| Tag::from_bytes_lossy(*v)).collect();
636	let exp_langs: Vec<Tag> = $langs.iter().map(\|v\| Tag::from_bytes_lossy(*v)).collect();
637
638	assert_eq!(exp_scripts, scripts.as_slice());
639	assert_eq!(exp_langs, languages.as_slice());
640	}
641	};
642	}
643
644	test_tags!(tag_full_en, None, "en", &[], &[b"ENG"]);
645	test_tags!(tag_full_en_x_hbscdflt, None, "en-x-hbscdflt", &[b"DFLT"], &[b"ENG"]);
646	test_tags!(tag_full_en_latin, Some(script::LATIN), "en", &[b"latn"], &[b"ENG"]);
647	test_tags!(tag_full_und_fonnapa, None, "und-fonnapa", &[], &[b"APPH"]);
648	test_tags!(tag_full_en_fonnapa, None, "en-fonnapa", &[], &[b"APPH"]);
649	test_tags!(tag_full_x_hbot1234_hbsc5678, None, "x-hbot1234-hbsc5678", &[b"5678"], &[b"1234"]);
650	test_tags!(tag_full_x_hbsc5678_hbot1234, None, "x-hbsc5678-hbot1234", &[b"5678"], &[b"1234"]);
651	test_tags!(tag_full_ml, Some(script::MALAYALAM), "ml", &[b"mlm3", b"mlm2", b"mlym"], &[b"MAL", b"MLR"]);
652	test_tags!(tag_full_xyz, None, "xyz", &[], &[b"XYZ"]);
653	test_tags!(tag_full_xy, None, "xy", &[], &[]);
654	}
655