lib.rs source code [crates/chrono_tz_build/src/lib.rs]

1	extern crate parse_zoneinfo;
2	#[cfg(feature = "filter-by-regex")]
3	extern crate regex;
4
5	use std::collections::BTreeSet;
6	use std::env;
7	use std::fs::File;
8	use std::io::{self, BufRead, BufReader, Write};
9	use std::path::Path;
10
11	use parse_zoneinfo::line::{Line, LineParser};
12	use parse_zoneinfo::structure::{Child, Structure};
13	use parse_zoneinfo::table::{Table, TableBuilder};
14	use parse_zoneinfo::transitions::FixedTimespan;
15	use parse_zoneinfo::transitions::TableTransitions;
16
17	/// The name of the environment variable which possibly holds the filter regex.
18	const FILTER_ENV_VAR_NAME: &str = "CHRONO_TZ_TIMEZONE_FILTER";
19
20	// This function is needed until zoneinfo_parse handles comments correctly.
21	// Technically a '#' symbol could occur between double quotes and should be
22	// ignored in this case, however this never happens in the tz database as it
23	// stands.
24	fn strip_comments(mut line: String) -> String {
25	if let Some(pos: usize) = line.find('#') {
26	line.truncate(new_len:pos);
27	};
28	line
29	}
30
31	// Generate a list of the time zone periods beyond the first that apply
32	// to this zone, as a string representation of a static slice.
33	fn format_rest(rest: Vec<(i64, FixedTimespan)>) -> String {
34	let mut ret = "&[`\n`".to_string();
35	for (
36	start,
37	FixedTimespan {
38	utc_offset,
39	dst_offset,
40	name,
41	},
42	) in rest
43	{
44	let timespan_name = match name.as_ref() {
45	"%z" => None,
46	name => Some(name),
47	};
48	ret.push_str(&format!(
49	" ({start}, FixedTimespan `{{` \
50	utc_offset: {utc}, dst_offset: {dst}, name: {name:?} \
51	`}}`),`\n`",
52	start = start,
53	utc = utc_offset,
54	dst = dst_offset,
55	name = timespan_name,
56	));
57	}
58	ret.push_str(" ]");
59	ret
60	}
61
62	// Convert all '/' to '__', all '+' to 'Plus' and '-' to 'Minus', unless
63	// it's a hyphen, in which case remove it. This is so the names can be used
64	// as rust identifiers.
65	fn convert_bad_chars(name: &str) -> String {
66	let name: String = name.replace('/', "__").replace(from:'+', to:"Plus");
67	if let Some(pos: usize) = name.find('-') {
68	if name[pos + `1`..]
69	.chars()
70	.next()
71	.map(char::is_numeric)
72	.unwrap_or(default:`false`)
73	{
74	name.replace(from:'-', to:"Minus")
75	} else {
76	name.replace(from:'-', to:"")
77	}
78	} else {
79	name
80	}
81	}
82
83	// The timezone file contains impls of `Timespans` for all timezones in the
84	// database. The `Wrap` wrapper in the `timezone_impl` module then implements
85	// TimeZone for any contained struct that implements `Timespans`.
86	fn write_timezone_file(timezone_file: &mut File, table: &Table) -> io::Result<()> {
87	let zones = table
88	.zonesets
89	.keys()
90	.chain(table.links.keys())
91	.collect::<BTreeSet<_>>();
92	writeln!(
93	timezone_file,
94	"use core::fmt::`{{`self, Debug, Display, Formatter`}}`;",
95	)?;
96	writeln!(timezone_file, "use core::str::FromStr;`\n`",)?;
97	writeln!(
98	timezone_file,
99	"use crate::timezone_impl::`{{`TimeSpans, FixedTimespanSet, FixedTimespan`}}`;`\n`",
100	)?;
101	writeln!(
102	timezone_file,
103	"/// TimeZones built at compile time from the tz database
104	///
105	/// This implements [`chrono::TimeZone`] so that it may be used in and to
106	/// construct chrono's DateTime type. See the root module documentation
107	/// for details."
108	)?;
109	writeln!(timezone_file, "#[derive(Clone, Copy, PartialEq, Eq, Hash)]")?;
110	writeln!(
111	timezone_file,
112	r#"#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]"#
113	)?;
114	writeln!(timezone_file, "pub enum Tz `{{`")?;
115	for zone in &zones {
116	let zone_name = convert_bad_chars(zone);
117	writeln!(
118	timezone_file,
119	" /// {raw_zone_name}`\n` {zone},",
120	zone = zone_name,
121	raw_zone_name = zone
122	)?;
123	}
124	writeln!(timezone_file, "`}}`")?;
125
126	let mut map = phf_codegen::Map::new();
127	for zone in &zones {
128	map.entry(zone, &format!("Tz::{}", convert_bad_chars(zone)));
129	}
130	writeln!(
131	timezone_file,
132	"static TIMEZONES: ::phf::Map<&'static str, Tz> = `\n`{};",
133	map.build()
134	)?;
135
136	#[cfg(feature = "case-insensitive")]
137	{
138	writeln!(timezone_file, "use uncased::UncasedStr;`\n`",)?;
139	let mut map = phf_codegen::Map::new();
140	for zone in &zones {
141	map.entry(
142	uncased::UncasedStr::new(zone),
143	&format!("Tz::{}", convert_bad_chars(zone)),
144	);
145	}
146	writeln!(
147	timezone_file,
148	"static TIMEZONES_UNCASED: ::phf::Map<&'static uncased::UncasedStr, Tz> = `\n`{};",
149	map.build()
150	)?;
151	}
152
153	writeln!(
154	timezone_file,
155	r#"#[derive(Copy, Clone, Debug, PartialEq, Eq)]
156	pub struct ParseError(());
157
158	impl Display for ParseError `{{`
159	fn fmt(&self, f: &mut Formatter) -> fmt::Result `{{`
160	f.write_str("failed to parse timezone")
161	`}}`
162	`}}`
163
164	#[cfg(feature = "std")]
165	impl std::error::Error for ParseError `{{}}`
166
167	impl FromStr for Tz `{{`
168	type Err = ParseError;
169	fn from_str(s: &str) -> Result<Self, Self::Err> `{{`
170	return TIMEZONES.get(s).cloned().ok_or(ParseError(()));
171	`}}`
172	`}}`
173	"#
174	)?;
175
176	writeln!(
177	timezone_file,
178	"impl Tz `{{`
179	pub fn name(self) -> &'static str `{{`
180	match self `{{`"
181	)?;
182	for zone in &zones {
183	let zone_name = convert_bad_chars(zone);
184	writeln!(
185	timezone_file,
186	" Tz::{zone} => `\"`{raw_zone_name}`\"`,",
187	zone = zone_name,
188	raw_zone_name = zone
189	)?;
190	}
191	writeln!(
192	timezone_file,
193	" `}}`
194	`}}`"
195	)?;
196
197	#[cfg(feature = "case-insensitive")]
198	{
199	writeln!(
200	timezone_file,
201	r#"
202	#[cfg(feature = "case-insensitive")]
203	/// Parses a timezone string in a case-insensitive way
204	pub fn from_str_insensitive(s: &str) -> Result<Self, ParseError> {{
205	return TIMEZONES_UNCASED.get(s.into()).cloned().ok_or(ParseError(()));
206	}}"#
207	)?;
208	}
209
210	writeln!(timezone_file, "`}}`")?;
211
212	writeln!(
213	timezone_file,
214	"impl Debug for Tz `{{`
215	fn fmt(&self, f: &mut Formatter) -> fmt::Result `{{`
216	f.write_str(self.name().as_ref())
217	`}}`
218	`}}\n`"
219	)?;
220	writeln!(
221	timezone_file,
222	"impl Display for Tz `{{`
223	fn fmt(&self, f: &mut Formatter) -> fmt::Result `{{`
224	f.write_str(self.name().as_ref())
225	`}}`
226	`}}\n`"
227	)?;
228	writeln!(
229	timezone_file,
230	"impl TimeSpans for Tz `{{`
231	fn timespans(&self) -> FixedTimespanSet `{{`
232	match *self `{{`"
233	)?;
234	for zone in &zones {
235	let timespans = table.timespans(zone).unwrap();
236	let zone_name = convert_bad_chars(zone);
237	let timespan_name = match timespans.first.name.as_ref() {
238	"%z" => None,
239	name => Some(name),
240	};
241	writeln!(
242	timezone_file,
243	" Tz::{zone} => `{{`
244	const REST: &[(i64, FixedTimespan)] = {rest};
245	FixedTimespanSet `{{`
246	first: FixedTimespan `{{`
247	utc_offset: {utc},
248	dst_offset: {dst},
249	name: {name:?},
250	`}}`,
251	rest: REST
252	`}}`
253	`}}`,`\n`",
254	zone = zone_name,
255	rest = format_rest(timespans.rest),
256	utc = timespans.first.utc_offset,
257	dst = timespans.first.dst_offset,
258	name = timespan_name,
259	)?;
260	}
261	write!(
262	timezone_file,
263	" `}}`
264	`}}`
265	`}}\n`"
266	)?;
267	write!(
268	timezone_file,
269	"/// An array of every known variant
270	///
271	/// Useful for iterating over known timezones:
272	///
273	/// ```
274	/// use chrono_tz::`{{`TZ_VARIANTS, Tz`}}`;
275	/// assert!(TZ_VARIANTS.iter().any(\|v\| *v == Tz::UTC));
276	/// ```
277	pub static TZ_VARIANTS: [Tz; {num}] = [
278	",
279	num = zones.len()
280	)?;
281	for zone in &zones {
282	writeln!(
283	timezone_file,
284	" Tz::{zone},",
285	zone = convert_bad_chars(zone)
286	)?;
287	}
288	write!(timezone_file, "];")?;
289	Ok(())
290	}
291
292	// Create a file containing nice-looking re-exports such as Europe::London
293	// instead of having to use chrono_tz::timezones::Europe__London
294	fn write_directory_file(directory_file: &mut File, table: &Table, version: &str) -> io::Result<()> {
295	// expose the underlying IANA TZDB version
296	writeln!(
297	directory_file,
298	"pub const IANA_TZDB_VERSION : &str = `\"`{version}`\"`;`\n`"
299	)?;
300	// add the `loose' zone definitions first
301	writeln!(directory_file, "use crate::timezones::Tz;`\n`")?;
302	let zones = table
303	.zonesets
304	.keys()
305	.chain(table.links.keys())
306	.filter(\|zone\| !zone.contains('/'))
307	.collect::<BTreeSet<_>>();
308	for zone in zones {
309	let zone = convert_bad_chars(zone);
310	writeln!(
311	directory_file,
312	"pub const {name} : Tz = Tz::{name};",
313	name = zone
314	)?;
315	}
316	writeln!(directory_file)?;
317
318	// now add the `structured' zone names in submodules
319	for entry in table.structure() {
320	if entry.name.contains('/') {
321	continue;
322	}
323	let module_name = convert_bad_chars(entry.name);
324	writeln!(directory_file, "pub mod {name} `{{`", name = module_name)?;
325	writeln!(directory_file, " use crate::timezones::Tz;`\n`",)?;
326	for child in entry.children {
327	match child {
328	Child::Submodule(name) => {
329	let submodule_name = convert_bad_chars(name);
330	writeln!(
331	directory_file,
332	" pub mod {name} `{{`",
333	name = submodule_name
334	)?;
335	writeln!(directory_file, " use crate::timezones::Tz;`\n`",)?;
336	let full_name = entry.name.to_string() + "/" + name;
337	for entry in table.structure() {
338	if entry.name == full_name {
339	for child in entry.children {
340	match child {
341	Child::Submodule(_) => {
342	panic!("Depth of > 3 nested submodules not implemented!")
343	}
344	Child::TimeZone(name) => {
345	let converted_name = convert_bad_chars(name);
346	writeln!(directory_file,
347	" pub const {name} : Tz = Tz::{module_name}__{submodule_name}__{name};",
348	module_name = module_name,
349	submodule_name = submodule_name,
350	name = converted_name,
351	)?;
352	}
353	}
354	}
355	}
356	}
357	writeln!(directory_file, " `}}\n`")?;
358	}
359	Child::TimeZone(name) => {
360	let name = convert_bad_chars(name);
361	writeln!(
362	directory_file,
363	" pub const {name} : Tz = Tz::{module_name}__{name};",
364	module_name = module_name,
365	name = name
366	)?;
367	}
368	}
369	}
370	writeln!(directory_file, "`}}\n`")?;
371	}
372	Ok(())
373	}
374
375	/// Stub module because filter-by-regex feature is not enabled
376	#[cfg(not(feature = "filter-by-regex"))]
377	mod filter {
378	/// stub function because filter-by-regex feature is not enabled
379	pub(crate) fn maybe_filter_timezone_table(_table: &mut super::Table) {}
380	}
381
382	/// Module containing code supporting filter-by-regex feature
383	///
384	/// The "GMT" and "UTC" time zones are always included.
385	#[cfg(feature = "filter-by-regex")]
386	mod filter {
387	use std::collections::HashSet;
388	use std::env;
389
390	use regex::Regex;
391
392	use crate::{Table, FILTER_ENV_VAR_NAME};
393
394	/// Filter `table` by applying [`FILTER_ENV_VAR_NAME`].
395	pub(crate) fn maybe_filter_timezone_table(table: &mut Table) {
396	if let Some(filter_regex) = get_filter_regex() {
397	filter_timezone_table(table, filter_regex);
398	}
399	}
400
401	/// Checks the `CHRONO_TZ_TIMEZONE_FILTER` environment variable.
402	/// Converts it to a regex if set. Panics if the regex is not valid, as we want
403	/// to fail the build if that happens.
404	fn get_filter_regex() -> Option<Regex> {
405	match env::var(FILTER_ENV_VAR_NAME) {
406	Ok(val) => {
407	let val = val.trim();
408	if val.is_empty() {
409	return None;
410	}
411	match Regex::new(val) {
412	Ok(regex) => Some(regex),
413	Err(err) => panic!(
414	"The value '{:?}' for environment variable {} is not a valid regex, err={}",
415	val, FILTER_ENV_VAR_NAME, err
416	),
417	}
418	}
419	Err(env::VarError::NotPresent) => None,
420	Err(env::VarError::NotUnicode(s)) => panic!(
421	"The value '{:?}' for environment variable {} is not valid Unicode",
422	s, FILTER_ENV_VAR_NAME
423	),
424	}
425	}
426
427	/// Insert a new name in the list of names to keep. If the name has 3
428	/// parts, then also insert the 2-part prefix. If we don't do this we will lose
429	/// half of Indiana in `directory.rs`. But we don't* want to keep one-part names,*
430	/// otherwise we will inevitably end up with 'America' and include too much as
431	/// a consequence.
432	fn insert_keep_entry(keep: &mut HashSet<String>, new_value: &str) {
433	let mut parts = new_value.split('/');
434	if let (Some(p1), Some(p2), Some(_), None) =
435	(parts.next(), parts.next(), parts.next(), parts.next())
436	{
437	keep.insert(format!("{}/{}", p1, p2));
438	}
439
440	keep.insert(new_value.to_string());
441	}
442
443	/// Filter `table` by applying `filter_regex`.
444	fn filter_timezone_table(table: &mut Table, filter_regex: Regex) {
445	// Compute the transitive closure of things to keep.
446	// Doing this, instead of just filtering `zonesets` and `links` by the
447	// regex, helps to keep the `structure()` intact.
448	let mut keep = HashSet::new();
449	for (k, v) in &table.links {
450	if filter_regex.is_match(k) \|\| k == "GMT" \|\| k == "UTC" {
451	insert_keep_entry(&mut keep, k);
452	}
453	if filter_regex.is_match(v) \|\| k == "GMT" \|\| k == "UTC" {
454	insert_keep_entry(&mut keep, v);
455	}
456	}
457
458	let mut n = `0`;
459	loop {
460	let len = keep.len();
461
462	for (k, v) in &table.links {
463	if keep.contains(k) && !keep.contains(v) {
464	insert_keep_entry(&mut keep, v);
465	}
466	if keep.contains(v) && !keep.contains(k) {
467	insert_keep_entry(&mut keep, k);
468	}
469	}
470
471	if keep.len() == len {
472	break;
473	}
474
475	n += `1`;
476	if n == `50` {
477	println!("cargo:warning=Recursion limit reached while building filter list");
478	break;
479	}
480	}
481
482	// Actually do the filtering.
483	table
484	.links
485	.retain(\|k, v\| keep.contains(k) \|\| keep.contains(v));
486
487	table
488	.zonesets
489	.retain(\|k, _\| filter_regex.is_match(k) \|\| keep.iter().any(\|s\| k.starts_with(s)));
490	}
491	}
492
493	fn detect_iana_db_version() -> String {
494	let root: String = env::var("CARGO_MANIFEST_DIR").expect(msg:"no Cargo build context");
495	let path: PathBuf = Path::new(&root).join(Path::new("tz/NEWS"));
496	let file: File = File::open(path).expect(msg:"failed to open file");
497
498	let mut lines: Lines> = BufReader::new(inner:file).lines();
499	while let Some(Ok(line: String)) = lines.next() {
500	let line: &str = match line.strip_prefix("Release ") {
501	Some(line: &str) => line,
502	_ => continue,
503	};
504
505	match line.split_once(delimiter:" - ") {
506	Some((version: &str, _)) => return version.to_owned(),
507	_ => continue,
508	}
509	}
510
511	unreachable!("no version found")
512	}
513
514	pub fn main() {
515	println!("cargo:rerun-if-env-changed={}", FILTER_ENV_VAR_NAME);
516
517	let parser = LineParser::new();
518	let mut table = TableBuilder::new();
519
520	let tzfiles = [
521	"tz/africa",
522	"tz/antarctica",
523	"tz/asia",
524	"tz/australasia",
525	"tz/backward",
526	"tz/etcetera",
527	"tz/europe",
528	"tz/northamerica",
529	"tz/southamerica",
530	];
531
532	let lines = tzfiles
533	.iter()
534	.map(Path::new)
535	.map(\|p\| {
536	Path::new(&env::var("CARGO_MANIFEST_DIR").unwrap_or_else(\|_\| String::new())).join(p)
537	})
538	.map(\|path\| {
539	File::open(&path).unwrap_or_else(\|e\| panic!("cannot open {}: {}", path.display(), e))
540	})
541	.map(BufReader::new)
542	.flat_map(BufRead::lines)
543	.map(Result::unwrap)
544	.map(strip_comments);
545
546	for line in lines {
547	match parser.parse_str(&line).unwrap() {
548	Line::Zone(zone) => table.add_zone_line(zone).unwrap(),
549	Line::Continuation(cont) => table.add_continuation_line(cont).unwrap(),
550	Line::Rule(rule) => table.add_rule_line(rule).unwrap(),
551	Line::Link(link) => table.add_link_line(link).unwrap(),
552	Line::Space => {}
553	}
554	}
555
556	let mut table = table.build();
557	filter::maybe_filter_timezone_table(&mut table);
558
559	let timezone_path = Path::new(&env::var("OUT_DIR").unwrap()).join("timezones.rs");
560	let mut timezone_file = File::create(timezone_path).unwrap();
561	write_timezone_file(&mut timezone_file, &table).unwrap();
562
563	let directory_path = Path::new(&env::var("OUT_DIR").unwrap()).join("directory.rs");
564	let mut directory_file = File::create(directory_path).unwrap();
565	let version = detect_iana_db_version();
566	write_directory_file(&mut directory_file, &table, &version).unwrap();
567	}
568