1 | extern crate parse_zoneinfo; |
2 | #[cfg (feature = "filter-by-regex" )] |
3 | extern crate regex; |
4 | |
5 | use std::collections::BTreeSet; |
6 | use std::env; |
7 | use std::fs::File; |
8 | use std::io::{self, BufRead, BufReader, Write}; |
9 | use std::path::Path; |
10 | |
11 | use parse_zoneinfo::line::{Line, LineParser}; |
12 | use parse_zoneinfo::structure::{Child, Structure}; |
13 | use parse_zoneinfo::table::{Table, TableBuilder}; |
14 | use parse_zoneinfo::transitions::FixedTimespan; |
15 | use parse_zoneinfo::transitions::TableTransitions; |
16 | |
17 | /// The name of the environment variable which possibly holds the filter regex. |
18 | const FILTER_ENV_VAR_NAME: &str = "CHRONO_TZ_TIMEZONE_FILTER" ; |
19 | |
20 | // This function is needed until zoneinfo_parse handles comments correctly. |
21 | // Technically a '#' symbol could occur between double quotes and should be |
22 | // ignored in this case, however this never happens in the tz database as it |
23 | // stands. |
24 | fn strip_comments(mut line: String) -> String { |
25 | if let Some(pos: usize) = line.find('#' ) { |
26 | line.truncate(new_len:pos); |
27 | }; |
28 | line |
29 | } |
30 | |
31 | // Generate a list of the time zone periods beyond the first that apply |
32 | // to this zone, as a string representation of a static slice. |
33 | fn format_rest(rest: Vec<(i64, FixedTimespan)>) -> String { |
34 | let mut ret = "&[ \n" .to_string(); |
35 | for ( |
36 | start, |
37 | FixedTimespan { |
38 | utc_offset, |
39 | dst_offset, |
40 | name, |
41 | }, |
42 | ) in rest |
43 | { |
44 | let timespan_name = match name.as_ref() { |
45 | "%z" => None, |
46 | name => Some(name), |
47 | }; |
48 | ret.push_str(&format!( |
49 | " ( {start}, FixedTimespan {{ \ |
50 | utc_offset: {utc}, dst_offset: {dst}, name: {name:?} \ |
51 | }}), \n" , |
52 | start = start, |
53 | utc = utc_offset, |
54 | dst = dst_offset, |
55 | name = timespan_name, |
56 | )); |
57 | } |
58 | ret.push_str(" ]" ); |
59 | ret |
60 | } |
61 | |
62 | // Convert all '/' to '__', all '+' to 'Plus' and '-' to 'Minus', unless |
63 | // it's a hyphen, in which case remove it. This is so the names can be used |
64 | // as rust identifiers. |
65 | fn convert_bad_chars(name: &str) -> String { |
66 | let name: String = name.replace('/' , "__" ).replace(from:'+' , to:"Plus" ); |
67 | if let Some(pos: usize) = name.find('-' ) { |
68 | if name[pos + 1..] |
69 | .chars() |
70 | .next() |
71 | .map(char::is_numeric) |
72 | .unwrap_or(default:false) |
73 | { |
74 | name.replace(from:'-' , to:"Minus" ) |
75 | } else { |
76 | name.replace(from:'-' , to:"" ) |
77 | } |
78 | } else { |
79 | name |
80 | } |
81 | } |
82 | |
83 | // The timezone file contains impls of `Timespans` for all timezones in the |
84 | // database. The `Wrap` wrapper in the `timezone_impl` module then implements |
85 | // TimeZone for any contained struct that implements `Timespans`. |
86 | fn write_timezone_file(timezone_file: &mut File, table: &Table) -> io::Result<()> { |
87 | let zones = table |
88 | .zonesets |
89 | .keys() |
90 | .chain(table.links.keys()) |
91 | .collect::<BTreeSet<_>>(); |
92 | writeln!( |
93 | timezone_file, |
94 | "use core::fmt:: {{self, Debug, Display, Formatter }};" , |
95 | )?; |
96 | writeln!(timezone_file, "use core::str::FromStr; \n" ,)?; |
97 | writeln!( |
98 | timezone_file, |
99 | "use crate::timezone_impl:: {{TimeSpans, FixedTimespanSet, FixedTimespan }}; \n" , |
100 | )?; |
101 | writeln!( |
102 | timezone_file, |
103 | "/// TimeZones built at compile time from the tz database |
104 | /// |
105 | /// This implements [`chrono::TimeZone`] so that it may be used in and to |
106 | /// construct chrono's DateTime type. See the root module documentation |
107 | /// for details." |
108 | )?; |
109 | writeln!(timezone_file, "#[derive(Clone, Copy, PartialEq, Eq, Hash)]" )?; |
110 | writeln!( |
111 | timezone_file, |
112 | r#"#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]"# |
113 | )?; |
114 | writeln!(timezone_file, "pub enum Tz {{" )?; |
115 | for zone in &zones { |
116 | let zone_name = convert_bad_chars(zone); |
117 | writeln!( |
118 | timezone_file, |
119 | " /// {raw_zone_name}\n {zone}," , |
120 | zone = zone_name, |
121 | raw_zone_name = zone |
122 | )?; |
123 | } |
124 | writeln!(timezone_file, " }}" )?; |
125 | |
126 | let mut map = phf_codegen::Map::new(); |
127 | for zone in &zones { |
128 | map.entry(zone, &format!("Tz:: {}" , convert_bad_chars(zone))); |
129 | } |
130 | writeln!( |
131 | timezone_file, |
132 | "static TIMEZONES: ::phf::Map<&'static str, Tz> = \n{};" , |
133 | map.build() |
134 | )?; |
135 | |
136 | #[cfg (feature = "case-insensitive" )] |
137 | { |
138 | writeln!(timezone_file, "use uncased::UncasedStr; \n" ,)?; |
139 | let mut map = phf_codegen::Map::new(); |
140 | for zone in &zones { |
141 | map.entry( |
142 | uncased::UncasedStr::new(zone), |
143 | &format!("Tz::{}" , convert_bad_chars(zone)), |
144 | ); |
145 | } |
146 | writeln!( |
147 | timezone_file, |
148 | "static TIMEZONES_UNCASED: ::phf::Map<&'static uncased::UncasedStr, Tz> = \n{};" , |
149 | map.build() |
150 | )?; |
151 | } |
152 | |
153 | writeln!( |
154 | timezone_file, |
155 | r#"#[derive(Copy, Clone, Debug, PartialEq, Eq)] |
156 | pub struct ParseError(()); |
157 | |
158 | impl Display for ParseError {{ |
159 | fn fmt(&self, f: &mut Formatter) -> fmt::Result {{ |
160 | f.write_str("failed to parse timezone") |
161 | }} |
162 | }} |
163 | |
164 | #[cfg(feature = "std")] |
165 | impl std::error::Error for ParseError {{}} |
166 | |
167 | impl FromStr for Tz {{ |
168 | type Err = ParseError; |
169 | fn from_str(s: &str) -> Result<Self, Self::Err> {{ |
170 | return TIMEZONES.get(s).cloned().ok_or(ParseError(())); |
171 | }} |
172 | }} |
173 | "# |
174 | )?; |
175 | |
176 | writeln!( |
177 | timezone_file, |
178 | "impl Tz {{ |
179 | pub fn name(self) -> &'static str {{ |
180 | match self {{" |
181 | )?; |
182 | for zone in &zones { |
183 | let zone_name = convert_bad_chars(zone); |
184 | writeln!( |
185 | timezone_file, |
186 | " Tz:: {zone} => \"{raw_zone_name}\"," , |
187 | zone = zone_name, |
188 | raw_zone_name = zone |
189 | )?; |
190 | } |
191 | writeln!( |
192 | timezone_file, |
193 | " }} |
194 | }}" |
195 | )?; |
196 | |
197 | #[cfg (feature = "case-insensitive" )] |
198 | { |
199 | writeln!( |
200 | timezone_file, |
201 | r#" |
202 | #[cfg(feature = "case-insensitive")] |
203 | /// Parses a timezone string in a case-insensitive way |
204 | pub fn from_str_insensitive(s: &str) -> Result<Self, ParseError> {{ |
205 | return TIMEZONES_UNCASED.get(s.into()).cloned().ok_or(ParseError(())); |
206 | }}"# |
207 | )?; |
208 | } |
209 | |
210 | writeln!(timezone_file, " }}" )?; |
211 | |
212 | writeln!( |
213 | timezone_file, |
214 | "impl Debug for Tz {{ |
215 | fn fmt(&self, f: &mut Formatter) -> fmt::Result {{ |
216 | f.write_str(self.name().as_ref()) |
217 | }} |
218 | }}\n" |
219 | )?; |
220 | writeln!( |
221 | timezone_file, |
222 | "impl Display for Tz {{ |
223 | fn fmt(&self, f: &mut Formatter) -> fmt::Result {{ |
224 | f.write_str(self.name().as_ref()) |
225 | }} |
226 | }}\n" |
227 | )?; |
228 | writeln!( |
229 | timezone_file, |
230 | "impl TimeSpans for Tz {{ |
231 | fn timespans(&self) -> FixedTimespanSet {{ |
232 | match *self {{" |
233 | )?; |
234 | for zone in &zones { |
235 | let timespans = table.timespans(zone).unwrap(); |
236 | let zone_name = convert_bad_chars(zone); |
237 | let timespan_name = match timespans.first.name.as_ref() { |
238 | "%z" => None, |
239 | name => Some(name), |
240 | }; |
241 | writeln!( |
242 | timezone_file, |
243 | " Tz:: {zone} => {{ |
244 | const REST: &[(i64, FixedTimespan)] = {rest}; |
245 | FixedTimespanSet {{ |
246 | first: FixedTimespan {{ |
247 | utc_offset: {utc}, |
248 | dst_offset: {dst}, |
249 | name: {name:?}, |
250 | }}, |
251 | rest: REST |
252 | }} |
253 | }}, \n" , |
254 | zone = zone_name, |
255 | rest = format_rest(timespans.rest), |
256 | utc = timespans.first.utc_offset, |
257 | dst = timespans.first.dst_offset, |
258 | name = timespan_name, |
259 | )?; |
260 | } |
261 | write!( |
262 | timezone_file, |
263 | " }} |
264 | }} |
265 | }}\n" |
266 | )?; |
267 | write!( |
268 | timezone_file, |
269 | "/// An array of every known variant |
270 | /// |
271 | /// Useful for iterating over known timezones: |
272 | /// |
273 | /// ``` |
274 | /// use chrono_tz:: {{TZ_VARIANTS, Tz }}; |
275 | /// assert!(TZ_VARIANTS.iter().any(|v| *v == Tz::UTC)); |
276 | /// ``` |
277 | pub static TZ_VARIANTS: [Tz; {num}] = [ |
278 | " , |
279 | num = zones.len() |
280 | )?; |
281 | for zone in &zones { |
282 | writeln!( |
283 | timezone_file, |
284 | " Tz:: {zone}," , |
285 | zone = convert_bad_chars(zone) |
286 | )?; |
287 | } |
288 | write!(timezone_file, "];" )?; |
289 | Ok(()) |
290 | } |
291 | |
292 | // Create a file containing nice-looking re-exports such as Europe::London |
293 | // instead of having to use chrono_tz::timezones::Europe__London |
294 | fn write_directory_file(directory_file: &mut File, table: &Table, version: &str) -> io::Result<()> { |
295 | // expose the underlying IANA TZDB version |
296 | writeln!( |
297 | directory_file, |
298 | "pub const IANA_TZDB_VERSION : &str = \"{version}\"; \n" |
299 | )?; |
300 | // add the `loose' zone definitions first |
301 | writeln!(directory_file, "use crate::timezones::Tz; \n" )?; |
302 | let zones = table |
303 | .zonesets |
304 | .keys() |
305 | .chain(table.links.keys()) |
306 | .filter(|zone| !zone.contains('/' )) |
307 | .collect::<BTreeSet<_>>(); |
308 | for zone in zones { |
309 | let zone = convert_bad_chars(zone); |
310 | writeln!( |
311 | directory_file, |
312 | "pub const {name} : Tz = Tz:: {name};" , |
313 | name = zone |
314 | )?; |
315 | } |
316 | writeln!(directory_file)?; |
317 | |
318 | // now add the `structured' zone names in submodules |
319 | for entry in table.structure() { |
320 | if entry.name.contains('/' ) { |
321 | continue; |
322 | } |
323 | let module_name = convert_bad_chars(entry.name); |
324 | writeln!(directory_file, "pub mod {name} {{" , name = module_name)?; |
325 | writeln!(directory_file, " use crate::timezones::Tz; \n" ,)?; |
326 | for child in entry.children { |
327 | match child { |
328 | Child::Submodule(name) => { |
329 | let submodule_name = convert_bad_chars(name); |
330 | writeln!( |
331 | directory_file, |
332 | " pub mod {name} {{" , |
333 | name = submodule_name |
334 | )?; |
335 | writeln!(directory_file, " use crate::timezones::Tz; \n" ,)?; |
336 | let full_name = entry.name.to_string() + "/" + name; |
337 | for entry in table.structure() { |
338 | if entry.name == full_name { |
339 | for child in entry.children { |
340 | match child { |
341 | Child::Submodule(_) => { |
342 | panic!("Depth of > 3 nested submodules not implemented!" ) |
343 | } |
344 | Child::TimeZone(name) => { |
345 | let converted_name = convert_bad_chars(name); |
346 | writeln!(directory_file, |
347 | " pub const {name} : Tz = Tz:: {module_name}__ {submodule_name}__ {name};" , |
348 | module_name = module_name, |
349 | submodule_name = submodule_name, |
350 | name = converted_name, |
351 | )?; |
352 | } |
353 | } |
354 | } |
355 | } |
356 | } |
357 | writeln!(directory_file, " }}\n" )?; |
358 | } |
359 | Child::TimeZone(name) => { |
360 | let name = convert_bad_chars(name); |
361 | writeln!( |
362 | directory_file, |
363 | " pub const {name} : Tz = Tz:: {module_name}__ {name};" , |
364 | module_name = module_name, |
365 | name = name |
366 | )?; |
367 | } |
368 | } |
369 | } |
370 | writeln!(directory_file, " }}\n" )?; |
371 | } |
372 | Ok(()) |
373 | } |
374 | |
375 | /// Stub module because filter-by-regex feature is not enabled |
376 | #[cfg (not(feature = "filter-by-regex" ))] |
377 | mod filter { |
378 | /// stub function because filter-by-regex feature is not enabled |
379 | pub(crate) fn maybe_filter_timezone_table(_table: &mut super::Table) {} |
380 | } |
381 | |
382 | /// Module containing code supporting filter-by-regex feature |
383 | /// |
384 | /// The "GMT" and "UTC" time zones are always included. |
385 | #[cfg (feature = "filter-by-regex" )] |
386 | mod filter { |
387 | use std::collections::HashSet; |
388 | use std::env; |
389 | |
390 | use regex::Regex; |
391 | |
392 | use crate::{Table, FILTER_ENV_VAR_NAME}; |
393 | |
394 | /// Filter `table` by applying [`FILTER_ENV_VAR_NAME`]. |
395 | pub(crate) fn maybe_filter_timezone_table(table: &mut Table) { |
396 | if let Some(filter_regex) = get_filter_regex() { |
397 | filter_timezone_table(table, filter_regex); |
398 | } |
399 | } |
400 | |
401 | /// Checks the `CHRONO_TZ_TIMEZONE_FILTER` environment variable. |
402 | /// Converts it to a regex if set. Panics if the regex is not valid, as we want |
403 | /// to fail the build if that happens. |
404 | fn get_filter_regex() -> Option<Regex> { |
405 | match env::var(FILTER_ENV_VAR_NAME) { |
406 | Ok(val) => { |
407 | let val = val.trim(); |
408 | if val.is_empty() { |
409 | return None; |
410 | } |
411 | match Regex::new(val) { |
412 | Ok(regex) => Some(regex), |
413 | Err(err) => panic!( |
414 | "The value '{:?}' for environment variable {} is not a valid regex, err={}" , |
415 | val, FILTER_ENV_VAR_NAME, err |
416 | ), |
417 | } |
418 | } |
419 | Err(env::VarError::NotPresent) => None, |
420 | Err(env::VarError::NotUnicode(s)) => panic!( |
421 | "The value '{:?}' for environment variable {} is not valid Unicode" , |
422 | s, FILTER_ENV_VAR_NAME |
423 | ), |
424 | } |
425 | } |
426 | |
427 | /// Insert a new name in the list of names to keep. If the name has 3 |
428 | /// parts, then also insert the 2-part prefix. If we don't do this we will lose |
429 | /// half of Indiana in `directory.rs`. But we *don't* want to keep one-part names, |
430 | /// otherwise we will inevitably end up with 'America' and include too much as |
431 | /// a consequence. |
432 | fn insert_keep_entry(keep: &mut HashSet<String>, new_value: &str) { |
433 | let mut parts = new_value.split('/' ); |
434 | if let (Some(p1), Some(p2), Some(_), None) = |
435 | (parts.next(), parts.next(), parts.next(), parts.next()) |
436 | { |
437 | keep.insert(format!("{}/{}" , p1, p2)); |
438 | } |
439 | |
440 | keep.insert(new_value.to_string()); |
441 | } |
442 | |
443 | /// Filter `table` by applying `filter_regex`. |
444 | fn filter_timezone_table(table: &mut Table, filter_regex: Regex) { |
445 | // Compute the transitive closure of things to keep. |
446 | // Doing this, instead of just filtering `zonesets` and `links` by the |
447 | // regex, helps to keep the `structure()` intact. |
448 | let mut keep = HashSet::new(); |
449 | for (k, v) in &table.links { |
450 | if filter_regex.is_match(k) || k == "GMT" || k == "UTC" { |
451 | insert_keep_entry(&mut keep, k); |
452 | } |
453 | if filter_regex.is_match(v) || k == "GMT" || k == "UTC" { |
454 | insert_keep_entry(&mut keep, v); |
455 | } |
456 | } |
457 | |
458 | let mut n = 0; |
459 | loop { |
460 | let len = keep.len(); |
461 | |
462 | for (k, v) in &table.links { |
463 | if keep.contains(k) && !keep.contains(v) { |
464 | insert_keep_entry(&mut keep, v); |
465 | } |
466 | if keep.contains(v) && !keep.contains(k) { |
467 | insert_keep_entry(&mut keep, k); |
468 | } |
469 | } |
470 | |
471 | if keep.len() == len { |
472 | break; |
473 | } |
474 | |
475 | n += 1; |
476 | if n == 50 { |
477 | println!("cargo:warning=Recursion limit reached while building filter list" ); |
478 | break; |
479 | } |
480 | } |
481 | |
482 | // Actually do the filtering. |
483 | table |
484 | .links |
485 | .retain(|k, v| keep.contains(k) || keep.contains(v)); |
486 | |
487 | table |
488 | .zonesets |
489 | .retain(|k, _| filter_regex.is_match(k) || keep.iter().any(|s| k.starts_with(s))); |
490 | } |
491 | } |
492 | |
493 | fn detect_iana_db_version() -> String { |
494 | let root: String = env::var("CARGO_MANIFEST_DIR" ).expect(msg:"no Cargo build context" ); |
495 | let path: PathBuf = Path::new(&root).join(Path::new("tz/NEWS" )); |
496 | let file: File = File::open(path).expect(msg:"failed to open file" ); |
497 | |
498 | let mut lines: Lines> = BufReader::new(inner:file).lines(); |
499 | while let Some(Ok(line: String)) = lines.next() { |
500 | let line: &str = match line.strip_prefix("Release " ) { |
501 | Some(line: &str) => line, |
502 | _ => continue, |
503 | }; |
504 | |
505 | match line.split_once(delimiter:" - " ) { |
506 | Some((version: &str, _)) => return version.to_owned(), |
507 | _ => continue, |
508 | } |
509 | } |
510 | |
511 | unreachable!("no version found" ) |
512 | } |
513 | |
514 | pub fn main() { |
515 | println!("cargo:rerun-if-env-changed= {}" , FILTER_ENV_VAR_NAME); |
516 | |
517 | let parser = LineParser::new(); |
518 | let mut table = TableBuilder::new(); |
519 | |
520 | let tzfiles = [ |
521 | "tz/africa" , |
522 | "tz/antarctica" , |
523 | "tz/asia" , |
524 | "tz/australasia" , |
525 | "tz/backward" , |
526 | "tz/etcetera" , |
527 | "tz/europe" , |
528 | "tz/northamerica" , |
529 | "tz/southamerica" , |
530 | ]; |
531 | |
532 | let lines = tzfiles |
533 | .iter() |
534 | .map(Path::new) |
535 | .map(|p| { |
536 | Path::new(&env::var("CARGO_MANIFEST_DIR" ).unwrap_or_else(|_| String::new())).join(p) |
537 | }) |
538 | .map(|path| { |
539 | File::open(&path).unwrap_or_else(|e| panic!("cannot open {}: {}" , path.display(), e)) |
540 | }) |
541 | .map(BufReader::new) |
542 | .flat_map(BufRead::lines) |
543 | .map(Result::unwrap) |
544 | .map(strip_comments); |
545 | |
546 | for line in lines { |
547 | match parser.parse_str(&line).unwrap() { |
548 | Line::Zone(zone) => table.add_zone_line(zone).unwrap(), |
549 | Line::Continuation(cont) => table.add_continuation_line(cont).unwrap(), |
550 | Line::Rule(rule) => table.add_rule_line(rule).unwrap(), |
551 | Line::Link(link) => table.add_link_line(link).unwrap(), |
552 | Line::Space => {} |
553 | } |
554 | } |
555 | |
556 | let mut table = table.build(); |
557 | filter::maybe_filter_timezone_table(&mut table); |
558 | |
559 | let timezone_path = Path::new(&env::var("OUT_DIR" ).unwrap()).join("timezones.rs" ); |
560 | let mut timezone_file = File::create(timezone_path).unwrap(); |
561 | write_timezone_file(&mut timezone_file, &table).unwrap(); |
562 | |
563 | let directory_path = Path::new(&env::var("OUT_DIR" ).unwrap()).join("directory.rs" ); |
564 | let mut directory_file = File::create(directory_path).unwrap(); |
565 | let version = detect_iana_db_version(); |
566 | write_directory_file(&mut directory_file, &table, &version).unwrap(); |
567 | } |
568 | |