1use alloc::{string::String, vec};
2
3use super::{
4 util::{
5 array_str::Abbreviation,
6 error::{err, Error},
7 escape::{Byte, Bytes},
8 itime::{IOffset, ITimestamp},
9 },
10 PosixTimeZone, TzifDateTime, TzifFixed, TzifIndicator, TzifLocalTimeType,
11 TzifOwned, TzifTransitionInfo, TzifTransitionKind, TzifTransitions,
12 TzifTransitionsOwned,
13};
14
15// These are Jiff min and max timestamp (in seconds) values.
16//
17// The TZif parser will clamp timestamps to this range. It's
18// not ideal, but Jiff can't handle values outside of this range
19// and completely refusing to use TZif data with pathological
20// timestamps in typically irrelevant transitions is bad juju.
21//
22// Ref: https://github.com/BurntSushi/jiff/issues/163
23// Ref: https://github.com/BurntSushi/jiff/pull/164
24const TIMESTAMP_MIN: i64 = -377705023201;
25const TIMESTAMP_MAX: i64 = 253402207200;
26
27// Similarly for offsets, although in this case, if we find
28// an offset outside of this range, we do actually error. This
29// is because it could result in true incorrect datetimes for
30// actual transitions.
31//
32// But our supported offset range is `-25:59:59..=+25:59:59`.
33// There's no real time zone with offsets even close to those
34// boundaries.
35//
36// If there is pathological data that we should ignore, then
37// we should wait for a real bug report in order to determine
38// the right way to ignore/clamp it.
39const OFFSET_MIN: i32 = -93599;
40const OFFSET_MAX: i32 = 93599;
41
42// When fattening TZif data, this is the year to go up to.
43//
44// This year was chosen because it's what the "fat" TZif data generated
45// by `zic` uses.
46const FATTEN_UP_TO_YEAR: i16 = 2038;
47
48// This is a "sanity" limit on the maximum number of transitions we'll
49// add to TZif data when fattening them up.
50//
51// This is mostly just a defense-in-depth limit to avoid weird cases
52// where a pathological POSIX time zone could be defined to create
53// many transitions. It's not clear that this is actually possible,
54// but I felt a little uneasy doing unbounded work that isn't linearly
55// proportional to the input data. So, this limit is put into place for
56// reasons of "good sense."
57//
58// For "normal" cases, there should be at most two transitions per
59// year. So this limit permits 300/2=150 years of transition data.
60// (Although we won't go above 2036. See above.)
61const FATTEN_MAX_TRANSITIONS: usize = 300;
62
63impl TzifOwned {
64 /// Parses the given data as a TZif formatted file.
65 ///
66 /// The name given is attached to the `Tzif` value returned, but is
67 /// otherwise not significant.
68 ///
69 /// If the given data is not recognized to be valid TZif, then an error is
70 /// returned.
71 ///
72 /// In general, callers may assume that it is safe to pass arbitrary or
73 /// even untrusted data to this function and count on it not panicking
74 /// or using resources that aren't limited to a small constant factor of
75 /// the size of the data itself. That is, callers can reliably limit the
76 /// resources used by limiting the size of the data given to this parse
77 /// function.
78 pub(crate) fn parse(
79 name: Option<String>,
80 bytes: &[u8],
81 ) -> Result<TzifOwned, Error> {
82 let original = bytes;
83 let name = name.into();
84 let (header32, rest) = Header::parse(4, bytes)
85 .map_err(|e| err!("failed to parse 32-bit header: {e}"))?;
86 let (mut tzif, rest) = if header32.version == 0 {
87 TzifOwned::parse32(name, header32, rest)?
88 } else {
89 TzifOwned::parse64(name, header32, rest)?
90 };
91 tzif.fatten();
92 // This should come after fattening, because fattening may add new
93 // transitions and we want to add civil datetimes to those.
94 tzif.add_civil_datetimes_to_transitions();
95 tzif.verify_posix_time_zone_consistency()?;
96 // Compute the checksum using the entire contents of the TZif data.
97 let tzif_raw_len = (rest.as_ptr() as usize)
98 .checked_sub(original.as_ptr() as usize)
99 .unwrap();
100 let tzif_raw_bytes = &original[..tzif_raw_len];
101 tzif.fixed.checksum = super::crc32::sum(tzif_raw_bytes);
102
103 // Shrink all of our allocs so we don't keep excess capacity around.
104 tzif.fixed.designations.shrink_to_fit();
105 tzif.types.shrink_to_fit();
106 tzif.transitions.timestamps.shrink_to_fit();
107 tzif.transitions.civil_starts.shrink_to_fit();
108 tzif.transitions.civil_ends.shrink_to_fit();
109 tzif.transitions.infos.shrink_to_fit();
110
111 Ok(tzif)
112 }
113
114 fn parse32<'b>(
115 name: Option<String>,
116 header32: Header,
117 bytes: &'b [u8],
118 ) -> Result<(TzifOwned, &'b [u8]), Error> {
119 let mut tzif = TzifOwned {
120 fixed: TzifFixed {
121 name,
122 version: header32.version,
123 // filled in later
124 checksum: 0,
125 designations: String::new(),
126 posix_tz: None,
127 },
128 types: vec![],
129 transitions: TzifTransitions {
130 timestamps: vec![],
131 civil_starts: vec![],
132 civil_ends: vec![],
133 infos: vec![],
134 },
135 };
136 let rest = tzif.parse_transitions(&header32, bytes)?;
137 let rest = tzif.parse_transition_types(&header32, rest)?;
138 let rest = tzif.parse_local_time_types(&header32, rest)?;
139 let rest = tzif.parse_time_zone_designations(&header32, rest)?;
140 let rest = tzif.parse_leap_seconds(&header32, rest)?;
141 let rest = tzif.parse_indicators(&header32, rest)?;
142 Ok((tzif, rest))
143 }
144
145 fn parse64<'b>(
146 name: Option<String>,
147 header32: Header,
148 bytes: &'b [u8],
149 ) -> Result<(TzifOwned, &'b [u8]), Error> {
150 let (_, rest) = try_split_at(
151 "V1 TZif data block",
152 bytes,
153 header32.data_block_len()?,
154 )?;
155 let (header64, rest) = Header::parse(8, rest)
156 .map_err(|e| err!("failed to parse 64-bit header: {e}"))?;
157 let mut tzif = TzifOwned {
158 fixed: TzifFixed {
159 name,
160 version: header64.version,
161 // filled in later
162 checksum: 0,
163 designations: String::new(),
164 posix_tz: None,
165 },
166 types: vec![],
167 transitions: TzifTransitions {
168 timestamps: vec![],
169 civil_starts: vec![],
170 civil_ends: vec![],
171 infos: vec![],
172 },
173 };
174 let rest = tzif.parse_transitions(&header64, rest)?;
175 let rest = tzif.parse_transition_types(&header64, rest)?;
176 let rest = tzif.parse_local_time_types(&header64, rest)?;
177 let rest = tzif.parse_time_zone_designations(&header64, rest)?;
178 let rest = tzif.parse_leap_seconds(&header64, rest)?;
179 let rest = tzif.parse_indicators(&header64, rest)?;
180 let rest = tzif.parse_footer(&header64, rest)?;
181 // Note that we don't check that the TZif data is fully valid. It is
182 // possible for it to contain superfluous information. For example, a
183 // non-zero local time type that is never referenced by a transition.
184 Ok((tzif, rest))
185 }
186
187 fn parse_transitions<'b>(
188 &mut self,
189 header: &Header,
190 bytes: &'b [u8],
191 ) -> Result<&'b [u8], Error> {
192 let (bytes, rest) = try_split_at(
193 "transition times data block",
194 bytes,
195 header.transition_times_len()?,
196 )?;
197 let mut it = bytes.chunks_exact(header.time_size);
198 // RFC 8536 says: "If there are no transitions, local time for all
199 // timestamps is specified by the TZ string in the footer if present
200 // and nonempty; otherwise, it is specified by time type 0."
201 //
202 // RFC 8536 also says: "Local time for timestamps before the first
203 // transition is specified by the first time type (time type
204 // 0)."
205 //
206 // So if there are no transitions, pushing this dummy one will result
207 // in the desired behavior even when it's the only transition.
208 // Similarly, since this is the minimum timestamp value, it will
209 // trigger for any times before the first transition found in the TZif
210 // data.
211 self.transitions.add_with_type_index(TIMESTAMP_MIN, 0);
212 while let Some(chunk) = it.next() {
213 let mut timestamp = if header.is_32bit() {
214 i64::from(from_be_bytes_i32(chunk))
215 } else {
216 from_be_bytes_i64(chunk)
217 };
218 if !(TIMESTAMP_MIN <= timestamp && timestamp <= TIMESTAMP_MAX) {
219 // We really shouldn't error here just because the Unix
220 // timestamp is outside what Jiff supports. Since what Jiff
221 // supports is _somewhat_ arbitrary. But Jiff's supported
222 // range is good enough for all realistic purposes, so we
223 // just clamp an out-of-range Unix timestamp to the Jiff
224 // min or max value.
225 //
226 // This can't result in the sorting order being wrong, but
227 // it can result in a transition that is duplicative with
228 // the dummy transition we inserted above. This should be
229 // fine.
230 let clamped = timestamp.clamp(TIMESTAMP_MIN, TIMESTAMP_MAX);
231 // only-jiff-start
232 warn!(
233 "found Unix timestamp {timestamp} that is outside \
234 Jiff's supported range, clamping to {clamped}",
235 );
236 // only-jiff-end
237 timestamp = clamped;
238 }
239 self.transitions.add(timestamp);
240 }
241 assert!(it.remainder().is_empty());
242 Ok(rest)
243 }
244
245 fn parse_transition_types<'b>(
246 &mut self,
247 header: &Header,
248 bytes: &'b [u8],
249 ) -> Result<&'b [u8], Error> {
250 let (bytes, rest) = try_split_at(
251 "transition types data block",
252 bytes,
253 header.transition_types_len()?,
254 )?;
255 // We skip the first transition because it is our minimum dummy
256 // transition.
257 for (transition_index, &type_index) in (1..).zip(bytes) {
258 if usize::from(type_index) >= header.tzh_typecnt {
259 return Err(err!(
260 "found transition type index {type_index},
261 but there are only {} local time types",
262 header.tzh_typecnt,
263 ));
264 }
265 self.transitions.infos[transition_index].type_index = type_index;
266 }
267 Ok(rest)
268 }
269
270 fn parse_local_time_types<'b>(
271 &mut self,
272 header: &Header,
273 bytes: &'b [u8],
274 ) -> Result<&'b [u8], Error> {
275 let (bytes, rest) = try_split_at(
276 "local time types data block",
277 bytes,
278 header.local_time_types_len()?,
279 )?;
280 let mut it = bytes.chunks_exact(6);
281 while let Some(chunk) = it.next() {
282 let offset = from_be_bytes_i32(&chunk[..4]);
283 if !(OFFSET_MIN <= offset && offset <= OFFSET_MAX) {
284 return Err(err!(
285 "found local time type with out-of-bounds offset: {offset}"
286 ));
287 }
288 let is_dst = chunk[4] == 1;
289 let designation = (chunk[5], chunk[5]);
290 self.types.push(TzifLocalTimeType {
291 offset,
292 is_dst,
293 designation,
294 indicator: TzifIndicator::LocalWall,
295 });
296 }
297 assert!(it.remainder().is_empty());
298 Ok(rest)
299 }
300
301 fn parse_time_zone_designations<'b>(
302 &mut self,
303 header: &Header,
304 bytes: &'b [u8],
305 ) -> Result<&'b [u8], Error> {
306 let (bytes, rest) = try_split_at(
307 "time zone designations data block",
308 bytes,
309 header.time_zone_designations_len()?,
310 )?;
311 self.fixed.designations =
312 String::from_utf8(bytes.to_vec()).map_err(|_| {
313 err!(
314 "time zone designations are not valid UTF-8: {:?}",
315 Bytes(bytes),
316 )
317 })?;
318 // Holy hell, this is brutal. The boundary conditions are crazy.
319 for (i, typ) in self.types.iter_mut().enumerate() {
320 let start = usize::from(typ.designation.0);
321 let Some(suffix) = self.fixed.designations.get(start..) else {
322 return Err(err!(
323 "local time type {i} has designation index of {start}, \
324 but cannot be more than {}",
325 self.fixed.designations.len(),
326 ));
327 };
328 let Some(len) = suffix.find('\x00') else {
329 return Err(err!(
330 "local time type {i} has designation index of {start}, \
331 but could not find NUL terminator after it in \
332 designations: {:?}",
333 self.fixed.designations,
334 ));
335 };
336 let Some(end) = start.checked_add(len) else {
337 return Err(err!(
338 "local time type {i} has designation index of {start}, \
339 but its length {len} is too big",
340 ));
341 };
342 typ.designation.1 = u8::try_from(end).map_err(|_| {
343 err!(
344 "local time type {i} has designation range of \
345 {start}..{end}, but end is too big",
346 )
347 })?;
348 }
349 Ok(rest)
350 }
351
352 /// This parses the leap second corrections in the TZif data.
353 ///
354 /// Note that we only parse and verify them. We don't actually use them.
355 /// Jiff effectively ignores leap seconds.
356 fn parse_leap_seconds<'b>(
357 &mut self,
358 header: &Header,
359 bytes: &'b [u8],
360 ) -> Result<&'b [u8], Error> {
361 let (bytes, rest) = try_split_at(
362 "leap seconds data block",
363 bytes,
364 header.leap_second_len()?,
365 )?;
366 let chunk_len = header
367 .time_size
368 .checked_add(4)
369 .expect("time_size plus 4 fits in usize");
370 let mut it = bytes.chunks_exact(chunk_len);
371 while let Some(chunk) = it.next() {
372 let (occur_bytes, _corr_bytes) = chunk.split_at(header.time_size);
373 let occur = if header.is_32bit() {
374 i64::from(from_be_bytes_i32(occur_bytes))
375 } else {
376 from_be_bytes_i64(occur_bytes)
377 };
378 if !(TIMESTAMP_MIN <= occur && occur <= TIMESTAMP_MAX) {
379 // only-jiff-start
380 warn!(
381 "leap second occurrence {occur} is \
382 not in Jiff's supported range"
383 )
384 // only-jiff-end
385 }
386 }
387 assert!(it.remainder().is_empty());
388 Ok(rest)
389 }
390
391 fn parse_indicators<'b>(
392 &mut self,
393 header: &Header,
394 bytes: &'b [u8],
395 ) -> Result<&'b [u8], Error> {
396 let (std_wall_bytes, rest) = try_split_at(
397 "standard/wall indicators data block",
398 bytes,
399 header.standard_wall_len()?,
400 )?;
401 let (ut_local_bytes, rest) = try_split_at(
402 "UT/local indicators data block",
403 rest,
404 header.ut_local_len()?,
405 )?;
406 if std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() {
407 // This is a weird case, but technically possible only if all
408 // UT/local indicators are 0. If any are 1, then it's an error,
409 // because it would require the corresponding std/wall indicator
410 // to be 1 too. Which it can't be, because there aren't any. So
411 // we just check that they're all zeros.
412 for (i, &byte) in ut_local_bytes.iter().enumerate() {
413 if byte != 0 {
414 return Err(err!(
415 "found UT/local indicator '{byte}' for local time \
416 type {i}, but it must be 0 since all std/wall \
417 indicators are 0",
418 ));
419 }
420 }
421 } else if !std_wall_bytes.is_empty() && ut_local_bytes.is_empty() {
422 for (i, &byte) in std_wall_bytes.iter().enumerate() {
423 // Indexing is OK because Header guarantees that the number of
424 // indicators is 0 or equal to the number of types.
425 self.types[i].indicator = if byte == 0 {
426 TzifIndicator::LocalWall
427 } else if byte == 1 {
428 TzifIndicator::LocalStandard
429 } else {
430 return Err(err!(
431 "found invalid std/wall indicator '{byte}' for \
432 local time type {i}, it must be 0 or 1",
433 ));
434 };
435 }
436 } else if !std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() {
437 assert_eq!(std_wall_bytes.len(), ut_local_bytes.len());
438 let it = std_wall_bytes.iter().zip(ut_local_bytes);
439 for (i, (&stdwall, &utlocal)) in it.enumerate() {
440 // Indexing is OK because Header guarantees that the number of
441 // indicators is 0 or equal to the number of types.
442 self.types[i].indicator = match (stdwall, utlocal) {
443 (0, 0) => TzifIndicator::LocalWall,
444 (1, 0) => TzifIndicator::LocalStandard,
445 (1, 1) => TzifIndicator::UTStandard,
446 (0, 1) => {
447 return Err(err!(
448 "found illegal ut-wall combination for \
449 local time type {i}, only local-wall, \
450 local-standard and ut-standard are allowed",
451 ))
452 }
453 _ => {
454 return Err(err!(
455 "found illegal std/wall or ut/local value for \
456 local time type {i}, each must be 0 or 1",
457 ))
458 }
459 };
460 }
461 } else {
462 // If they're both empty then we don't need to do anything. Every
463 // local time type record already has the correct default for this
464 // case set.
465 debug_assert!(std_wall_bytes.is_empty());
466 debug_assert!(ut_local_bytes.is_empty());
467 }
468 Ok(rest)
469 }
470
471 fn parse_footer<'b>(
472 &mut self,
473 _header: &Header,
474 bytes: &'b [u8],
475 ) -> Result<&'b [u8], Error> {
476 if bytes.is_empty() {
477 return Err(err!(
478 "invalid V2+ TZif footer, expected \\n, \
479 but found unexpected end of data",
480 ));
481 }
482 if bytes[0] != b'\n' {
483 return Err(err!(
484 "invalid V2+ TZif footer, expected {:?}, but found {:?}",
485 Byte(b'\n'),
486 Byte(bytes[0]),
487 ));
488 }
489 let bytes = &bytes[1..];
490 // Only scan up to 1KB for a NUL terminator in case we somehow got
491 // passed a huge block of bytes.
492 let toscan = &bytes[..bytes.len().min(1024)];
493 let Some(nlat) = toscan.iter().position(|&b| b == b'\n') else {
494 return Err(err!(
495 "invalid V2 TZif footer, could not find {:?} \
496 terminator in: {:?}",
497 Byte(b'\n'),
498 Bytes(toscan),
499 ));
500 };
501 let (bytes, rest) = bytes.split_at(nlat);
502 if !bytes.is_empty() {
503 // We could in theory limit TZ strings to their strict POSIX
504 // definition here for TZif V2, but I don't think there is any
505 // harm in allowing the extensions in V2 formatted TZif data. Note
506 // that the GNU tooling allow it via the `TZ` environment variable
507 // even though POSIX doesn't specify it. This all seems okay to me
508 // because the V3+ extension is a strict superset of functionality.
509 let posix_tz =
510 PosixTimeZone::parse(bytes).map_err(|e| err!("{e}"))?;
511 self.fixed.posix_tz = Some(posix_tz);
512 }
513 Ok(&rest[1..])
514 }
515
516 /// Validates that the POSIX TZ string we parsed (if one exists) is
517 /// consistent with the last transition in this time zone. This is
518 /// required by RFC 8536.
519 ///
520 /// RFC 8536 says, "If the string is nonempty and one or more
521 /// transitions appear in the version 2+ data, the string MUST be
522 /// consistent with the last version 2+ transition."
523 fn verify_posix_time_zone_consistency(&self) -> Result<(), Error> {
524 // We need to be a little careful, since we always have at least one
525 // transition (accounting for the dummy `Timestamp::MIN` transition).
526 // So if we only have 1 transition and a POSIX TZ string, then we
527 // should not validate it since it's equivalent to the case of 0
528 // transitions and a POSIX TZ string.
529 if self.transitions.timestamps.len() <= 1 {
530 return Ok(());
531 }
532 let Some(ref tz) = self.fixed.posix_tz else {
533 return Ok(());
534 };
535 let last = self
536 .transitions
537 .timestamps
538 .last()
539 .expect("last transition timestamp");
540 let type_index = self
541 .transitions
542 .infos
543 .last()
544 .expect("last transition info")
545 .type_index;
546 let typ = &self.types[usize::from(type_index)];
547 let (ioff, abbrev, is_dst) =
548 tz.to_offset_info(ITimestamp::from_second(*last));
549 if ioff.second != typ.offset {
550 return Err(err!(
551 "expected last transition to have DST offset \
552 of {expected_offset}, but got {got_offset} \
553 according to POSIX TZ string {tz}",
554 expected_offset = typ.offset,
555 got_offset = ioff.second,
556 tz = tz,
557 ));
558 }
559 if is_dst != typ.is_dst {
560 return Err(err!(
561 "expected last transition to have is_dst={expected_dst}, \
562 but got is_dst={got_dst} according to POSIX TZ \
563 string {tz}",
564 expected_dst = typ.is_dst,
565 got_dst = is_dst,
566 tz = tz,
567 ));
568 }
569 if abbrev != self.designation(&typ) {
570 return Err(err!(
571 "expected last transition to have \
572 designation={expected_abbrev}, \
573 but got designation={got_abbrev} according to POSIX TZ \
574 string {tz}",
575 expected_abbrev = self.designation(&typ),
576 got_abbrev = abbrev,
577 tz = tz,
578 ));
579 }
580 Ok(())
581 }
582
583 /// Add civil datetimes to our transitions.
584 ///
585 /// This isn't strictly necessary, but it speeds up time zone lookups when
586 /// the input is a civil datetime. It lets us do comparisons directly on
587 /// the civil datetime as given, instead of needing to convert the civil
588 /// datetime given to a timestamp first. (Even if we didn't do this, I
589 /// believe we'd still need at least one additional timestamp that is
590 /// offset, because TZ lookups for a civil datetime are done in local time,
591 /// and the timestamps in TZif data are, of course, all in UTC.)
592 fn add_civil_datetimes_to_transitions(&mut self) {
593 fn to_datetime(timestamp: i64, offset: i32) -> TzifDateTime {
594 use crate::shared::util::itime::{IOffset, ITimestamp};
595 let its = ITimestamp { second: timestamp, nanosecond: 0 };
596 let ioff = IOffset { second: offset };
597 let dt = its.to_datetime(ioff);
598 TzifDateTime::new(
599 dt.date.year,
600 dt.date.month,
601 dt.date.day,
602 dt.time.hour,
603 dt.time.minute,
604 dt.time.second,
605 )
606 }
607
608 let trans = &mut self.transitions;
609 for i in 0..trans.timestamps.len() {
610 let timestamp = trans.timestamps[i];
611 let offset = {
612 let type_index = trans.infos[i].type_index;
613 self.types[usize::from(type_index)].offset
614 };
615 let prev_offset = {
616 let type_index = trans.infos[i.saturating_sub(1)].type_index;
617 self.types[usize::from(type_index)].offset
618 };
619
620 if prev_offset == offset {
621 // Equivalent offsets means there can never be any ambiguity.
622 let start = to_datetime(timestamp, prev_offset);
623 trans.infos[i].kind = TzifTransitionKind::Unambiguous;
624 trans.civil_starts[i] = start;
625 } else if prev_offset < offset {
626 // When the offset of the previous transition is less, that
627 // means there is some non-zero amount of time that is
628 // "skipped" when moving to the next transition. Thus, we have
629 // a gap. The start of the gap is the offset which gets us the
630 // earliest time, i.e., the smaller of the two offsets.
631 trans.infos[i].kind = TzifTransitionKind::Gap;
632 trans.civil_starts[i] = to_datetime(timestamp, prev_offset);
633 trans.civil_ends[i] = to_datetime(timestamp, offset);
634 } else {
635 // When the offset of the previous transition is greater, that
636 // means there is some non-zero amount of time that will be
637 // replayed on a wall clock in this time zone. Thus, we have
638 // a fold. The start of the gold is the offset which gets us
639 // the earliest time, i.e., the smaller of the two offsets.
640 assert!(prev_offset > offset);
641 trans.infos[i].kind = TzifTransitionKind::Fold;
642 trans.civil_starts[i] = to_datetime(timestamp, offset);
643 trans.civil_ends[i] = to_datetime(timestamp, prev_offset);
644 }
645 }
646 }
647
648 /// Fatten up this TZif data with additional transitions.
649 ///
650 /// These additional transitions often make time zone lookups faster, and
651 /// they smooth out the performance difference between using "slim" and
652 /// "fat" tzdbs.
653 fn fatten(&mut self) {
654 // Note that this is a crate feature for *both* `jiff` and
655 // `jiff-static`.
656 if !cfg!(feature = "tz-fat") {
657 return;
658 }
659 let Some(posix_tz) = self.fixed.posix_tz.clone() else { return };
660 let last =
661 self.transitions.timestamps.last().expect("last transition");
662 let mut i = 0;
663 let mut prev = ITimestamp::from_second(*last);
664 loop {
665 if i > FATTEN_MAX_TRANSITIONS {
666 // only-jiff-start
667 warn!(
668 "fattening TZif data for `{name:?}` somehow generated \
669 more than {max} transitions, so giving up to avoid \
670 doing too much work",
671 name = self.fixed.name,
672 max = FATTEN_MAX_TRANSITIONS,
673 );
674 // only-jiff-end
675 return;
676 }
677 i += 1;
678 prev = match self.add_transition(&posix_tz, prev) {
679 None => break,
680 Some(next) => next,
681 };
682 }
683 }
684
685 /// If there's a transition strictly after the given timestamp for the
686 /// given POSIX time zone, then add it to this TZif data.
687 fn add_transition(
688 &mut self,
689 posix_tz: &PosixTimeZone<Abbreviation>,
690 prev: ITimestamp,
691 ) -> Option<ITimestamp> {
692 let (its, ioff, abbrev, is_dst) = posix_tz.next_transition(prev)?;
693 if its.to_datetime(IOffset::UTC).date.year >= FATTEN_UP_TO_YEAR {
694 return None;
695 }
696 let type_index =
697 self.find_or_create_local_time_type(ioff, abbrev, is_dst)?;
698 self.transitions.add_with_type_index(its.second, type_index);
699 Some(its)
700 }
701
702 /// Look for a local time type matching the data given.
703 ///
704 /// If one could not be found, then one is created and its index is
705 /// returned.
706 ///
707 /// If one could not be found and one could not be created (e.g., the index
708 /// would overflow `u8`), then `None` is returned.
709 fn find_or_create_local_time_type(
710 &mut self,
711 offset: IOffset,
712 abbrev: &str,
713 is_dst: bool,
714 ) -> Option<u8> {
715 for (i, typ) in self.types.iter().enumerate() {
716 if offset.second == typ.offset
717 && abbrev == self.designation(typ)
718 && is_dst == typ.is_dst
719 {
720 return u8::try_from(i).ok();
721 }
722 }
723 let i = u8::try_from(self.types.len()).ok()?;
724 let designation = self.find_or_create_designation(abbrev)?;
725 self.types.push(TzifLocalTimeType {
726 offset: offset.second,
727 is_dst,
728 designation,
729 // Not really clear if this is correct, but Jiff
730 // ignores this anyway, so ¯\_(ツ)_/¯.
731 indicator: TzifIndicator::LocalWall,
732 });
733 Some(i)
734 }
735
736 /// Look for a designation (i.e., time zone abbreviation) matching the data
737 /// given, and return its range into `self.fixed.designations`.
738 ///
739 /// If one could not be found, then one is created and its range is
740 /// returned.
741 ///
742 /// If one could not be found and one could not be created (e.g., the range
743 /// would overflow `u8`), then `None` is returned.
744 fn find_or_create_designation(
745 &mut self,
746 needle: &str,
747 ) -> Option<(u8, u8)> {
748 let mut start = 0;
749 while let Some(offset) = self.fixed.designations[start..].find('\0') {
750 let end = start + offset;
751 let abbrev = &self.fixed.designations[start..end];
752 if needle == abbrev {
753 return Some((start.try_into().ok()?, end.try_into().ok()?));
754 }
755 start = end + 1;
756 }
757 self.fixed.designations.push_str(needle);
758 self.fixed.designations.push('\0');
759 let end = start + needle.len();
760 Some((start.try_into().ok()?, end.try_into().ok()?))
761 }
762
763 fn designation(&self, typ: &TzifLocalTimeType) -> &str {
764 let range =
765 usize::from(typ.designation.0)..usize::from(typ.designation.1);
766 // OK because we verify that the designation range on every local
767 // time type is a valid range into `self.designations`.
768 &self.fixed.designations[range]
769 }
770}
771
772impl TzifTransitionsOwned {
773 /// Add a single transition with the given timestamp.
774 ///
775 /// This also fills in the other columns (civil starts, civil ends and
776 /// infos) with sensible default values. It is expected that callers will
777 /// later fill them in.
778 fn add(&mut self, timestamp: i64) {
779 self.add_with_type_index(timestamp, type_index:0);
780 }
781
782 /// Like `TzifTransitionsOwned::add`, but let's the caller provide a type
783 /// index if it is known.
784 fn add_with_type_index(&mut self, timestamp: i64, type_index: u8) {
785 self.timestamps.push(timestamp);
786 self.civil_starts.push(TzifDateTime::ZERO);
787 self.civil_ends.push(TzifDateTime::ZERO);
788 self.infos.push(TzifTransitionInfo {
789 type_index,
790 kind: TzifTransitionKind::Unambiguous,
791 });
792 }
793}
794
795/// The header for a TZif formatted file.
796///
797/// V2+ TZif format have two headers: one for V1 data, and then a second
798/// following the V1 data block that describes another data block which uses
799/// 64-bit timestamps. The two headers both have the same format and both
800/// use 32-bit big-endian encoded integers.
801#[derive(Debug)]
802struct Header {
803 /// The size of the timestamps encoded in the data block.
804 ///
805 /// This is guaranteed to be either 4 (for V1) or 8 (for the 64-bit header
806 /// block in V2+).
807 time_size: usize,
808 /// The file format version.
809 ///
810 /// Note that this is either a NUL byte (for version 1), or an ASCII byte
811 /// corresponding to the version number. That is, `0x32` for `2`, `0x33`
812 /// for `3` or `0x34` for `4`. Note also that just because zoneinfo might
813 /// have been recently generated does not mean it uses the latest format
814 /// version. It seems like newer versions are only compiled by `zic` when
815 /// they are needed. For example, `America/New_York` on my system (as of
816 /// `2024-03-25`) has version `0x32`, but `Asia/Jerusalem` has version
817 /// `0x33`.
818 version: u8,
819 /// Number of UT/local indicators stored in the file.
820 ///
821 /// This is checked to be either equal to `0` or equal to `tzh_typecnt`.
822 tzh_ttisutcnt: usize,
823 /// The number of standard/wall indicators stored in the file.
824 ///
825 /// This is checked to be either equal to `0` or equal to `tzh_typecnt`.
826 tzh_ttisstdcnt: usize,
827 /// The number of leap seconds for which data entries are stored in the
828 /// file.
829 tzh_leapcnt: usize,
830 /// The number of transition times for which data entries are stored in
831 /// the file.
832 tzh_timecnt: usize,
833 /// The number of local time types for which data entries are stored in the
834 /// file.
835 ///
836 /// This is checked to be at least `1`.
837 tzh_typecnt: usize,
838 /// The number of bytes of time zone abbreviation strings stored in the
839 /// file.
840 ///
841 /// This is checked to be at least `1`.
842 tzh_charcnt: usize,
843}
844
845impl Header {
846 /// Parse the header record from the given bytes.
847 ///
848 /// Upon success, return the header and all bytes after the header.
849 ///
850 /// The given `time_size` must be 4 or 8, corresponding to either the
851 /// V1 header block or the V2+ header block, respectively.
852 fn parse(
853 time_size: usize,
854 bytes: &[u8],
855 ) -> Result<(Header, &[u8]), Error> {
856 assert!(time_size == 4 || time_size == 8, "time size must be 4 or 8");
857 if bytes.len() < 44 {
858 return Err(err!("invalid header: too short"));
859 }
860 let (magic, rest) = bytes.split_at(4);
861 if magic != b"TZif" {
862 return Err(err!("invalid header: magic bytes mismatch"));
863 }
864 let (version, rest) = rest.split_at(1);
865 let (_reserved, rest) = rest.split_at(15);
866
867 let (tzh_ttisutcnt_bytes, rest) = rest.split_at(4);
868 let (tzh_ttisstdcnt_bytes, rest) = rest.split_at(4);
869 let (tzh_leapcnt_bytes, rest) = rest.split_at(4);
870 let (tzh_timecnt_bytes, rest) = rest.split_at(4);
871 let (tzh_typecnt_bytes, rest) = rest.split_at(4);
872 let (tzh_charcnt_bytes, rest) = rest.split_at(4);
873
874 let tzh_ttisutcnt = from_be_bytes_u32_to_usize(tzh_ttisutcnt_bytes)
875 .map_err(|e| err!("failed to parse tzh_ttisutcnt: {e}"))?;
876 let tzh_ttisstdcnt = from_be_bytes_u32_to_usize(tzh_ttisstdcnt_bytes)
877 .map_err(|e| err!("failed to parse tzh_ttisstdcnt: {e}"))?;
878 let tzh_leapcnt = from_be_bytes_u32_to_usize(tzh_leapcnt_bytes)
879 .map_err(|e| err!("failed to parse tzh_leapcnt: {e}"))?;
880 let tzh_timecnt = from_be_bytes_u32_to_usize(tzh_timecnt_bytes)
881 .map_err(|e| err!("failed to parse tzh_timecnt: {e}"))?;
882 let tzh_typecnt = from_be_bytes_u32_to_usize(tzh_typecnt_bytes)
883 .map_err(|e| err!("failed to parse tzh_typecnt: {e}"))?;
884 let tzh_charcnt = from_be_bytes_u32_to_usize(tzh_charcnt_bytes)
885 .map_err(|e| err!("failed to parse tzh_charcnt: {e}"))?;
886
887 if tzh_ttisutcnt != 0 && tzh_ttisutcnt != tzh_typecnt {
888 return Err(err!(
889 "expected tzh_ttisutcnt={tzh_ttisutcnt} to be zero \
890 or equal to tzh_typecnt={tzh_typecnt}",
891 ));
892 }
893 if tzh_ttisstdcnt != 0 && tzh_ttisstdcnt != tzh_typecnt {
894 return Err(err!(
895 "expected tzh_ttisstdcnt={tzh_ttisstdcnt} to be zero \
896 or equal to tzh_typecnt={tzh_typecnt}",
897 ));
898 }
899 if tzh_typecnt < 1 {
900 return Err(err!(
901 "expected tzh_typecnt={tzh_typecnt} to be at least 1",
902 ));
903 }
904 if tzh_charcnt < 1 {
905 return Err(err!(
906 "expected tzh_charcnt={tzh_charcnt} to be at least 1",
907 ));
908 }
909
910 let header = Header {
911 time_size,
912 version: version[0],
913 tzh_ttisutcnt,
914 tzh_ttisstdcnt,
915 tzh_leapcnt,
916 tzh_timecnt,
917 tzh_typecnt,
918 tzh_charcnt,
919 };
920 Ok((header, rest))
921 }
922
923 /// Returns true if this header is for a 32-bit data block.
924 ///
925 /// When false, it is guaranteed that this header is for a 64-bit data
926 /// block.
927 fn is_32bit(&self) -> bool {
928 self.time_size == 4
929 }
930
931 /// Returns the size of the data block, in bytes, for this header.
932 ///
933 /// This returns an error if the arithmetic required to compute the
934 /// length would overflow.
935 ///
936 /// This is useful for, e.g., skipping over the 32-bit V1 data block in
937 /// V2+ TZif formatted files.
938 fn data_block_len(&self) -> Result<usize, Error> {
939 let a = self.transition_times_len()?;
940 let b = self.transition_types_len()?;
941 let c = self.local_time_types_len()?;
942 let d = self.time_zone_designations_len()?;
943 let e = self.leap_second_len()?;
944 let f = self.standard_wall_len()?;
945 let g = self.ut_local_len()?;
946 a.checked_add(b)
947 .and_then(|z| z.checked_add(c))
948 .and_then(|z| z.checked_add(d))
949 .and_then(|z| z.checked_add(e))
950 .and_then(|z| z.checked_add(f))
951 .and_then(|z| z.checked_add(g))
952 .ok_or_else(|| {
953 err!(
954 "length of data block in V{} tzfile is too big",
955 self.version
956 )
957 })
958 }
959
960 fn transition_times_len(&self) -> Result<usize, Error> {
961 self.tzh_timecnt.checked_mul(self.time_size).ok_or_else(|| {
962 err!("tzh_timecnt value {} is too big", self.tzh_timecnt)
963 })
964 }
965
966 fn transition_types_len(&self) -> Result<usize, Error> {
967 Ok(self.tzh_timecnt)
968 }
969
970 fn local_time_types_len(&self) -> Result<usize, Error> {
971 self.tzh_typecnt.checked_mul(6).ok_or_else(|| {
972 err!("tzh_typecnt value {} is too big", self.tzh_typecnt)
973 })
974 }
975
976 fn time_zone_designations_len(&self) -> Result<usize, Error> {
977 Ok(self.tzh_charcnt)
978 }
979
980 fn leap_second_len(&self) -> Result<usize, Error> {
981 let record_len = self
982 .time_size
983 .checked_add(4)
984 .expect("4-or-8 plus 4 always fits in usize");
985 self.tzh_leapcnt.checked_mul(record_len).ok_or_else(|| {
986 err!("tzh_leapcnt value {} is too big", self.tzh_leapcnt)
987 })
988 }
989
990 fn standard_wall_len(&self) -> Result<usize, Error> {
991 Ok(self.tzh_ttisstdcnt)
992 }
993
994 fn ut_local_len(&self) -> Result<usize, Error> {
995 Ok(self.tzh_ttisutcnt)
996 }
997}
998
999/// Splits the given slice of bytes at the index given.
1000///
1001/// If the index is out of range (greater than `bytes.len()`) then an error is
1002/// returned. The error message will include the `what` string given, which is
1003/// meant to describe the thing being split.
1004fn try_split_at<'b>(
1005 what: &'static str,
1006 bytes: &'b [u8],
1007 at: usize,
1008) -> Result<(&'b [u8], &'b [u8]), Error> {
1009 if at > bytes.len() {
1010 Err(err!(
1011 "expected at least {at} bytes for {what}, \
1012 but found only {} bytes",
1013 bytes.len(),
1014 ))
1015 } else {
1016 Ok(bytes.split_at(mid:at))
1017 }
1018}
1019
1020/// Interprets the given slice as an unsigned 32-bit big endian integer,
1021/// attempts to convert it to a `usize` and returns it.
1022///
1023/// # Panics
1024///
1025/// When `bytes.len() != 4`.
1026///
1027/// # Errors
1028///
1029/// This errors if the `u32` parsed from the given bytes cannot fit in a
1030/// `usize`.
1031fn from_be_bytes_u32_to_usize(bytes: &[u8]) -> Result<usize, Error> {
1032 let n: u32 = from_be_bytes_u32(bytes);
1033 usize::try_from(n).map_err(|_| {
1034 err!(
1035 "failed to parse integer {n} (too big, max allowed is {}",
1036 usize::MAX
1037 )
1038 })
1039}
1040
1041/// Interprets the given slice as an unsigned 32-bit big endian integer and
1042/// returns it.
1043///
1044/// # Panics
1045///
1046/// When `bytes.len() != 4`.
1047fn from_be_bytes_u32(bytes: &[u8]) -> u32 {
1048 u32::from_be_bytes(bytes.try_into().unwrap())
1049}
1050
1051/// Interprets the given slice as a signed 32-bit big endian integer and
1052/// returns it.
1053///
1054/// # Panics
1055///
1056/// When `bytes.len() != 4`.
1057fn from_be_bytes_i32(bytes: &[u8]) -> i32 {
1058 i32::from_be_bytes(bytes.try_into().unwrap())
1059}
1060
1061/// Interprets the given slice as a signed 64-bit big endian integer and
1062/// returns it.
1063///
1064/// # Panics
1065///
1066/// When `bytes.len() != 8`.
1067fn from_be_bytes_i64(bytes: &[u8]) -> i64 {
1068 i64::from_be_bytes(bytes.try_into().unwrap())
1069}
1070