tzif.rs source code [crates/jiff/src/shared/tzif.rs]

1	use alloc::{string::String, vec};
2
3	use super::{
4	util::{
5	array_str::Abbreviation,
6	error::{err, Error},
7	escape::{Byte, Bytes},
8	itime::{IOffset, ITimestamp},
9	},
10	PosixTimeZone, TzifDateTime, TzifFixed, TzifIndicator, TzifLocalTimeType,
11	TzifOwned, TzifTransitionInfo, TzifTransitionKind, TzifTransitions,
12	TzifTransitionsOwned,
13	};
14
15	// These are Jiff min and max timestamp (in seconds) values.
16	//
17	// The TZif parser will clamp timestamps to this range. It's
18	// not ideal, but Jiff can't handle values outside of this range
19	// and completely refusing to use TZif data with pathological
20	// timestamps in typically irrelevant transitions is bad juju.
21	//
22	// Ref: https://github.com/BurntSushi/jiff/issues/163
23	// Ref: https://github.com/BurntSushi/jiff/pull/164
24	const TIMESTAMP_MIN: i64 = `-377705023201`;
25	const TIMESTAMP_MAX: i64 = `253402207200`;
26
27	// Similarly for offsets, although in this case, if we find
28	// an offset outside of this range, we do actually error. This
29	// is because it could result in true incorrect datetimes for
30	// actual transitions.
31	//
32	// But our supported offset range is `-25:59:59..=+25:59:59`.
33	// There's no real time zone with offsets even close to those
34	// boundaries.
35	//
36	// If there is pathological data that we should ignore, then
37	// we should wait for a real bug report in order to determine
38	// the right way to ignore/clamp it.
39	const OFFSET_MIN: i32 = `-93599`;
40	const OFFSET_MAX: i32 = `93599`;
41
42	// When fattening TZif data, this is the year to go up to.
43	//
44	// This year was chosen because it's what the "fat" TZif data generated
45	// by `zic` uses.
46	const FATTEN_UP_TO_YEAR: i16 = `2038`;
47
48	// This is a "sanity" limit on the maximum number of transitions we'll
49	// add to TZif data when fattening them up.
50	//
51	// This is mostly just a defense-in-depth limit to avoid weird cases
52	// where a pathological POSIX time zone could be defined to create
53	// many transitions. It's not clear that this is actually possible,
54	// but I felt a little uneasy doing unbounded work that isn't linearly
55	// proportional to the input data. So, this limit is put into place for
56	// reasons of "good sense."
57	//
58	// For "normal" cases, there should be at most two transitions per
59	// year. So this limit permits 300/2=150 years of transition data.
60	// (Although we won't go above 2036. See above.)
61	const FATTEN_MAX_TRANSITIONS: usize = `300`;
62
63	impl TzifOwned {
64	/// Parses the given data as a TZif formatted file.
65	///
66	/// The name given is attached to the `Tzif` value returned, but is
67	/// otherwise not significant.
68	///
69	/// If the given data is not recognized to be valid TZif, then an error is
70	/// returned.
71	///
72	/// In general, callers may assume that it is safe to pass arbitrary or
73	/// even untrusted data to this function and count on it not panicking
74	/// or using resources that aren't limited to a small constant factor of
75	/// the size of the data itself. That is, callers can reliably limit the
76	/// resources used by limiting the size of the data given to this parse
77	/// function.
78	pub(crate) fn parse(
79	name: Option<String>,
80	bytes: &[u8],
81	) -> Result<TzifOwned, Error> {
82	let original = bytes;
83	let name = name.into();
84	let (header32, rest) = Header::parse(`4`, bytes)
85	.map_err(\|e\| err!("failed to parse 32-bit header: {e}"))?;
86	let (mut tzif, rest) = if header32.version == `0` {
87	TzifOwned::parse32(name, header32, rest)?
88	} else {
89	TzifOwned::parse64(name, header32, rest)?
90	};
91	tzif.fatten();
92	// This should come after fattening, because fattening may add new
93	// transitions and we want to add civil datetimes to those.
94	tzif.add_civil_datetimes_to_transitions();
95	tzif.verify_posix_time_zone_consistency()?;
96	// Compute the checksum using the entire contents of the TZif data.
97	let tzif_raw_len = (rest.as_ptr() as usize)
98	.checked_sub(original.as_ptr() as usize)
99	.unwrap();
100	let tzif_raw_bytes = &original[..tzif_raw_len];
101	tzif.fixed.checksum = super::crc32::sum(tzif_raw_bytes);
102
103	// Shrink all of our allocs so we don't keep excess capacity around.
104	tzif.fixed.designations.shrink_to_fit();
105	tzif.types.shrink_to_fit();
106	tzif.transitions.timestamps.shrink_to_fit();
107	tzif.transitions.civil_starts.shrink_to_fit();
108	tzif.transitions.civil_ends.shrink_to_fit();
109	tzif.transitions.infos.shrink_to_fit();
110
111	Ok(tzif)
112	}
113
114	fn parse32<'b>(
115	name: Option<String>,
116	header32: Header,
117	bytes: &'b [u8],
118	) -> Result<(TzifOwned, &'b [u8]), Error> {
119	let mut tzif = TzifOwned {
120	fixed: TzifFixed {
121	name,
122	version: header32.version,
123	// filled in later
124	checksum: `0`,
125	designations: String::new(),
126	posix_tz: None,
127	},
128	types: vec![],
129	transitions: TzifTransitions {
130	timestamps: vec![],
131	civil_starts: vec![],
132	civil_ends: vec![],
133	infos: vec![],
134	},
135	};
136	let rest = tzif.parse_transitions(&header32, bytes)?;
137	let rest = tzif.parse_transition_types(&header32, rest)?;
138	let rest = tzif.parse_local_time_types(&header32, rest)?;
139	let rest = tzif.parse_time_zone_designations(&header32, rest)?;
140	let rest = tzif.parse_leap_seconds(&header32, rest)?;
141	let rest = tzif.parse_indicators(&header32, rest)?;
142	Ok((tzif, rest))
143	}
144
145	fn parse64<'b>(
146	name: Option<String>,
147	header32: Header,
148	bytes: &'b [u8],
149	) -> Result<(TzifOwned, &'b [u8]), Error> {
150	let (_, rest) = try_split_at(
151	"V1 TZif data block",
152	bytes,
153	header32.data_block_len()?,
154	)?;
155	let (header64, rest) = Header::parse(`8`, rest)
156	.map_err(\|e\| err!("failed to parse 64-bit header: {e}"))?;
157	let mut tzif = TzifOwned {
158	fixed: TzifFixed {
159	name,
160	version: header64.version,
161	// filled in later
162	checksum: `0`,
163	designations: String::new(),
164	posix_tz: None,
165	},
166	types: vec![],
167	transitions: TzifTransitions {
168	timestamps: vec![],
169	civil_starts: vec![],
170	civil_ends: vec![],
171	infos: vec![],
172	},
173	};
174	let rest = tzif.parse_transitions(&header64, rest)?;
175	let rest = tzif.parse_transition_types(&header64, rest)?;
176	let rest = tzif.parse_local_time_types(&header64, rest)?;
177	let rest = tzif.parse_time_zone_designations(&header64, rest)?;
178	let rest = tzif.parse_leap_seconds(&header64, rest)?;
179	let rest = tzif.parse_indicators(&header64, rest)?;
180	let rest = tzif.parse_footer(&header64, rest)?;
181	// Note that we don't check that the TZif data is fully valid. It is
182	// possible for it to contain superfluous information. For example, a
183	// non-zero local time type that is never referenced by a transition.
184	Ok((tzif, rest))
185	}
186
187	fn parse_transitions<'b>(
188	&mut self,
189	header: &Header,
190	bytes: &'b [u8],
191	) -> Result<&'b [u8], Error> {
192	let (bytes, rest) = try_split_at(
193	"transition times data block",
194	bytes,
195	header.transition_times_len()?,
196	)?;
197	let mut it = bytes.chunks_exact(header.time_size);
198	// RFC 8536 says: "If there are no transitions, local time for all
199	// timestamps is specified by the TZ string in the footer if present
200	// and nonempty; otherwise, it is specified by time type 0."
201	//
202	// RFC 8536 also says: "Local time for timestamps before the first
203	// transition is specified by the first time type (time type
204	// 0)."
205	//
206	// So if there are no transitions, pushing this dummy one will result
207	// in the desired behavior even when it's the only transition.
208	// Similarly, since this is the minimum timestamp value, it will
209	// trigger for any times before the first transition found in the TZif
210	// data.
211	self.transitions.add_with_type_index(TIMESTAMP_MIN, `0`);
212	while let Some(chunk) = it.next() {
213	let mut timestamp = if header.is_32bit() {
214	i64::from(from_be_bytes_i32(chunk))
215	} else {
216	from_be_bytes_i64(chunk)
217	};
218	if !(TIMESTAMP_MIN <= timestamp && timestamp <= TIMESTAMP_MAX) {
219	// We really shouldn't error here just because the Unix
220	// timestamp is outside what Jiff supports. Since what Jiff
221	// supports is _somewhat_ arbitrary. But Jiff's supported
222	// range is good enough for all realistic purposes, so we
223	// just clamp an out-of-range Unix timestamp to the Jiff
224	// min or max value.
225	//
226	// This can't result in the sorting order being wrong, but
227	// it can result in a transition that is duplicative with
228	// the dummy transition we inserted above. This should be
229	// fine.
230	let clamped = timestamp.clamp(TIMESTAMP_MIN, TIMESTAMP_MAX);
231	// only-jiff-start
232	warn!(
233	"found Unix timestamp {timestamp} that is outside \
234	Jiff's supported range, clamping to {clamped}",
235	);
236	// only-jiff-end
237	timestamp = clamped;
238	}
239	self.transitions.add(timestamp);
240	}
241	assert!(it.remainder().is_empty());
242	Ok(rest)
243	}
244
245	fn parse_transition_types<'b>(
246	&mut self,
247	header: &Header,
248	bytes: &'b [u8],
249	) -> Result<&'b [u8], Error> {
250	let (bytes, rest) = try_split_at(
251	"transition types data block",
252	bytes,
253	header.transition_types_len()?,
254	)?;
255	// We skip the first transition because it is our minimum dummy
256	// transition.
257	for (transition_index, &type_index) in (`1`..).zip(bytes) {
258	if usize::from(type_index) >= header.tzh_typecnt {
259	return Err(err!(
260	"found transition type index {type_index},
261	but there are only {} local time types",
262	header.tzh_typecnt,
263	));
264	}
265	self.transitions.infos[transition_index].type_index = type_index;
266	}
267	Ok(rest)
268	}
269
270	fn parse_local_time_types<'b>(
271	&mut self,
272	header: &Header,
273	bytes: &'b [u8],
274	) -> Result<&'b [u8], Error> {
275	let (bytes, rest) = try_split_at(
276	"local time types data block",
277	bytes,
278	header.local_time_types_len()?,
279	)?;
280	let mut it = bytes.chunks_exact(`6`);
281	while let Some(chunk) = it.next() {
282	let offset = from_be_bytes_i32(&chunk[..`4`]);
283	if !(OFFSET_MIN <= offset && offset <= OFFSET_MAX) {
284	return Err(err!(
285	"found local time type with out-of-bounds offset: {offset}"
286	));
287	}
288	let is_dst = chunk[`4`] == `1`;
289	let designation = (chunk[`5`], chunk[`5`]);
290	self.types.push(TzifLocalTimeType {
291	offset,
292	is_dst,
293	designation,
294	indicator: TzifIndicator::LocalWall,
295	});
296	}
297	assert!(it.remainder().is_empty());
298	Ok(rest)
299	}
300
301	fn parse_time_zone_designations<'b>(
302	&mut self,
303	header: &Header,
304	bytes: &'b [u8],
305	) -> Result<&'b [u8], Error> {
306	let (bytes, rest) = try_split_at(
307	"time zone designations data block",
308	bytes,
309	header.time_zone_designations_len()?,
310	)?;
311	self.fixed.designations =
312	String::from_utf8(bytes.to_vec()).map_err(\|_\| {
313	err!(
314	"time zone designations are not valid UTF-8: {:?}",
315	Bytes(bytes),
316	)
317	})?;
318	// Holy hell, this is brutal. The boundary conditions are crazy.
319	for (i, typ) in self.types.iter_mut().enumerate() {
320	let start = usize::from(typ.designation.0);
321	let Some(suffix) = self.fixed.designations.get(start..) else {
322	return Err(err!(
323	"local time type {i} has designation index of {start}, \
324	but cannot be more than {}",
325	self.fixed.designations.len(),
326	));
327	};
328	let Some(len) = suffix.find('`\x00`') else {
329	return Err(err!(
330	"local time type {i} has designation index of {start}, \
331	but could not find NUL terminator after it in \
332	designations: {:?}",
333	self.fixed.designations,
334	));
335	};
336	let Some(end) = start.checked_add(len) else {
337	return Err(err!(
338	"local time type {i} has designation index of {start}, \
339	but its length {len} is too big",
340	));
341	};
342	typ.designation.1 = u8::try_from(end).map_err(\|_\| {
343	err!(
344	"local time type {i} has designation range of \
345	{start}..{end}, but end is too big",
346	)
347	})?;
348	}
349	Ok(rest)
350	}
351
352	/// This parses the leap second corrections in the TZif data.
353	///
354	/// Note that we only parse and verify them. We don't actually use them.
355	/// Jiff effectively ignores leap seconds.
356	fn parse_leap_seconds<'b>(
357	&mut self,
358	header: &Header,
359	bytes: &'b [u8],
360	) -> Result<&'b [u8], Error> {
361	let (bytes, rest) = try_split_at(
362	"leap seconds data block",
363	bytes,
364	header.leap_second_len()?,
365	)?;
366	let chunk_len = header
367	.time_size
368	.checked_add(`4`)
369	.expect("time_size plus 4 fits in usize");
370	let mut it = bytes.chunks_exact(chunk_len);
371	while let Some(chunk) = it.next() {
372	let (occur_bytes, _corr_bytes) = chunk.split_at(header.time_size);
373	let occur = if header.is_32bit() {
374	i64::from(from_be_bytes_i32(occur_bytes))
375	} else {
376	from_be_bytes_i64(occur_bytes)
377	};
378	if !(TIMESTAMP_MIN <= occur && occur <= TIMESTAMP_MAX) {
379	// only-jiff-start
380	warn!(
381	"leap second occurrence {occur} is \
382	not in Jiff's supported range"
383	)
384	// only-jiff-end
385	}
386	}
387	assert!(it.remainder().is_empty());
388	Ok(rest)
389	}
390
391	fn parse_indicators<'b>(
392	&mut self,
393	header: &Header,
394	bytes: &'b [u8],
395	) -> Result<&'b [u8], Error> {
396	let (std_wall_bytes, rest) = try_split_at(
397	"standard/wall indicators data block",
398	bytes,
399	header.standard_wall_len()?,
400	)?;
401	let (ut_local_bytes, rest) = try_split_at(
402	"UT/local indicators data block",
403	rest,
404	header.ut_local_len()?,
405	)?;
406	if std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() {
407	// This is a weird case, but technically possible only if all
408	// UT/local indicators are 0. If any are 1, then it's an error,
409	// because it would require the corresponding std/wall indicator
410	// to be 1 too. Which it can't be, because there aren't any. So
411	// we just check that they're all zeros.
412	for (i, &byte) in ut_local_bytes.iter().enumerate() {
413	if byte != `0` {
414	return Err(err!(
415	"found UT/local indicator '{byte}' for local time \
416	type {i}, but it must be 0 since all std/wall \
417	indicators are 0",
418	));
419	}
420	}
421	} else if !std_wall_bytes.is_empty() && ut_local_bytes.is_empty() {
422	for (i, &byte) in std_wall_bytes.iter().enumerate() {
423	// Indexing is OK because Header guarantees that the number of
424	// indicators is 0 or equal to the number of types.
425	self.types[i].indicator = if byte == `0` {
426	TzifIndicator::LocalWall
427	} else if byte == `1` {
428	TzifIndicator::LocalStandard
429	} else {
430	return Err(err!(
431	"found invalid std/wall indicator '{byte}' for \
432	local time type {i}, it must be 0 or 1",
433	));
434	};
435	}
436	} else if !std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() {
437	assert_eq!(std_wall_bytes.len(), ut_local_bytes.len());
438	let it = std_wall_bytes.iter().zip(ut_local_bytes);
439	for (i, (&stdwall, &utlocal)) in it.enumerate() {
440	// Indexing is OK because Header guarantees that the number of
441	// indicators is 0 or equal to the number of types.
442	self.types[i].indicator = match (stdwall, utlocal) {
443	(`0`, `0`) => TzifIndicator::LocalWall,
444	(`1`, `0`) => TzifIndicator::LocalStandard,
445	(`1`, `1`) => TzifIndicator::UTStandard,
446	(`0`, `1`) => {
447	return Err(err!(
448	"found illegal ut-wall combination for \
449	local time type {i}, only local-wall, \
450	local-standard and ut-standard are allowed",
451	))
452	}
453	_ => {
454	return Err(err!(
455	"found illegal std/wall or ut/local value for \
456	local time type {i}, each must be 0 or 1",
457	))
458	}
459	};
460	}
461	} else {
462	// If they're both empty then we don't need to do anything. Every
463	// local time type record already has the correct default for this
464	// case set.
465	debug_assert!(std_wall_bytes.is_empty());
466	debug_assert!(ut_local_bytes.is_empty());
467	}
468	Ok(rest)
469	}
470
471	fn parse_footer<'b>(
472	&mut self,
473	_header: &Header,
474	bytes: &'b [u8],
475	) -> Result<&'b [u8], Error> {
476	if bytes.is_empty() {
477	return Err(err!(
478	"invalid V2+ TZif footer, expected `\\`n, \
479	but found unexpected end of data",
480	));
481	}
482	if bytes[`0`] != b'`\n`' {
483	return Err(err!(
484	"invalid V2+ TZif footer, expected {:?}, but found {:?}",
485	Byte(b'`\n`'),
486	Byte(bytes[`0`]),
487	));
488	}
489	let bytes = &bytes[`1`..];
490	// Only scan up to 1KB for a NUL terminator in case we somehow got
491	// passed a huge block of bytes.
492	let toscan = &bytes[..bytes.len().min(`1024`)];
493	let Some(nlat) = toscan.iter().position(\|&b\| b == b'`\n`') else {
494	return Err(err!(
495	"invalid V2 TZif footer, could not find {:?} \
496	terminator in: {:?}",
497	Byte(b'`\n`'),
498	Bytes(toscan),
499	));
500	};
501	let (bytes, rest) = bytes.split_at(nlat);
502	if !bytes.is_empty() {
503	// We could in theory limit TZ strings to their strict POSIX
504	// definition here for TZif V2, but I don't think there is any
505	// harm in allowing the extensions in V2 formatted TZif data. Note
506	// that the GNU tooling allow it via the `TZ` environment variable
507	// even though POSIX doesn't specify it. This all seems okay to me
508	// because the V3+ extension is a strict superset of functionality.
509	let posix_tz =
510	PosixTimeZone::parse(bytes).map_err(\|e\| err!("{e}"))?;
511	self.fixed.posix_tz = Some(posix_tz);
512	}
513	Ok(&rest[`1`..])
514	}
515
516	/// Validates that the POSIX TZ string we parsed (if one exists) is
517	/// consistent with the last transition in this time zone. This is
518	/// required by RFC 8536.
519	///
520	/// RFC 8536 says, "If the string is nonempty and one or more
521	/// transitions appear in the version 2+ data, the string MUST be
522	/// consistent with the last version 2+ transition."
523	fn verify_posix_time_zone_consistency(&self) -> Result<(), Error> {
524	// We need to be a little careful, since we always have at least one
525	// transition (accounting for the dummy `Timestamp::MIN` transition).
526	// So if we only have 1 transition and a POSIX TZ string, then we
527	// should not validate it since it's equivalent to the case of 0
528	// transitions and a POSIX TZ string.
529	if self.transitions.timestamps.len() <= `1` {
530	return Ok(());
531	}
532	let Some(ref tz) = self.fixed.posix_tz else {
533	return Ok(());
534	};
535	let last = self
536	.transitions
537	.timestamps
538	.last()
539	.expect("last transition timestamp");
540	let type_index = self
541	.transitions
542	.infos
543	.last()
544	.expect("last transition info")
545	.type_index;
546	let typ = &self.types[usize::from(type_index)];
547	let (ioff, abbrev, is_dst) =
548	tz.to_offset_info(ITimestamp::from_second(*last));
549	if ioff.second != typ.offset {
550	return Err(err!(
551	"expected last transition to have DST offset \
552	of {expected_offset}, but got {got_offset} \
553	according to POSIX TZ string {tz}",
554	expected_offset = typ.offset,
555	got_offset = ioff.second,
556	tz = tz,
557	));
558	}
559	if is_dst != typ.is_dst {
560	return Err(err!(
561	"expected last transition to have is_dst={expected_dst}, \
562	but got is_dst={got_dst} according to POSIX TZ \
563	string {tz}",
564	expected_dst = typ.is_dst,
565	got_dst = is_dst,
566	tz = tz,
567	));
568	}
569	if abbrev != self.designation(&typ) {
570	return Err(err!(
571	"expected last transition to have \
572	designation={expected_abbrev}, \
573	but got designation={got_abbrev} according to POSIX TZ \
574	string {tz}",
575	expected_abbrev = self.designation(&typ),
576	got_abbrev = abbrev,
577	tz = tz,
578	));
579	}
580	Ok(())
581	}
582
583	/// Add civil datetimes to our transitions.
584	///
585	/// This isn't strictly necessary, but it speeds up time zone lookups when
586	/// the input is a civil datetime. It lets us do comparisons directly on
587	/// the civil datetime as given, instead of needing to convert the civil
588	/// datetime given to a timestamp first. (Even if we didn't do this, I
589	/// believe we'd still need at least one additional timestamp that is
590	/// offset, because TZ lookups for a civil datetime are done in local time,
591	/// and the timestamps in TZif data are, of course, all in UTC.)
592	fn add_civil_datetimes_to_transitions(&mut self) {
593	fn to_datetime(timestamp: i64, offset: i32) -> TzifDateTime {
594	use crate::shared::util::itime::{IOffset, ITimestamp};
595	let its = ITimestamp { second: timestamp, nanosecond: `0` };
596	let ioff = IOffset { second: offset };
597	let dt = its.to_datetime(ioff);
598	TzifDateTime::new(
599	dt.date.year,
600	dt.date.month,
601	dt.date.day,
602	dt.time.hour,
603	dt.time.minute,
604	dt.time.second,
605	)
606	}
607
608	let trans = &mut self.transitions;
609	for i in `0`..trans.timestamps.len() {
610	let timestamp = trans.timestamps[i];
611	let offset = {
612	let type_index = trans.infos[i].type_index;
613	self.types[usize::from(type_index)].offset
614	};
615	let prev_offset = {
616	let type_index = trans.infos[i.saturating_sub(`1`)].type_index;
617	self.types[usize::from(type_index)].offset
618	};
619
620	if prev_offset == offset {
621	// Equivalent offsets means there can never be any ambiguity.
622	let start = to_datetime(timestamp, prev_offset);
623	trans.infos[i].kind = TzifTransitionKind::Unambiguous;
624	trans.civil_starts[i] = start;
625	} else if prev_offset < offset {
626	// When the offset of the previous transition is less, that
627	// means there is some non-zero amount of time that is
628	// "skipped" when moving to the next transition. Thus, we have
629	// a gap. The start of the gap is the offset which gets us the
630	// earliest time, i.e., the smaller of the two offsets.
631	trans.infos[i].kind = TzifTransitionKind::Gap;
632	trans.civil_starts[i] = to_datetime(timestamp, prev_offset);
633	trans.civil_ends[i] = to_datetime(timestamp, offset);
634	} else {
635	// When the offset of the previous transition is greater, that
636	// means there is some non-zero amount of time that will be
637	// replayed on a wall clock in this time zone. Thus, we have
638	// a fold. The start of the gold is the offset which gets us
639	// the earliest time, i.e., the smaller of the two offsets.
640	assert!(prev_offset > offset);
641	trans.infos[i].kind = TzifTransitionKind::Fold;
642	trans.civil_starts[i] = to_datetime(timestamp, offset);
643	trans.civil_ends[i] = to_datetime(timestamp, prev_offset);
644	}
645	}
646	}
647
648	/// Fatten up this TZif data with additional transitions.
649	///
650	/// These additional transitions often make time zone lookups faster, and
651	/// they smooth out the performance difference between using "slim" and
652	/// "fat" tzdbs.
653	fn fatten(&mut self) {
654	// Note that this is a crate feature for both* `jiff` and*
655	// `jiff-static`.
656	if !cfg!(feature = "tz-fat") {
657	return;
658	}
659	let Some(posix_tz) = self.fixed.posix_tz.clone() else { return };
660	let last =
661	self.transitions.timestamps.last().expect("last transition");
662	let mut i = `0`;
663	let mut prev = ITimestamp::from_second(*last);
664	loop {
665	if i > FATTEN_MAX_TRANSITIONS {
666	// only-jiff-start
667	warn!(
668	"fattening TZif data for `{name:?}` somehow generated \
669	more than {max} transitions, so giving up to avoid \
670	doing too much work",
671	name = self.fixed.name,
672	max = FATTEN_MAX_TRANSITIONS,
673	);
674	// only-jiff-end
675	return;
676	}
677	i += `1`;
678	prev = match self.add_transition(&posix_tz, prev) {
679	None => break,
680	Some(next) => next,
681	};
682	}
683	}
684
685	/// If there's a transition strictly after the given timestamp for the
686	/// given POSIX time zone, then add it to this TZif data.
687	fn add_transition(
688	&mut self,
689	posix_tz: &PosixTimeZone<Abbreviation>,
690	prev: ITimestamp,
691	) -> Option<ITimestamp> {
692	let (its, ioff, abbrev, is_dst) = posix_tz.next_transition(prev)?;
693	if its.to_datetime(IOffset::UTC).date.year >= FATTEN_UP_TO_YEAR {
694	return None;
695	}
696	let type_index =
697	self.find_or_create_local_time_type(ioff, abbrev, is_dst)?;
698	self.transitions.add_with_type_index(its.second, type_index);
699	Some(its)
700	}
701
702	/// Look for a local time type matching the data given.
703	///
704	/// If one could not be found, then one is created and its index is
705	/// returned.
706	///
707	/// If one could not be found and one could not be created (e.g., the index
708	/// would overflow `u8`), then `None` is returned.
709	fn find_or_create_local_time_type(
710	&mut self,
711	offset: IOffset,
712	abbrev: &str,
713	is_dst: bool,
714	) -> Option<u8> {
715	for (i, typ) in self.types.iter().enumerate() {
716	if offset.second == typ.offset
717	&& abbrev == self.designation(typ)
718	&& is_dst == typ.is_dst
719	{
720	return u8::try_from(i).ok();
721	}
722	}
723	let i = u8::try_from(self.types.len()).ok()?;
724	let designation = self.find_or_create_designation(abbrev)?;
725	self.types.push(TzifLocalTimeType {
726	offset: offset.second,
727	is_dst,
728	designation,
729	// Not really clear if this is correct, but Jiff
730	// ignores this anyway, so ¯\_(ツ)_/¯.
731	indicator: TzifIndicator::LocalWall,
732	});
733	Some(i)
734	}
735
736	/// Look for a designation (i.e., time zone abbreviation) matching the data
737	/// given, and return its range into `self.fixed.designations`.
738	///
739	/// If one could not be found, then one is created and its range is
740	/// returned.
741	///
742	/// If one could not be found and one could not be created (e.g., the range
743	/// would overflow `u8`), then `None` is returned.
744	fn find_or_create_designation(
745	&mut self,
746	needle: &str,
747	) -> Option<(u8, u8)> {
748	let mut start = `0`;
749	while let Some(offset) = self.fixed.designations[start..].find('`\0`') {
750	let end = start + offset;
751	let abbrev = &self.fixed.designations[start..end];
752	if needle == abbrev {
753	return Some((start.try_into().ok()?, end.try_into().ok()?));
754	}
755	start = end + `1`;
756	}
757	self.fixed.designations.push_str(needle);
758	self.fixed.designations.push('`\0`');
759	let end = start + needle.len();
760	Some((start.try_into().ok()?, end.try_into().ok()?))
761	}
762
763	fn designation(&self, typ: &TzifLocalTimeType) -> &str {
764	let range =
765	usize::from(typ.designation.0)..usize::from(typ.designation.1);
766	// OK because we verify that the designation range on every local
767	// time type is a valid range into `self.designations`.
768	&self.fixed.designations[range]
769	}
770	}
771
772	impl TzifTransitionsOwned {
773	/// Add a single transition with the given timestamp.
774	///
775	/// This also fills in the other columns (civil starts, civil ends and
776	/// infos) with sensible default values. It is expected that callers will
777	/// later fill them in.
778	fn add(&mut self, timestamp: i64) {
779	self.add_with_type_index(timestamp, type_index:`0`);
780	}
781
782	/// Like `TzifTransitionsOwned::add`, but let's the caller provide a type
783	/// index if it is known.
784	fn add_with_type_index(&mut self, timestamp: i64, type_index: u8) {
785	self.timestamps.push(timestamp);
786	self.civil_starts.push(TzifDateTime::ZERO);
787	self.civil_ends.push(TzifDateTime::ZERO);
788	self.infos.push(TzifTransitionInfo {
789	type_index,
790	kind: TzifTransitionKind::Unambiguous,
791	});
792	}
793	}
794
795	/// The header for a TZif formatted file.
796	///
797	/// V2+ TZif format have two headers: one for V1 data, and then a second
798	/// following the V1 data block that describes another data block which uses
799	/// 64-bit timestamps. The two headers both have the same format and both
800	/// use 32-bit big-endian encoded integers.
801	#[derive(Debug)]
802	struct Header {
803	/// The size of the timestamps encoded in the data block.
804	///
805	/// This is guaranteed to be either 4 (for V1) or 8 (for the 64-bit header
806	/// block in V2+).
807	time_size: usize,
808	/// The file format version.
809	///
810	/// Note that this is either a NUL byte (for version 1), or an ASCII byte
811	/// corresponding to the version number. That is, `0x32` for `2`, `0x33`
812	/// for `3` or `0x34` for `4`. Note also that just because zoneinfo might
813	/// have been recently generated does not mean it uses the latest format
814	/// version. It seems like newer versions are only compiled by `zic` when
815	/// they are needed. For example, `America/New_York` on my system (as of
816	/// `2024-03-25`) has version `0x32`, but `Asia/Jerusalem` has version
817	/// `0x33`.
818	version: u8,
819	/// Number of UT/local indicators stored in the file.
820	///
821	/// This is checked to be either equal to `0` or equal to `tzh_typecnt`.
822	tzh_ttisutcnt: usize,
823	/// The number of standard/wall indicators stored in the file.
824	///
825	/// This is checked to be either equal to `0` or equal to `tzh_typecnt`.
826	tzh_ttisstdcnt: usize,
827	/// The number of leap seconds for which data entries are stored in the
828	/// file.
829	tzh_leapcnt: usize,
830	/// The number of transition times for which data entries are stored in
831	/// the file.
832	tzh_timecnt: usize,
833	/// The number of local time types for which data entries are stored in the
834	/// file.
835	///
836	/// This is checked to be at least `1`.
837	tzh_typecnt: usize,
838	/// The number of bytes of time zone abbreviation strings stored in the
839	/// file.
840	///
841	/// This is checked to be at least `1`.
842	tzh_charcnt: usize,
843	}
844
845	impl Header {
846	/// Parse the header record from the given bytes.
847	///
848	/// Upon success, return the header and all bytes after the header.
849	///
850	/// The given `time_size` must be 4 or 8, corresponding to either the
851	/// V1 header block or the V2+ header block, respectively.
852	fn parse(
853	time_size: usize,
854	bytes: &[u8],
855	) -> Result<(Header, &[u8]), Error> {
856	assert!(time_size == `4` \|\| time_size == `8`, "time size must be 4 or 8");
857	if bytes.len() < `44` {
858	return Err(err!("invalid header: too short"));
859	}
860	let (magic, rest) = bytes.split_at(`4`);
861	if magic != b"TZif" {
862	return Err(err!("invalid header: magic bytes mismatch"));
863	}
864	let (version, rest) = rest.split_at(`1`);
865	let (_reserved, rest) = rest.split_at(`15`);
866
867	let (tzh_ttisutcnt_bytes, rest) = rest.split_at(`4`);
868	let (tzh_ttisstdcnt_bytes, rest) = rest.split_at(`4`);
869	let (tzh_leapcnt_bytes, rest) = rest.split_at(`4`);
870	let (tzh_timecnt_bytes, rest) = rest.split_at(`4`);
871	let (tzh_typecnt_bytes, rest) = rest.split_at(`4`);
872	let (tzh_charcnt_bytes, rest) = rest.split_at(`4`);
873
874	let tzh_ttisutcnt = from_be_bytes_u32_to_usize(tzh_ttisutcnt_bytes)
875	.map_err(\|e\| err!("failed to parse tzh_ttisutcnt: {e}"))?;
876	let tzh_ttisstdcnt = from_be_bytes_u32_to_usize(tzh_ttisstdcnt_bytes)
877	.map_err(\|e\| err!("failed to parse tzh_ttisstdcnt: {e}"))?;
878	let tzh_leapcnt = from_be_bytes_u32_to_usize(tzh_leapcnt_bytes)
879	.map_err(\|e\| err!("failed to parse tzh_leapcnt: {e}"))?;
880	let tzh_timecnt = from_be_bytes_u32_to_usize(tzh_timecnt_bytes)
881	.map_err(\|e\| err!("failed to parse tzh_timecnt: {e}"))?;
882	let tzh_typecnt = from_be_bytes_u32_to_usize(tzh_typecnt_bytes)
883	.map_err(\|e\| err!("failed to parse tzh_typecnt: {e}"))?;
884	let tzh_charcnt = from_be_bytes_u32_to_usize(tzh_charcnt_bytes)
885	.map_err(\|e\| err!("failed to parse tzh_charcnt: {e}"))?;
886
887	if tzh_ttisutcnt != `0` && tzh_ttisutcnt != tzh_typecnt {
888	return Err(err!(
889	"expected tzh_ttisutcnt={tzh_ttisutcnt} to be zero \
890	or equal to tzh_typecnt={tzh_typecnt}",
891	));
892	}
893	if tzh_ttisstdcnt != `0` && tzh_ttisstdcnt != tzh_typecnt {
894	return Err(err!(
895	"expected tzh_ttisstdcnt={tzh_ttisstdcnt} to be zero \
896	or equal to tzh_typecnt={tzh_typecnt}",
897	));
898	}
899	if tzh_typecnt < `1` {
900	return Err(err!(
901	"expected tzh_typecnt={tzh_typecnt} to be at least 1",
902	));
903	}
904	if tzh_charcnt < `1` {
905	return Err(err!(
906	"expected tzh_charcnt={tzh_charcnt} to be at least 1",
907	));
908	}
909
910	let header = Header {
911	time_size,
912	version: version[`0`],
913	tzh_ttisutcnt,
914	tzh_ttisstdcnt,
915	tzh_leapcnt,
916	tzh_timecnt,
917	tzh_typecnt,
918	tzh_charcnt,
919	};
920	Ok((header, rest))
921	}
922
923	/// Returns true if this header is for a 32-bit data block.
924	///
925	/// When false, it is guaranteed that this header is for a 64-bit data
926	/// block.
927	fn is_32bit(&self) -> bool {
928	self.time_size == `4`
929	}
930
931	/// Returns the size of the data block, in bytes, for this header.
932	///
933	/// This returns an error if the arithmetic required to compute the
934	/// length would overflow.
935	///
936	/// This is useful for, e.g., skipping over the 32-bit V1 data block in
937	/// V2+ TZif formatted files.
938	fn data_block_len(&self) -> Result<usize, Error> {
939	let a = self.transition_times_len()?;
940	let b = self.transition_types_len()?;
941	let c = self.local_time_types_len()?;
942	let d = self.time_zone_designations_len()?;
943	let e = self.leap_second_len()?;
944	let f = self.standard_wall_len()?;
945	let g = self.ut_local_len()?;
946	a.checked_add(b)
947	.and_then(\|z\| z.checked_add(c))
948	.and_then(\|z\| z.checked_add(d))
949	.and_then(\|z\| z.checked_add(e))
950	.and_then(\|z\| z.checked_add(f))
951	.and_then(\|z\| z.checked_add(g))
952	.ok_or_else(\|\| {
953	err!(
954	"length of data block in V{} tzfile is too big",
955	self.version
956	)
957	})
958	}
959
960	fn transition_times_len(&self) -> Result<usize, Error> {
961	self.tzh_timecnt.checked_mul(self.time_size).ok_or_else(\|\| {
962	err!("tzh_timecnt value {} is too big", self.tzh_timecnt)
963	})
964	}
965
966	fn transition_types_len(&self) -> Result<usize, Error> {
967	Ok(self.tzh_timecnt)
968	}
969
970	fn local_time_types_len(&self) -> Result<usize, Error> {
971	self.tzh_typecnt.checked_mul(`6`).ok_or_else(\|\| {
972	err!("tzh_typecnt value {} is too big", self.tzh_typecnt)
973	})
974	}
975
976	fn time_zone_designations_len(&self) -> Result<usize, Error> {
977	Ok(self.tzh_charcnt)
978	}
979
980	fn leap_second_len(&self) -> Result<usize, Error> {
981	let record_len = self
982	.time_size
983	.checked_add(`4`)
984	.expect("4-or-8 plus 4 always fits in usize");
985	self.tzh_leapcnt.checked_mul(record_len).ok_or_else(\|\| {
986	err!("tzh_leapcnt value {} is too big", self.tzh_leapcnt)
987	})
988	}
989
990	fn standard_wall_len(&self) -> Result<usize, Error> {
991	Ok(self.tzh_ttisstdcnt)
992	}
993
994	fn ut_local_len(&self) -> Result<usize, Error> {
995	Ok(self.tzh_ttisutcnt)
996	}
997	}
998
999	/// Splits the given slice of bytes at the index given.
1000	///
1001	/// If the index is out of range (greater than `bytes.len()`) then an error is
1002	/// returned. The error message will include the `what` string given, which is
1003	/// meant to describe the thing being split.
1004	fn try_split_at<'b>(
1005	what: &'static str,
1006	bytes: &'b [u8],
1007	at: usize,
1008	) -> Result<(&'b [u8], &'b [u8]), Error> {
1009	if at > bytes.len() {
1010	Err(err!(
1011	"expected at least {at} bytes for {what}, \
1012	but found only {} bytes",
1013	bytes.len(),
1014	))
1015	} else {
1016	Ok(bytes.split_at(mid:at))
1017	}
1018	}
1019
1020	/// Interprets the given slice as an unsigned 32-bit big endian integer,
1021	/// attempts to convert it to a `usize` and returns it.
1022	///
1023	/// # Panics
1024	///
1025	/// When `bytes.len() != 4`.
1026	///
1027	/// # Errors
1028	///
1029	/// This errors if the `u32` parsed from the given bytes cannot fit in a
1030	/// `usize`.
1031	fn from_be_bytes_u32_to_usize(bytes: &[u8]) -> Result<usize, Error> {
1032	let n: u32 = from_be_bytes_u32(bytes);
1033	usize::try_from(n).map_err(\|_\| {
1034	err!(
1035	"failed to parse integer {n} (too big, max allowed is {}",
1036	usize::MAX
1037	)
1038	})
1039	}
1040
1041	/// Interprets the given slice as an unsigned 32-bit big endian integer and
1042	/// returns it.
1043	///
1044	/// # Panics
1045	///
1046	/// When `bytes.len() != 4`.
1047	fn from_be_bytes_u32(bytes: &[u8]) -> u32 {
1048	u32::from_be_bytes(bytes.try_into().unwrap())
1049	}
1050
1051	/// Interprets the given slice as a signed 32-bit big endian integer and
1052	/// returns it.
1053	///
1054	/// # Panics
1055	///
1056	/// When `bytes.len() != 4`.
1057	fn from_be_bytes_i32(bytes: &[u8]) -> i32 {
1058	i32::from_be_bytes(bytes.try_into().unwrap())
1059	}
1060
1061	/// Interprets the given slice as a signed 64-bit big endian integer and
1062	/// returns it.
1063	///
1064	/// # Panics
1065	///
1066	/// When `bytes.len() != 8`.
1067	fn from_be_bytes_i64(bytes: &[u8]) -> i64 {
1068	i64::from_be_bytes(bytes.try_into().unwrap())
1069	}
1070