1 | use alloc::{string::String, vec}; |
2 | |
3 | use super::{ |
4 | util::{ |
5 | array_str::Abbreviation, |
6 | error::{err, Error}, |
7 | escape::{Byte, Bytes}, |
8 | itime::{IOffset, ITimestamp}, |
9 | }, |
10 | PosixTimeZone, TzifDateTime, TzifFixed, TzifIndicator, TzifLocalTimeType, |
11 | TzifOwned, TzifTransitionInfo, TzifTransitionKind, TzifTransitions, |
12 | TzifTransitionsOwned, |
13 | }; |
14 | |
15 | // These are Jiff min and max timestamp (in seconds) values. |
16 | // |
17 | // The TZif parser will clamp timestamps to this range. It's |
18 | // not ideal, but Jiff can't handle values outside of this range |
19 | // and completely refusing to use TZif data with pathological |
20 | // timestamps in typically irrelevant transitions is bad juju. |
21 | // |
22 | // Ref: https://github.com/BurntSushi/jiff/issues/163 |
23 | // Ref: https://github.com/BurntSushi/jiff/pull/164 |
24 | const TIMESTAMP_MIN: i64 = -377705023201; |
25 | const TIMESTAMP_MAX: i64 = 253402207200; |
26 | |
27 | // Similarly for offsets, although in this case, if we find |
28 | // an offset outside of this range, we do actually error. This |
29 | // is because it could result in true incorrect datetimes for |
30 | // actual transitions. |
31 | // |
32 | // But our supported offset range is `-25:59:59..=+25:59:59`. |
33 | // There's no real time zone with offsets even close to those |
34 | // boundaries. |
35 | // |
36 | // If there is pathological data that we should ignore, then |
37 | // we should wait for a real bug report in order to determine |
38 | // the right way to ignore/clamp it. |
39 | const OFFSET_MIN: i32 = -93599; |
40 | const OFFSET_MAX: i32 = 93599; |
41 | |
42 | // When fattening TZif data, this is the year to go up to. |
43 | // |
44 | // This year was chosen because it's what the "fat" TZif data generated |
45 | // by `zic` uses. |
46 | const FATTEN_UP_TO_YEAR: i16 = 2038; |
47 | |
48 | // This is a "sanity" limit on the maximum number of transitions we'll |
49 | // add to TZif data when fattening them up. |
50 | // |
51 | // This is mostly just a defense-in-depth limit to avoid weird cases |
52 | // where a pathological POSIX time zone could be defined to create |
53 | // many transitions. It's not clear that this is actually possible, |
54 | // but I felt a little uneasy doing unbounded work that isn't linearly |
55 | // proportional to the input data. So, this limit is put into place for |
56 | // reasons of "good sense." |
57 | // |
58 | // For "normal" cases, there should be at most two transitions per |
59 | // year. So this limit permits 300/2=150 years of transition data. |
60 | // (Although we won't go above 2036. See above.) |
61 | const FATTEN_MAX_TRANSITIONS: usize = 300; |
62 | |
63 | impl TzifOwned { |
64 | /// Parses the given data as a TZif formatted file. |
65 | /// |
66 | /// The name given is attached to the `Tzif` value returned, but is |
67 | /// otherwise not significant. |
68 | /// |
69 | /// If the given data is not recognized to be valid TZif, then an error is |
70 | /// returned. |
71 | /// |
72 | /// In general, callers may assume that it is safe to pass arbitrary or |
73 | /// even untrusted data to this function and count on it not panicking |
74 | /// or using resources that aren't limited to a small constant factor of |
75 | /// the size of the data itself. That is, callers can reliably limit the |
76 | /// resources used by limiting the size of the data given to this parse |
77 | /// function. |
78 | pub(crate) fn parse( |
79 | name: Option<String>, |
80 | bytes: &[u8], |
81 | ) -> Result<TzifOwned, Error> { |
82 | let original = bytes; |
83 | let name = name.into(); |
84 | let (header32, rest) = Header::parse(4, bytes) |
85 | .map_err(|e| err!("failed to parse 32-bit header: {e}" ))?; |
86 | let (mut tzif, rest) = if header32.version == 0 { |
87 | TzifOwned::parse32(name, header32, rest)? |
88 | } else { |
89 | TzifOwned::parse64(name, header32, rest)? |
90 | }; |
91 | tzif.fatten(); |
92 | // This should come after fattening, because fattening may add new |
93 | // transitions and we want to add civil datetimes to those. |
94 | tzif.add_civil_datetimes_to_transitions(); |
95 | tzif.verify_posix_time_zone_consistency()?; |
96 | // Compute the checksum using the entire contents of the TZif data. |
97 | let tzif_raw_len = (rest.as_ptr() as usize) |
98 | .checked_sub(original.as_ptr() as usize) |
99 | .unwrap(); |
100 | let tzif_raw_bytes = &original[..tzif_raw_len]; |
101 | tzif.fixed.checksum = super::crc32::sum(tzif_raw_bytes); |
102 | |
103 | // Shrink all of our allocs so we don't keep excess capacity around. |
104 | tzif.fixed.designations.shrink_to_fit(); |
105 | tzif.types.shrink_to_fit(); |
106 | tzif.transitions.timestamps.shrink_to_fit(); |
107 | tzif.transitions.civil_starts.shrink_to_fit(); |
108 | tzif.transitions.civil_ends.shrink_to_fit(); |
109 | tzif.transitions.infos.shrink_to_fit(); |
110 | |
111 | Ok(tzif) |
112 | } |
113 | |
114 | fn parse32<'b>( |
115 | name: Option<String>, |
116 | header32: Header, |
117 | bytes: &'b [u8], |
118 | ) -> Result<(TzifOwned, &'b [u8]), Error> { |
119 | let mut tzif = TzifOwned { |
120 | fixed: TzifFixed { |
121 | name, |
122 | version: header32.version, |
123 | // filled in later |
124 | checksum: 0, |
125 | designations: String::new(), |
126 | posix_tz: None, |
127 | }, |
128 | types: vec![], |
129 | transitions: TzifTransitions { |
130 | timestamps: vec![], |
131 | civil_starts: vec![], |
132 | civil_ends: vec![], |
133 | infos: vec![], |
134 | }, |
135 | }; |
136 | let rest = tzif.parse_transitions(&header32, bytes)?; |
137 | let rest = tzif.parse_transition_types(&header32, rest)?; |
138 | let rest = tzif.parse_local_time_types(&header32, rest)?; |
139 | let rest = tzif.parse_time_zone_designations(&header32, rest)?; |
140 | let rest = tzif.parse_leap_seconds(&header32, rest)?; |
141 | let rest = tzif.parse_indicators(&header32, rest)?; |
142 | Ok((tzif, rest)) |
143 | } |
144 | |
145 | fn parse64<'b>( |
146 | name: Option<String>, |
147 | header32: Header, |
148 | bytes: &'b [u8], |
149 | ) -> Result<(TzifOwned, &'b [u8]), Error> { |
150 | let (_, rest) = try_split_at( |
151 | "V1 TZif data block" , |
152 | bytes, |
153 | header32.data_block_len()?, |
154 | )?; |
155 | let (header64, rest) = Header::parse(8, rest) |
156 | .map_err(|e| err!("failed to parse 64-bit header: {e}" ))?; |
157 | let mut tzif = TzifOwned { |
158 | fixed: TzifFixed { |
159 | name, |
160 | version: header64.version, |
161 | // filled in later |
162 | checksum: 0, |
163 | designations: String::new(), |
164 | posix_tz: None, |
165 | }, |
166 | types: vec![], |
167 | transitions: TzifTransitions { |
168 | timestamps: vec![], |
169 | civil_starts: vec![], |
170 | civil_ends: vec![], |
171 | infos: vec![], |
172 | }, |
173 | }; |
174 | let rest = tzif.parse_transitions(&header64, rest)?; |
175 | let rest = tzif.parse_transition_types(&header64, rest)?; |
176 | let rest = tzif.parse_local_time_types(&header64, rest)?; |
177 | let rest = tzif.parse_time_zone_designations(&header64, rest)?; |
178 | let rest = tzif.parse_leap_seconds(&header64, rest)?; |
179 | let rest = tzif.parse_indicators(&header64, rest)?; |
180 | let rest = tzif.parse_footer(&header64, rest)?; |
181 | // Note that we don't check that the TZif data is fully valid. It is |
182 | // possible for it to contain superfluous information. For example, a |
183 | // non-zero local time type that is never referenced by a transition. |
184 | Ok((tzif, rest)) |
185 | } |
186 | |
187 | fn parse_transitions<'b>( |
188 | &mut self, |
189 | header: &Header, |
190 | bytes: &'b [u8], |
191 | ) -> Result<&'b [u8], Error> { |
192 | let (bytes, rest) = try_split_at( |
193 | "transition times data block" , |
194 | bytes, |
195 | header.transition_times_len()?, |
196 | )?; |
197 | let mut it = bytes.chunks_exact(header.time_size); |
198 | // RFC 8536 says: "If there are no transitions, local time for all |
199 | // timestamps is specified by the TZ string in the footer if present |
200 | // and nonempty; otherwise, it is specified by time type 0." |
201 | // |
202 | // RFC 8536 also says: "Local time for timestamps before the first |
203 | // transition is specified by the first time type (time type |
204 | // 0)." |
205 | // |
206 | // So if there are no transitions, pushing this dummy one will result |
207 | // in the desired behavior even when it's the only transition. |
208 | // Similarly, since this is the minimum timestamp value, it will |
209 | // trigger for any times before the first transition found in the TZif |
210 | // data. |
211 | self.transitions.add_with_type_index(TIMESTAMP_MIN, 0); |
212 | while let Some(chunk) = it.next() { |
213 | let mut timestamp = if header.is_32bit() { |
214 | i64::from(from_be_bytes_i32(chunk)) |
215 | } else { |
216 | from_be_bytes_i64(chunk) |
217 | }; |
218 | if !(TIMESTAMP_MIN <= timestamp && timestamp <= TIMESTAMP_MAX) { |
219 | // We really shouldn't error here just because the Unix |
220 | // timestamp is outside what Jiff supports. Since what Jiff |
221 | // supports is _somewhat_ arbitrary. But Jiff's supported |
222 | // range is good enough for all realistic purposes, so we |
223 | // just clamp an out-of-range Unix timestamp to the Jiff |
224 | // min or max value. |
225 | // |
226 | // This can't result in the sorting order being wrong, but |
227 | // it can result in a transition that is duplicative with |
228 | // the dummy transition we inserted above. This should be |
229 | // fine. |
230 | let clamped = timestamp.clamp(TIMESTAMP_MIN, TIMESTAMP_MAX); |
231 | // only-jiff-start |
232 | warn!( |
233 | "found Unix timestamp {timestamp} that is outside \ |
234 | Jiff's supported range, clamping to {clamped}" , |
235 | ); |
236 | // only-jiff-end |
237 | timestamp = clamped; |
238 | } |
239 | self.transitions.add(timestamp); |
240 | } |
241 | assert!(it.remainder().is_empty()); |
242 | Ok(rest) |
243 | } |
244 | |
245 | fn parse_transition_types<'b>( |
246 | &mut self, |
247 | header: &Header, |
248 | bytes: &'b [u8], |
249 | ) -> Result<&'b [u8], Error> { |
250 | let (bytes, rest) = try_split_at( |
251 | "transition types data block" , |
252 | bytes, |
253 | header.transition_types_len()?, |
254 | )?; |
255 | // We skip the first transition because it is our minimum dummy |
256 | // transition. |
257 | for (transition_index, &type_index) in (1..).zip(bytes) { |
258 | if usize::from(type_index) >= header.tzh_typecnt { |
259 | return Err(err!( |
260 | "found transition type index {type_index}, |
261 | but there are only {} local time types" , |
262 | header.tzh_typecnt, |
263 | )); |
264 | } |
265 | self.transitions.infos[transition_index].type_index = type_index; |
266 | } |
267 | Ok(rest) |
268 | } |
269 | |
270 | fn parse_local_time_types<'b>( |
271 | &mut self, |
272 | header: &Header, |
273 | bytes: &'b [u8], |
274 | ) -> Result<&'b [u8], Error> { |
275 | let (bytes, rest) = try_split_at( |
276 | "local time types data block" , |
277 | bytes, |
278 | header.local_time_types_len()?, |
279 | )?; |
280 | let mut it = bytes.chunks_exact(6); |
281 | while let Some(chunk) = it.next() { |
282 | let offset = from_be_bytes_i32(&chunk[..4]); |
283 | if !(OFFSET_MIN <= offset && offset <= OFFSET_MAX) { |
284 | return Err(err!( |
285 | "found local time type with out-of-bounds offset: {offset}" |
286 | )); |
287 | } |
288 | let is_dst = chunk[4] == 1; |
289 | let designation = (chunk[5], chunk[5]); |
290 | self.types.push(TzifLocalTimeType { |
291 | offset, |
292 | is_dst, |
293 | designation, |
294 | indicator: TzifIndicator::LocalWall, |
295 | }); |
296 | } |
297 | assert!(it.remainder().is_empty()); |
298 | Ok(rest) |
299 | } |
300 | |
301 | fn parse_time_zone_designations<'b>( |
302 | &mut self, |
303 | header: &Header, |
304 | bytes: &'b [u8], |
305 | ) -> Result<&'b [u8], Error> { |
306 | let (bytes, rest) = try_split_at( |
307 | "time zone designations data block" , |
308 | bytes, |
309 | header.time_zone_designations_len()?, |
310 | )?; |
311 | self.fixed.designations = |
312 | String::from_utf8(bytes.to_vec()).map_err(|_| { |
313 | err!( |
314 | "time zone designations are not valid UTF-8: {:?}" , |
315 | Bytes(bytes), |
316 | ) |
317 | })?; |
318 | // Holy hell, this is brutal. The boundary conditions are crazy. |
319 | for (i, typ) in self.types.iter_mut().enumerate() { |
320 | let start = usize::from(typ.designation.0); |
321 | let Some(suffix) = self.fixed.designations.get(start..) else { |
322 | return Err(err!( |
323 | "local time type {i} has designation index of {start}, \ |
324 | but cannot be more than {}" , |
325 | self.fixed.designations.len(), |
326 | )); |
327 | }; |
328 | let Some(len) = suffix.find(' \x00' ) else { |
329 | return Err(err!( |
330 | "local time type {i} has designation index of {start}, \ |
331 | but could not find NUL terminator after it in \ |
332 | designations: {:?}" , |
333 | self.fixed.designations, |
334 | )); |
335 | }; |
336 | let Some(end) = start.checked_add(len) else { |
337 | return Err(err!( |
338 | "local time type {i} has designation index of {start}, \ |
339 | but its length {len} is too big" , |
340 | )); |
341 | }; |
342 | typ.designation.1 = u8::try_from(end).map_err(|_| { |
343 | err!( |
344 | "local time type {i} has designation range of \ |
345 | {start}.. {end}, but end is too big" , |
346 | ) |
347 | })?; |
348 | } |
349 | Ok(rest) |
350 | } |
351 | |
352 | /// This parses the leap second corrections in the TZif data. |
353 | /// |
354 | /// Note that we only parse and verify them. We don't actually use them. |
355 | /// Jiff effectively ignores leap seconds. |
356 | fn parse_leap_seconds<'b>( |
357 | &mut self, |
358 | header: &Header, |
359 | bytes: &'b [u8], |
360 | ) -> Result<&'b [u8], Error> { |
361 | let (bytes, rest) = try_split_at( |
362 | "leap seconds data block" , |
363 | bytes, |
364 | header.leap_second_len()?, |
365 | )?; |
366 | let chunk_len = header |
367 | .time_size |
368 | .checked_add(4) |
369 | .expect("time_size plus 4 fits in usize" ); |
370 | let mut it = bytes.chunks_exact(chunk_len); |
371 | while let Some(chunk) = it.next() { |
372 | let (occur_bytes, _corr_bytes) = chunk.split_at(header.time_size); |
373 | let occur = if header.is_32bit() { |
374 | i64::from(from_be_bytes_i32(occur_bytes)) |
375 | } else { |
376 | from_be_bytes_i64(occur_bytes) |
377 | }; |
378 | if !(TIMESTAMP_MIN <= occur && occur <= TIMESTAMP_MAX) { |
379 | // only-jiff-start |
380 | warn!( |
381 | "leap second occurrence {occur} is \ |
382 | not in Jiff's supported range" |
383 | ) |
384 | // only-jiff-end |
385 | } |
386 | } |
387 | assert!(it.remainder().is_empty()); |
388 | Ok(rest) |
389 | } |
390 | |
391 | fn parse_indicators<'b>( |
392 | &mut self, |
393 | header: &Header, |
394 | bytes: &'b [u8], |
395 | ) -> Result<&'b [u8], Error> { |
396 | let (std_wall_bytes, rest) = try_split_at( |
397 | "standard/wall indicators data block" , |
398 | bytes, |
399 | header.standard_wall_len()?, |
400 | )?; |
401 | let (ut_local_bytes, rest) = try_split_at( |
402 | "UT/local indicators data block" , |
403 | rest, |
404 | header.ut_local_len()?, |
405 | )?; |
406 | if std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() { |
407 | // This is a weird case, but technically possible only if all |
408 | // UT/local indicators are 0. If any are 1, then it's an error, |
409 | // because it would require the corresponding std/wall indicator |
410 | // to be 1 too. Which it can't be, because there aren't any. So |
411 | // we just check that they're all zeros. |
412 | for (i, &byte) in ut_local_bytes.iter().enumerate() { |
413 | if byte != 0 { |
414 | return Err(err!( |
415 | "found UT/local indicator ' {byte}' for local time \ |
416 | type {i}, but it must be 0 since all std/wall \ |
417 | indicators are 0" , |
418 | )); |
419 | } |
420 | } |
421 | } else if !std_wall_bytes.is_empty() && ut_local_bytes.is_empty() { |
422 | for (i, &byte) in std_wall_bytes.iter().enumerate() { |
423 | // Indexing is OK because Header guarantees that the number of |
424 | // indicators is 0 or equal to the number of types. |
425 | self.types[i].indicator = if byte == 0 { |
426 | TzifIndicator::LocalWall |
427 | } else if byte == 1 { |
428 | TzifIndicator::LocalStandard |
429 | } else { |
430 | return Err(err!( |
431 | "found invalid std/wall indicator ' {byte}' for \ |
432 | local time type {i}, it must be 0 or 1" , |
433 | )); |
434 | }; |
435 | } |
436 | } else if !std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() { |
437 | assert_eq!(std_wall_bytes.len(), ut_local_bytes.len()); |
438 | let it = std_wall_bytes.iter().zip(ut_local_bytes); |
439 | for (i, (&stdwall, &utlocal)) in it.enumerate() { |
440 | // Indexing is OK because Header guarantees that the number of |
441 | // indicators is 0 or equal to the number of types. |
442 | self.types[i].indicator = match (stdwall, utlocal) { |
443 | (0, 0) => TzifIndicator::LocalWall, |
444 | (1, 0) => TzifIndicator::LocalStandard, |
445 | (1, 1) => TzifIndicator::UTStandard, |
446 | (0, 1) => { |
447 | return Err(err!( |
448 | "found illegal ut-wall combination for \ |
449 | local time type {i}, only local-wall, \ |
450 | local-standard and ut-standard are allowed" , |
451 | )) |
452 | } |
453 | _ => { |
454 | return Err(err!( |
455 | "found illegal std/wall or ut/local value for \ |
456 | local time type {i}, each must be 0 or 1" , |
457 | )) |
458 | } |
459 | }; |
460 | } |
461 | } else { |
462 | // If they're both empty then we don't need to do anything. Every |
463 | // local time type record already has the correct default for this |
464 | // case set. |
465 | debug_assert!(std_wall_bytes.is_empty()); |
466 | debug_assert!(ut_local_bytes.is_empty()); |
467 | } |
468 | Ok(rest) |
469 | } |
470 | |
471 | fn parse_footer<'b>( |
472 | &mut self, |
473 | _header: &Header, |
474 | bytes: &'b [u8], |
475 | ) -> Result<&'b [u8], Error> { |
476 | if bytes.is_empty() { |
477 | return Err(err!( |
478 | "invalid V2+ TZif footer, expected \\n, \ |
479 | but found unexpected end of data" , |
480 | )); |
481 | } |
482 | if bytes[0] != b' \n' { |
483 | return Err(err!( |
484 | "invalid V2+ TZif footer, expected {:?}, but found {:?}" , |
485 | Byte(b' \n' ), |
486 | Byte(bytes[0]), |
487 | )); |
488 | } |
489 | let bytes = &bytes[1..]; |
490 | // Only scan up to 1KB for a NUL terminator in case we somehow got |
491 | // passed a huge block of bytes. |
492 | let toscan = &bytes[..bytes.len().min(1024)]; |
493 | let Some(nlat) = toscan.iter().position(|&b| b == b' \n' ) else { |
494 | return Err(err!( |
495 | "invalid V2 TZif footer, could not find {:?} \ |
496 | terminator in: {:?}" , |
497 | Byte(b' \n' ), |
498 | Bytes(toscan), |
499 | )); |
500 | }; |
501 | let (bytes, rest) = bytes.split_at(nlat); |
502 | if !bytes.is_empty() { |
503 | // We could in theory limit TZ strings to their strict POSIX |
504 | // definition here for TZif V2, but I don't think there is any |
505 | // harm in allowing the extensions in V2 formatted TZif data. Note |
506 | // that the GNU tooling allow it via the `TZ` environment variable |
507 | // even though POSIX doesn't specify it. This all seems okay to me |
508 | // because the V3+ extension is a strict superset of functionality. |
509 | let posix_tz = |
510 | PosixTimeZone::parse(bytes).map_err(|e| err!(" {e}" ))?; |
511 | self.fixed.posix_tz = Some(posix_tz); |
512 | } |
513 | Ok(&rest[1..]) |
514 | } |
515 | |
516 | /// Validates that the POSIX TZ string we parsed (if one exists) is |
517 | /// consistent with the last transition in this time zone. This is |
518 | /// required by RFC 8536. |
519 | /// |
520 | /// RFC 8536 says, "If the string is nonempty and one or more |
521 | /// transitions appear in the version 2+ data, the string MUST be |
522 | /// consistent with the last version 2+ transition." |
523 | fn verify_posix_time_zone_consistency(&self) -> Result<(), Error> { |
524 | // We need to be a little careful, since we always have at least one |
525 | // transition (accounting for the dummy `Timestamp::MIN` transition). |
526 | // So if we only have 1 transition and a POSIX TZ string, then we |
527 | // should not validate it since it's equivalent to the case of 0 |
528 | // transitions and a POSIX TZ string. |
529 | if self.transitions.timestamps.len() <= 1 { |
530 | return Ok(()); |
531 | } |
532 | let Some(ref tz) = self.fixed.posix_tz else { |
533 | return Ok(()); |
534 | }; |
535 | let last = self |
536 | .transitions |
537 | .timestamps |
538 | .last() |
539 | .expect("last transition timestamp" ); |
540 | let type_index = self |
541 | .transitions |
542 | .infos |
543 | .last() |
544 | .expect("last transition info" ) |
545 | .type_index; |
546 | let typ = &self.types[usize::from(type_index)]; |
547 | let (ioff, abbrev, is_dst) = |
548 | tz.to_offset_info(ITimestamp::from_second(*last)); |
549 | if ioff.second != typ.offset { |
550 | return Err(err!( |
551 | "expected last transition to have DST offset \ |
552 | of {expected_offset}, but got {got_offset} \ |
553 | according to POSIX TZ string {tz}" , |
554 | expected_offset = typ.offset, |
555 | got_offset = ioff.second, |
556 | tz = tz, |
557 | )); |
558 | } |
559 | if is_dst != typ.is_dst { |
560 | return Err(err!( |
561 | "expected last transition to have is_dst= {expected_dst}, \ |
562 | but got is_dst= {got_dst} according to POSIX TZ \ |
563 | string {tz}" , |
564 | expected_dst = typ.is_dst, |
565 | got_dst = is_dst, |
566 | tz = tz, |
567 | )); |
568 | } |
569 | if abbrev != self.designation(&typ) { |
570 | return Err(err!( |
571 | "expected last transition to have \ |
572 | designation= {expected_abbrev}, \ |
573 | but got designation= {got_abbrev} according to POSIX TZ \ |
574 | string {tz}" , |
575 | expected_abbrev = self.designation(&typ), |
576 | got_abbrev = abbrev, |
577 | tz = tz, |
578 | )); |
579 | } |
580 | Ok(()) |
581 | } |
582 | |
583 | /// Add civil datetimes to our transitions. |
584 | /// |
585 | /// This isn't strictly necessary, but it speeds up time zone lookups when |
586 | /// the input is a civil datetime. It lets us do comparisons directly on |
587 | /// the civil datetime as given, instead of needing to convert the civil |
588 | /// datetime given to a timestamp first. (Even if we didn't do this, I |
589 | /// believe we'd still need at least one additional timestamp that is |
590 | /// offset, because TZ lookups for a civil datetime are done in local time, |
591 | /// and the timestamps in TZif data are, of course, all in UTC.) |
592 | fn add_civil_datetimes_to_transitions(&mut self) { |
593 | fn to_datetime(timestamp: i64, offset: i32) -> TzifDateTime { |
594 | use crate::shared::util::itime::{IOffset, ITimestamp}; |
595 | let its = ITimestamp { second: timestamp, nanosecond: 0 }; |
596 | let ioff = IOffset { second: offset }; |
597 | let dt = its.to_datetime(ioff); |
598 | TzifDateTime::new( |
599 | dt.date.year, |
600 | dt.date.month, |
601 | dt.date.day, |
602 | dt.time.hour, |
603 | dt.time.minute, |
604 | dt.time.second, |
605 | ) |
606 | } |
607 | |
608 | let trans = &mut self.transitions; |
609 | for i in 0..trans.timestamps.len() { |
610 | let timestamp = trans.timestamps[i]; |
611 | let offset = { |
612 | let type_index = trans.infos[i].type_index; |
613 | self.types[usize::from(type_index)].offset |
614 | }; |
615 | let prev_offset = { |
616 | let type_index = trans.infos[i.saturating_sub(1)].type_index; |
617 | self.types[usize::from(type_index)].offset |
618 | }; |
619 | |
620 | if prev_offset == offset { |
621 | // Equivalent offsets means there can never be any ambiguity. |
622 | let start = to_datetime(timestamp, prev_offset); |
623 | trans.infos[i].kind = TzifTransitionKind::Unambiguous; |
624 | trans.civil_starts[i] = start; |
625 | } else if prev_offset < offset { |
626 | // When the offset of the previous transition is less, that |
627 | // means there is some non-zero amount of time that is |
628 | // "skipped" when moving to the next transition. Thus, we have |
629 | // a gap. The start of the gap is the offset which gets us the |
630 | // earliest time, i.e., the smaller of the two offsets. |
631 | trans.infos[i].kind = TzifTransitionKind::Gap; |
632 | trans.civil_starts[i] = to_datetime(timestamp, prev_offset); |
633 | trans.civil_ends[i] = to_datetime(timestamp, offset); |
634 | } else { |
635 | // When the offset of the previous transition is greater, that |
636 | // means there is some non-zero amount of time that will be |
637 | // replayed on a wall clock in this time zone. Thus, we have |
638 | // a fold. The start of the gold is the offset which gets us |
639 | // the earliest time, i.e., the smaller of the two offsets. |
640 | assert!(prev_offset > offset); |
641 | trans.infos[i].kind = TzifTransitionKind::Fold; |
642 | trans.civil_starts[i] = to_datetime(timestamp, offset); |
643 | trans.civil_ends[i] = to_datetime(timestamp, prev_offset); |
644 | } |
645 | } |
646 | } |
647 | |
648 | /// Fatten up this TZif data with additional transitions. |
649 | /// |
650 | /// These additional transitions often make time zone lookups faster, and |
651 | /// they smooth out the performance difference between using "slim" and |
652 | /// "fat" tzdbs. |
653 | fn fatten(&mut self) { |
654 | // Note that this is a crate feature for *both* `jiff` and |
655 | // `jiff-static`. |
656 | if !cfg!(feature = "tz-fat" ) { |
657 | return; |
658 | } |
659 | let Some(posix_tz) = self.fixed.posix_tz.clone() else { return }; |
660 | let last = |
661 | self.transitions.timestamps.last().expect("last transition" ); |
662 | let mut i = 0; |
663 | let mut prev = ITimestamp::from_second(*last); |
664 | loop { |
665 | if i > FATTEN_MAX_TRANSITIONS { |
666 | // only-jiff-start |
667 | warn!( |
668 | "fattening TZif data for `{name:?}` somehow generated \ |
669 | more than {max} transitions, so giving up to avoid \ |
670 | doing too much work" , |
671 | name = self.fixed.name, |
672 | max = FATTEN_MAX_TRANSITIONS, |
673 | ); |
674 | // only-jiff-end |
675 | return; |
676 | } |
677 | i += 1; |
678 | prev = match self.add_transition(&posix_tz, prev) { |
679 | None => break, |
680 | Some(next) => next, |
681 | }; |
682 | } |
683 | } |
684 | |
685 | /// If there's a transition strictly after the given timestamp for the |
686 | /// given POSIX time zone, then add it to this TZif data. |
687 | fn add_transition( |
688 | &mut self, |
689 | posix_tz: &PosixTimeZone<Abbreviation>, |
690 | prev: ITimestamp, |
691 | ) -> Option<ITimestamp> { |
692 | let (its, ioff, abbrev, is_dst) = posix_tz.next_transition(prev)?; |
693 | if its.to_datetime(IOffset::UTC).date.year >= FATTEN_UP_TO_YEAR { |
694 | return None; |
695 | } |
696 | let type_index = |
697 | self.find_or_create_local_time_type(ioff, abbrev, is_dst)?; |
698 | self.transitions.add_with_type_index(its.second, type_index); |
699 | Some(its) |
700 | } |
701 | |
702 | /// Look for a local time type matching the data given. |
703 | /// |
704 | /// If one could not be found, then one is created and its index is |
705 | /// returned. |
706 | /// |
707 | /// If one could not be found and one could not be created (e.g., the index |
708 | /// would overflow `u8`), then `None` is returned. |
709 | fn find_or_create_local_time_type( |
710 | &mut self, |
711 | offset: IOffset, |
712 | abbrev: &str, |
713 | is_dst: bool, |
714 | ) -> Option<u8> { |
715 | for (i, typ) in self.types.iter().enumerate() { |
716 | if offset.second == typ.offset |
717 | && abbrev == self.designation(typ) |
718 | && is_dst == typ.is_dst |
719 | { |
720 | return u8::try_from(i).ok(); |
721 | } |
722 | } |
723 | let i = u8::try_from(self.types.len()).ok()?; |
724 | let designation = self.find_or_create_designation(abbrev)?; |
725 | self.types.push(TzifLocalTimeType { |
726 | offset: offset.second, |
727 | is_dst, |
728 | designation, |
729 | // Not really clear if this is correct, but Jiff |
730 | // ignores this anyway, so ¯\_(ツ)_/¯. |
731 | indicator: TzifIndicator::LocalWall, |
732 | }); |
733 | Some(i) |
734 | } |
735 | |
736 | /// Look for a designation (i.e., time zone abbreviation) matching the data |
737 | /// given, and return its range into `self.fixed.designations`. |
738 | /// |
739 | /// If one could not be found, then one is created and its range is |
740 | /// returned. |
741 | /// |
742 | /// If one could not be found and one could not be created (e.g., the range |
743 | /// would overflow `u8`), then `None` is returned. |
744 | fn find_or_create_designation( |
745 | &mut self, |
746 | needle: &str, |
747 | ) -> Option<(u8, u8)> { |
748 | let mut start = 0; |
749 | while let Some(offset) = self.fixed.designations[start..].find(' \0' ) { |
750 | let end = start + offset; |
751 | let abbrev = &self.fixed.designations[start..end]; |
752 | if needle == abbrev { |
753 | return Some((start.try_into().ok()?, end.try_into().ok()?)); |
754 | } |
755 | start = end + 1; |
756 | } |
757 | self.fixed.designations.push_str(needle); |
758 | self.fixed.designations.push(' \0' ); |
759 | let end = start + needle.len(); |
760 | Some((start.try_into().ok()?, end.try_into().ok()?)) |
761 | } |
762 | |
763 | fn designation(&self, typ: &TzifLocalTimeType) -> &str { |
764 | let range = |
765 | usize::from(typ.designation.0)..usize::from(typ.designation.1); |
766 | // OK because we verify that the designation range on every local |
767 | // time type is a valid range into `self.designations`. |
768 | &self.fixed.designations[range] |
769 | } |
770 | } |
771 | |
772 | impl TzifTransitionsOwned { |
773 | /// Add a single transition with the given timestamp. |
774 | /// |
775 | /// This also fills in the other columns (civil starts, civil ends and |
776 | /// infos) with sensible default values. It is expected that callers will |
777 | /// later fill them in. |
778 | fn add(&mut self, timestamp: i64) { |
779 | self.add_with_type_index(timestamp, type_index:0); |
780 | } |
781 | |
782 | /// Like `TzifTransitionsOwned::add`, but let's the caller provide a type |
783 | /// index if it is known. |
784 | fn add_with_type_index(&mut self, timestamp: i64, type_index: u8) { |
785 | self.timestamps.push(timestamp); |
786 | self.civil_starts.push(TzifDateTime::ZERO); |
787 | self.civil_ends.push(TzifDateTime::ZERO); |
788 | self.infos.push(TzifTransitionInfo { |
789 | type_index, |
790 | kind: TzifTransitionKind::Unambiguous, |
791 | }); |
792 | } |
793 | } |
794 | |
795 | /// The header for a TZif formatted file. |
796 | /// |
797 | /// V2+ TZif format have two headers: one for V1 data, and then a second |
798 | /// following the V1 data block that describes another data block which uses |
799 | /// 64-bit timestamps. The two headers both have the same format and both |
800 | /// use 32-bit big-endian encoded integers. |
801 | #[derive (Debug)] |
802 | struct Header { |
803 | /// The size of the timestamps encoded in the data block. |
804 | /// |
805 | /// This is guaranteed to be either 4 (for V1) or 8 (for the 64-bit header |
806 | /// block in V2+). |
807 | time_size: usize, |
808 | /// The file format version. |
809 | /// |
810 | /// Note that this is either a NUL byte (for version 1), or an ASCII byte |
811 | /// corresponding to the version number. That is, `0x32` for `2`, `0x33` |
812 | /// for `3` or `0x34` for `4`. Note also that just because zoneinfo might |
813 | /// have been recently generated does not mean it uses the latest format |
814 | /// version. It seems like newer versions are only compiled by `zic` when |
815 | /// they are needed. For example, `America/New_York` on my system (as of |
816 | /// `2024-03-25`) has version `0x32`, but `Asia/Jerusalem` has version |
817 | /// `0x33`. |
818 | version: u8, |
819 | /// Number of UT/local indicators stored in the file. |
820 | /// |
821 | /// This is checked to be either equal to `0` or equal to `tzh_typecnt`. |
822 | tzh_ttisutcnt: usize, |
823 | /// The number of standard/wall indicators stored in the file. |
824 | /// |
825 | /// This is checked to be either equal to `0` or equal to `tzh_typecnt`. |
826 | tzh_ttisstdcnt: usize, |
827 | /// The number of leap seconds for which data entries are stored in the |
828 | /// file. |
829 | tzh_leapcnt: usize, |
830 | /// The number of transition times for which data entries are stored in |
831 | /// the file. |
832 | tzh_timecnt: usize, |
833 | /// The number of local time types for which data entries are stored in the |
834 | /// file. |
835 | /// |
836 | /// This is checked to be at least `1`. |
837 | tzh_typecnt: usize, |
838 | /// The number of bytes of time zone abbreviation strings stored in the |
839 | /// file. |
840 | /// |
841 | /// This is checked to be at least `1`. |
842 | tzh_charcnt: usize, |
843 | } |
844 | |
845 | impl Header { |
846 | /// Parse the header record from the given bytes. |
847 | /// |
848 | /// Upon success, return the header and all bytes after the header. |
849 | /// |
850 | /// The given `time_size` must be 4 or 8, corresponding to either the |
851 | /// V1 header block or the V2+ header block, respectively. |
852 | fn parse( |
853 | time_size: usize, |
854 | bytes: &[u8], |
855 | ) -> Result<(Header, &[u8]), Error> { |
856 | assert!(time_size == 4 || time_size == 8, "time size must be 4 or 8" ); |
857 | if bytes.len() < 44 { |
858 | return Err(err!("invalid header: too short" )); |
859 | } |
860 | let (magic, rest) = bytes.split_at(4); |
861 | if magic != b"TZif" { |
862 | return Err(err!("invalid header: magic bytes mismatch" )); |
863 | } |
864 | let (version, rest) = rest.split_at(1); |
865 | let (_reserved, rest) = rest.split_at(15); |
866 | |
867 | let (tzh_ttisutcnt_bytes, rest) = rest.split_at(4); |
868 | let (tzh_ttisstdcnt_bytes, rest) = rest.split_at(4); |
869 | let (tzh_leapcnt_bytes, rest) = rest.split_at(4); |
870 | let (tzh_timecnt_bytes, rest) = rest.split_at(4); |
871 | let (tzh_typecnt_bytes, rest) = rest.split_at(4); |
872 | let (tzh_charcnt_bytes, rest) = rest.split_at(4); |
873 | |
874 | let tzh_ttisutcnt = from_be_bytes_u32_to_usize(tzh_ttisutcnt_bytes) |
875 | .map_err(|e| err!("failed to parse tzh_ttisutcnt: {e}" ))?; |
876 | let tzh_ttisstdcnt = from_be_bytes_u32_to_usize(tzh_ttisstdcnt_bytes) |
877 | .map_err(|e| err!("failed to parse tzh_ttisstdcnt: {e}" ))?; |
878 | let tzh_leapcnt = from_be_bytes_u32_to_usize(tzh_leapcnt_bytes) |
879 | .map_err(|e| err!("failed to parse tzh_leapcnt: {e}" ))?; |
880 | let tzh_timecnt = from_be_bytes_u32_to_usize(tzh_timecnt_bytes) |
881 | .map_err(|e| err!("failed to parse tzh_timecnt: {e}" ))?; |
882 | let tzh_typecnt = from_be_bytes_u32_to_usize(tzh_typecnt_bytes) |
883 | .map_err(|e| err!("failed to parse tzh_typecnt: {e}" ))?; |
884 | let tzh_charcnt = from_be_bytes_u32_to_usize(tzh_charcnt_bytes) |
885 | .map_err(|e| err!("failed to parse tzh_charcnt: {e}" ))?; |
886 | |
887 | if tzh_ttisutcnt != 0 && tzh_ttisutcnt != tzh_typecnt { |
888 | return Err(err!( |
889 | "expected tzh_ttisutcnt= {tzh_ttisutcnt} to be zero \ |
890 | or equal to tzh_typecnt= {tzh_typecnt}" , |
891 | )); |
892 | } |
893 | if tzh_ttisstdcnt != 0 && tzh_ttisstdcnt != tzh_typecnt { |
894 | return Err(err!( |
895 | "expected tzh_ttisstdcnt= {tzh_ttisstdcnt} to be zero \ |
896 | or equal to tzh_typecnt= {tzh_typecnt}" , |
897 | )); |
898 | } |
899 | if tzh_typecnt < 1 { |
900 | return Err(err!( |
901 | "expected tzh_typecnt= {tzh_typecnt} to be at least 1" , |
902 | )); |
903 | } |
904 | if tzh_charcnt < 1 { |
905 | return Err(err!( |
906 | "expected tzh_charcnt= {tzh_charcnt} to be at least 1" , |
907 | )); |
908 | } |
909 | |
910 | let header = Header { |
911 | time_size, |
912 | version: version[0], |
913 | tzh_ttisutcnt, |
914 | tzh_ttisstdcnt, |
915 | tzh_leapcnt, |
916 | tzh_timecnt, |
917 | tzh_typecnt, |
918 | tzh_charcnt, |
919 | }; |
920 | Ok((header, rest)) |
921 | } |
922 | |
923 | /// Returns true if this header is for a 32-bit data block. |
924 | /// |
925 | /// When false, it is guaranteed that this header is for a 64-bit data |
926 | /// block. |
927 | fn is_32bit(&self) -> bool { |
928 | self.time_size == 4 |
929 | } |
930 | |
931 | /// Returns the size of the data block, in bytes, for this header. |
932 | /// |
933 | /// This returns an error if the arithmetic required to compute the |
934 | /// length would overflow. |
935 | /// |
936 | /// This is useful for, e.g., skipping over the 32-bit V1 data block in |
937 | /// V2+ TZif formatted files. |
938 | fn data_block_len(&self) -> Result<usize, Error> { |
939 | let a = self.transition_times_len()?; |
940 | let b = self.transition_types_len()?; |
941 | let c = self.local_time_types_len()?; |
942 | let d = self.time_zone_designations_len()?; |
943 | let e = self.leap_second_len()?; |
944 | let f = self.standard_wall_len()?; |
945 | let g = self.ut_local_len()?; |
946 | a.checked_add(b) |
947 | .and_then(|z| z.checked_add(c)) |
948 | .and_then(|z| z.checked_add(d)) |
949 | .and_then(|z| z.checked_add(e)) |
950 | .and_then(|z| z.checked_add(f)) |
951 | .and_then(|z| z.checked_add(g)) |
952 | .ok_or_else(|| { |
953 | err!( |
954 | "length of data block in V {} tzfile is too big" , |
955 | self.version |
956 | ) |
957 | }) |
958 | } |
959 | |
960 | fn transition_times_len(&self) -> Result<usize, Error> { |
961 | self.tzh_timecnt.checked_mul(self.time_size).ok_or_else(|| { |
962 | err!("tzh_timecnt value {} is too big" , self.tzh_timecnt) |
963 | }) |
964 | } |
965 | |
966 | fn transition_types_len(&self) -> Result<usize, Error> { |
967 | Ok(self.tzh_timecnt) |
968 | } |
969 | |
970 | fn local_time_types_len(&self) -> Result<usize, Error> { |
971 | self.tzh_typecnt.checked_mul(6).ok_or_else(|| { |
972 | err!("tzh_typecnt value {} is too big" , self.tzh_typecnt) |
973 | }) |
974 | } |
975 | |
976 | fn time_zone_designations_len(&self) -> Result<usize, Error> { |
977 | Ok(self.tzh_charcnt) |
978 | } |
979 | |
980 | fn leap_second_len(&self) -> Result<usize, Error> { |
981 | let record_len = self |
982 | .time_size |
983 | .checked_add(4) |
984 | .expect("4-or-8 plus 4 always fits in usize" ); |
985 | self.tzh_leapcnt.checked_mul(record_len).ok_or_else(|| { |
986 | err!("tzh_leapcnt value {} is too big" , self.tzh_leapcnt) |
987 | }) |
988 | } |
989 | |
990 | fn standard_wall_len(&self) -> Result<usize, Error> { |
991 | Ok(self.tzh_ttisstdcnt) |
992 | } |
993 | |
994 | fn ut_local_len(&self) -> Result<usize, Error> { |
995 | Ok(self.tzh_ttisutcnt) |
996 | } |
997 | } |
998 | |
999 | /// Splits the given slice of bytes at the index given. |
1000 | /// |
1001 | /// If the index is out of range (greater than `bytes.len()`) then an error is |
1002 | /// returned. The error message will include the `what` string given, which is |
1003 | /// meant to describe the thing being split. |
1004 | fn try_split_at<'b>( |
1005 | what: &'static str, |
1006 | bytes: &'b [u8], |
1007 | at: usize, |
1008 | ) -> Result<(&'b [u8], &'b [u8]), Error> { |
1009 | if at > bytes.len() { |
1010 | Err(err!( |
1011 | "expected at least {at} bytes for {what}, \ |
1012 | but found only {} bytes" , |
1013 | bytes.len(), |
1014 | )) |
1015 | } else { |
1016 | Ok(bytes.split_at(mid:at)) |
1017 | } |
1018 | } |
1019 | |
1020 | /// Interprets the given slice as an unsigned 32-bit big endian integer, |
1021 | /// attempts to convert it to a `usize` and returns it. |
1022 | /// |
1023 | /// # Panics |
1024 | /// |
1025 | /// When `bytes.len() != 4`. |
1026 | /// |
1027 | /// # Errors |
1028 | /// |
1029 | /// This errors if the `u32` parsed from the given bytes cannot fit in a |
1030 | /// `usize`. |
1031 | fn from_be_bytes_u32_to_usize(bytes: &[u8]) -> Result<usize, Error> { |
1032 | let n: u32 = from_be_bytes_u32(bytes); |
1033 | usize::try_from(n).map_err(|_| { |
1034 | err!( |
1035 | "failed to parse integer {n} (too big, max allowed is {}" , |
1036 | usize::MAX |
1037 | ) |
1038 | }) |
1039 | } |
1040 | |
1041 | /// Interprets the given slice as an unsigned 32-bit big endian integer and |
1042 | /// returns it. |
1043 | /// |
1044 | /// # Panics |
1045 | /// |
1046 | /// When `bytes.len() != 4`. |
1047 | fn from_be_bytes_u32(bytes: &[u8]) -> u32 { |
1048 | u32::from_be_bytes(bytes.try_into().unwrap()) |
1049 | } |
1050 | |
1051 | /// Interprets the given slice as a signed 32-bit big endian integer and |
1052 | /// returns it. |
1053 | /// |
1054 | /// # Panics |
1055 | /// |
1056 | /// When `bytes.len() != 4`. |
1057 | fn from_be_bytes_i32(bytes: &[u8]) -> i32 { |
1058 | i32::from_be_bytes(bytes.try_into().unwrap()) |
1059 | } |
1060 | |
1061 | /// Interprets the given slice as a signed 64-bit big endian integer and |
1062 | /// returns it. |
1063 | /// |
1064 | /// # Panics |
1065 | /// |
1066 | /// When `bytes.len() != 8`. |
1067 | fn from_be_bytes_i64(bytes: &[u8]) -> i64 { |
1068 | i64::from_be_bytes(bytes.try_into().unwrap()) |
1069 | } |
1070 | |