1 | use std::io::{self, ErrorKind}; |
2 | use std::iter; |
3 | use std::num::ParseIntError; |
4 | use std::str::{self, FromStr}; |
5 | |
6 | use super::rule::TransitionRule; |
7 | use super::timezone::{LeapSecond, LocalTimeType, TimeZone, Transition}; |
8 | use super::Error; |
9 | |
10 | pub(super) fn parse(bytes: &[u8]) -> Result<TimeZone, Error> { |
11 | let mut cursor = Cursor::new(bytes); |
12 | let state = State::new(&mut cursor, true)?; |
13 | let (state, footer) = match state.header.version { |
14 | Version::V1 => match cursor.is_empty() { |
15 | true => (state, None), |
16 | false => { |
17 | return Err(Error::InvalidTzFile("remaining data after end of TZif v1 data block" )) |
18 | } |
19 | }, |
20 | Version::V2 | Version::V3 => { |
21 | let state = State::new(&mut cursor, false)?; |
22 | (state, Some(cursor.remaining())) |
23 | } |
24 | }; |
25 | |
26 | let mut transitions = Vec::with_capacity(state.header.transition_count); |
27 | for (arr_time, &local_time_type_index) in |
28 | state.transition_times.chunks_exact(state.time_size).zip(state.transition_types) |
29 | { |
30 | let unix_leap_time = |
31 | state.parse_time(&arr_time[0..state.time_size], state.header.version)?; |
32 | let local_time_type_index = local_time_type_index as usize; |
33 | transitions.push(Transition::new(unix_leap_time, local_time_type_index)); |
34 | } |
35 | |
36 | let mut local_time_types = Vec::with_capacity(state.header.type_count); |
37 | for arr in state.local_time_types.chunks_exact(6) { |
38 | let ut_offset = read_be_i32(&arr[..4])?; |
39 | |
40 | let is_dst = match arr[4] { |
41 | 0 => false, |
42 | 1 => true, |
43 | _ => return Err(Error::InvalidTzFile("invalid DST indicator" )), |
44 | }; |
45 | |
46 | let char_index = arr[5] as usize; |
47 | if char_index >= state.header.char_count { |
48 | return Err(Error::InvalidTzFile("invalid time zone name char index" )); |
49 | } |
50 | |
51 | let position = match state.names[char_index..].iter().position(|&c| c == b' \0' ) { |
52 | Some(position) => position, |
53 | None => return Err(Error::InvalidTzFile("invalid time zone name char index" )), |
54 | }; |
55 | |
56 | let name = &state.names[char_index..char_index + position]; |
57 | let name = if !name.is_empty() { Some(name) } else { None }; |
58 | local_time_types.push(LocalTimeType::new(ut_offset, is_dst, name)?); |
59 | } |
60 | |
61 | let mut leap_seconds = Vec::with_capacity(state.header.leap_count); |
62 | for arr in state.leap_seconds.chunks_exact(state.time_size + 4) { |
63 | let unix_leap_time = state.parse_time(&arr[0..state.time_size], state.header.version)?; |
64 | let correction = read_be_i32(&arr[state.time_size..state.time_size + 4])?; |
65 | leap_seconds.push(LeapSecond::new(unix_leap_time, correction)); |
66 | } |
67 | |
68 | let std_walls_iter = state.std_walls.iter().copied().chain(iter::repeat(0)); |
69 | let ut_locals_iter = state.ut_locals.iter().copied().chain(iter::repeat(0)); |
70 | if std_walls_iter.zip(ut_locals_iter).take(state.header.type_count).any(|pair| pair == (0, 1)) { |
71 | return Err(Error::InvalidTzFile( |
72 | "invalid couple of standard/wall and UT/local indicators" , |
73 | )); |
74 | } |
75 | |
76 | let extra_rule = match footer { |
77 | Some(footer) => { |
78 | let footer = str::from_utf8(footer)?; |
79 | if !(footer.starts_with(' \n' ) && footer.ends_with(' \n' )) { |
80 | return Err(Error::InvalidTzFile("invalid footer" )); |
81 | } |
82 | |
83 | let tz_string = footer.trim_matches(|c: char| c.is_ascii_whitespace()); |
84 | if tz_string.starts_with(':' ) || tz_string.contains(' \0' ) { |
85 | return Err(Error::InvalidTzFile("invalid footer" )); |
86 | } |
87 | |
88 | match tz_string.is_empty() { |
89 | true => None, |
90 | false => Some(TransitionRule::from_tz_string( |
91 | tz_string.as_bytes(), |
92 | state.header.version == Version::V3, |
93 | )?), |
94 | } |
95 | } |
96 | None => None, |
97 | }; |
98 | |
99 | TimeZone::new(transitions, local_time_types, leap_seconds, extra_rule) |
100 | } |
101 | |
102 | /// TZif data blocks |
103 | struct State<'a> { |
104 | header: Header, |
105 | /// Time size in bytes |
106 | time_size: usize, |
107 | /// Transition times data block |
108 | transition_times: &'a [u8], |
109 | /// Transition types data block |
110 | transition_types: &'a [u8], |
111 | /// Local time types data block |
112 | local_time_types: &'a [u8], |
113 | /// Time zone names data block |
114 | names: &'a [u8], |
115 | /// Leap seconds data block |
116 | leap_seconds: &'a [u8], |
117 | /// UT/local indicators data block |
118 | std_walls: &'a [u8], |
119 | /// Standard/wall indicators data block |
120 | ut_locals: &'a [u8], |
121 | } |
122 | |
123 | impl<'a> State<'a> { |
124 | /// Read TZif data blocks |
125 | fn new(cursor: &mut Cursor<'a>, first: bool) -> Result<Self, Error> { |
126 | let header = Header::new(cursor)?; |
127 | let time_size = match first { |
128 | true => 4, // We always parse V1 first |
129 | false => 8, |
130 | }; |
131 | |
132 | Ok(Self { |
133 | time_size, |
134 | transition_times: cursor.read_exact(header.transition_count * time_size)?, |
135 | transition_types: cursor.read_exact(header.transition_count)?, |
136 | local_time_types: cursor.read_exact(header.type_count * 6)?, |
137 | names: cursor.read_exact(header.char_count)?, |
138 | leap_seconds: cursor.read_exact(header.leap_count * (time_size + 4))?, |
139 | std_walls: cursor.read_exact(header.std_wall_count)?, |
140 | ut_locals: cursor.read_exact(header.ut_local_count)?, |
141 | header, |
142 | }) |
143 | } |
144 | |
145 | /// Parse time values |
146 | fn parse_time(&self, arr: &[u8], version: Version) -> Result<i64, Error> { |
147 | match version { |
148 | Version::V1 => Ok(read_be_i32(&arr[..4])?.into()), |
149 | Version::V2 | Version::V3 => read_be_i64(arr), |
150 | } |
151 | } |
152 | } |
153 | |
154 | /// TZif header |
155 | #[derive (Debug)] |
156 | struct Header { |
157 | /// TZif version |
158 | version: Version, |
159 | /// Number of UT/local indicators |
160 | ut_local_count: usize, |
161 | /// Number of standard/wall indicators |
162 | std_wall_count: usize, |
163 | /// Number of leap-second records |
164 | leap_count: usize, |
165 | /// Number of transition times |
166 | transition_count: usize, |
167 | /// Number of local time type records |
168 | type_count: usize, |
169 | /// Number of time zone names bytes |
170 | char_count: usize, |
171 | } |
172 | |
173 | impl Header { |
174 | fn new(cursor: &mut Cursor) -> Result<Self, Error> { |
175 | let magic = cursor.read_exact(4)?; |
176 | if magic != *b"TZif" { |
177 | return Err(Error::InvalidTzFile("invalid magic number" )); |
178 | } |
179 | |
180 | let version = match cursor.read_exact(1)? { |
181 | [0x00] => Version::V1, |
182 | [0x32] => Version::V2, |
183 | [0x33] => Version::V3, |
184 | _ => return Err(Error::UnsupportedTzFile("unsupported TZif version" )), |
185 | }; |
186 | |
187 | cursor.read_exact(15)?; |
188 | let ut_local_count = cursor.read_be_u32()?; |
189 | let std_wall_count = cursor.read_be_u32()?; |
190 | let leap_count = cursor.read_be_u32()?; |
191 | let transition_count = cursor.read_be_u32()?; |
192 | let type_count = cursor.read_be_u32()?; |
193 | let char_count = cursor.read_be_u32()?; |
194 | |
195 | if !(type_count != 0 |
196 | && char_count != 0 |
197 | && (ut_local_count == 0 || ut_local_count == type_count) |
198 | && (std_wall_count == 0 || std_wall_count == type_count)) |
199 | { |
200 | return Err(Error::InvalidTzFile("invalid header" )); |
201 | } |
202 | |
203 | Ok(Self { |
204 | version, |
205 | ut_local_count: ut_local_count as usize, |
206 | std_wall_count: std_wall_count as usize, |
207 | leap_count: leap_count as usize, |
208 | transition_count: transition_count as usize, |
209 | type_count: type_count as usize, |
210 | char_count: char_count as usize, |
211 | }) |
212 | } |
213 | } |
214 | |
215 | /// A `Cursor` contains a slice of a buffer and a read count. |
216 | #[derive (Debug, Eq, PartialEq)] |
217 | pub(crate) struct Cursor<'a> { |
218 | /// Slice representing the remaining data to be read |
219 | remaining: &'a [u8], |
220 | /// Number of already read bytes |
221 | read_count: usize, |
222 | } |
223 | |
224 | impl<'a> Cursor<'a> { |
225 | /// Construct a new `Cursor` from remaining data |
226 | pub(crate) const fn new(remaining: &'a [u8]) -> Self { |
227 | Self { remaining, read_count: 0 } |
228 | } |
229 | |
230 | pub(crate) fn peek(&self) -> Option<&u8> { |
231 | self.remaining().first() |
232 | } |
233 | |
234 | /// Returns remaining data |
235 | pub(crate) const fn remaining(&self) -> &'a [u8] { |
236 | self.remaining |
237 | } |
238 | |
239 | /// Returns `true` if data is remaining |
240 | pub(crate) const fn is_empty(&self) -> bool { |
241 | self.remaining.is_empty() |
242 | } |
243 | |
244 | pub(crate) fn read_be_u32(&mut self) -> Result<u32, Error> { |
245 | let mut buf = [0; 4]; |
246 | buf.copy_from_slice(self.read_exact(4)?); |
247 | Ok(u32::from_be_bytes(buf)) |
248 | } |
249 | |
250 | /// Read exactly `count` bytes, reducing remaining data and incrementing read count |
251 | pub(crate) fn read_exact(&mut self, count: usize) -> Result<&'a [u8], io::Error> { |
252 | match (self.remaining.get(..count), self.remaining.get(count..)) { |
253 | (Some(result), Some(remaining)) => { |
254 | self.remaining = remaining; |
255 | self.read_count += count; |
256 | Ok(result) |
257 | } |
258 | _ => Err(io::Error::from(ErrorKind::UnexpectedEof)), |
259 | } |
260 | } |
261 | |
262 | /// Read bytes and compare them to the provided tag |
263 | pub(crate) fn read_tag(&mut self, tag: &[u8]) -> Result<(), io::Error> { |
264 | if self.read_exact(tag.len())? == tag { |
265 | Ok(()) |
266 | } else { |
267 | Err(io::Error::from(ErrorKind::InvalidData)) |
268 | } |
269 | } |
270 | |
271 | /// Read bytes if the remaining data is prefixed by the provided tag |
272 | pub(crate) fn read_optional_tag(&mut self, tag: &[u8]) -> Result<bool, io::Error> { |
273 | if self.remaining.starts_with(tag) { |
274 | self.read_exact(tag.len())?; |
275 | Ok(true) |
276 | } else { |
277 | Ok(false) |
278 | } |
279 | } |
280 | |
281 | /// Read bytes as long as the provided predicate is true |
282 | pub(crate) fn read_while<F: Fn(&u8) -> bool>(&mut self, f: F) -> Result<&'a [u8], io::Error> { |
283 | match self.remaining.iter().position(|x| !f(x)) { |
284 | None => self.read_exact(self.remaining.len()), |
285 | Some(position) => self.read_exact(position), |
286 | } |
287 | } |
288 | |
289 | // Parse an integer out of the ASCII digits |
290 | pub(crate) fn read_int<T: FromStr<Err = ParseIntError>>(&mut self) -> Result<T, Error> { |
291 | let bytes = self.read_while(u8::is_ascii_digit)?; |
292 | Ok(str::from_utf8(bytes)?.parse()?) |
293 | } |
294 | |
295 | /// Read bytes until the provided predicate is true |
296 | pub(crate) fn read_until<F: Fn(&u8) -> bool>(&mut self, f: F) -> Result<&'a [u8], io::Error> { |
297 | match self.remaining.iter().position(f) { |
298 | None => self.read_exact(self.remaining.len()), |
299 | Some(position) => self.read_exact(position), |
300 | } |
301 | } |
302 | } |
303 | |
304 | pub(crate) fn read_be_i32(bytes: &[u8]) -> Result<i32, Error> { |
305 | if bytes.len() != 4 { |
306 | return Err(Error::InvalidSlice("too short for i32" )); |
307 | } |
308 | |
309 | let mut buf: [u8; 4] = [0; 4]; |
310 | buf.copy_from_slice(src:bytes); |
311 | Ok(i32::from_be_bytes(buf)) |
312 | } |
313 | |
314 | pub(crate) fn read_be_i64(bytes: &[u8]) -> Result<i64, Error> { |
315 | if bytes.len() != 8 { |
316 | return Err(Error::InvalidSlice("too short for i64" )); |
317 | } |
318 | |
319 | let mut buf: [u8; 8] = [0; 8]; |
320 | buf.copy_from_slice(src:bytes); |
321 | Ok(i64::from_be_bytes(buf)) |
322 | } |
323 | |
324 | /// TZif version |
325 | #[derive (Debug, Copy, Clone, Eq, PartialEq)] |
326 | enum Version { |
327 | /// Version 1 |
328 | V1, |
329 | /// Version 2 |
330 | V2, |
331 | /// Version 3 |
332 | V3, |
333 | } |
334 | |