| 1 | use std::io::{self, ErrorKind}; |
| 2 | use std::iter; |
| 3 | use std::num::ParseIntError; |
| 4 | use std::str::{self, FromStr}; |
| 5 | |
| 6 | use super::rule::TransitionRule; |
| 7 | use super::timezone::{LeapSecond, LocalTimeType, TimeZone, Transition}; |
| 8 | use super::Error; |
| 9 | |
| 10 | pub(super) fn parse(bytes: &[u8]) -> Result<TimeZone, Error> { |
| 11 | let mut cursor = Cursor::new(bytes); |
| 12 | let state = State::new(&mut cursor, true)?; |
| 13 | let (state, footer) = match state.header.version { |
| 14 | Version::V1 => match cursor.is_empty() { |
| 15 | true => (state, None), |
| 16 | false => { |
| 17 | return Err(Error::InvalidTzFile("remaining data after end of TZif v1 data block" )) |
| 18 | } |
| 19 | }, |
| 20 | Version::V2 | Version::V3 => { |
| 21 | let state = State::new(&mut cursor, false)?; |
| 22 | (state, Some(cursor.remaining())) |
| 23 | } |
| 24 | }; |
| 25 | |
| 26 | let mut transitions = Vec::with_capacity(state.header.transition_count); |
| 27 | for (arr_time, &local_time_type_index) in |
| 28 | state.transition_times.chunks_exact(state.time_size).zip(state.transition_types) |
| 29 | { |
| 30 | let unix_leap_time = |
| 31 | state.parse_time(&arr_time[0..state.time_size], state.header.version)?; |
| 32 | let local_time_type_index = local_time_type_index as usize; |
| 33 | transitions.push(Transition::new(unix_leap_time, local_time_type_index)); |
| 34 | } |
| 35 | |
| 36 | let mut local_time_types = Vec::with_capacity(state.header.type_count); |
| 37 | for arr in state.local_time_types.chunks_exact(6) { |
| 38 | let ut_offset = read_be_i32(&arr[..4])?; |
| 39 | |
| 40 | let is_dst = match arr[4] { |
| 41 | 0 => false, |
| 42 | 1 => true, |
| 43 | _ => return Err(Error::InvalidTzFile("invalid DST indicator" )), |
| 44 | }; |
| 45 | |
| 46 | let char_index = arr[5] as usize; |
| 47 | if char_index >= state.header.char_count { |
| 48 | return Err(Error::InvalidTzFile("invalid time zone name char index" )); |
| 49 | } |
| 50 | |
| 51 | let position = match state.names[char_index..].iter().position(|&c| c == b' \0' ) { |
| 52 | Some(position) => position, |
| 53 | None => return Err(Error::InvalidTzFile("invalid time zone name char index" )), |
| 54 | }; |
| 55 | |
| 56 | let name = &state.names[char_index..char_index + position]; |
| 57 | let name = if !name.is_empty() { Some(name) } else { None }; |
| 58 | local_time_types.push(LocalTimeType::new(ut_offset, is_dst, name)?); |
| 59 | } |
| 60 | |
| 61 | let mut leap_seconds = Vec::with_capacity(state.header.leap_count); |
| 62 | for arr in state.leap_seconds.chunks_exact(state.time_size + 4) { |
| 63 | let unix_leap_time = state.parse_time(&arr[0..state.time_size], state.header.version)?; |
| 64 | let correction = read_be_i32(&arr[state.time_size..state.time_size + 4])?; |
| 65 | leap_seconds.push(LeapSecond::new(unix_leap_time, correction)); |
| 66 | } |
| 67 | |
| 68 | let std_walls_iter = state.std_walls.iter().copied().chain(iter::repeat(0)); |
| 69 | let ut_locals_iter = state.ut_locals.iter().copied().chain(iter::repeat(0)); |
| 70 | if std_walls_iter.zip(ut_locals_iter).take(state.header.type_count).any(|pair| pair == (0, 1)) { |
| 71 | return Err(Error::InvalidTzFile( |
| 72 | "invalid couple of standard/wall and UT/local indicators" , |
| 73 | )); |
| 74 | } |
| 75 | |
| 76 | let extra_rule = match footer { |
| 77 | Some(footer) => { |
| 78 | let footer = str::from_utf8(footer)?; |
| 79 | if !(footer.starts_with(' \n' ) && footer.ends_with(' \n' )) { |
| 80 | return Err(Error::InvalidTzFile("invalid footer" )); |
| 81 | } |
| 82 | |
| 83 | let tz_string = footer.trim_matches(|c: char| c.is_ascii_whitespace()); |
| 84 | if tz_string.starts_with(':' ) || tz_string.contains(' \0' ) { |
| 85 | return Err(Error::InvalidTzFile("invalid footer" )); |
| 86 | } |
| 87 | |
| 88 | match tz_string.is_empty() { |
| 89 | true => None, |
| 90 | false => Some(TransitionRule::from_tz_string( |
| 91 | tz_string.as_bytes(), |
| 92 | state.header.version == Version::V3, |
| 93 | )?), |
| 94 | } |
| 95 | } |
| 96 | None => None, |
| 97 | }; |
| 98 | |
| 99 | TimeZone::new(transitions, local_time_types, leap_seconds, extra_rule) |
| 100 | } |
| 101 | |
| 102 | /// TZif data blocks |
| 103 | struct State<'a> { |
| 104 | header: Header, |
| 105 | /// Time size in bytes |
| 106 | time_size: usize, |
| 107 | /// Transition times data block |
| 108 | transition_times: &'a [u8], |
| 109 | /// Transition types data block |
| 110 | transition_types: &'a [u8], |
| 111 | /// Local time types data block |
| 112 | local_time_types: &'a [u8], |
| 113 | /// Time zone names data block |
| 114 | names: &'a [u8], |
| 115 | /// Leap seconds data block |
| 116 | leap_seconds: &'a [u8], |
| 117 | /// UT/local indicators data block |
| 118 | std_walls: &'a [u8], |
| 119 | /// Standard/wall indicators data block |
| 120 | ut_locals: &'a [u8], |
| 121 | } |
| 122 | |
| 123 | impl<'a> State<'a> { |
| 124 | /// Read TZif data blocks |
| 125 | fn new(cursor: &mut Cursor<'a>, first: bool) -> Result<Self, Error> { |
| 126 | let header = Header::new(cursor)?; |
| 127 | let time_size = match first { |
| 128 | true => 4, // We always parse V1 first |
| 129 | false => 8, |
| 130 | }; |
| 131 | |
| 132 | Ok(Self { |
| 133 | time_size, |
| 134 | transition_times: cursor.read_exact(header.transition_count * time_size)?, |
| 135 | transition_types: cursor.read_exact(header.transition_count)?, |
| 136 | local_time_types: cursor.read_exact(header.type_count * 6)?, |
| 137 | names: cursor.read_exact(header.char_count)?, |
| 138 | leap_seconds: cursor.read_exact(header.leap_count * (time_size + 4))?, |
| 139 | std_walls: cursor.read_exact(header.std_wall_count)?, |
| 140 | ut_locals: cursor.read_exact(header.ut_local_count)?, |
| 141 | header, |
| 142 | }) |
| 143 | } |
| 144 | |
| 145 | /// Parse time values |
| 146 | fn parse_time(&self, arr: &[u8], version: Version) -> Result<i64, Error> { |
| 147 | match version { |
| 148 | Version::V1 => Ok(read_be_i32(&arr[..4])?.into()), |
| 149 | Version::V2 | Version::V3 => read_be_i64(arr), |
| 150 | } |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | /// TZif header |
| 155 | #[derive (Debug)] |
| 156 | struct Header { |
| 157 | /// TZif version |
| 158 | version: Version, |
| 159 | /// Number of UT/local indicators |
| 160 | ut_local_count: usize, |
| 161 | /// Number of standard/wall indicators |
| 162 | std_wall_count: usize, |
| 163 | /// Number of leap-second records |
| 164 | leap_count: usize, |
| 165 | /// Number of transition times |
| 166 | transition_count: usize, |
| 167 | /// Number of local time type records |
| 168 | type_count: usize, |
| 169 | /// Number of time zone names bytes |
| 170 | char_count: usize, |
| 171 | } |
| 172 | |
| 173 | impl Header { |
| 174 | fn new(cursor: &mut Cursor) -> Result<Self, Error> { |
| 175 | let magic = cursor.read_exact(4)?; |
| 176 | if magic != *b"TZif" { |
| 177 | return Err(Error::InvalidTzFile("invalid magic number" )); |
| 178 | } |
| 179 | |
| 180 | let version = match cursor.read_exact(1)? { |
| 181 | [0x00] => Version::V1, |
| 182 | [0x32] => Version::V2, |
| 183 | [0x33] => Version::V3, |
| 184 | _ => return Err(Error::UnsupportedTzFile("unsupported TZif version" )), |
| 185 | }; |
| 186 | |
| 187 | cursor.read_exact(15)?; |
| 188 | let ut_local_count = cursor.read_be_u32()?; |
| 189 | let std_wall_count = cursor.read_be_u32()?; |
| 190 | let leap_count = cursor.read_be_u32()?; |
| 191 | let transition_count = cursor.read_be_u32()?; |
| 192 | let type_count = cursor.read_be_u32()?; |
| 193 | let char_count = cursor.read_be_u32()?; |
| 194 | |
| 195 | if !(type_count != 0 |
| 196 | && char_count != 0 |
| 197 | && (ut_local_count == 0 || ut_local_count == type_count) |
| 198 | && (std_wall_count == 0 || std_wall_count == type_count)) |
| 199 | { |
| 200 | return Err(Error::InvalidTzFile("invalid header" )); |
| 201 | } |
| 202 | |
| 203 | Ok(Self { |
| 204 | version, |
| 205 | ut_local_count: ut_local_count as usize, |
| 206 | std_wall_count: std_wall_count as usize, |
| 207 | leap_count: leap_count as usize, |
| 208 | transition_count: transition_count as usize, |
| 209 | type_count: type_count as usize, |
| 210 | char_count: char_count as usize, |
| 211 | }) |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | /// A `Cursor` contains a slice of a buffer and a read count. |
| 216 | #[derive (Debug, Eq, PartialEq)] |
| 217 | pub(crate) struct Cursor<'a> { |
| 218 | /// Slice representing the remaining data to be read |
| 219 | remaining: &'a [u8], |
| 220 | /// Number of already read bytes |
| 221 | read_count: usize, |
| 222 | } |
| 223 | |
| 224 | impl<'a> Cursor<'a> { |
| 225 | /// Construct a new `Cursor` from remaining data |
| 226 | pub(crate) const fn new(remaining: &'a [u8]) -> Self { |
| 227 | Self { remaining, read_count: 0 } |
| 228 | } |
| 229 | |
| 230 | pub(crate) fn peek(&self) -> Option<&u8> { |
| 231 | self.remaining().first() |
| 232 | } |
| 233 | |
| 234 | /// Returns remaining data |
| 235 | pub(crate) const fn remaining(&self) -> &'a [u8] { |
| 236 | self.remaining |
| 237 | } |
| 238 | |
| 239 | /// Returns `true` if data is remaining |
| 240 | pub(crate) const fn is_empty(&self) -> bool { |
| 241 | self.remaining.is_empty() |
| 242 | } |
| 243 | |
| 244 | pub(crate) fn read_be_u32(&mut self) -> Result<u32, Error> { |
| 245 | let mut buf = [0; 4]; |
| 246 | buf.copy_from_slice(self.read_exact(4)?); |
| 247 | Ok(u32::from_be_bytes(buf)) |
| 248 | } |
| 249 | |
| 250 | #[cfg (target_env = "ohos" )] |
| 251 | pub(crate) fn seek_after(&mut self, offset: usize) -> Result<usize, io::Error> { |
| 252 | if offset < self.read_count { |
| 253 | return Err(io::Error::from(ErrorKind::UnexpectedEof)); |
| 254 | } |
| 255 | match self.remaining.get((offset - self.read_count)..) { |
| 256 | Some(remaining) => { |
| 257 | self.remaining = remaining; |
| 258 | self.read_count = offset; |
| 259 | Ok(offset) |
| 260 | } |
| 261 | _ => Err(io::Error::from(ErrorKind::UnexpectedEof)), |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | /// Read exactly `count` bytes, reducing remaining data and incrementing read count |
| 266 | pub(crate) fn read_exact(&mut self, count: usize) -> Result<&'a [u8], io::Error> { |
| 267 | match (self.remaining.get(..count), self.remaining.get(count..)) { |
| 268 | (Some(result), Some(remaining)) => { |
| 269 | self.remaining = remaining; |
| 270 | self.read_count += count; |
| 271 | Ok(result) |
| 272 | } |
| 273 | _ => Err(io::Error::from(ErrorKind::UnexpectedEof)), |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | /// Read bytes and compare them to the provided tag |
| 278 | pub(crate) fn read_tag(&mut self, tag: &[u8]) -> Result<(), io::Error> { |
| 279 | if self.read_exact(tag.len())? == tag { |
| 280 | Ok(()) |
| 281 | } else { |
| 282 | Err(io::Error::from(ErrorKind::InvalidData)) |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | /// Read bytes if the remaining data is prefixed by the provided tag |
| 287 | pub(crate) fn read_optional_tag(&mut self, tag: &[u8]) -> Result<bool, io::Error> { |
| 288 | if self.remaining.starts_with(tag) { |
| 289 | self.read_exact(tag.len())?; |
| 290 | Ok(true) |
| 291 | } else { |
| 292 | Ok(false) |
| 293 | } |
| 294 | } |
| 295 | |
| 296 | /// Read bytes as long as the provided predicate is true |
| 297 | pub(crate) fn read_while<F: Fn(&u8) -> bool>(&mut self, f: F) -> Result<&'a [u8], io::Error> { |
| 298 | match self.remaining.iter().position(|x| !f(x)) { |
| 299 | None => self.read_exact(self.remaining.len()), |
| 300 | Some(position) => self.read_exact(position), |
| 301 | } |
| 302 | } |
| 303 | |
| 304 | // Parse an integer out of the ASCII digits |
| 305 | pub(crate) fn read_int<T: FromStr<Err = ParseIntError>>(&mut self) -> Result<T, Error> { |
| 306 | let bytes = self.read_while(u8::is_ascii_digit)?; |
| 307 | Ok(str::from_utf8(bytes)?.parse()?) |
| 308 | } |
| 309 | |
| 310 | /// Read bytes until the provided predicate is true |
| 311 | pub(crate) fn read_until<F: Fn(&u8) -> bool>(&mut self, f: F) -> Result<&'a [u8], io::Error> { |
| 312 | match self.remaining.iter().position(f) { |
| 313 | None => self.read_exact(self.remaining.len()), |
| 314 | Some(position) => self.read_exact(position), |
| 315 | } |
| 316 | } |
| 317 | } |
| 318 | |
| 319 | pub(crate) fn read_be_i32(bytes: &[u8]) -> Result<i32, Error> { |
| 320 | if bytes.len() != 4 { |
| 321 | return Err(Error::InvalidSlice("too short for i32" )); |
| 322 | } |
| 323 | |
| 324 | let mut buf: [u8; 4] = [0; 4]; |
| 325 | buf.copy_from_slice(src:bytes); |
| 326 | Ok(i32::from_be_bytes(buf)) |
| 327 | } |
| 328 | |
| 329 | pub(crate) fn read_be_i64(bytes: &[u8]) -> Result<i64, Error> { |
| 330 | if bytes.len() != 8 { |
| 331 | return Err(Error::InvalidSlice("too short for i64" )); |
| 332 | } |
| 333 | |
| 334 | let mut buf: [u8; 8] = [0; 8]; |
| 335 | buf.copy_from_slice(src:bytes); |
| 336 | Ok(i64::from_be_bytes(buf)) |
| 337 | } |
| 338 | |
| 339 | /// TZif version |
| 340 | #[derive (Debug, Copy, Clone, Eq, PartialEq)] |
| 341 | enum Version { |
| 342 | /// Version 1 |
| 343 | V1, |
| 344 | /// Version 2 |
| 345 | V2, |
| 346 | /// Version 3 |
| 347 | V3, |
| 348 | } |
| 349 | |