| 1 | use crate::{ |
| 2 | error::{err, Error}, |
| 3 | util::escape::{Byte, Bytes}, |
| 4 | }; |
| 5 | |
| 6 | /// Parses an `i64` number from the beginning to the end of the given slice of |
| 7 | /// ASCII digit characters. |
| 8 | /// |
| 9 | /// If any byte in the given slice is not `[0-9]`, then this returns an error. |
| 10 | /// Similarly, if the number parsed does not fit into a `i64`, then this |
| 11 | /// returns an error. Notably, this routine does not permit parsing a negative |
| 12 | /// integer. (We use `i64` because everything in this crate uses signed |
| 13 | /// integers, and because a higher level routine might want to parse the sign |
| 14 | /// and then apply it to the result of this routine.) |
| 15 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
| 16 | pub(crate) fn i64(bytes: &[u8]) -> Result<i64, Error> { |
| 17 | if bytes.is_empty() { |
| 18 | return Err(err!("invalid number, no digits found" )); |
| 19 | } |
| 20 | let mut n: i64 = 0; |
| 21 | for &byte in bytes { |
| 22 | let digit = match byte.checked_sub(b'0' ) { |
| 23 | None => { |
| 24 | return Err(err!( |
| 25 | "invalid digit, expected 0-9 but got {}" , |
| 26 | Byte(byte), |
| 27 | )); |
| 28 | } |
| 29 | Some(digit) if digit > 9 => { |
| 30 | return Err(err!( |
| 31 | "invalid digit, expected 0-9 but got {}" , |
| 32 | Byte(byte), |
| 33 | )) |
| 34 | } |
| 35 | Some(digit) => { |
| 36 | debug_assert!((0..=9).contains(&digit)); |
| 37 | i64::from(digit) |
| 38 | } |
| 39 | }; |
| 40 | n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else( |
| 41 | || { |
| 42 | err!( |
| 43 | "number ' {}' too big to parse into 64-bit integer" , |
| 44 | Bytes(bytes), |
| 45 | ) |
| 46 | }, |
| 47 | )?; |
| 48 | } |
| 49 | Ok(n) |
| 50 | } |
| 51 | |
| 52 | /// Parses an `i64` fractional number from the beginning to the end of the |
| 53 | /// given slice of ASCII digit characters. |
| 54 | /// |
| 55 | /// The fraction's maximum precision must be provided. The returned integer |
| 56 | /// will always be in units of `10^{max_precision}`. For example, to parse a |
| 57 | /// fractional amount of seconds with a maximum precision of nanoseconds, then |
| 58 | /// use `max_precision=9`. |
| 59 | /// |
| 60 | /// If any byte in the given slice is not `[0-9]`, then this returns an error. |
| 61 | /// Similarly, if the fraction parsed does not fit into a `i64`, then this |
| 62 | /// returns an error. Notably, this routine does not permit parsing a negative |
| 63 | /// integer. (We use `i64` because everything in this crate uses signed |
| 64 | /// integers, and because a higher level routine might want to parse the sign |
| 65 | /// and then apply it to the result of this routine.) |
| 66 | pub(crate) fn fraction( |
| 67 | bytes: &[u8], |
| 68 | max_precision: usize, |
| 69 | ) -> Result<i64, Error> { |
| 70 | if bytes.is_empty() { |
| 71 | return Err(err!("invalid fraction, no digits found" )); |
| 72 | } else if bytes.len() > max_precision { |
| 73 | return Err(err!( |
| 74 | "invalid fraction, too many digits \ |
| 75 | (at most {max_precision} are allowed" |
| 76 | )); |
| 77 | } |
| 78 | let mut n: i64 = 0; |
| 79 | for &byte in bytes { |
| 80 | let digit = match byte.checked_sub(b'0' ) { |
| 81 | None => { |
| 82 | return Err(err!( |
| 83 | "invalid fractional digit, expected 0-9 but got {}" , |
| 84 | Byte(byte), |
| 85 | )); |
| 86 | } |
| 87 | Some(digit) if digit > 9 => { |
| 88 | return Err(err!( |
| 89 | "invalid fractional digit, expected 0-9 but got {}" , |
| 90 | Byte(byte), |
| 91 | )) |
| 92 | } |
| 93 | Some(digit) => { |
| 94 | debug_assert!((0..=9).contains(&digit)); |
| 95 | i64::from(digit) |
| 96 | } |
| 97 | }; |
| 98 | n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else( |
| 99 | || { |
| 100 | err!( |
| 101 | "fractional ' {}' too big to parse into 64-bit integer" , |
| 102 | Bytes(bytes), |
| 103 | ) |
| 104 | }, |
| 105 | )?; |
| 106 | } |
| 107 | for _ in bytes.len()..max_precision { |
| 108 | n = n.checked_mul(10).ok_or_else(|| { |
| 109 | err!( |
| 110 | "fractional ' {}' too big to parse into 64-bit integer \ |
| 111 | (too much precision supported)" , |
| 112 | Bytes(bytes) |
| 113 | ) |
| 114 | })?; |
| 115 | } |
| 116 | Ok(n) |
| 117 | } |
| 118 | |
| 119 | /// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available. |
| 120 | /// |
| 121 | /// This is effectively `OsStr::to_str`, but with a slightly better error |
| 122 | /// message. |
| 123 | #[cfg (feature = "tzdb-zoneinfo" )] |
| 124 | pub(crate) fn os_str_utf8<'o, O>(os_str: &'o O) -> Result<&'o str, Error> |
| 125 | where |
| 126 | O: ?Sized + AsRef<std::ffi::OsStr>, |
| 127 | { |
| 128 | let os_str = os_str.as_ref(); |
| 129 | os_str |
| 130 | .to_str() |
| 131 | .ok_or_else(|| err!("environment value {os_str:?} is not valid UTF-8" )) |
| 132 | } |
| 133 | |
| 134 | /// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available. |
| 135 | /// |
| 136 | /// The main difference between this and `OsStr::to_str` is that this will |
| 137 | /// be a zero-cost conversion on Unix platforms to `&[u8]`. On Windows, this |
| 138 | /// will do UTF-8 validation and return an error if it's invalid UTF-8. |
| 139 | #[cfg (feature = "tz-system" )] |
| 140 | pub(crate) fn os_str_bytes<'o, O>(os_str: &'o O) -> Result<&'o [u8], Error> |
| 141 | where |
| 142 | O: ?Sized + AsRef<std::ffi::OsStr>, |
| 143 | { |
| 144 | let os_str = os_str.as_ref(); |
| 145 | #[cfg (unix)] |
| 146 | { |
| 147 | use std::os::unix::ffi::OsStrExt; |
| 148 | Ok(os_str.as_bytes()) |
| 149 | } |
| 150 | #[cfg (not(unix))] |
| 151 | { |
| 152 | let string = os_str.to_str().ok_or_else(|| { |
| 153 | err!("environment value {os_str:?} is not valid UTF-8" ) |
| 154 | })?; |
| 155 | // It is suspect that we're doing UTF-8 validation and then throwing |
| 156 | // away the fact that we did UTF-8 validation. So this could lead |
| 157 | // to an extra UTF-8 check if the caller ultimately needs UTF-8. If |
| 158 | // that's important, we can add a new API that returns a `&str`. But it |
| 159 | // probably won't matter because an `OsStr` in this crate is usually |
| 160 | // just an environment variable. |
| 161 | Ok(string.as_bytes()) |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | /// Splits the given input into two slices at the given position. |
| 166 | /// |
| 167 | /// If the position is greater than the length of the slice given, then this |
| 168 | /// returns `None`. |
| 169 | #[cfg_attr (feature = "perf-inline" , inline(always))] |
| 170 | pub(crate) fn split(input: &[u8], at: usize) -> Option<(&[u8], &[u8])> { |
| 171 | if at > input.len() { |
| 172 | None |
| 173 | } else { |
| 174 | Some(input.split_at(mid:at)) |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | /// Returns a function that converts two slices to an offset. |
| 179 | /// |
| 180 | /// It takes the starting point as input and returns a function that, when |
| 181 | /// given an ending point (greater than or equal to the starting point), then |
| 182 | /// the corresponding pointers are subtracted and an offset relative to the |
| 183 | /// starting point is returned. |
| 184 | /// |
| 185 | /// This is useful as a helper function in parsing routines that use slices |
| 186 | /// but want to report offsets. |
| 187 | /// |
| 188 | /// # Panics |
| 189 | /// |
| 190 | /// This may panic if the ending point is not a suffix slice of `start`. |
| 191 | pub(crate) fn offseter<'a>( |
| 192 | start: &'a [u8], |
| 193 | ) -> impl Fn(&'a [u8]) -> usize + 'a { |
| 194 | move |end: &'a [u8]| (end.as_ptr() as usize) - (start.as_ptr() as usize) |
| 195 | } |
| 196 | |
| 197 | /// Returns a function that converts two slices to the slice between them. |
| 198 | /// |
| 199 | /// This takes a starting point as input and returns a function that, when |
| 200 | /// given an ending point (greater than or equal to the starting point), it |
| 201 | /// returns a slice beginning at the starting point and ending just at the |
| 202 | /// ending point. |
| 203 | /// |
| 204 | /// This is useful as a helper function in parsing routines. |
| 205 | /// |
| 206 | /// # Panics |
| 207 | /// |
| 208 | /// This may panic if the ending point is not a suffix slice of `start`. |
| 209 | pub(crate) fn slicer<'a>( |
| 210 | start: &'a [u8], |
| 211 | ) -> impl Fn(&'a [u8]) -> &'a [u8] + 'a { |
| 212 | let mkoffset: impl Fn(&[u8]) -> usize = offseter(start); |
| 213 | move |end: &'a [u8]| { |
| 214 | let offset: usize = mkoffset(end); |
| 215 | &start[..offset] |
| 216 | } |
| 217 | } |
| 218 | |