1//! Defining custom `Reader`s quickly.
2
3use alloc::borrow::Cow;
4use alloc::rc::Rc;
5use alloc::string::String;
6use alloc::sync::Arc;
7use core::fmt::Debug;
8use core::hash::{Hash, Hasher};
9use core::ops::{Deref, Index, Range, RangeFrom, RangeTo};
10use core::slice;
11use core::str;
12use stable_deref_trait::CloneStableDeref;
13
14use crate::endianity::Endianity;
15use crate::read::{Error, Reader, ReaderOffsetId, Result};
16
17/// A reference counted, non-thread-safe slice of bytes and associated
18/// endianity.
19///
20/// ```
21/// # #[cfg(feature = "std")] {
22/// use std::rc::Rc;
23///
24/// let buf = Rc::from(&[1, 2, 3, 4][..]);
25/// let reader = gimli::EndianRcSlice::new(buf, gimli::NativeEndian);
26/// # let _ = reader;
27/// # }
28/// ```
29pub type EndianRcSlice<Endian> = EndianReader<Endian, Rc<[u8]>>;
30
31/// An atomically reference counted, thread-safe slice of bytes and associated
32/// endianity.
33///
34/// ```
35/// # #[cfg(feature = "std")] {
36/// use std::sync::Arc;
37///
38/// let buf = Arc::from(&[1, 2, 3, 4][..]);
39/// let reader = gimli::EndianArcSlice::new(buf, gimli::NativeEndian);
40/// # let _ = reader;
41/// # }
42/// ```
43pub type EndianArcSlice<Endian> = EndianReader<Endian, Arc<[u8]>>;
44
45/// An easy way to define a custom `Reader` implementation with a reference to a
46/// generic buffer of bytes and an associated endianity.
47///
48/// Note that the whole original buffer is kept alive in memory even if there is
49/// only one reader that references only a handful of bytes from that original
50/// buffer. That is, `EndianReader` will not do any copying, moving, or
51/// compacting in order to free up unused regions of the original buffer. If you
52/// require this kind of behavior, it is up to you to implement `Reader`
53/// directly by-hand.
54///
55/// # Example
56///
57/// Say you have an `mmap`ed file that you want to serve as a `gimli::Reader`.
58/// You can wrap that `mmap`ed file up in a `MmapFile` type and use
59/// `EndianReader<Rc<MmapFile>>` or `EndianReader<Arc<MmapFile>>` as readers as
60/// long as `MmapFile` dereferences to the underlying `[u8]` data.
61///
62/// ```
63/// use std::io;
64/// use std::ops::Deref;
65/// use std::path::Path;
66/// use std::slice;
67/// use std::sync::Arc;
68///
69/// /// A type that represents an `mmap`ed file.
70/// #[derive(Debug)]
71/// pub struct MmapFile {
72/// ptr: *const u8,
73/// len: usize,
74/// }
75///
76/// impl MmapFile {
77/// pub fn new(path: &Path) -> io::Result<MmapFile> {
78/// // Call `mmap` and check for errors and all that...
79/// # unimplemented!()
80/// }
81/// }
82///
83/// impl Drop for MmapFile {
84/// fn drop(&mut self) {
85/// // Call `munmap` to clean up after ourselves...
86/// # unimplemented!()
87/// }
88/// }
89///
90/// // And `MmapFile` can deref to a slice of the `mmap`ed region of memory.
91/// impl Deref for MmapFile {
92/// type Target = [u8];
93/// fn deref(&self) -> &[u8] {
94/// unsafe {
95/// slice::from_raw_parts(self.ptr, self.len)
96/// }
97/// }
98/// }
99///
100/// /// A type that represents a shared `mmap`ed file.
101/// #[derive(Debug, Clone)]
102/// pub struct ArcMmapFile(Arc<MmapFile>);
103///
104/// // And `ArcMmapFile` can deref to a slice of the `mmap`ed region of memory.
105/// impl Deref for ArcMmapFile {
106/// type Target = [u8];
107/// fn deref(&self) -> &[u8] {
108/// &self.0
109/// }
110/// }
111///
112/// // These are both valid for any `Rc` or `Arc`.
113/// unsafe impl gimli::StableDeref for ArcMmapFile {}
114/// unsafe impl gimli::CloneStableDeref for ArcMmapFile {}
115///
116/// /// A `gimli::Reader` that is backed by an `mmap`ed file!
117/// pub type MmapFileReader<Endian> = gimli::EndianReader<Endian, ArcMmapFile>;
118/// # fn test(_: &MmapFileReader<gimli::NativeEndian>) { }
119/// ```
120#[derive(Debug, Clone, Copy)]
121pub struct EndianReader<Endian, T>
122where
123 Endian: Endianity,
124 T: CloneStableDeref<Target = [u8]> + Debug,
125{
126 range: SubRange<T>,
127 endian: Endian,
128}
129
130impl<Endian, T1, T2> PartialEq<EndianReader<Endian, T2>> for EndianReader<Endian, T1>
131where
132 Endian: Endianity,
133 T1: CloneStableDeref<Target = [u8]> + Debug,
134 T2: CloneStableDeref<Target = [u8]> + Debug,
135{
136 fn eq(&self, rhs: &EndianReader<Endian, T2>) -> bool {
137 self.bytes() == rhs.bytes()
138 }
139}
140
141impl<Endian, T> Eq for EndianReader<Endian, T>
142where
143 Endian: Endianity,
144 T: CloneStableDeref<Target = [u8]> + Debug,
145{
146}
147
148impl<Endian, T> Hash for EndianReader<Endian, T>
149where
150 Endian: Endianity,
151 T: CloneStableDeref<Target = [u8]> + Debug,
152{
153 fn hash<H: Hasher>(&self, state: &mut H) {
154 // This must match the `PartialEq` implementation.
155 self.bytes().hash(state);
156 }
157}
158
159// This is separated out from `EndianReader` so that we can avoid running afoul
160// of borrowck. We need to `read_slice(&mut self, ...) -> &[u8]` and then call
161// `self.endian.read_whatever` on the result. The problem is that the returned
162// slice keeps the `&mut self` borrow active, so we wouldn't be able to access
163// `self.endian`. Splitting the sub-range out from the endian lets us work
164// around this, making it so that only the `self.range` borrow is held active,
165// not all of `self`.
166//
167// This also serves to encapsulate the unsafe code concerning `CloneStableDeref`.
168// The `bytes` member is held so that the bytes live long enough, and the
169// `CloneStableDeref` ensures these bytes never move. The `ptr` and `len`
170// members point inside `bytes`, and are updated during read operations.
171#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
172struct SubRange<T>
173where
174 T: CloneStableDeref<Target = [u8]> + Debug,
175{
176 bytes: T,
177 ptr: *const u8,
178 len: usize,
179}
180
181unsafe impl<T> Send for SubRange<T> where T: CloneStableDeref<Target = [u8]> + Debug + Send {}
182
183unsafe impl<T> Sync for SubRange<T> where T: CloneStableDeref<Target = [u8]> + Debug + Sync {}
184
185impl<T> SubRange<T>
186where
187 T: CloneStableDeref<Target = [u8]> + Debug,
188{
189 #[inline]
190 fn new(bytes: T) -> Self {
191 let ptr = bytes.as_ptr();
192 let len = bytes.len();
193 SubRange { bytes, ptr, len }
194 }
195
196 #[inline]
197 fn bytes(&self) -> &[u8] {
198 // Safe because `T` implements `CloneStableDeref`, `bytes` can't be modified,
199 // and all operations that modify `ptr` and `len` ensure they stay in range.
200 unsafe { slice::from_raw_parts(self.ptr, self.len) }
201 }
202
203 #[inline]
204 fn len(&self) -> usize {
205 self.len
206 }
207
208 #[inline]
209 fn truncate(&mut self, len: usize) {
210 assert!(len <= self.len);
211 self.len = len;
212 }
213
214 #[inline]
215 fn skip(&mut self, len: usize) {
216 assert!(len <= self.len);
217 self.ptr = unsafe { self.ptr.add(len) };
218 self.len -= len;
219 }
220
221 #[inline]
222 fn read_slice(&mut self, len: usize) -> Option<&[u8]> {
223 if self.len() < len {
224 None
225 } else {
226 // Same as for `bytes()`.
227 let bytes = unsafe { slice::from_raw_parts(self.ptr, len) };
228 self.skip(len);
229 Some(bytes)
230 }
231 }
232}
233
234impl<Endian, T> EndianReader<Endian, T>
235where
236 Endian: Endianity,
237 T: CloneStableDeref<Target = [u8]> + Debug,
238{
239 /// Construct a new `EndianReader` with the given bytes.
240 #[inline]
241 pub fn new(bytes: T, endian: Endian) -> EndianReader<Endian, T> {
242 EndianReader {
243 range: SubRange::new(bytes),
244 endian,
245 }
246 }
247
248 /// Return a reference to the raw bytes underlying this reader.
249 #[inline]
250 pub fn bytes(&self) -> &[u8] {
251 self.range.bytes()
252 }
253}
254
255/// # Range Methods
256///
257/// Unfortunately, `std::ops::Index` *must* return a reference, so we can't
258/// implement `Index<Range<usize>>` to return a new `EndianReader` the way we
259/// would like to. Instead, we abandon fancy indexing operators and have these
260/// plain old methods.
261impl<Endian, T> EndianReader<Endian, T>
262where
263 Endian: Endianity,
264 T: CloneStableDeref<Target = [u8]> + Debug,
265{
266 /// Take the given `start..end` range of the underlying buffer and return a
267 /// new `EndianReader`.
268 ///
269 /// ```
270 /// # #[cfg(feature = "std")] {
271 /// use gimli::{EndianReader, LittleEndian};
272 /// use std::sync::Arc;
273 ///
274 /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]);
275 /// let reader = EndianReader::new(buf.clone(), LittleEndian);
276 /// assert_eq!(reader.range(1..3),
277 /// EndianReader::new(&buf[1..3], LittleEndian));
278 /// # }
279 /// ```
280 ///
281 /// # Panics
282 ///
283 /// Panics if the range is out of bounds.
284 pub fn range(&self, idx: Range<usize>) -> EndianReader<Endian, T> {
285 let mut r = self.clone();
286 r.range.skip(idx.start);
287 r.range.truncate(idx.len());
288 r
289 }
290
291 /// Take the given `start..` range of the underlying buffer and return a new
292 /// `EndianReader`.
293 ///
294 /// ```
295 /// # #[cfg(feature = "std")] {
296 /// use gimli::{EndianReader, LittleEndian};
297 /// use std::sync::Arc;
298 ///
299 /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]);
300 /// let reader = EndianReader::new(buf.clone(), LittleEndian);
301 /// assert_eq!(reader.range_from(2..),
302 /// EndianReader::new(&buf[2..], LittleEndian));
303 /// # }
304 /// ```
305 ///
306 /// # Panics
307 ///
308 /// Panics if the range is out of bounds.
309 pub fn range_from(&self, idx: RangeFrom<usize>) -> EndianReader<Endian, T> {
310 let mut r = self.clone();
311 r.range.skip(idx.start);
312 r
313 }
314
315 /// Take the given `..end` range of the underlying buffer and return a new
316 /// `EndianReader`.
317 ///
318 /// ```
319 /// # #[cfg(feature = "std")] {
320 /// use gimli::{EndianReader, LittleEndian};
321 /// use std::sync::Arc;
322 ///
323 /// let buf = Arc::<[u8]>::from(&[0x01, 0x02, 0x03, 0x04][..]);
324 /// let reader = EndianReader::new(buf.clone(), LittleEndian);
325 /// assert_eq!(reader.range_to(..3),
326 /// EndianReader::new(&buf[..3], LittleEndian));
327 /// # }
328 /// ```
329 ///
330 /// # Panics
331 ///
332 /// Panics if the range is out of bounds.
333 pub fn range_to(&self, idx: RangeTo<usize>) -> EndianReader<Endian, T> {
334 let mut r = self.clone();
335 r.range.truncate(idx.end);
336 r
337 }
338}
339
340impl<Endian, T> Index<usize> for EndianReader<Endian, T>
341where
342 Endian: Endianity,
343 T: CloneStableDeref<Target = [u8]> + Debug,
344{
345 type Output = u8;
346 fn index(&self, idx: usize) -> &Self::Output {
347 &self.bytes()[idx]
348 }
349}
350
351impl<Endian, T> Index<RangeFrom<usize>> for EndianReader<Endian, T>
352where
353 Endian: Endianity,
354 T: CloneStableDeref<Target = [u8]> + Debug,
355{
356 type Output = [u8];
357 fn index(&self, idx: RangeFrom<usize>) -> &Self::Output {
358 &self.bytes()[idx]
359 }
360}
361
362impl<Endian, T> Deref for EndianReader<Endian, T>
363where
364 Endian: Endianity,
365 T: CloneStableDeref<Target = [u8]> + Debug,
366{
367 type Target = [u8];
368 fn deref(&self) -> &Self::Target {
369 self.bytes()
370 }
371}
372
373impl<Endian, T> Reader for EndianReader<Endian, T>
374where
375 Endian: Endianity,
376 T: CloneStableDeref<Target = [u8]> + Debug,
377{
378 type Endian = Endian;
379 type Offset = usize;
380
381 #[inline]
382 fn endian(&self) -> Endian {
383 self.endian
384 }
385
386 #[inline]
387 fn len(&self) -> usize {
388 self.range.len()
389 }
390
391 #[inline]
392 fn empty(&mut self) {
393 self.range.truncate(0);
394 }
395
396 #[inline]
397 fn truncate(&mut self, len: usize) -> Result<()> {
398 if self.len() < len {
399 Err(Error::UnexpectedEof(self.offset_id()))
400 } else {
401 self.range.truncate(len);
402 Ok(())
403 }
404 }
405
406 #[inline]
407 fn offset_from(&self, base: &EndianReader<Endian, T>) -> usize {
408 let base_ptr = base.bytes().as_ptr() as usize;
409 let ptr = self.bytes().as_ptr() as usize;
410 debug_assert!(base_ptr <= ptr);
411 debug_assert!(ptr + self.bytes().len() <= base_ptr + base.bytes().len());
412 ptr - base_ptr
413 }
414
415 #[inline]
416 fn offset_id(&self) -> ReaderOffsetId {
417 ReaderOffsetId(self.bytes().as_ptr() as u64)
418 }
419
420 #[inline]
421 fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option<Self::Offset> {
422 let id = id.0;
423 let self_id = self.bytes().as_ptr() as u64;
424 let self_len = self.bytes().len() as u64;
425 if id >= self_id && id <= self_id + self_len {
426 Some((id - self_id) as usize)
427 } else {
428 None
429 }
430 }
431
432 #[inline]
433 fn find(&self, byte: u8) -> Result<usize> {
434 self.bytes()
435 .iter()
436 .position(|x| *x == byte)
437 .ok_or_else(|| Error::UnexpectedEof(self.offset_id()))
438 }
439
440 #[inline]
441 fn skip(&mut self, len: usize) -> Result<()> {
442 if self.len() < len {
443 Err(Error::UnexpectedEof(self.offset_id()))
444 } else {
445 self.range.skip(len);
446 Ok(())
447 }
448 }
449
450 #[inline]
451 fn split(&mut self, len: usize) -> Result<Self> {
452 if self.len() < len {
453 Err(Error::UnexpectedEof(self.offset_id()))
454 } else {
455 let mut r = self.clone();
456 r.range.truncate(len);
457 self.range.skip(len);
458 Ok(r)
459 }
460 }
461
462 #[inline]
463 fn to_slice(&self) -> Result<Cow<'_, [u8]>> {
464 Ok(self.bytes().into())
465 }
466
467 #[inline]
468 fn to_string(&self) -> Result<Cow<'_, str>> {
469 match str::from_utf8(self.bytes()) {
470 Ok(s) => Ok(s.into()),
471 _ => Err(Error::BadUtf8),
472 }
473 }
474
475 #[inline]
476 fn to_string_lossy(&self) -> Result<Cow<'_, str>> {
477 Ok(String::from_utf8_lossy(self.bytes()))
478 }
479
480 #[inline]
481 fn read_slice(&mut self, buf: &mut [u8]) -> Result<()> {
482 match self.range.read_slice(buf.len()) {
483 Some(slice) => {
484 buf.copy_from_slice(slice);
485 Ok(())
486 }
487 None => Err(Error::UnexpectedEof(self.offset_id())),
488 }
489 }
490}
491
492#[cfg(test)]
493mod tests {
494 use super::*;
495 use crate::endianity::NativeEndian;
496 use crate::read::Reader;
497
498 fn native_reader<T: CloneStableDeref<Target = [u8]> + Debug>(
499 bytes: T,
500 ) -> EndianReader<NativeEndian, T> {
501 EndianReader::new(bytes, NativeEndian)
502 }
503
504 const BUF: &[u8] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 0];
505
506 #[test]
507 fn test_reader_split() {
508 let mut reader = native_reader(BUF);
509 let left = reader.split(3).unwrap();
510 assert_eq!(left, native_reader(&BUF[..3]));
511 assert_eq!(reader, native_reader(&BUF[3..]));
512 }
513
514 #[test]
515 fn test_reader_split_out_of_bounds() {
516 let mut reader = native_reader(BUF);
517 assert!(reader.split(30).is_err());
518 }
519
520 #[test]
521 fn bytes_and_len_and_range_and_eq() {
522 let reader = native_reader(BUF);
523 assert_eq!(reader.len(), BUF.len());
524 assert_eq!(reader.bytes(), BUF);
525 assert_eq!(reader, native_reader(BUF));
526
527 let range = reader.range(2..8);
528 let buf_range = &BUF[2..8];
529 assert_eq!(range.len(), buf_range.len());
530 assert_eq!(range.bytes(), buf_range);
531 assert_ne!(range, native_reader(BUF));
532 assert_eq!(range, native_reader(buf_range));
533
534 let range_from = range.range_from(1..);
535 let buf_range_from = &buf_range[1..];
536 assert_eq!(range_from.len(), buf_range_from.len());
537 assert_eq!(range_from.bytes(), buf_range_from);
538 assert_ne!(range_from, native_reader(BUF));
539 assert_eq!(range_from, native_reader(buf_range_from));
540
541 let range_to = range_from.range_to(..4);
542 let buf_range_to = &buf_range_from[..4];
543 assert_eq!(range_to.len(), buf_range_to.len());
544 assert_eq!(range_to.bytes(), buf_range_to);
545 assert_ne!(range_to, native_reader(BUF));
546 assert_eq!(range_to, native_reader(buf_range_to));
547 }
548
549 #[test]
550 fn find() {
551 let mut reader = native_reader(BUF);
552 reader.skip(2).unwrap();
553 assert_eq!(
554 reader.find(5),
555 Ok(BUF[2..].iter().position(|x| *x == 5).unwrap())
556 );
557 }
558
559 #[test]
560 fn indexing() {
561 let mut reader = native_reader(BUF);
562 reader.skip(2).unwrap();
563 assert_eq!(reader[0], BUF[2]);
564 }
565
566 #[test]
567 #[should_panic]
568 fn indexing_out_of_bounds() {
569 let mut reader = native_reader(BUF);
570 reader.skip(2).unwrap();
571 let _ = reader[900];
572 }
573
574 #[test]
575 fn endian() {
576 let reader = native_reader(BUF);
577 assert_eq!(reader.endian(), NativeEndian);
578 }
579
580 #[test]
581 fn empty() {
582 let mut reader = native_reader(BUF);
583 assert!(!reader.is_empty());
584 reader.empty();
585 assert!(reader.is_empty());
586 assert!(reader.bytes().is_empty());
587 }
588
589 #[test]
590 fn truncate() {
591 let reader = native_reader(BUF);
592 let mut reader = reader.range(2..8);
593 reader.truncate(2).unwrap();
594 assert_eq!(reader.bytes(), &BUF[2..4]);
595 }
596
597 #[test]
598 fn offset_from() {
599 let reader = native_reader(BUF);
600 let sub = reader.range(2..8);
601 assert_eq!(sub.offset_from(&reader), 2);
602 }
603
604 #[test]
605 fn skip() {
606 let mut reader = native_reader(BUF);
607 reader.skip(2).unwrap();
608 assert_eq!(reader.bytes(), &BUF[2..]);
609 }
610
611 #[test]
612 fn to_slice() {
613 assert_eq!(
614 native_reader(BUF).range(2..5).to_slice(),
615 Ok(Cow::from(&BUF[2..5]))
616 );
617 }
618
619 #[test]
620 fn to_string_ok() {
621 let buf = b"hello, world!";
622 let reader = native_reader(&buf[..]);
623 let reader = reader.range_from(7..);
624 assert_eq!(reader.to_string(), Ok(Cow::from("world!")));
625 }
626
627 // The rocket emoji (🚀 = [0xf0, 0x9f, 0x9a, 0x80]) but rotated left by one
628 // to make it invalid UTF-8.
629 const BAD_UTF8: &[u8] = &[0x9f, 0x9a, 0x80, 0xf0];
630
631 #[test]
632 fn to_string_err() {
633 let reader = native_reader(BAD_UTF8);
634 assert!(reader.to_string().is_err());
635 }
636
637 #[test]
638 fn to_string_lossy() {
639 let reader = native_reader(BAD_UTF8);
640 assert_eq!(reader.to_string_lossy(), Ok(Cow::from("����")));
641 }
642
643 #[test]
644 fn read_u8_array() {
645 let mut reader = native_reader(BAD_UTF8);
646 reader.skip(1).unwrap();
647 let arr: [u8; 2] = reader.read_u8_array().unwrap();
648 assert_eq!(arr, &BAD_UTF8[1..3]);
649 assert_eq!(reader.bytes(), &BAD_UTF8[3..]);
650 }
651}
652