1// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
2// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
3// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
4// option. This file may not be copied, modified, or distributed
5// except according to those terms.
6
7use std::borrow::Borrow;
8use std::cell::{Cell, UnsafeCell};
9use std::cmp::Ordering;
10use std::default::Default;
11use std::fmt as strfmt;
12use std::iter::FromIterator;
13use std::marker::PhantomData;
14use std::num::NonZeroUsize;
15use std::ops::{Deref, DerefMut};
16use std::sync::atomic::Ordering as AtomicOrdering;
17use std::sync::atomic::{self, AtomicUsize};
18use std::{hash, io, mem, ptr, str, u32};
19
20#[cfg(feature = "encoding")]
21use encoding::{self, DecoderTrap, EncoderTrap, EncodingRef};
22
23use buf32::{self, Buf32};
24use fmt::imp::Fixup;
25use fmt::{self, Slice};
26use util::{copy_and_advance, copy_lifetime, copy_lifetime_mut, unsafe_slice, unsafe_slice_mut};
27use OFLOW;
28
29const MAX_INLINE_LEN: usize = 8;
30const MAX_INLINE_TAG: usize = 0xF;
31const EMPTY_TAG: usize = 0xF;
32
33#[inline(always)]
34fn inline_tag(len: u32) -> NonZeroUsize {
35 debug_assert!(len <= MAX_INLINE_LEN as u32);
36 unsafe { NonZeroUsize::new_unchecked(if len == 0 { EMPTY_TAG } else { len as usize }) }
37}
38
39/// The multithreadedness of a tendril.
40///
41/// Exactly two types implement this trait:
42///
43/// - `Atomic`: use this in your tendril and you will have a `Send` tendril which works
44/// across threads; this is akin to `Arc`.
45///
46/// - `NonAtomic`: use this in your tendril and you will have a tendril which is neither
47/// `Send` nor `Sync` but should be a tad faster; this is akin to `Rc`.
48///
49/// The layout of this trait is also mandated to be that of a `usize`,
50/// for it is used for reference counting.
51pub unsafe trait Atomicity: 'static {
52 #[doc(hidden)]
53 fn new() -> Self;
54
55 #[doc(hidden)]
56 fn increment(&self) -> usize;
57
58 #[doc(hidden)]
59 fn decrement(&self) -> usize;
60
61 #[doc(hidden)]
62 fn fence_acquire();
63}
64
65/// A marker of a non-atomic tendril.
66///
67/// This is the default for the second type parameter of a `Tendril`
68/// and so doesn't typically need to be written.
69///
70/// This is akin to using `Rc` for reference counting.
71#[repr(C)]
72pub struct NonAtomic(Cell<usize>);
73
74unsafe impl Atomicity for NonAtomic {
75 #[inline]
76 fn new() -> Self {
77 NonAtomic(Cell::new(1))
78 }
79
80 #[inline]
81 fn increment(&self) -> usize {
82 let value: usize = self.0.get();
83 self.0.set(val:value.checked_add(1).expect(msg:OFLOW));
84 value
85 }
86
87 #[inline]
88 fn decrement(&self) -> usize {
89 let value: usize = self.0.get();
90 self.0.set(val:value - 1);
91 value
92 }
93
94 #[inline]
95 fn fence_acquire() {}
96}
97
98/// A marker of an atomic (and hence concurrent) tendril.
99///
100/// This is used as the second, optional type parameter of a `Tendril`;
101/// `Tendril<F, Atomic>` thus implements`Send`.
102///
103/// This is akin to using `Arc` for reference counting.
104pub struct Atomic(AtomicUsize);
105
106unsafe impl Atomicity for Atomic {
107 #[inline]
108 fn new() -> Self {
109 Atomic(AtomicUsize::new(1))
110 }
111
112 #[inline]
113 fn increment(&self) -> usize {
114 // Relaxed is OK because we have a reference already.
115 self.0.fetch_add(val:1, order:AtomicOrdering::Relaxed)
116 }
117
118 #[inline]
119 fn decrement(&self) -> usize {
120 self.0.fetch_sub(val:1, order:AtomicOrdering::Release)
121 }
122
123 #[inline]
124 fn fence_acquire() {
125 atomic::fence(order:AtomicOrdering::Acquire);
126 }
127}
128
129#[repr(C)] // Preserve field order for cross-atomicity transmutes
130struct Header<A: Atomicity> {
131 refcount: A,
132 cap: u32,
133}
134
135impl<A> Header<A>
136where
137 A: Atomicity,
138{
139 #[inline(always)]
140 unsafe fn new() -> Header<A> {
141 Header {
142 refcount: A::new(),
143 cap: 0,
144 }
145 }
146}
147
148/// Errors that can occur when slicing a `Tendril`.
149#[derive(Copy, Clone, Hash, Debug, PartialEq, Eq)]
150pub enum SubtendrilError {
151 OutOfBounds,
152 ValidationFailed,
153}
154
155/// Compact string type for zero-copy parsing.
156///
157/// `Tendril`s have the semantics of owned strings, but are sometimes views
158/// into shared buffers. When you mutate a `Tendril`, an owned copy is made
159/// if necessary. Further mutations occur in-place until the string becomes
160/// shared, e.g. with `clone()` or `subtendril()`.
161///
162/// Buffer sharing is accomplished through thread-local (non-atomic) reference
163/// counting, which has very low overhead. The Rust type system will prevent
164/// you at compile time from sending a `Tendril` between threads. We plan to
165/// relax this restriction in the future; see `README.md`.
166///
167/// Whereas `String` allocates in the heap for any non-empty string, `Tendril`
168/// can store small strings (up to 8 bytes) in-line, without a heap allocation.
169/// `Tendril` is also smaller than `String` on 64-bit platforms — 16 bytes
170/// versus 24.
171///
172/// The type parameter `F` specifies the format of the tendril, for example
173/// UTF-8 text or uninterpreted bytes. The parameter will be instantiated
174/// with one of the marker types from `tendril::fmt`. See the `StrTendril`
175/// and `ByteTendril` type aliases for two examples.
176///
177/// The type parameter `A` indicates the atomicity of the tendril; it is by
178/// default `NonAtomic`, but can be specified as `Atomic` to get a tendril
179/// which implements `Send` (viz. a thread-safe tendril).
180///
181/// The maximum length of a `Tendril` is 4 GB. The library will panic if
182/// you attempt to go over the limit.
183#[repr(C)]
184pub struct Tendril<F, A = NonAtomic>
185where
186 F: fmt::Format,
187 A: Atomicity,
188{
189 ptr: Cell<NonZeroUsize>,
190 buf: UnsafeCell<Buffer>,
191 marker: PhantomData<*mut F>,
192 refcount_marker: PhantomData<A>,
193}
194
195#[repr(C)]
196union Buffer {
197 heap: Heap,
198 inline: [u8; 8],
199}
200
201#[derive(Copy, Clone)]
202#[repr(C)]
203struct Heap {
204 len: u32,
205 aux: u32,
206}
207
208unsafe impl<F, A> Send for Tendril<F, A>
209where
210 F: fmt::Format,
211 A: Atomicity + Sync,
212{
213}
214
215/// `Tendril` for storing native Rust strings.
216pub type StrTendril = Tendril<fmt::UTF8>;
217
218/// `Tendril` for storing binary data.
219pub type ByteTendril = Tendril<fmt::Bytes>;
220
221impl<F, A> Clone for Tendril<F, A>
222where
223 F: fmt::Format,
224 A: Atomicity,
225{
226 #[inline]
227 fn clone(&self) -> Tendril<F, A> {
228 unsafe {
229 if self.ptr.get().get() > MAX_INLINE_TAG {
230 self.make_buf_shared();
231 self.incref();
232 }
233
234 ptr::read(self)
235 }
236 }
237}
238
239impl<F, A> Drop for Tendril<F, A>
240where
241 F: fmt::Format,
242 A: Atomicity,
243{
244 #[inline]
245 fn drop(&mut self) {
246 unsafe {
247 let p: usize = self.ptr.get().get();
248 if p <= MAX_INLINE_TAG {
249 return;
250 }
251
252 let (buf: Buf32>, shared: bool, _) = self.assume_buf();
253 if shared {
254 let header: *mut Header = self.header();
255 if (*header).refcount.decrement() == 1 {
256 A::fence_acquire();
257 buf.destroy();
258 }
259 } else {
260 buf.destroy();
261 }
262 }
263 }
264}
265
266macro_rules! from_iter_method {
267 ($ty:ty) => {
268 #[inline]
269 fn from_iter<I>(iterable: I) -> Self
270 where
271 I: IntoIterator<Item = $ty>,
272 {
273 let mut output = Self::new();
274 output.extend(iterable);
275 output
276 }
277 };
278}
279
280impl<A> Extend<char> for Tendril<fmt::UTF8, A>
281where
282 A: Atomicity,
283{
284 #[inline]
285 fn extend<I>(&mut self, iterable: I)
286 where
287 I: IntoIterator<Item = char>,
288 {
289 let iterator: ::IntoIter = iterable.into_iter();
290 self.force_reserve(additional:iterator.size_hint().0 as u32);
291 for c: char in iterator {
292 self.push_char(c);
293 }
294 }
295}
296
297impl<A> FromIterator<char> for Tendril<fmt::UTF8, A>
298where
299 A: Atomicity,
300{
301 from_iter_method!(char);
302}
303
304impl<A> Extend<u8> for Tendril<fmt::Bytes, A>
305where
306 A: Atomicity,
307{
308 #[inline]
309 fn extend<I>(&mut self, iterable: I)
310 where
311 I: IntoIterator<Item = u8>,
312 {
313 let iterator: ::IntoIter = iterable.into_iter();
314 self.force_reserve(additional:iterator.size_hint().0 as u32);
315 for b: u8 in iterator {
316 self.push_slice(&[b]);
317 }
318 }
319}
320
321impl<A> FromIterator<u8> for Tendril<fmt::Bytes, A>
322where
323 A: Atomicity,
324{
325 from_iter_method!(u8);
326}
327
328impl<'a, A> Extend<&'a u8> for Tendril<fmt::Bytes, A>
329where
330 A: Atomicity,
331{
332 #[inline]
333 fn extend<I>(&mut self, iterable: I)
334 where
335 I: IntoIterator<Item = &'a u8>,
336 {
337 let iterator: ::IntoIter = iterable.into_iter();
338 self.force_reserve(additional:iterator.size_hint().0 as u32);
339 for &b: u8 in iterator {
340 self.push_slice(&[b]);
341 }
342 }
343}
344
345impl<'a, A> FromIterator<&'a u8> for Tendril<fmt::Bytes, A>
346where
347 A: Atomicity,
348{
349 from_iter_method!(&'a u8);
350}
351
352impl<'a, A> Extend<&'a str> for Tendril<fmt::UTF8, A>
353where
354 A: Atomicity,
355{
356 #[inline]
357 fn extend<I>(&mut self, iterable: I)
358 where
359 I: IntoIterator<Item = &'a str>,
360 {
361 for s: &str in iterable {
362 self.push_slice(s);
363 }
364 }
365}
366
367impl<'a, A> FromIterator<&'a str> for Tendril<fmt::UTF8, A>
368where
369 A: Atomicity,
370{
371 from_iter_method!(&'a str);
372}
373
374impl<'a, A> Extend<&'a [u8]> for Tendril<fmt::Bytes, A>
375where
376 A: Atomicity,
377{
378 #[inline]
379 fn extend<I>(&mut self, iterable: I)
380 where
381 I: IntoIterator<Item = &'a [u8]>,
382 {
383 for s: &[u8] in iterable {
384 self.push_slice(s);
385 }
386 }
387}
388
389impl<'a, A> FromIterator<&'a [u8]> for Tendril<fmt::Bytes, A>
390where
391 A: Atomicity,
392{
393 from_iter_method!(&'a [u8]);
394}
395
396impl<'a, F, A> Extend<&'a Tendril<F, A>> for Tendril<F, A>
397where
398 F: fmt::Format + 'a,
399 A: Atomicity,
400{
401 #[inline]
402 fn extend<I>(&mut self, iterable: I)
403 where
404 I: IntoIterator<Item = &'a Tendril<F, A>>,
405 {
406 for t: &Tendril in iterable {
407 self.push_tendril(t);
408 }
409 }
410}
411
412impl<'a, F, A> FromIterator<&'a Tendril<F, A>> for Tendril<F, A>
413where
414 F: fmt::Format + 'a,
415 A: Atomicity,
416{
417 from_iter_method!(&'a Tendril<F, A>);
418}
419
420impl<F, A> Deref for Tendril<F, A>
421where
422 F: fmt::SliceFormat,
423 A: Atomicity,
424{
425 type Target = F::Slice;
426
427 #[inline]
428 fn deref(&self) -> &F::Slice {
429 unsafe { F::Slice::from_bytes(self.as_byte_slice()) }
430 }
431}
432
433impl<F, A> DerefMut for Tendril<F, A>
434where
435 F: fmt::SliceFormat,
436 A: Atomicity,
437{
438 #[inline]
439 fn deref_mut(&mut self) -> &mut F::Slice {
440 unsafe { F::Slice::from_mut_bytes(self.as_mut_byte_slice()) }
441 }
442}
443
444impl<F, A> Borrow<[u8]> for Tendril<F, A>
445where
446 F: fmt::SliceFormat,
447 A: Atomicity,
448{
449 fn borrow(&self) -> &[u8] {
450 self.as_byte_slice()
451 }
452}
453
454// Why not impl Borrow<str> for Tendril<fmt::UTF8>? str and [u8] hash differently,
455// and so a HashMap<StrTendril, _> would silently break if we indexed by str. Ick.
456// https://github.com/rust-lang/rust/issues/27108
457
458impl<F, A> PartialEq for Tendril<F, A>
459where
460 F: fmt::Format,
461 A: Atomicity,
462{
463 #[inline]
464 fn eq(&self, other: &Self) -> bool {
465 self.as_byte_slice() == other.as_byte_slice()
466 }
467
468 #[inline]
469 fn ne(&self, other: &Self) -> bool {
470 self.as_byte_slice() != other.as_byte_slice()
471 }
472}
473
474impl<F, A> Eq for Tendril<F, A>
475where
476 F: fmt::Format,
477 A: Atomicity,
478{
479}
480
481impl<F, A> PartialOrd for Tendril<F, A>
482where
483 F: fmt::SliceFormat,
484 <F as fmt::SliceFormat>::Slice: PartialOrd,
485 A: Atomicity,
486{
487 #[inline]
488 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
489 PartialOrd::partial_cmp(&**self, &**other)
490 }
491}
492
493impl<F, A> Ord for Tendril<F, A>
494where
495 F: fmt::SliceFormat,
496 <F as fmt::SliceFormat>::Slice: Ord,
497 A: Atomicity,
498{
499 #[inline]
500 fn cmp(&self, other: &Self) -> Ordering {
501 Ord::cmp(&**self, &**other)
502 }
503}
504
505impl<F, A> Default for Tendril<F, A>
506where
507 F: fmt::Format,
508 A: Atomicity,
509{
510 #[inline(always)]
511 fn default() -> Tendril<F, A> {
512 Tendril::new()
513 }
514}
515
516impl<F, A> strfmt::Debug for Tendril<F, A>
517where
518 F: fmt::SliceFormat + Default + strfmt::Debug,
519 <F as fmt::SliceFormat>::Slice: strfmt::Debug,
520 A: Atomicity,
521{
522 #[inline]
523 fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result {
524 let kind: &str = match self.ptr.get().get() {
525 p: usize if p <= MAX_INLINE_TAG => "inline",
526 p: usize if p & 1 == 1 => "shared",
527 _ => "owned",
528 };
529
530 write!(f, "Tendril<{:?}>({}: ", <F as Default>::default(), kind)?;
531 <<F as fmt::SliceFormat>::Slice as strfmt::Debug>::fmt(&**self, f)?;
532 write!(f, ")")
533 }
534}
535
536impl<F, A> hash::Hash for Tendril<F, A>
537where
538 F: fmt::Format,
539 A: Atomicity,
540{
541 #[inline]
542 fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
543 self.as_byte_slice().hash(state:hasher)
544 }
545}
546
547impl<F, A> Tendril<F, A>
548where
549 F: fmt::Format,
550 A: Atomicity,
551{
552 /// Create a new, empty `Tendril` in any format.
553 #[inline(always)]
554 pub fn new() -> Tendril<F, A> {
555 unsafe { Tendril::inline(&[]) }
556 }
557
558 /// Create a new, empty `Tendril` with a specified capacity.
559 #[inline]
560 pub fn with_capacity(capacity: u32) -> Tendril<F, A> {
561 let mut t: Tendril<F, A> = Tendril::new();
562 if capacity > MAX_INLINE_LEN as u32 {
563 unsafe {
564 t.make_owned_with_capacity(capacity);
565 }
566 }
567 t
568 }
569
570 /// Reserve space for additional bytes.
571 ///
572 /// This is only a suggestion. There are cases where `Tendril` will
573 /// decline to allocate until the buffer is actually modified.
574 #[inline]
575 pub fn reserve(&mut self, additional: u32) {
576 if !self.is_shared() {
577 // Don't grow a shared tendril because we'd have to copy
578 // right away.
579 self.force_reserve(additional);
580 }
581 }
582
583 /// Reserve space for additional bytes, even for shared buffers.
584 #[inline]
585 fn force_reserve(&mut self, additional: u32) {
586 let new_len = self.len32().checked_add(additional).expect(OFLOW);
587 if new_len > MAX_INLINE_LEN as u32 {
588 unsafe {
589 self.make_owned_with_capacity(new_len);
590 }
591 }
592 }
593
594 /// Get the length of the `Tendril`.
595 ///
596 /// This is named not to conflict with `len()` on the underlying
597 /// slice, if any.
598 #[inline(always)]
599 pub fn len32(&self) -> u32 {
600 match self.ptr.get().get() {
601 EMPTY_TAG => 0,
602 n if n <= MAX_INLINE_LEN => n as u32,
603 _ => unsafe { self.raw_len() },
604 }
605 }
606
607 /// Is the backing buffer shared?
608 #[inline]
609 pub fn is_shared(&self) -> bool {
610 let n = self.ptr.get().get();
611
612 (n > MAX_INLINE_TAG) && ((n & 1) == 1)
613 }
614
615 /// Is the backing buffer shared with this other `Tendril`?
616 #[inline]
617 pub fn is_shared_with(&self, other: &Tendril<F, A>) -> bool {
618 let n = self.ptr.get().get();
619
620 (n > MAX_INLINE_TAG) && (n == other.ptr.get().get())
621 }
622
623 /// Truncate to length 0 without discarding any owned storage.
624 #[inline]
625 pub fn clear(&mut self) {
626 if self.ptr.get().get() <= MAX_INLINE_TAG {
627 self.ptr
628 .set(unsafe { NonZeroUsize::new_unchecked(EMPTY_TAG) });
629 } else {
630 let (_, shared, _) = unsafe { self.assume_buf() };
631 if shared {
632 // No need to keep a reference alive for a 0-size slice.
633 *self = Tendril::new();
634 } else {
635 unsafe { self.set_len(0) };
636 }
637 }
638 }
639
640 /// Build a `Tendril` by copying a byte slice, if it conforms to the format.
641 #[inline]
642 pub fn try_from_byte_slice(x: &[u8]) -> Result<Tendril<F, A>, ()> {
643 match F::validate(x) {
644 true => Ok(unsafe { Tendril::from_byte_slice_without_validating(x) }),
645 false => Err(()),
646 }
647 }
648
649 /// View as uninterpreted bytes.
650 #[inline(always)]
651 pub fn as_bytes(&self) -> &Tendril<fmt::Bytes, A> {
652 unsafe { mem::transmute(self) }
653 }
654
655 /// Convert into uninterpreted bytes.
656 #[inline(always)]
657 pub fn into_bytes(self) -> Tendril<fmt::Bytes, A> {
658 unsafe { mem::transmute(self) }
659 }
660
661 /// Convert `self` into a type which is `Send`.
662 ///
663 /// If the tendril is owned or inline, this is free,
664 /// but if it's shared this will entail a copy of the contents.
665 #[inline]
666 pub fn into_send(mut self) -> SendTendril<F> {
667 self.make_owned();
668 SendTendril {
669 // This changes the header.refcount from A to NonAtomic, but that's
670 // OK because we have defined the format of A as a usize.
671 tendril: unsafe { mem::transmute(self) },
672 }
673 }
674
675 /// View as a superset format, for free.
676 #[inline(always)]
677 pub fn as_superset<Super>(&self) -> &Tendril<Super, A>
678 where
679 F: fmt::SubsetOf<Super>,
680 Super: fmt::Format,
681 {
682 unsafe { mem::transmute(self) }
683 }
684
685 /// Convert into a superset format, for free.
686 #[inline(always)]
687 pub fn into_superset<Super>(self) -> Tendril<Super, A>
688 where
689 F: fmt::SubsetOf<Super>,
690 Super: fmt::Format,
691 {
692 unsafe { mem::transmute(self) }
693 }
694
695 /// View as a subset format, if the `Tendril` conforms to that subset.
696 #[inline]
697 pub fn try_as_subset<Sub>(&self) -> Result<&Tendril<Sub, A>, ()>
698 where
699 Sub: fmt::SubsetOf<F>,
700 {
701 match Sub::revalidate_subset(self.as_byte_slice()) {
702 true => Ok(unsafe { mem::transmute(self) }),
703 false => Err(()),
704 }
705 }
706
707 /// Convert into a subset format, if the `Tendril` conforms to that subset.
708 #[inline]
709 pub fn try_into_subset<Sub>(self) -> Result<Tendril<Sub, A>, Self>
710 where
711 Sub: fmt::SubsetOf<F>,
712 {
713 match Sub::revalidate_subset(self.as_byte_slice()) {
714 true => Ok(unsafe { mem::transmute(self) }),
715 false => Err(self),
716 }
717 }
718
719 /// View as another format, if the bytes of the `Tendril` are valid for
720 /// that format.
721 #[inline]
722 pub fn try_reinterpret_view<Other>(&self) -> Result<&Tendril<Other, A>, ()>
723 where
724 Other: fmt::Format,
725 {
726 match Other::validate(self.as_byte_slice()) {
727 true => Ok(unsafe { mem::transmute(self) }),
728 false => Err(()),
729 }
730 }
731
732 /// Convert into another format, if the `Tendril` conforms to that format.
733 ///
734 /// This only re-validates the existing bytes under the new format. It
735 /// will *not* change the byte content of the tendril!
736 ///
737 /// See the `encode` and `decode` methods for character encoding conversion.
738 #[inline]
739 pub fn try_reinterpret<Other>(self) -> Result<Tendril<Other, A>, Self>
740 where
741 Other: fmt::Format,
742 {
743 match Other::validate(self.as_byte_slice()) {
744 true => Ok(unsafe { mem::transmute(self) }),
745 false => Err(self),
746 }
747 }
748
749 /// Push some bytes onto the end of the `Tendril`, if they conform to the
750 /// format.
751 #[inline]
752 pub fn try_push_bytes(&mut self, buf: &[u8]) -> Result<(), ()> {
753 match F::validate(buf) {
754 true => unsafe {
755 self.push_bytes_without_validating(buf);
756 Ok(())
757 },
758 false => Err(()),
759 }
760 }
761
762 /// Push another `Tendril` onto the end of this one.
763 #[inline]
764 pub fn push_tendril(&mut self, other: &Tendril<F, A>) {
765 let new_len = self.len32().checked_add(other.len32()).expect(OFLOW);
766
767 unsafe {
768 if (self.ptr.get().get() > MAX_INLINE_TAG) && (other.ptr.get().get() > MAX_INLINE_TAG) {
769 let (self_buf, self_shared, _) = self.assume_buf();
770 let (other_buf, other_shared, _) = other.assume_buf();
771
772 if self_shared
773 && other_shared
774 && (self_buf.data_ptr() == other_buf.data_ptr())
775 && other.aux() == self.aux() + self.raw_len()
776 {
777 self.set_len(new_len);
778 return;
779 }
780 }
781
782 self.push_bytes_without_validating(other.as_byte_slice())
783 }
784 }
785
786 /// Attempt to slice this `Tendril` as a new `Tendril`.
787 ///
788 /// This will share the buffer when possible. Mutating a shared buffer
789 /// will copy the contents.
790 ///
791 /// The offset and length are in bytes. The function will return
792 /// `Err` if these are out of bounds, or if the resulting slice
793 /// does not conform to the format.
794 #[inline]
795 pub fn try_subtendril(
796 &self,
797 offset: u32,
798 length: u32,
799 ) -> Result<Tendril<F, A>, SubtendrilError> {
800 let self_len = self.len32();
801 if offset > self_len || length > (self_len - offset) {
802 return Err(SubtendrilError::OutOfBounds);
803 }
804
805 unsafe {
806 let byte_slice = unsafe_slice(self.as_byte_slice(), offset as usize, length as usize);
807 if !F::validate_subseq(byte_slice) {
808 return Err(SubtendrilError::ValidationFailed);
809 }
810
811 Ok(self.unsafe_subtendril(offset, length))
812 }
813 }
814
815 /// Slice this `Tendril` as a new `Tendril`.
816 ///
817 /// Panics on bounds or validity check failure.
818 #[inline]
819 pub fn subtendril(&self, offset: u32, length: u32) -> Tendril<F, A> {
820 self.try_subtendril(offset, length).unwrap()
821 }
822
823 /// Try to drop `n` bytes from the front.
824 ///
825 /// Returns `Err` if the bytes are not available, or the suffix fails
826 /// validation.
827 #[inline]
828 pub fn try_pop_front(&mut self, n: u32) -> Result<(), SubtendrilError> {
829 if n == 0 {
830 return Ok(());
831 }
832 let old_len = self.len32();
833 if n > old_len {
834 return Err(SubtendrilError::OutOfBounds);
835 }
836 let new_len = old_len - n;
837
838 unsafe {
839 if !F::validate_suffix(unsafe_slice(
840 self.as_byte_slice(),
841 n as usize,
842 new_len as usize,
843 )) {
844 return Err(SubtendrilError::ValidationFailed);
845 }
846
847 self.unsafe_pop_front(n);
848 Ok(())
849 }
850 }
851
852 /// Drop `n` bytes from the front.
853 ///
854 /// Panics if the bytes are not available, or the suffix fails
855 /// validation.
856 #[inline]
857 pub fn pop_front(&mut self, n: u32) {
858 self.try_pop_front(n).unwrap()
859 }
860
861 /// Drop `n` bytes from the back.
862 ///
863 /// Returns `Err` if the bytes are not available, or the prefix fails
864 /// validation.
865 #[inline]
866 pub fn try_pop_back(&mut self, n: u32) -> Result<(), SubtendrilError> {
867 if n == 0 {
868 return Ok(());
869 }
870 let old_len = self.len32();
871 if n > old_len {
872 return Err(SubtendrilError::OutOfBounds);
873 }
874 let new_len = old_len - n;
875
876 unsafe {
877 if !F::validate_prefix(unsafe_slice(self.as_byte_slice(), 0, new_len as usize)) {
878 return Err(SubtendrilError::ValidationFailed);
879 }
880
881 self.unsafe_pop_back(n);
882 Ok(())
883 }
884 }
885
886 /// Drop `n` bytes from the back.
887 ///
888 /// Panics if the bytes are not available, or the prefix fails
889 /// validation.
890 #[inline]
891 pub fn pop_back(&mut self, n: u32) {
892 self.try_pop_back(n).unwrap()
893 }
894
895 /// View as another format, without validating.
896 #[inline(always)]
897 pub unsafe fn reinterpret_view_without_validating<Other>(&self) -> &Tendril<Other, A>
898 where
899 Other: fmt::Format,
900 {
901 mem::transmute(self)
902 }
903
904 /// Convert into another format, without validating.
905 #[inline(always)]
906 pub unsafe fn reinterpret_without_validating<Other>(self) -> Tendril<Other, A>
907 where
908 Other: fmt::Format,
909 {
910 mem::transmute(self)
911 }
912
913 /// Build a `Tendril` by copying a byte slice, without validating.
914 #[inline]
915 pub unsafe fn from_byte_slice_without_validating(x: &[u8]) -> Tendril<F, A> {
916 assert!(x.len() <= buf32::MAX_LEN);
917 if x.len() <= MAX_INLINE_LEN {
918 Tendril::inline(x)
919 } else {
920 Tendril::owned_copy(x)
921 }
922 }
923
924 /// Push some bytes onto the end of the `Tendril`, without validating.
925 #[inline]
926 pub unsafe fn push_bytes_without_validating(&mut self, buf: &[u8]) {
927 assert!(buf.len() <= buf32::MAX_LEN);
928
929 let Fixup {
930 drop_left,
931 drop_right,
932 insert_len,
933 insert_bytes,
934 } = F::fixup(self.as_byte_slice(), buf);
935
936 // FIXME: think more about overflow
937 let adj_len = self.len32() + insert_len - drop_left;
938
939 let new_len = adj_len.checked_add(buf.len() as u32).expect(OFLOW) - drop_right;
940
941 let drop_left = drop_left as usize;
942 let drop_right = drop_right as usize;
943
944 if new_len <= MAX_INLINE_LEN as u32 {
945 let mut tmp = [0_u8; MAX_INLINE_LEN];
946 {
947 let old = self.as_byte_slice();
948 let mut dest = tmp.as_mut_ptr();
949 copy_and_advance(&mut dest, unsafe_slice(old, 0, old.len() - drop_left));
950 copy_and_advance(
951 &mut dest,
952 unsafe_slice(&insert_bytes, 0, insert_len as usize),
953 );
954 copy_and_advance(
955 &mut dest,
956 unsafe_slice(buf, drop_right, buf.len() - drop_right),
957 );
958 }
959 *self = Tendril::inline(&tmp[..new_len as usize]);
960 } else {
961 self.make_owned_with_capacity(new_len);
962 let (owned, _, _) = self.assume_buf();
963 let mut dest = owned
964 .data_ptr()
965 .offset((owned.len as usize - drop_left) as isize);
966 copy_and_advance(
967 &mut dest,
968 unsafe_slice(&insert_bytes, 0, insert_len as usize),
969 );
970 copy_and_advance(
971 &mut dest,
972 unsafe_slice(buf, drop_right, buf.len() - drop_right),
973 );
974 self.set_len(new_len);
975 }
976 }
977
978 /// Slice this `Tendril` as a new `Tendril`.
979 ///
980 /// Does not check validity or bounds!
981 #[inline]
982 pub unsafe fn unsafe_subtendril(&self, offset: u32, length: u32) -> Tendril<F, A> {
983 if length <= MAX_INLINE_LEN as u32 {
984 Tendril::inline(unsafe_slice(
985 self.as_byte_slice(),
986 offset as usize,
987 length as usize,
988 ))
989 } else {
990 self.make_buf_shared();
991 self.incref();
992 let (buf, _, _) = self.assume_buf();
993 Tendril::shared(buf, self.aux() + offset, length)
994 }
995 }
996
997 /// Drop `n` bytes from the front.
998 ///
999 /// Does not check validity or bounds!
1000 #[inline]
1001 pub unsafe fn unsafe_pop_front(&mut self, n: u32) {
1002 let new_len = self.len32() - n;
1003 if new_len <= MAX_INLINE_LEN as u32 {
1004 *self = Tendril::inline(unsafe_slice(
1005 self.as_byte_slice(),
1006 n as usize,
1007 new_len as usize,
1008 ));
1009 } else {
1010 self.make_buf_shared();
1011 self.set_aux(self.aux() + n);
1012 let len = self.raw_len();
1013 self.set_len(len - n);
1014 }
1015 }
1016
1017 /// Drop `n` bytes from the back.
1018 ///
1019 /// Does not check validity or bounds!
1020 #[inline]
1021 pub unsafe fn unsafe_pop_back(&mut self, n: u32) {
1022 let new_len = self.len32() - n;
1023 if new_len <= MAX_INLINE_LEN as u32 {
1024 *self = Tendril::inline(unsafe_slice(self.as_byte_slice(), 0, new_len as usize));
1025 } else {
1026 self.make_buf_shared();
1027 let len = self.raw_len();
1028 self.set_len(len - n);
1029 }
1030 }
1031
1032 #[inline]
1033 unsafe fn incref(&self) {
1034 (*self.header()).refcount.increment();
1035 }
1036
1037 #[inline]
1038 unsafe fn make_buf_shared(&self) {
1039 let p = self.ptr.get().get();
1040 if p & 1 == 0 {
1041 let header = p as *mut Header<A>;
1042 (*header).cap = self.aux();
1043
1044 self.ptr.set(NonZeroUsize::new_unchecked(p | 1));
1045 self.set_aux(0);
1046 }
1047 }
1048
1049 // This is not public as it is of no practical value to users.
1050 // By and large they shouldn't need to worry about the distinction at all,
1051 // and going out of your way to make it owned is pointless.
1052 #[inline]
1053 fn make_owned(&mut self) {
1054 unsafe {
1055 let ptr = self.ptr.get().get();
1056 if ptr <= MAX_INLINE_TAG || (ptr & 1) == 1 {
1057 *self = Tendril::owned_copy(self.as_byte_slice());
1058 }
1059 }
1060 }
1061
1062 #[inline]
1063 unsafe fn make_owned_with_capacity(&mut self, cap: u32) {
1064 self.make_owned();
1065 let mut buf = self.assume_buf().0;
1066 buf.grow(cap);
1067 self.ptr.set(NonZeroUsize::new_unchecked(buf.ptr as usize));
1068 self.set_aux(buf.cap);
1069 }
1070
1071 #[inline(always)]
1072 unsafe fn header(&self) -> *mut Header<A> {
1073 (self.ptr.get().get() & !1) as *mut Header<A>
1074 }
1075
1076 #[inline]
1077 unsafe fn assume_buf(&self) -> (Buf32<Header<A>>, bool, u32) {
1078 let ptr = self.ptr.get().get();
1079 let header = self.header();
1080 let shared = (ptr & 1) == 1;
1081 let (cap, offset) = match shared {
1082 true => ((*header).cap, self.aux()),
1083 false => (self.aux(), 0),
1084 };
1085
1086 (
1087 Buf32 {
1088 ptr: header,
1089 len: offset + self.len32(),
1090 cap: cap,
1091 },
1092 shared,
1093 offset,
1094 )
1095 }
1096
1097 #[inline]
1098 unsafe fn inline(x: &[u8]) -> Tendril<F, A> {
1099 let len = x.len();
1100 let t = Tendril {
1101 ptr: Cell::new(inline_tag(len as u32)),
1102 buf: UnsafeCell::new(Buffer { inline: [0; 8] }),
1103 marker: PhantomData,
1104 refcount_marker: PhantomData,
1105 };
1106 ptr::copy_nonoverlapping(x.as_ptr(), (*t.buf.get()).inline.as_mut_ptr(), len);
1107 t
1108 }
1109
1110 #[inline]
1111 unsafe fn owned(x: Buf32<Header<A>>) -> Tendril<F, A> {
1112 Tendril {
1113 ptr: Cell::new(NonZeroUsize::new_unchecked(x.ptr as usize)),
1114 buf: UnsafeCell::new(Buffer {
1115 heap: Heap {
1116 len: x.len,
1117 aux: x.cap,
1118 },
1119 }),
1120 marker: PhantomData,
1121 refcount_marker: PhantomData,
1122 }
1123 }
1124
1125 #[inline]
1126 unsafe fn owned_copy(x: &[u8]) -> Tendril<F, A> {
1127 let len32 = x.len() as u32;
1128 let mut b = Buf32::with_capacity(len32, Header::new());
1129 ptr::copy_nonoverlapping(x.as_ptr(), b.data_ptr(), x.len());
1130 b.len = len32;
1131 Tendril::owned(b)
1132 }
1133
1134 #[inline]
1135 unsafe fn shared(buf: Buf32<Header<A>>, off: u32, len: u32) -> Tendril<F, A> {
1136 Tendril {
1137 ptr: Cell::new(NonZeroUsize::new_unchecked((buf.ptr as usize) | 1)),
1138 buf: UnsafeCell::new(Buffer {
1139 heap: Heap { len, aux: off },
1140 }),
1141 marker: PhantomData,
1142 refcount_marker: PhantomData,
1143 }
1144 }
1145
1146 #[inline]
1147 fn as_byte_slice<'a>(&'a self) -> &'a [u8] {
1148 unsafe {
1149 match self.ptr.get().get() {
1150 EMPTY_TAG => &[],
1151 n if n <= MAX_INLINE_LEN => (*self.buf.get()).inline.get_unchecked(..n),
1152 _ => {
1153 let (buf, _, offset) = self.assume_buf();
1154 copy_lifetime(
1155 self,
1156 unsafe_slice(buf.data(), offset as usize, self.len32() as usize),
1157 )
1158 }
1159 }
1160 }
1161 }
1162
1163 // There's no need to worry about locking on an atomic Tendril, because it makes it unique as
1164 // soon as you do that.
1165 #[inline]
1166 fn as_mut_byte_slice<'a>(&'a mut self) -> &'a mut [u8] {
1167 unsafe {
1168 match self.ptr.get().get() {
1169 EMPTY_TAG => &mut [],
1170 n if n <= MAX_INLINE_LEN => (*self.buf.get()).inline.get_unchecked_mut(..n),
1171 _ => {
1172 self.make_owned();
1173 let (mut buf, _, offset) = self.assume_buf();
1174 let len = self.len32() as usize;
1175 copy_lifetime_mut(self, unsafe_slice_mut(buf.data_mut(), offset as usize, len))
1176 }
1177 }
1178 }
1179 }
1180
1181 unsafe fn raw_len(&self) -> u32 {
1182 (*self.buf.get()).heap.len
1183 }
1184
1185 unsafe fn set_len(&mut self, len: u32) {
1186 (*self.buf.get()).heap.len = len;
1187 }
1188
1189 unsafe fn aux(&self) -> u32 {
1190 (*self.buf.get()).heap.aux
1191 }
1192
1193 unsafe fn set_aux(&self, aux: u32) {
1194 (*self.buf.get()).heap.aux = aux;
1195 }
1196}
1197
1198impl<F, A> Tendril<F, A>
1199where
1200 F: fmt::SliceFormat,
1201 A: Atomicity,
1202{
1203 /// Build a `Tendril` by copying a slice.
1204 #[inline]
1205 pub fn from_slice(x: &F::Slice) -> Tendril<F, A> {
1206 unsafe { Tendril::from_byte_slice_without_validating(x.as_bytes()) }
1207 }
1208
1209 /// Push a slice onto the end of the `Tendril`.
1210 #[inline]
1211 pub fn push_slice(&mut self, x: &F::Slice) {
1212 unsafe { self.push_bytes_without_validating(buf:x.as_bytes()) }
1213 }
1214}
1215
1216/// A simple wrapper to make `Tendril` `Send`.
1217///
1218/// Although there is a certain subset of the operations on a `Tendril` that a `SendTendril` could
1219/// reasonably implement, in order to clearly separate concerns this type is deliberately
1220/// minimalist, acting as a safe encapsulation around the invariants which permit `Send`ness and
1221/// behaving as an opaque object.
1222///
1223/// A `SendTendril` may be produced by `Tendril.into_send()` or `SendTendril::from(tendril)`,
1224/// and may be returned to a `Tendril` by `Tendril::from(self)`.
1225#[derive(Clone)]
1226pub struct SendTendril<F>
1227where
1228 F: fmt::Format,
1229{
1230 tendril: Tendril<F>,
1231}
1232
1233unsafe impl<F> Send for SendTendril<F> where F: fmt::Format {}
1234
1235impl<F, A> From<Tendril<F, A>> for SendTendril<F>
1236where
1237 F: fmt::Format,
1238 A: Atomicity,
1239{
1240 #[inline]
1241 fn from(tendril: Tendril<F, A>) -> SendTendril<F> {
1242 tendril.into_send()
1243 }
1244}
1245
1246impl<F, A> From<SendTendril<F>> for Tendril<F, A>
1247where
1248 F: fmt::Format,
1249 A: Atomicity,
1250{
1251 #[inline]
1252 fn from(send: SendTendril<F>) -> Tendril<F, A> {
1253 unsafe { mem::transmute(src:send.tendril) }
1254 // header.refcount may have been initialised as an Atomic or a NonAtomic, but the value
1255 // will be the same (1) regardless, because the layout is defined.
1256 // Thus we don't need to fiddle about resetting it or anything like that.
1257 }
1258}
1259
1260/// `Tendril`-related methods for Rust slices.
1261pub trait SliceExt<F>: fmt::Slice
1262where
1263 F: fmt::SliceFormat<Slice = Self>,
1264{
1265 /// Make a `Tendril` from this slice.
1266 #[inline]
1267 fn to_tendril(&self) -> Tendril<F> {
1268 // It should be done thusly, but at the time of writing the defaults don't help inference:
1269 //fn to_tendril<A = NonAtomic>(&self) -> Tendril<Self::Format, A>
1270 // where A: Atomicity,
1271 //{
1272 Tendril::from_slice(self)
1273 }
1274}
1275
1276impl SliceExt<fmt::UTF8> for str {}
1277impl SliceExt<fmt::Bytes> for [u8] {}
1278
1279impl<F, A> Tendril<F, A>
1280where
1281 F: for<'a> fmt::CharFormat<'a>,
1282 A: Atomicity,
1283{
1284 /// Remove and return the first character, if any.
1285 #[inline]
1286 pub fn pop_front_char<'a>(&'a mut self) -> Option<char> {
1287 unsafe {
1288 let next_char; // first char in iterator
1289 let mut skip = 0; // number of bytes to skip, or 0 to clear
1290
1291 {
1292 // <--+
1293 // | Creating an iterator borrows self, so introduce a
1294 // +- scope to contain the borrow (that way we can mutate
1295 // self below, after this scope exits).
1296
1297 let mut iter = F::char_indices(self.as_byte_slice());
1298 match iter.next() {
1299 Some((_, c)) => {
1300 next_char = Some(c);
1301 if let Some((n, _)) = iter.next() {
1302 skip = n as u32;
1303 }
1304 }
1305 None => {
1306 next_char = None;
1307 }
1308 }
1309 }
1310
1311 if skip != 0 {
1312 self.unsafe_pop_front(skip);
1313 } else {
1314 self.clear();
1315 }
1316
1317 next_char
1318 }
1319 }
1320
1321 /// Remove and return a run of characters at the front of the `Tendril`
1322 /// which are classified the same according to the function `classify`.
1323 ///
1324 /// Returns `None` on an empty string.
1325 #[inline]
1326 pub fn pop_front_char_run<'a, C, R>(&'a mut self, mut classify: C) -> Option<(Tendril<F, A>, R)>
1327 where
1328 C: FnMut(char) -> R,
1329 R: PartialEq,
1330 {
1331 let (class, first_mismatch);
1332 {
1333 let mut chars = unsafe { F::char_indices(self.as_byte_slice()) };
1334 let (_, first) = unwrap_or_return!(chars.next(), None);
1335 class = classify(first);
1336 first_mismatch = chars.find(|&(_, ch)| &classify(ch) != &class);
1337 }
1338
1339 match first_mismatch {
1340 Some((idx, _)) => unsafe {
1341 let t = self.unsafe_subtendril(0, idx as u32);
1342 self.unsafe_pop_front(idx as u32);
1343 Some((t, class))
1344 },
1345 None => {
1346 let t = self.clone();
1347 self.clear();
1348 Some((t, class))
1349 }
1350 }
1351 }
1352
1353 /// Push a character, if it can be represented in this format.
1354 #[inline]
1355 pub fn try_push_char(&mut self, c: char) -> Result<(), ()> {
1356 F::encode_char(c, |b| unsafe {
1357 self.push_bytes_without_validating(b);
1358 })
1359 }
1360}
1361
1362/// Extension trait for `io::Read`.
1363pub trait ReadExt: io::Read {
1364 fn read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize>
1365 where
1366 A: Atomicity;
1367}
1368
1369impl<T> ReadExt for T
1370where
1371 T: io::Read,
1372{
1373 /// Read all bytes until EOF.
1374 fn read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize>
1375 where
1376 A: Atomicity,
1377 {
1378 // Adapted from libstd/io/mod.rs.
1379 const DEFAULT_BUF_SIZE: u32 = 64 * 1024;
1380
1381 let start_len = buf.len();
1382 let mut len = start_len;
1383 let mut new_write_size = 16;
1384 let ret;
1385 loop {
1386 if len == buf.len() {
1387 if new_write_size < DEFAULT_BUF_SIZE {
1388 new_write_size *= 2;
1389 }
1390 // FIXME: this exposes uninitialized bytes to a generic R type
1391 // this is fine for R=File which never reads these bytes,
1392 // but user-defined types might.
1393 // The standard library pushes zeros to `Vec<u8>` for that reason.
1394 unsafe {
1395 buf.push_uninitialized(new_write_size);
1396 }
1397 }
1398
1399 match self.read(&mut buf[len..]) {
1400 Ok(0) => {
1401 ret = Ok(len - start_len);
1402 break;
1403 }
1404 Ok(n) => len += n,
1405 Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
1406 Err(e) => {
1407 ret = Err(e);
1408 break;
1409 }
1410 }
1411 }
1412
1413 let buf_len = buf.len32();
1414 buf.pop_back(buf_len - (len as u32));
1415 ret
1416 }
1417}
1418
1419impl<A> io::Write for Tendril<fmt::Bytes, A>
1420where
1421 A: Atomicity,
1422{
1423 #[inline]
1424 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
1425 self.push_slice(buf);
1426 Ok(buf.len())
1427 }
1428
1429 #[inline]
1430 fn write_all(&mut self, buf: &[u8]) -> io::Result<()> {
1431 self.push_slice(buf);
1432 Ok(())
1433 }
1434
1435 #[inline(always)]
1436 fn flush(&mut self) -> io::Result<()> {
1437 Ok(())
1438 }
1439}
1440
1441#[cfg(feature = "encoding")]
1442impl<A> encoding::ByteWriter for Tendril<fmt::Bytes, A>
1443where
1444 A: Atomicity,
1445{
1446 #[inline]
1447 fn write_byte(&mut self, b: u8) {
1448 self.push_slice(&[b]);
1449 }
1450
1451 #[inline]
1452 fn write_bytes(&mut self, v: &[u8]) {
1453 self.push_slice(v);
1454 }
1455
1456 #[inline]
1457 fn writer_hint(&mut self, additional: usize) {
1458 self.reserve(::std::cmp::min(u32::MAX as usize, additional) as u32);
1459 }
1460}
1461
1462impl<F, A> Tendril<F, A>
1463where
1464 A: Atomicity,
1465 F: fmt::SliceFormat<Slice = [u8]>,
1466{
1467 /// Decode from some character encoding into UTF-8.
1468 ///
1469 /// See the [rust-encoding docs](https://lifthrasiir.github.io/rust-encoding/encoding/)
1470 /// for more information.
1471 #[inline]
1472 #[cfg(feature = "encoding")]
1473 pub fn decode(
1474 &self,
1475 encoding: EncodingRef,
1476 trap: DecoderTrap,
1477 ) -> Result<Tendril<fmt::UTF8, A>, ::std::borrow::Cow<'static, str>> {
1478 let mut ret = Tendril::new();
1479 encoding.decode_to(&*self, trap, &mut ret).map(|_| ret)
1480 }
1481
1482 /// Push "uninitialized bytes" onto the end.
1483 ///
1484 /// Really, this grows the tendril without writing anything to the new area.
1485 /// It's only defined for byte tendrils because it's only useful if you
1486 /// plan to then mutate the buffer.
1487 #[inline]
1488 pub unsafe fn push_uninitialized(&mut self, n: u32) {
1489 let new_len = self.len32().checked_add(n).expect(OFLOW);
1490 if new_len <= MAX_INLINE_LEN as u32 && self.ptr.get().get() <= MAX_INLINE_TAG {
1491 self.ptr.set(inline_tag(new_len))
1492 } else {
1493 self.make_owned_with_capacity(new_len);
1494 self.set_len(new_len);
1495 }
1496 }
1497}
1498
1499impl<A> strfmt::Display for Tendril<fmt::UTF8, A>
1500where
1501 A: Atomicity,
1502{
1503 #[inline]
1504 fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result {
1505 <str as strfmt::Display>::fmt(&**self, f)
1506 }
1507}
1508
1509impl<A> str::FromStr for Tendril<fmt::UTF8, A>
1510where
1511 A: Atomicity,
1512{
1513 type Err = ();
1514
1515 #[inline]
1516 fn from_str(s: &str) -> Result<Self, ()> {
1517 Ok(Tendril::from_slice(s))
1518 }
1519}
1520
1521impl<A> strfmt::Write for Tendril<fmt::UTF8, A>
1522where
1523 A: Atomicity,
1524{
1525 #[inline]
1526 fn write_str(&mut self, s: &str) -> strfmt::Result {
1527 self.push_slice(s);
1528 Ok(())
1529 }
1530}
1531
1532#[cfg(feature = "encoding")]
1533impl<A> encoding::StringWriter for Tendril<fmt::UTF8, A>
1534where
1535 A: Atomicity,
1536{
1537 #[inline]
1538 fn write_char(&mut self, c: char) {
1539 self.push_char(c);
1540 }
1541
1542 #[inline]
1543 fn write_str(&mut self, s: &str) {
1544 self.push_slice(s);
1545 }
1546
1547 #[inline]
1548 fn writer_hint(&mut self, additional: usize) {
1549 self.reserve(::std::cmp::min(u32::MAX as usize, additional) as u32);
1550 }
1551}
1552
1553impl<A> Tendril<fmt::UTF8, A>
1554where
1555 A: Atomicity,
1556{
1557 /// Encode from UTF-8 into some other character encoding.
1558 ///
1559 /// See the [rust-encoding docs](https://lifthrasiir.github.io/rust-encoding/encoding/)
1560 /// for more information.
1561 #[inline]
1562 #[cfg(feature = "encoding")]
1563 pub fn encode(
1564 &self,
1565 encoding: EncodingRef,
1566 trap: EncoderTrap,
1567 ) -> Result<Tendril<fmt::Bytes, A>, ::std::borrow::Cow<'static, str>> {
1568 let mut ret = Tendril::new();
1569 encoding.encode_to(&*self, trap, &mut ret).map(|_| ret)
1570 }
1571
1572 /// Push a character onto the end.
1573 #[inline]
1574 pub fn push_char(&mut self, c: char) {
1575 unsafe {
1576 self.push_bytes_without_validating(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
1577 }
1578 }
1579
1580 /// Create a `Tendril` from a single character.
1581 #[inline]
1582 pub fn from_char(c: char) -> Tendril<fmt::UTF8, A> {
1583 let mut t: Tendril<fmt::UTF8, A> = Tendril::new();
1584 t.push_char(c);
1585 t
1586 }
1587
1588 /// Helper for the `format_tendril!` macro.
1589 #[inline]
1590 pub fn format(args: strfmt::Arguments) -> Tendril<fmt::UTF8, A> {
1591 use std::fmt::Write;
1592 let mut output: Tendril<fmt::UTF8, A> = Tendril::new();
1593 let _ = write!(&mut output, "{}", args);
1594 output
1595 }
1596}
1597
1598/// Create a `StrTendril` through string formatting.
1599///
1600/// Works just like the standard `format!` macro.
1601#[macro_export]
1602macro_rules! format_tendril {
1603 ($($arg:tt)*) => ($crate::StrTendril::format(format_args!($($arg)*)))
1604}
1605
1606impl<'a, F, A> From<&'a F::Slice> for Tendril<F, A>
1607where
1608 F: fmt::SliceFormat,
1609 A: Atomicity,
1610{
1611 #[inline]
1612 fn from(input: &F::Slice) -> Tendril<F, A> {
1613 Tendril::from_slice(input)
1614 }
1615}
1616
1617impl<A> From<String> for Tendril<fmt::UTF8, A>
1618where
1619 A: Atomicity,
1620{
1621 #[inline]
1622 fn from(input: String) -> Tendril<fmt::UTF8, A> {
1623 Tendril::from_slice(&*input)
1624 }
1625}
1626
1627impl<F, A> AsRef<F::Slice> for Tendril<F, A>
1628where
1629 F: fmt::SliceFormat,
1630 A: Atomicity,
1631{
1632 #[inline]
1633 fn as_ref(&self) -> &F::Slice {
1634 &**self
1635 }
1636}
1637
1638impl<A> From<Tendril<fmt::UTF8, A>> for String
1639where
1640 A: Atomicity,
1641{
1642 #[inline]
1643 fn from(input: Tendril<fmt::UTF8, A>) -> String {
1644 String::from(&*input)
1645 }
1646}
1647
1648impl<'a, A> From<&'a Tendril<fmt::UTF8, A>> for String
1649where
1650 A: Atomicity,
1651{
1652 #[inline]
1653 fn from(input: &'a Tendril<fmt::UTF8, A>) -> String {
1654 String::from(&**input)
1655 }
1656}
1657
1658#[cfg(all(test, feature = "bench"))]
1659#[path = "bench.rs"]
1660mod bench;
1661
1662#[cfg(test)]
1663mod test {
1664 use super::{
1665 Atomic, ByteTendril, Header, NonAtomic, ReadExt, SendTendril, SliceExt, StrTendril, Tendril,
1666 };
1667 use fmt;
1668 use std::iter;
1669 use std::thread;
1670
1671 fn assert_send<T: Send>() {}
1672
1673 #[test]
1674 fn smoke_test() {
1675 assert_eq!("", &*"".to_tendril());
1676 assert_eq!("abc", &*"abc".to_tendril());
1677 assert_eq!("Hello, world!", &*"Hello, world!".to_tendril());
1678
1679 assert_eq!(b"", &*b"".to_tendril());
1680 assert_eq!(b"abc", &*b"abc".to_tendril());
1681 assert_eq!(b"Hello, world!", &*b"Hello, world!".to_tendril());
1682 }
1683
1684 #[test]
1685 fn assert_sizes() {
1686 use std::mem;
1687 struct EmptyWithDrop;
1688 impl Drop for EmptyWithDrop {
1689 fn drop(&mut self) {}
1690 }
1691 let compiler_uses_inline_drop_flags = mem::size_of::<EmptyWithDrop>() > 0;
1692
1693 let correct = mem::size_of::<*const ()>()
1694 + 8
1695 + if compiler_uses_inline_drop_flags {
1696 1
1697 } else {
1698 0
1699 };
1700
1701 assert_eq!(correct, mem::size_of::<ByteTendril>());
1702 assert_eq!(correct, mem::size_of::<StrTendril>());
1703
1704 assert_eq!(correct, mem::size_of::<Option<ByteTendril>>());
1705 assert_eq!(correct, mem::size_of::<Option<StrTendril>>());
1706
1707 assert_eq!(
1708 mem::size_of::<*const ()>() * 2,
1709 mem::size_of::<Header<Atomic>>(),
1710 );
1711 assert_eq!(
1712 mem::size_of::<Header<Atomic>>(),
1713 mem::size_of::<Header<NonAtomic>>(),
1714 );
1715 }
1716
1717 #[test]
1718 fn validate_utf8() {
1719 assert!(ByteTendril::try_from_byte_slice(b"\xFF").is_ok());
1720 assert!(StrTendril::try_from_byte_slice(b"\xFF").is_err());
1721 assert!(StrTendril::try_from_byte_slice(b"\xEA\x99\xFF").is_err());
1722 assert!(StrTendril::try_from_byte_slice(b"\xEA\x99").is_err());
1723 assert!(StrTendril::try_from_byte_slice(b"\xEA\x99\xAE\xEA").is_err());
1724 assert_eq!(
1725 "\u{a66e}",
1726 &*StrTendril::try_from_byte_slice(b"\xEA\x99\xAE").unwrap()
1727 );
1728
1729 let mut t = StrTendril::new();
1730 assert!(t.try_push_bytes(b"\xEA\x99").is_err());
1731 assert!(t.try_push_bytes(b"\xAE").is_err());
1732 assert!(t.try_push_bytes(b"\xEA\x99\xAE").is_ok());
1733 assert_eq!("\u{a66e}", &*t);
1734 }
1735
1736 #[test]
1737 fn share_and_unshare() {
1738 let s = b"foobarbaz".to_tendril();
1739 assert_eq!(b"foobarbaz", &*s);
1740 assert!(!s.is_shared());
1741
1742 let mut t = s.clone();
1743 assert_eq!(s.as_ptr(), t.as_ptr());
1744 assert!(s.is_shared());
1745 assert!(t.is_shared());
1746
1747 t.push_slice(b"quux");
1748 assert_eq!(b"foobarbaz", &*s);
1749 assert_eq!(b"foobarbazquux", &*t);
1750 assert!(s.as_ptr() != t.as_ptr());
1751 assert!(!t.is_shared());
1752 }
1753
1754 #[test]
1755 fn format_display() {
1756 assert_eq!("foobar", &*format!("{}", "foobar".to_tendril()));
1757
1758 let mut s = "foo".to_tendril();
1759 assert_eq!("foo", &*format!("{}", s));
1760
1761 let t = s.clone();
1762 assert_eq!("foo", &*format!("{}", s));
1763 assert_eq!("foo", &*format!("{}", t));
1764
1765 s.push_slice("barbaz!");
1766 assert_eq!("foobarbaz!", &*format!("{}", s));
1767 assert_eq!("foo", &*format!("{}", t));
1768 }
1769
1770 #[test]
1771 fn format_debug() {
1772 assert_eq!(
1773 r#"Tendril<UTF8>(inline: "foobar")"#,
1774 &*format!("{:?}", "foobar".to_tendril())
1775 );
1776 assert_eq!(
1777 r#"Tendril<Bytes>(inline: [102, 111, 111, 98, 97, 114])"#,
1778 &*format!("{:?}", b"foobar".to_tendril())
1779 );
1780
1781 let t = "anextralongstring".to_tendril();
1782 assert_eq!(
1783 r#"Tendril<UTF8>(owned: "anextralongstring")"#,
1784 &*format!("{:?}", t)
1785 );
1786 let _ = t.clone();
1787 assert_eq!(
1788 r#"Tendril<UTF8>(shared: "anextralongstring")"#,
1789 &*format!("{:?}", t)
1790 );
1791 }
1792
1793 #[test]
1794 fn subtendril() {
1795 assert_eq!("foo".to_tendril(), "foo-bar".to_tendril().subtendril(0, 3));
1796 assert_eq!("bar".to_tendril(), "foo-bar".to_tendril().subtendril(4, 3));
1797
1798 let mut t = "foo-bar".to_tendril();
1799 t.pop_front(2);
1800 assert_eq!("o-bar".to_tendril(), t);
1801 t.pop_back(1);
1802 assert_eq!("o-ba".to_tendril(), t);
1803
1804 assert_eq!(
1805 "foo".to_tendril(),
1806 "foo-a-longer-string-bar-baz".to_tendril().subtendril(0, 3)
1807 );
1808 assert_eq!(
1809 "oo-a-".to_tendril(),
1810 "foo-a-longer-string-bar-baz".to_tendril().subtendril(1, 5)
1811 );
1812 assert_eq!(
1813 "bar".to_tendril(),
1814 "foo-a-longer-string-bar-baz".to_tendril().subtendril(20, 3)
1815 );
1816
1817 let mut t = "another rather long string".to_tendril();
1818 t.pop_front(2);
1819 assert!(t.starts_with("other rather"));
1820 t.pop_back(1);
1821 assert_eq!("other rather long strin".to_tendril(), t);
1822 assert!(t.is_shared());
1823 }
1824
1825 #[test]
1826 fn subtendril_invalid() {
1827 assert!("\u{a66e}".to_tendril().try_subtendril(0, 2).is_err());
1828 assert!("\u{a66e}".to_tendril().try_subtendril(1, 2).is_err());
1829
1830 assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 3).is_err());
1831 assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 2).is_err());
1832 assert!("\u{1f4a9}".to_tendril().try_subtendril(0, 1).is_err());
1833 assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 3).is_err());
1834 assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 2).is_err());
1835 assert!("\u{1f4a9}".to_tendril().try_subtendril(1, 1).is_err());
1836 assert!("\u{1f4a9}".to_tendril().try_subtendril(2, 2).is_err());
1837 assert!("\u{1f4a9}".to_tendril().try_subtendril(2, 1).is_err());
1838 assert!("\u{1f4a9}".to_tendril().try_subtendril(3, 1).is_err());
1839
1840 let mut t = "\u{1f4a9}zzzzzz".to_tendril();
1841 assert!(t.try_pop_front(1).is_err());
1842 assert!(t.try_pop_front(2).is_err());
1843 assert!(t.try_pop_front(3).is_err());
1844 assert!(t.try_pop_front(4).is_ok());
1845 assert_eq!("zzzzzz", &*t);
1846
1847 let mut t = "zzzzzz\u{1f4a9}".to_tendril();
1848 assert!(t.try_pop_back(1).is_err());
1849 assert!(t.try_pop_back(2).is_err());
1850 assert!(t.try_pop_back(3).is_err());
1851 assert!(t.try_pop_back(4).is_ok());
1852 assert_eq!("zzzzzz", &*t);
1853 }
1854
1855 #[test]
1856 fn conversion() {
1857 assert_eq!(
1858 &[0x66, 0x6F, 0x6F].to_tendril(),
1859 "foo".to_tendril().as_bytes()
1860 );
1861 assert_eq!(
1862 [0x66, 0x6F, 0x6F].to_tendril(),
1863 "foo".to_tendril().into_bytes()
1864 );
1865
1866 let ascii: Tendril<fmt::ASCII> = b"hello".to_tendril().try_reinterpret().unwrap();
1867 assert_eq!(&"hello".to_tendril(), ascii.as_superset());
1868 assert_eq!("hello".to_tendril(), ascii.clone().into_superset());
1869
1870 assert!(b"\xFF"
1871 .to_tendril()
1872 .try_reinterpret::<fmt::ASCII>()
1873 .is_err());
1874
1875 let t = "hello".to_tendril();
1876 let ascii: &Tendril<fmt::ASCII> = t.try_as_subset().unwrap();
1877 assert_eq!(b"hello", &**ascii.as_bytes());
1878
1879 assert!("ő"
1880 .to_tendril()
1881 .try_reinterpret_view::<fmt::ASCII>()
1882 .is_err());
1883 assert!("ő".to_tendril().try_as_subset::<fmt::ASCII>().is_err());
1884
1885 let ascii: Tendril<fmt::ASCII> = "hello".to_tendril().try_into_subset().unwrap();
1886 assert_eq!(b"hello", &**ascii.as_bytes());
1887
1888 assert!("ő".to_tendril().try_reinterpret::<fmt::ASCII>().is_err());
1889 assert!("ő".to_tendril().try_into_subset::<fmt::ASCII>().is_err());
1890 }
1891
1892 #[test]
1893 fn clear() {
1894 let mut t = "foo-".to_tendril();
1895 t.clear();
1896 assert_eq!(t.len(), 0);
1897 assert_eq!(t.len32(), 0);
1898 assert_eq!(&*t, "");
1899
1900 let mut t = "much longer".to_tendril();
1901 let s = t.clone();
1902 t.clear();
1903 assert_eq!(t.len(), 0);
1904 assert_eq!(t.len32(), 0);
1905 assert_eq!(&*t, "");
1906 assert_eq!(&*s, "much longer");
1907 }
1908
1909 #[test]
1910 fn push_tendril() {
1911 let mut t = "abc".to_tendril();
1912 t.push_tendril(&"xyz".to_tendril());
1913 assert_eq!("abcxyz", &*t);
1914 }
1915
1916 #[test]
1917 fn wtf8() {
1918 assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b"\xED\xA0\xBD").is_ok());
1919 assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b"\xED\xB2\xA9").is_ok());
1920 assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b"\xED\xA0\xBD\xED\xB2\xA9").is_err());
1921
1922 let t: Tendril<fmt::WTF8> =
1923 Tendril::try_from_byte_slice(b"\xED\xA0\xBD\xEA\x99\xAE").unwrap();
1924 assert!(b"\xED\xA0\xBD".to_tendril().try_reinterpret().unwrap() == t.subtendril(0, 3));
1925 assert!(b"\xEA\x99\xAE".to_tendril().try_reinterpret().unwrap() == t.subtendril(3, 3));
1926 assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err());
1927
1928 assert!(t.try_subtendril(0, 1).is_err());
1929 assert!(t.try_subtendril(0, 2).is_err());
1930 assert!(t.try_subtendril(1, 1).is_err());
1931
1932 assert!(t.try_subtendril(3, 1).is_err());
1933 assert!(t.try_subtendril(3, 2).is_err());
1934 assert!(t.try_subtendril(4, 1).is_err());
1935
1936 // paired surrogates
1937 let mut t: Tendril<fmt::WTF8> = Tendril::try_from_byte_slice(b"\xED\xA0\xBD").unwrap();
1938 assert!(t.try_push_bytes(b"\xED\xB2\xA9").is_ok());
1939 assert_eq!(b"\xF0\x9F\x92\xA9", t.as_byte_slice());
1940 assert!(t.try_reinterpret_view::<fmt::UTF8>().is_ok());
1941
1942 // unpaired surrogates
1943 let mut t: Tendril<fmt::WTF8> = Tendril::try_from_byte_slice(b"\xED\xA0\xBB").unwrap();
1944 assert!(t.try_push_bytes(b"\xED\xA0").is_err());
1945 assert!(t.try_push_bytes(b"\xED").is_err());
1946 assert!(t.try_push_bytes(b"\xA0").is_err());
1947 assert!(t.try_push_bytes(b"\xED\xA0\xBD").is_ok());
1948 assert_eq!(b"\xED\xA0\xBB\xED\xA0\xBD", t.as_byte_slice());
1949 assert!(t.try_push_bytes(b"\xED\xB2\xA9").is_ok());
1950 assert_eq!(b"\xED\xA0\xBB\xF0\x9F\x92\xA9", t.as_byte_slice());
1951 assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err());
1952 }
1953
1954 #[test]
1955 fn front_char() {
1956 let mut t = "".to_tendril();
1957 assert_eq!(None, t.pop_front_char());
1958 assert_eq!(None, t.pop_front_char());
1959
1960 let mut t = "abc".to_tendril();
1961 assert_eq!(Some('a'), t.pop_front_char());
1962 assert_eq!(Some('b'), t.pop_front_char());
1963 assert_eq!(Some('c'), t.pop_front_char());
1964 assert_eq!(None, t.pop_front_char());
1965 assert_eq!(None, t.pop_front_char());
1966
1967 let mut t = "főo-a-longer-string-bar-baz".to_tendril();
1968 assert_eq!(28, t.len());
1969 assert_eq!(Some('f'), t.pop_front_char());
1970 assert_eq!(Some('ő'), t.pop_front_char());
1971 assert_eq!(Some('o'), t.pop_front_char());
1972 assert_eq!(Some('-'), t.pop_front_char());
1973 assert_eq!(23, t.len());
1974 }
1975
1976 #[test]
1977 fn char_run() {
1978 for &(s, exp) in &[
1979 ("", None),
1980 (" ", Some((" ", true))),
1981 ("x", Some(("x", false))),
1982 (" \t \n", Some((" \t \n", true))),
1983 ("xyzzy", Some(("xyzzy", false))),
1984 (" xyzzy", Some((" ", true))),
1985 ("xyzzy ", Some(("xyzzy", false))),
1986 (" xyzzy ", Some((" ", true))),
1987 ("xyzzy hi", Some(("xyzzy", false))),
1988 ("中 ", Some(("中", false))),
1989 (" 中 ", Some((" ", true))),
1990 (" 中 ", Some((" ", true))),
1991 (" 中 ", Some((" ", true))),
1992 ] {
1993 let mut t = s.to_tendril();
1994 let res = t.pop_front_char_run(char::is_whitespace);
1995 match exp {
1996 None => assert!(res.is_none()),
1997 Some((es, ec)) => {
1998 let (rt, rc) = res.unwrap();
1999 assert_eq!(es, &*rt);
2000 assert_eq!(ec, rc);
2001 }
2002 }
2003 }
2004 }
2005
2006 #[test]
2007 fn deref_mut_inline() {
2008 let mut t = "xyő".to_tendril().into_bytes();
2009 t[3] = 0xff;
2010 assert_eq!(b"xy\xC5\xFF", &*t);
2011 assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err());
2012 t[3] = 0x8b;
2013 assert_eq!("xyŋ", &**t.try_reinterpret_view::<fmt::UTF8>().unwrap());
2014
2015 unsafe {
2016 t.push_uninitialized(3);
2017 t[4] = 0xEA;
2018 t[5] = 0x99;
2019 t[6] = 0xAE;
2020 assert_eq!(
2021 "xyŋ\u{a66e}",
2022 &**t.try_reinterpret_view::<fmt::UTF8>().unwrap()
2023 );
2024 t.push_uninitialized(20);
2025 t.pop_back(20);
2026 assert_eq!(
2027 "xyŋ\u{a66e}",
2028 &**t.try_reinterpret_view::<fmt::UTF8>().unwrap()
2029 );
2030 }
2031 }
2032
2033 #[test]
2034 fn deref_mut() {
2035 let mut t = b"0123456789".to_tendril();
2036 let u = t.clone();
2037 assert!(t.is_shared());
2038 t[9] = 0xff;
2039 assert!(!t.is_shared());
2040 assert_eq!(b"0123456789", &*u);
2041 assert_eq!(b"012345678\xff", &*t);
2042 }
2043
2044 #[test]
2045 fn push_char() {
2046 let mut t = "xyz".to_tendril();
2047 t.push_char('o');
2048 assert_eq!("xyzo", &*t);
2049 t.push_char('ő');
2050 assert_eq!("xyzoő", &*t);
2051 t.push_char('\u{a66e}');
2052 assert_eq!("xyzoő\u{a66e}", &*t);
2053 t.push_char('\u{1f4a9}');
2054 assert_eq!("xyzoő\u{a66e}\u{1f4a9}", &*t);
2055 assert_eq!(t.len(), 13);
2056 }
2057
2058 #[test]
2059 #[cfg(feature = "encoding")]
2060 fn encode() {
2061 use encoding::{all, EncoderTrap};
2062
2063 let t = "안녕하세요 러스트".to_tendril();
2064 assert_eq!(
2065 b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae",
2066 &*t.encode(all::WINDOWS_949, EncoderTrap::Strict).unwrap()
2067 );
2068
2069 let t = "Энергия пробуждения ия-я-я! \u{a66e}".to_tendril();
2070 assert_eq!(
2071 b"\xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\
2072 \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21 ?",
2073 &*t.encode(all::KOI8_U, EncoderTrap::Replace).unwrap()
2074 );
2075
2076 let t = "\u{1f4a9}".to_tendril();
2077 assert!(t.encode(all::WINDOWS_1252, EncoderTrap::Strict).is_err());
2078 }
2079
2080 #[test]
2081 #[cfg(feature = "encoding")]
2082 fn decode() {
2083 use encoding::{all, DecoderTrap};
2084
2085 let t = b"\xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\
2086 \xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae"
2087 .to_tendril();
2088 assert_eq!(
2089 "안녕하세요 러스트",
2090 &*t.decode(all::WINDOWS_949, DecoderTrap::Strict).unwrap()
2091 );
2092
2093 let t = b"\xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\
2094 \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21"
2095 .to_tendril();
2096 assert_eq!(
2097 "Энергия пробуждения ия-я-я!",
2098 &*t.decode(all::KOI8_U, DecoderTrap::Replace).unwrap()
2099 );
2100
2101 let t = b"x \xff y".to_tendril();
2102 assert!(t.decode(all::UTF_8, DecoderTrap::Strict).is_err());
2103
2104 let t = b"x \xff y".to_tendril();
2105 assert_eq!(
2106 "x \u{fffd} y",
2107 &*t.decode(all::UTF_8, DecoderTrap::Replace).unwrap()
2108 );
2109 }
2110
2111 #[test]
2112 fn ascii() {
2113 fn mk(x: &[u8]) -> Tendril<fmt::ASCII> {
2114 x.to_tendril().try_reinterpret().unwrap()
2115 }
2116
2117 let mut t = mk(b"xyz");
2118 assert_eq!(Some('x'), t.pop_front_char());
2119 assert_eq!(Some('y'), t.pop_front_char());
2120 assert_eq!(Some('z'), t.pop_front_char());
2121 assert_eq!(None, t.pop_front_char());
2122
2123 let mut t = mk(b" \t xyz");
2124 assert!(Some((mk(b" \t "), true)) == t.pop_front_char_run(char::is_whitespace));
2125 assert!(Some((mk(b"xyz"), false)) == t.pop_front_char_run(char::is_whitespace));
2126 assert!(t.pop_front_char_run(char::is_whitespace).is_none());
2127
2128 let mut t = Tendril::<fmt::ASCII>::new();
2129 assert!(t.try_push_char('x').is_ok());
2130 assert!(t.try_push_char('\0').is_ok());
2131 assert!(t.try_push_char('\u{a0}').is_err());
2132 assert_eq!(b"x\0", t.as_byte_slice());
2133 }
2134
2135 #[test]
2136 fn latin1() {
2137 fn mk(x: &[u8]) -> Tendril<fmt::Latin1> {
2138 x.to_tendril().try_reinterpret().unwrap()
2139 }
2140
2141 let mut t = mk(b"\xd8_\xd8");
2142 assert_eq!(Some('Ø'), t.pop_front_char());
2143 assert_eq!(Some('_'), t.pop_front_char());
2144 assert_eq!(Some('Ø'), t.pop_front_char());
2145 assert_eq!(None, t.pop_front_char());
2146
2147 let mut t = mk(b" \t \xfe\xa7z");
2148 assert!(Some((mk(b" \t "), true)) == t.pop_front_char_run(char::is_whitespace));
2149 assert!(Some((mk(b"\xfe\xa7z"), false)) == t.pop_front_char_run(char::is_whitespace));
2150 assert!(t.pop_front_char_run(char::is_whitespace).is_none());
2151
2152 let mut t = Tendril::<fmt::Latin1>::new();
2153 assert!(t.try_push_char('x').is_ok());
2154 assert!(t.try_push_char('\0').is_ok());
2155 assert!(t.try_push_char('\u{a0}').is_ok());
2156 assert!(t.try_push_char('ő').is_err());
2157 assert!(t.try_push_char('я').is_err());
2158 assert!(t.try_push_char('\u{a66e}').is_err());
2159 assert!(t.try_push_char('\u{1f4a9}').is_err());
2160 assert_eq!(b"x\0\xa0", t.as_byte_slice());
2161 }
2162
2163 #[test]
2164 fn format() {
2165 assert_eq!("", &*format_tendril!(""));
2166 assert_eq!(
2167 "two and two make 4",
2168 &*format_tendril!("two and two make {}", 2 + 2)
2169 );
2170 }
2171
2172 #[test]
2173 fn merge_shared() {
2174 let t = "012345678901234567890123456789".to_tendril();
2175 let a = t.subtendril(10, 20);
2176 assert!(a.is_shared());
2177 assert_eq!("01234567890123456789", &*a);
2178 let mut b = t.subtendril(0, 10);
2179 assert!(b.is_shared());
2180 assert_eq!("0123456789", &*b);
2181
2182 b.push_tendril(&a);
2183 assert!(b.is_shared());
2184 assert!(a.is_shared());
2185 assert!(a.is_shared_with(&b));
2186 assert!(b.is_shared_with(&a));
2187 assert_eq!("012345678901234567890123456789", &*b);
2188
2189 assert!(t.is_shared());
2190 assert!(t.is_shared_with(&a));
2191 assert!(t.is_shared_with(&b));
2192 }
2193
2194 #[test]
2195 fn merge_cant_share() {
2196 let t = "012345678901234567890123456789".to_tendril();
2197 let mut b = t.subtendril(0, 10);
2198 assert!(b.is_shared());
2199 assert_eq!("0123456789", &*b);
2200
2201 b.push_tendril(&"abcd".to_tendril());
2202 assert!(!b.is_shared());
2203 assert_eq!("0123456789abcd", &*b);
2204 }
2205
2206 #[test]
2207 fn shared_doesnt_reserve() {
2208 let mut t = "012345678901234567890123456789".to_tendril();
2209 let a = t.subtendril(1, 10);
2210
2211 assert!(t.is_shared());
2212 t.reserve(10);
2213 assert!(t.is_shared());
2214
2215 let _ = a;
2216 }
2217
2218 #[test]
2219 fn out_of_bounds() {
2220 assert!("".to_tendril().try_subtendril(0, 1).is_err());
2221 assert!("abc".to_tendril().try_subtendril(0, 4).is_err());
2222 assert!("abc".to_tendril().try_subtendril(3, 1).is_err());
2223 assert!("abc".to_tendril().try_subtendril(7, 1).is_err());
2224
2225 let mut t = "".to_tendril();
2226 assert!(t.try_pop_front(1).is_err());
2227 assert!(t.try_pop_front(5).is_err());
2228 assert!(t.try_pop_front(500).is_err());
2229 assert!(t.try_pop_back(1).is_err());
2230 assert!(t.try_pop_back(5).is_err());
2231 assert!(t.try_pop_back(500).is_err());
2232
2233 let mut t = "abcd".to_tendril();
2234 assert!(t.try_pop_front(1).is_ok());
2235 assert!(t.try_pop_front(4).is_err());
2236 assert!(t.try_pop_front(500).is_err());
2237 assert!(t.try_pop_back(1).is_ok());
2238 assert!(t.try_pop_back(3).is_err());
2239 assert!(t.try_pop_back(500).is_err());
2240 }
2241
2242 #[test]
2243 fn compare() {
2244 for &a in &[
2245 "indiscretions",
2246 "validity",
2247 "hallucinogenics",
2248 "timelessness",
2249 "original",
2250 "microcosms",
2251 "boilers",
2252 "mammoth",
2253 ] {
2254 for &b in &[
2255 "intrepidly",
2256 "frigid",
2257 "spa",
2258 "cardigans",
2259 "guileful",
2260 "evaporated",
2261 "unenthusiastic",
2262 "legitimate",
2263 ] {
2264 let ta = a.to_tendril();
2265 let tb = b.to_tendril();
2266
2267 assert_eq!(a.eq(b), ta.eq(&tb));
2268 assert_eq!(a.ne(b), ta.ne(&tb));
2269 assert_eq!(a.lt(b), ta.lt(&tb));
2270 assert_eq!(a.le(b), ta.le(&tb));
2271 assert_eq!(a.gt(b), ta.gt(&tb));
2272 assert_eq!(a.ge(b), ta.ge(&tb));
2273 assert_eq!(a.partial_cmp(b), ta.partial_cmp(&tb));
2274 assert_eq!(a.cmp(b), ta.cmp(&tb));
2275 }
2276 }
2277 }
2278
2279 #[test]
2280 fn extend_and_from_iterator() {
2281 // Testing Extend<T> and FromIterator<T> for the various Ts.
2282
2283 // Tendril<F>
2284 let mut t = "Hello".to_tendril();
2285 t.extend(None::<&Tendril<_>>.into_iter());
2286 assert_eq!("Hello", &*t);
2287 t.extend(&[", ".to_tendril(), "world".to_tendril(), "!".to_tendril()]);
2288 assert_eq!("Hello, world!", &*t);
2289 assert_eq!(
2290 "Hello, world!",
2291 &*[
2292 "Hello".to_tendril(),
2293 ", ".to_tendril(),
2294 "world".to_tendril(),
2295 "!".to_tendril()
2296 ]
2297 .iter()
2298 .collect::<StrTendril>()
2299 );
2300
2301 // &str
2302 let mut t = "Hello".to_tendril();
2303 t.extend(None::<&str>.into_iter());
2304 assert_eq!("Hello", &*t);
2305 t.extend([", ", "world", "!"].iter().map(|&s| s));
2306 assert_eq!("Hello, world!", &*t);
2307 assert_eq!(
2308 "Hello, world!",
2309 &*["Hello", ", ", "world", "!"]
2310 .iter()
2311 .map(|&s| s)
2312 .collect::<StrTendril>()
2313 );
2314
2315 // &[u8]
2316 let mut t = b"Hello".to_tendril();
2317 t.extend(None::<&[u8]>.into_iter());
2318 assert_eq!(b"Hello", &*t);
2319 t.extend(
2320 [b", ".as_ref(), b"world".as_ref(), b"!".as_ref()]
2321 .iter()
2322 .map(|&s| s),
2323 );
2324 assert_eq!(b"Hello, world!", &*t);
2325 assert_eq!(
2326 b"Hello, world!",
2327 &*[
2328 b"Hello".as_ref(),
2329 b", ".as_ref(),
2330 b"world".as_ref(),
2331 b"!".as_ref()
2332 ]
2333 .iter()
2334 .map(|&s| s)
2335 .collect::<ByteTendril>()
2336 );
2337
2338 let string = "the quick brown fox jumps over the lazy dog";
2339 let string_expected = string.to_tendril();
2340 let bytes = string.as_bytes();
2341 let bytes_expected = bytes.to_tendril();
2342
2343 // char
2344 assert_eq!(string_expected, string.chars().collect());
2345 let mut tendril = StrTendril::new();
2346 tendril.extend(string.chars());
2347 assert_eq!(string_expected, tendril);
2348
2349 // &u8
2350 assert_eq!(bytes_expected, bytes.iter().collect());
2351 let mut tendril = ByteTendril::new();
2352 tendril.extend(bytes);
2353 assert_eq!(bytes_expected, tendril);
2354
2355 // u8
2356 assert_eq!(bytes_expected, bytes.iter().map(|&b| b).collect());
2357 let mut tendril = ByteTendril::new();
2358 tendril.extend(bytes.iter().map(|&b| b));
2359 assert_eq!(bytes_expected, tendril);
2360 }
2361
2362 #[test]
2363 fn from_str() {
2364 use std::str::FromStr;
2365 let t: Tendril<_> = FromStr::from_str("foo bar baz").unwrap();
2366 assert_eq!("foo bar baz", &*t);
2367 }
2368
2369 #[test]
2370 fn from_char() {
2371 assert_eq!("o", &*StrTendril::from_char('o'));
2372 assert_eq!("ő", &*StrTendril::from_char('ő'));
2373 assert_eq!("\u{a66e}", &*StrTendril::from_char('\u{a66e}'));
2374 assert_eq!("\u{1f4a9}", &*StrTendril::from_char('\u{1f4a9}'));
2375 }
2376
2377 #[test]
2378 #[cfg_attr(miri, ignore)] // slow
2379 fn read() {
2380 fn check(x: &[u8]) {
2381 use std::io::Cursor;
2382 let mut t = ByteTendril::new();
2383 assert_eq!(x.len(), Cursor::new(x).read_to_tendril(&mut t).unwrap());
2384 assert_eq!(x, &*t);
2385 }
2386
2387 check(b"");
2388 check(b"abcd");
2389
2390 let long: Vec<u8> = iter::repeat(b'x').take(1_000_000).collect();
2391 check(&long);
2392 }
2393
2394 #[test]
2395 fn hash_map_key() {
2396 use std::collections::HashMap;
2397
2398 // As noted with Borrow, indexing on HashMap<StrTendril, _> is byte-based because of
2399 // https://github.com/rust-lang/rust/issues/27108.
2400 let mut map = HashMap::new();
2401 map.insert("foo".to_tendril(), 1);
2402 assert_eq!(map.get(b"foo".as_ref()), Some(&1));
2403 assert_eq!(map.get(b"bar".as_ref()), None);
2404
2405 let mut map = HashMap::new();
2406 map.insert(b"foo".to_tendril(), 1);
2407 assert_eq!(map.get(b"foo".as_ref()), Some(&1));
2408 assert_eq!(map.get(b"bar".as_ref()), None);
2409 }
2410
2411 #[test]
2412 fn atomic() {
2413 assert_send::<Tendril<fmt::UTF8, Atomic>>();
2414 let s: Tendril<fmt::UTF8, Atomic> = Tendril::from_slice("this is a string");
2415 assert!(!s.is_shared());
2416 let mut t = s.clone();
2417 assert!(s.is_shared());
2418 let sp = s.as_ptr() as usize;
2419 thread::spawn(move || {
2420 assert!(t.is_shared());
2421 t.push_slice(" extended");
2422 assert_eq!("this is a string extended", &*t);
2423 assert!(t.as_ptr() as usize != sp);
2424 assert!(!t.is_shared());
2425 })
2426 .join()
2427 .unwrap();
2428 assert!(s.is_shared());
2429 assert_eq!("this is a string", &*s);
2430 }
2431
2432 #[test]
2433 fn send() {
2434 assert_send::<SendTendril<fmt::UTF8>>();
2435 let s = "this is a string".to_tendril();
2436 let t = s.clone();
2437 let s2 = s.into_send();
2438 thread::spawn(move || {
2439 let s = StrTendril::from(s2);
2440 assert!(!s.is_shared());
2441 assert_eq!("this is a string", &*s);
2442 })
2443 .join()
2444 .unwrap();
2445 assert_eq!("this is a string", &*t);
2446 }
2447
2448 /// https://github.com/servo/tendril/issues/58
2449 #[test]
2450 fn issue_58() {
2451 let data = "<p><i>Hello!</p>, World!</i>";
2452 let s: Tendril<fmt::UTF8, NonAtomic> = data.into();
2453 assert_eq!(&*s, data);
2454 let s: Tendril<fmt::UTF8, Atomic> = s.into_send().into();
2455 assert_eq!(&*s, data);
2456 }
2457
2458 #[test]
2459 fn inline_send() {
2460 let s = "x".to_tendril();
2461 let t = s.clone();
2462 let s2 = s.into_send();
2463 thread::spawn(move || {
2464 let s = StrTendril::from(s2);
2465 assert!(!s.is_shared());
2466 assert_eq!("x", &*s);
2467 })
2468 .join()
2469 .unwrap();
2470 assert_eq!("x", &*t);
2471 }
2472}
2473