| 1 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 2 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 3 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
| 4 | // option. This file may not be copied, modified, or distributed |
| 5 | // except according to those terms. |
| 6 | |
| 7 | use std::borrow::Borrow; |
| 8 | use std::cell::{Cell, UnsafeCell}; |
| 9 | use std::cmp::Ordering; |
| 10 | use std::default::Default; |
| 11 | use std::fmt as strfmt; |
| 12 | use std::iter::FromIterator; |
| 13 | use std::marker::PhantomData; |
| 14 | use std::num::NonZeroUsize; |
| 15 | use std::ops::{Deref, DerefMut}; |
| 16 | use std::sync::atomic::Ordering as AtomicOrdering; |
| 17 | use std::sync::atomic::{self, AtomicUsize}; |
| 18 | use std::{hash, io, mem, ptr, str, u32}; |
| 19 | |
| 20 | #[cfg (feature = "encoding" )] |
| 21 | use encoding::{self, DecoderTrap, EncoderTrap, EncodingRef}; |
| 22 | |
| 23 | use buf32::{self, Buf32}; |
| 24 | use fmt::imp::Fixup; |
| 25 | use fmt::{self, Slice}; |
| 26 | use util::{copy_and_advance, copy_lifetime, copy_lifetime_mut, unsafe_slice, unsafe_slice_mut}; |
| 27 | use OFLOW; |
| 28 | |
| 29 | const MAX_INLINE_LEN: usize = 8; |
| 30 | const MAX_INLINE_TAG: usize = 0xF; |
| 31 | const EMPTY_TAG: usize = 0xF; |
| 32 | |
| 33 | #[inline (always)] |
| 34 | fn inline_tag(len: u32) -> NonZeroUsize { |
| 35 | debug_assert!(len <= MAX_INLINE_LEN as u32); |
| 36 | unsafe { NonZeroUsize::new_unchecked(if len == 0 { EMPTY_TAG } else { len as usize }) } |
| 37 | } |
| 38 | |
| 39 | /// The multithreadedness of a tendril. |
| 40 | /// |
| 41 | /// Exactly two types implement this trait: |
| 42 | /// |
| 43 | /// - `Atomic`: use this in your tendril and you will have a `Send` tendril which works |
| 44 | /// across threads; this is akin to `Arc`. |
| 45 | /// |
| 46 | /// - `NonAtomic`: use this in your tendril and you will have a tendril which is neither |
| 47 | /// `Send` nor `Sync` but should be a tad faster; this is akin to `Rc`. |
| 48 | /// |
| 49 | /// The layout of this trait is also mandated to be that of a `usize`, |
| 50 | /// for it is used for reference counting. |
| 51 | pub unsafe trait Atomicity: 'static { |
| 52 | #[doc (hidden)] |
| 53 | fn new() -> Self; |
| 54 | |
| 55 | #[doc (hidden)] |
| 56 | fn increment(&self) -> usize; |
| 57 | |
| 58 | #[doc (hidden)] |
| 59 | fn decrement(&self) -> usize; |
| 60 | |
| 61 | #[doc (hidden)] |
| 62 | fn fence_acquire(); |
| 63 | } |
| 64 | |
| 65 | /// A marker of a non-atomic tendril. |
| 66 | /// |
| 67 | /// This is the default for the second type parameter of a `Tendril` |
| 68 | /// and so doesn't typically need to be written. |
| 69 | /// |
| 70 | /// This is akin to using `Rc` for reference counting. |
| 71 | #[repr (C)] |
| 72 | pub struct NonAtomic(Cell<usize>); |
| 73 | |
| 74 | unsafe impl Atomicity for NonAtomic { |
| 75 | #[inline ] |
| 76 | fn new() -> Self { |
| 77 | NonAtomic(Cell::new(1)) |
| 78 | } |
| 79 | |
| 80 | #[inline ] |
| 81 | fn increment(&self) -> usize { |
| 82 | let value: usize = self.0.get(); |
| 83 | self.0.set(val:value.checked_add(1).expect(msg:OFLOW)); |
| 84 | value |
| 85 | } |
| 86 | |
| 87 | #[inline ] |
| 88 | fn decrement(&self) -> usize { |
| 89 | let value: usize = self.0.get(); |
| 90 | self.0.set(val:value - 1); |
| 91 | value |
| 92 | } |
| 93 | |
| 94 | #[inline ] |
| 95 | fn fence_acquire() {} |
| 96 | } |
| 97 | |
| 98 | /// A marker of an atomic (and hence concurrent) tendril. |
| 99 | /// |
| 100 | /// This is used as the second, optional type parameter of a `Tendril`; |
| 101 | /// `Tendril<F, Atomic>` thus implements`Send`. |
| 102 | /// |
| 103 | /// This is akin to using `Arc` for reference counting. |
| 104 | pub struct Atomic(AtomicUsize); |
| 105 | |
| 106 | unsafe impl Atomicity for Atomic { |
| 107 | #[inline ] |
| 108 | fn new() -> Self { |
| 109 | Atomic(AtomicUsize::new(1)) |
| 110 | } |
| 111 | |
| 112 | #[inline ] |
| 113 | fn increment(&self) -> usize { |
| 114 | // Relaxed is OK because we have a reference already. |
| 115 | self.0.fetch_add(val:1, order:AtomicOrdering::Relaxed) |
| 116 | } |
| 117 | |
| 118 | #[inline ] |
| 119 | fn decrement(&self) -> usize { |
| 120 | self.0.fetch_sub(val:1, order:AtomicOrdering::Release) |
| 121 | } |
| 122 | |
| 123 | #[inline ] |
| 124 | fn fence_acquire() { |
| 125 | atomic::fence(order:AtomicOrdering::Acquire); |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | #[repr (C)] // Preserve field order for cross-atomicity transmutes |
| 130 | struct Header<A: Atomicity> { |
| 131 | refcount: A, |
| 132 | cap: u32, |
| 133 | } |
| 134 | |
| 135 | impl<A> Header<A> |
| 136 | where |
| 137 | A: Atomicity, |
| 138 | { |
| 139 | #[inline (always)] |
| 140 | unsafe fn new() -> Header<A> { |
| 141 | Header { |
| 142 | refcount: A::new(), |
| 143 | cap: 0, |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | /// Errors that can occur when slicing a `Tendril`. |
| 149 | #[derive (Copy, Clone, Hash, Debug, PartialEq, Eq)] |
| 150 | pub enum SubtendrilError { |
| 151 | OutOfBounds, |
| 152 | ValidationFailed, |
| 153 | } |
| 154 | |
| 155 | /// Compact string type for zero-copy parsing. |
| 156 | /// |
| 157 | /// `Tendril`s have the semantics of owned strings, but are sometimes views |
| 158 | /// into shared buffers. When you mutate a `Tendril`, an owned copy is made |
| 159 | /// if necessary. Further mutations occur in-place until the string becomes |
| 160 | /// shared, e.g. with `clone()` or `subtendril()`. |
| 161 | /// |
| 162 | /// Buffer sharing is accomplished through thread-local (non-atomic) reference |
| 163 | /// counting, which has very low overhead. The Rust type system will prevent |
| 164 | /// you at compile time from sending a `Tendril` between threads. We plan to |
| 165 | /// relax this restriction in the future; see `README.md`. |
| 166 | /// |
| 167 | /// Whereas `String` allocates in the heap for any non-empty string, `Tendril` |
| 168 | /// can store small strings (up to 8 bytes) in-line, without a heap allocation. |
| 169 | /// `Tendril` is also smaller than `String` on 64-bit platforms — 16 bytes |
| 170 | /// versus 24. |
| 171 | /// |
| 172 | /// The type parameter `F` specifies the format of the tendril, for example |
| 173 | /// UTF-8 text or uninterpreted bytes. The parameter will be instantiated |
| 174 | /// with one of the marker types from `tendril::fmt`. See the `StrTendril` |
| 175 | /// and `ByteTendril` type aliases for two examples. |
| 176 | /// |
| 177 | /// The type parameter `A` indicates the atomicity of the tendril; it is by |
| 178 | /// default `NonAtomic`, but can be specified as `Atomic` to get a tendril |
| 179 | /// which implements `Send` (viz. a thread-safe tendril). |
| 180 | /// |
| 181 | /// The maximum length of a `Tendril` is 4 GB. The library will panic if |
| 182 | /// you attempt to go over the limit. |
| 183 | #[repr (C)] |
| 184 | pub struct Tendril<F, A = NonAtomic> |
| 185 | where |
| 186 | F: fmt::Format, |
| 187 | A: Atomicity, |
| 188 | { |
| 189 | ptr: Cell<NonZeroUsize>, |
| 190 | buf: UnsafeCell<Buffer>, |
| 191 | marker: PhantomData<*mut F>, |
| 192 | refcount_marker: PhantomData<A>, |
| 193 | } |
| 194 | |
| 195 | #[repr (C)] |
| 196 | union Buffer { |
| 197 | heap: Heap, |
| 198 | inline: [u8; 8], |
| 199 | } |
| 200 | |
| 201 | #[derive (Copy, Clone)] |
| 202 | #[repr (C)] |
| 203 | struct Heap { |
| 204 | len: u32, |
| 205 | aux: u32, |
| 206 | } |
| 207 | |
| 208 | unsafe impl<F, A> Send for Tendril<F, A> |
| 209 | where |
| 210 | F: fmt::Format, |
| 211 | A: Atomicity + Sync, |
| 212 | { |
| 213 | } |
| 214 | |
| 215 | /// `Tendril` for storing native Rust strings. |
| 216 | pub type StrTendril = Tendril<fmt::UTF8>; |
| 217 | |
| 218 | /// `Tendril` for storing binary data. |
| 219 | pub type ByteTendril = Tendril<fmt::Bytes>; |
| 220 | |
| 221 | impl<F, A> Clone for Tendril<F, A> |
| 222 | where |
| 223 | F: fmt::Format, |
| 224 | A: Atomicity, |
| 225 | { |
| 226 | #[inline ] |
| 227 | fn clone(&self) -> Tendril<F, A> { |
| 228 | unsafe { |
| 229 | if self.ptr.get().get() > MAX_INLINE_TAG { |
| 230 | self.make_buf_shared(); |
| 231 | self.incref(); |
| 232 | } |
| 233 | |
| 234 | ptr::read(self) |
| 235 | } |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | impl<F, A> Drop for Tendril<F, A> |
| 240 | where |
| 241 | F: fmt::Format, |
| 242 | A: Atomicity, |
| 243 | { |
| 244 | #[inline ] |
| 245 | fn drop(&mut self) { |
| 246 | unsafe { |
| 247 | let p: usize = self.ptr.get().get(); |
| 248 | if p <= MAX_INLINE_TAG { |
| 249 | return; |
| 250 | } |
| 251 | |
| 252 | let (buf: Buf32, shared: bool, _) = self.assume_buf(); |
| 253 | if shared { |
| 254 | let header: *mut Header = self.header(); |
| 255 | if (*header).refcount.decrement() == 1 { |
| 256 | A::fence_acquire(); |
| 257 | buf.destroy(); |
| 258 | } |
| 259 | } else { |
| 260 | buf.destroy(); |
| 261 | } |
| 262 | } |
| 263 | } |
| 264 | } |
| 265 | |
| 266 | macro_rules! from_iter_method { |
| 267 | ($ty:ty) => { |
| 268 | #[inline] |
| 269 | fn from_iter<I>(iterable: I) -> Self |
| 270 | where |
| 271 | I: IntoIterator<Item = $ty>, |
| 272 | { |
| 273 | let mut output = Self::new(); |
| 274 | output.extend(iterable); |
| 275 | output |
| 276 | } |
| 277 | }; |
| 278 | } |
| 279 | |
| 280 | impl<A> Extend<char> for Tendril<fmt::UTF8, A> |
| 281 | where |
| 282 | A: Atomicity, |
| 283 | { |
| 284 | #[inline ] |
| 285 | fn extend<I>(&mut self, iterable: I) |
| 286 | where |
| 287 | I: IntoIterator<Item = char>, |
| 288 | { |
| 289 | let iterator: ::IntoIter = iterable.into_iter(); |
| 290 | self.force_reserve(additional:iterator.size_hint().0 as u32); |
| 291 | for c: char in iterator { |
| 292 | self.push_char(c); |
| 293 | } |
| 294 | } |
| 295 | } |
| 296 | |
| 297 | impl<A> FromIterator<char> for Tendril<fmt::UTF8, A> |
| 298 | where |
| 299 | A: Atomicity, |
| 300 | { |
| 301 | from_iter_method!(char); |
| 302 | } |
| 303 | |
| 304 | impl<A> Extend<u8> for Tendril<fmt::Bytes, A> |
| 305 | where |
| 306 | A: Atomicity, |
| 307 | { |
| 308 | #[inline ] |
| 309 | fn extend<I>(&mut self, iterable: I) |
| 310 | where |
| 311 | I: IntoIterator<Item = u8>, |
| 312 | { |
| 313 | let iterator: ::IntoIter = iterable.into_iter(); |
| 314 | self.force_reserve(additional:iterator.size_hint().0 as u32); |
| 315 | for b: u8 in iterator { |
| 316 | self.push_slice(&[b]); |
| 317 | } |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | impl<A> FromIterator<u8> for Tendril<fmt::Bytes, A> |
| 322 | where |
| 323 | A: Atomicity, |
| 324 | { |
| 325 | from_iter_method!(u8); |
| 326 | } |
| 327 | |
| 328 | impl<'a, A> Extend<&'a u8> for Tendril<fmt::Bytes, A> |
| 329 | where |
| 330 | A: Atomicity, |
| 331 | { |
| 332 | #[inline ] |
| 333 | fn extend<I>(&mut self, iterable: I) |
| 334 | where |
| 335 | I: IntoIterator<Item = &'a u8>, |
| 336 | { |
| 337 | let iterator: ::IntoIter = iterable.into_iter(); |
| 338 | self.force_reserve(additional:iterator.size_hint().0 as u32); |
| 339 | for &b: u8 in iterator { |
| 340 | self.push_slice(&[b]); |
| 341 | } |
| 342 | } |
| 343 | } |
| 344 | |
| 345 | impl<'a, A> FromIterator<&'a u8> for Tendril<fmt::Bytes, A> |
| 346 | where |
| 347 | A: Atomicity, |
| 348 | { |
| 349 | from_iter_method!(&'a u8); |
| 350 | } |
| 351 | |
| 352 | impl<'a, A> Extend<&'a str> for Tendril<fmt::UTF8, A> |
| 353 | where |
| 354 | A: Atomicity, |
| 355 | { |
| 356 | #[inline ] |
| 357 | fn extend<I>(&mut self, iterable: I) |
| 358 | where |
| 359 | I: IntoIterator<Item = &'a str>, |
| 360 | { |
| 361 | for s: &'a str in iterable { |
| 362 | self.push_slice(s); |
| 363 | } |
| 364 | } |
| 365 | } |
| 366 | |
| 367 | impl<'a, A> FromIterator<&'a str> for Tendril<fmt::UTF8, A> |
| 368 | where |
| 369 | A: Atomicity, |
| 370 | { |
| 371 | from_iter_method!(&'a str); |
| 372 | } |
| 373 | |
| 374 | impl<'a, A> Extend<&'a [u8]> for Tendril<fmt::Bytes, A> |
| 375 | where |
| 376 | A: Atomicity, |
| 377 | { |
| 378 | #[inline ] |
| 379 | fn extend<I>(&mut self, iterable: I) |
| 380 | where |
| 381 | I: IntoIterator<Item = &'a [u8]>, |
| 382 | { |
| 383 | for s: &'a [u8] in iterable { |
| 384 | self.push_slice(s); |
| 385 | } |
| 386 | } |
| 387 | } |
| 388 | |
| 389 | impl<'a, A> FromIterator<&'a [u8]> for Tendril<fmt::Bytes, A> |
| 390 | where |
| 391 | A: Atomicity, |
| 392 | { |
| 393 | from_iter_method!(&'a [u8]); |
| 394 | } |
| 395 | |
| 396 | impl<'a, F, A> Extend<&'a Tendril<F, A>> for Tendril<F, A> |
| 397 | where |
| 398 | F: fmt::Format + 'a, |
| 399 | A: Atomicity, |
| 400 | { |
| 401 | #[inline ] |
| 402 | fn extend<I>(&mut self, iterable: I) |
| 403 | where |
| 404 | I: IntoIterator<Item = &'a Tendril<F, A>>, |
| 405 | { |
| 406 | for t: &'a Tendril in iterable { |
| 407 | self.push_tendril(t); |
| 408 | } |
| 409 | } |
| 410 | } |
| 411 | |
| 412 | impl<'a, F, A> FromIterator<&'a Tendril<F, A>> for Tendril<F, A> |
| 413 | where |
| 414 | F: fmt::Format + 'a, |
| 415 | A: Atomicity, |
| 416 | { |
| 417 | from_iter_method!(&'a Tendril<F, A>); |
| 418 | } |
| 419 | |
| 420 | impl<F, A> Deref for Tendril<F, A> |
| 421 | where |
| 422 | F: fmt::SliceFormat, |
| 423 | A: Atomicity, |
| 424 | { |
| 425 | type Target = F::Slice; |
| 426 | |
| 427 | #[inline ] |
| 428 | fn deref(&self) -> &F::Slice { |
| 429 | unsafe { F::Slice::from_bytes(self.as_byte_slice()) } |
| 430 | } |
| 431 | } |
| 432 | |
| 433 | impl<F, A> DerefMut for Tendril<F, A> |
| 434 | where |
| 435 | F: fmt::SliceFormat, |
| 436 | A: Atomicity, |
| 437 | { |
| 438 | #[inline ] |
| 439 | fn deref_mut(&mut self) -> &mut F::Slice { |
| 440 | unsafe { F::Slice::from_mut_bytes(self.as_mut_byte_slice()) } |
| 441 | } |
| 442 | } |
| 443 | |
| 444 | impl<F, A> Borrow<[u8]> for Tendril<F, A> |
| 445 | where |
| 446 | F: fmt::SliceFormat, |
| 447 | A: Atomicity, |
| 448 | { |
| 449 | fn borrow(&self) -> &[u8] { |
| 450 | self.as_byte_slice() |
| 451 | } |
| 452 | } |
| 453 | |
| 454 | // Why not impl Borrow<str> for Tendril<fmt::UTF8>? str and [u8] hash differently, |
| 455 | // and so a HashMap<StrTendril, _> would silently break if we indexed by str. Ick. |
| 456 | // https://github.com/rust-lang/rust/issues/27108 |
| 457 | |
| 458 | impl<F, A> PartialEq for Tendril<F, A> |
| 459 | where |
| 460 | F: fmt::Format, |
| 461 | A: Atomicity, |
| 462 | { |
| 463 | #[inline ] |
| 464 | fn eq(&self, other: &Self) -> bool { |
| 465 | self.as_byte_slice() == other.as_byte_slice() |
| 466 | } |
| 467 | |
| 468 | #[inline ] |
| 469 | fn ne(&self, other: &Self) -> bool { |
| 470 | self.as_byte_slice() != other.as_byte_slice() |
| 471 | } |
| 472 | } |
| 473 | |
| 474 | impl<F, A> Eq for Tendril<F, A> |
| 475 | where |
| 476 | F: fmt::Format, |
| 477 | A: Atomicity, |
| 478 | { |
| 479 | } |
| 480 | |
| 481 | impl<F, A> PartialOrd for Tendril<F, A> |
| 482 | where |
| 483 | F: fmt::SliceFormat, |
| 484 | <F as fmt::SliceFormat>::Slice: PartialOrd, |
| 485 | A: Atomicity, |
| 486 | { |
| 487 | #[inline ] |
| 488 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
| 489 | PartialOrd::partial_cmp(&**self, &**other) |
| 490 | } |
| 491 | } |
| 492 | |
| 493 | impl<F, A> Ord for Tendril<F, A> |
| 494 | where |
| 495 | F: fmt::SliceFormat, |
| 496 | <F as fmt::SliceFormat>::Slice: Ord, |
| 497 | A: Atomicity, |
| 498 | { |
| 499 | #[inline ] |
| 500 | fn cmp(&self, other: &Self) -> Ordering { |
| 501 | Ord::cmp(&**self, &**other) |
| 502 | } |
| 503 | } |
| 504 | |
| 505 | impl<F, A> Default for Tendril<F, A> |
| 506 | where |
| 507 | F: fmt::Format, |
| 508 | A: Atomicity, |
| 509 | { |
| 510 | #[inline (always)] |
| 511 | fn default() -> Tendril<F, A> { |
| 512 | Tendril::new() |
| 513 | } |
| 514 | } |
| 515 | |
| 516 | impl<F, A> strfmt::Debug for Tendril<F, A> |
| 517 | where |
| 518 | F: fmt::SliceFormat + Default + strfmt::Debug, |
| 519 | <F as fmt::SliceFormat>::Slice: strfmt::Debug, |
| 520 | A: Atomicity, |
| 521 | { |
| 522 | #[inline ] |
| 523 | fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result { |
| 524 | let kind: &'static str = match self.ptr.get().get() { |
| 525 | p: usize if p <= MAX_INLINE_TAG => "inline" , |
| 526 | p: usize if p & 1 == 1 => "shared" , |
| 527 | _ => "owned" , |
| 528 | }; |
| 529 | |
| 530 | write!(f, "Tendril< {:?}>( {}: " , <F as Default>::default(), kind)?; |
| 531 | <<F as fmt::SliceFormat>::Slice as strfmt::Debug>::fmt(&**self, f)?; |
| 532 | write!(f, ")" ) |
| 533 | } |
| 534 | } |
| 535 | |
| 536 | impl<F, A> hash::Hash for Tendril<F, A> |
| 537 | where |
| 538 | F: fmt::Format, |
| 539 | A: Atomicity, |
| 540 | { |
| 541 | #[inline ] |
| 542 | fn hash<H: hash::Hasher>(&self, hasher: &mut H) { |
| 543 | self.as_byte_slice().hash(state:hasher) |
| 544 | } |
| 545 | } |
| 546 | |
| 547 | impl<F, A> Tendril<F, A> |
| 548 | where |
| 549 | F: fmt::Format, |
| 550 | A: Atomicity, |
| 551 | { |
| 552 | /// Create a new, empty `Tendril` in any format. |
| 553 | #[inline (always)] |
| 554 | pub fn new() -> Tendril<F, A> { |
| 555 | unsafe { Tendril::inline(&[]) } |
| 556 | } |
| 557 | |
| 558 | /// Create a new, empty `Tendril` with a specified capacity. |
| 559 | #[inline ] |
| 560 | pub fn with_capacity(capacity: u32) -> Tendril<F, A> { |
| 561 | let mut t: Tendril<F, A> = Tendril::new(); |
| 562 | if capacity > MAX_INLINE_LEN as u32 { |
| 563 | unsafe { |
| 564 | t.make_owned_with_capacity(capacity); |
| 565 | } |
| 566 | } |
| 567 | t |
| 568 | } |
| 569 | |
| 570 | /// Reserve space for additional bytes. |
| 571 | /// |
| 572 | /// This is only a suggestion. There are cases where `Tendril` will |
| 573 | /// decline to allocate until the buffer is actually modified. |
| 574 | #[inline ] |
| 575 | pub fn reserve(&mut self, additional: u32) { |
| 576 | if !self.is_shared() { |
| 577 | // Don't grow a shared tendril because we'd have to copy |
| 578 | // right away. |
| 579 | self.force_reserve(additional); |
| 580 | } |
| 581 | } |
| 582 | |
| 583 | /// Reserve space for additional bytes, even for shared buffers. |
| 584 | #[inline ] |
| 585 | fn force_reserve(&mut self, additional: u32) { |
| 586 | let new_len = self.len32().checked_add(additional).expect(OFLOW); |
| 587 | if new_len > MAX_INLINE_LEN as u32 { |
| 588 | unsafe { |
| 589 | self.make_owned_with_capacity(new_len); |
| 590 | } |
| 591 | } |
| 592 | } |
| 593 | |
| 594 | /// Get the length of the `Tendril`. |
| 595 | /// |
| 596 | /// This is named not to conflict with `len()` on the underlying |
| 597 | /// slice, if any. |
| 598 | #[inline (always)] |
| 599 | pub fn len32(&self) -> u32 { |
| 600 | match self.ptr.get().get() { |
| 601 | EMPTY_TAG => 0, |
| 602 | n if n <= MAX_INLINE_LEN => n as u32, |
| 603 | _ => unsafe { self.raw_len() }, |
| 604 | } |
| 605 | } |
| 606 | |
| 607 | /// Is the backing buffer shared? |
| 608 | #[inline ] |
| 609 | pub fn is_shared(&self) -> bool { |
| 610 | let n = self.ptr.get().get(); |
| 611 | |
| 612 | (n > MAX_INLINE_TAG) && ((n & 1) == 1) |
| 613 | } |
| 614 | |
| 615 | /// Is the backing buffer shared with this other `Tendril`? |
| 616 | #[inline ] |
| 617 | pub fn is_shared_with(&self, other: &Tendril<F, A>) -> bool { |
| 618 | let n = self.ptr.get().get(); |
| 619 | |
| 620 | (n > MAX_INLINE_TAG) && (n == other.ptr.get().get()) |
| 621 | } |
| 622 | |
| 623 | /// Truncate to length 0 without discarding any owned storage. |
| 624 | #[inline ] |
| 625 | pub fn clear(&mut self) { |
| 626 | if self.ptr.get().get() <= MAX_INLINE_TAG { |
| 627 | self.ptr |
| 628 | .set(unsafe { NonZeroUsize::new_unchecked(EMPTY_TAG) }); |
| 629 | } else { |
| 630 | let (_, shared, _) = unsafe { self.assume_buf() }; |
| 631 | if shared { |
| 632 | // No need to keep a reference alive for a 0-size slice. |
| 633 | *self = Tendril::new(); |
| 634 | } else { |
| 635 | unsafe { self.set_len(0) }; |
| 636 | } |
| 637 | } |
| 638 | } |
| 639 | |
| 640 | /// Build a `Tendril` by copying a byte slice, if it conforms to the format. |
| 641 | #[inline ] |
| 642 | pub fn try_from_byte_slice(x: &[u8]) -> Result<Tendril<F, A>, ()> { |
| 643 | match F::validate(x) { |
| 644 | true => Ok(unsafe { Tendril::from_byte_slice_without_validating(x) }), |
| 645 | false => Err(()), |
| 646 | } |
| 647 | } |
| 648 | |
| 649 | /// View as uninterpreted bytes. |
| 650 | #[inline (always)] |
| 651 | pub fn as_bytes(&self) -> &Tendril<fmt::Bytes, A> { |
| 652 | unsafe { mem::transmute(self) } |
| 653 | } |
| 654 | |
| 655 | /// Convert into uninterpreted bytes. |
| 656 | #[inline (always)] |
| 657 | pub fn into_bytes(self) -> Tendril<fmt::Bytes, A> { |
| 658 | unsafe { mem::transmute(self) } |
| 659 | } |
| 660 | |
| 661 | /// Convert `self` into a type which is `Send`. |
| 662 | /// |
| 663 | /// If the tendril is owned or inline, this is free, |
| 664 | /// but if it's shared this will entail a copy of the contents. |
| 665 | #[inline ] |
| 666 | pub fn into_send(mut self) -> SendTendril<F> { |
| 667 | self.make_owned(); |
| 668 | SendTendril { |
| 669 | // This changes the header.refcount from A to NonAtomic, but that's |
| 670 | // OK because we have defined the format of A as a usize. |
| 671 | tendril: unsafe { mem::transmute(self) }, |
| 672 | } |
| 673 | } |
| 674 | |
| 675 | /// View as a superset format, for free. |
| 676 | #[inline (always)] |
| 677 | pub fn as_superset<Super>(&self) -> &Tendril<Super, A> |
| 678 | where |
| 679 | F: fmt::SubsetOf<Super>, |
| 680 | Super: fmt::Format, |
| 681 | { |
| 682 | unsafe { mem::transmute(self) } |
| 683 | } |
| 684 | |
| 685 | /// Convert into a superset format, for free. |
| 686 | #[inline (always)] |
| 687 | pub fn into_superset<Super>(self) -> Tendril<Super, A> |
| 688 | where |
| 689 | F: fmt::SubsetOf<Super>, |
| 690 | Super: fmt::Format, |
| 691 | { |
| 692 | unsafe { mem::transmute(self) } |
| 693 | } |
| 694 | |
| 695 | /// View as a subset format, if the `Tendril` conforms to that subset. |
| 696 | #[inline ] |
| 697 | pub fn try_as_subset<Sub>(&self) -> Result<&Tendril<Sub, A>, ()> |
| 698 | where |
| 699 | Sub: fmt::SubsetOf<F>, |
| 700 | { |
| 701 | match Sub::revalidate_subset(self.as_byte_slice()) { |
| 702 | true => Ok(unsafe { mem::transmute(self) }), |
| 703 | false => Err(()), |
| 704 | } |
| 705 | } |
| 706 | |
| 707 | /// Convert into a subset format, if the `Tendril` conforms to that subset. |
| 708 | #[inline ] |
| 709 | pub fn try_into_subset<Sub>(self) -> Result<Tendril<Sub, A>, Self> |
| 710 | where |
| 711 | Sub: fmt::SubsetOf<F>, |
| 712 | { |
| 713 | match Sub::revalidate_subset(self.as_byte_slice()) { |
| 714 | true => Ok(unsafe { mem::transmute(self) }), |
| 715 | false => Err(self), |
| 716 | } |
| 717 | } |
| 718 | |
| 719 | /// View as another format, if the bytes of the `Tendril` are valid for |
| 720 | /// that format. |
| 721 | #[inline ] |
| 722 | pub fn try_reinterpret_view<Other>(&self) -> Result<&Tendril<Other, A>, ()> |
| 723 | where |
| 724 | Other: fmt::Format, |
| 725 | { |
| 726 | match Other::validate(self.as_byte_slice()) { |
| 727 | true => Ok(unsafe { mem::transmute(self) }), |
| 728 | false => Err(()), |
| 729 | } |
| 730 | } |
| 731 | |
| 732 | /// Convert into another format, if the `Tendril` conforms to that format. |
| 733 | /// |
| 734 | /// This only re-validates the existing bytes under the new format. It |
| 735 | /// will *not* change the byte content of the tendril! |
| 736 | /// |
| 737 | /// See the `encode` and `decode` methods for character encoding conversion. |
| 738 | #[inline ] |
| 739 | pub fn try_reinterpret<Other>(self) -> Result<Tendril<Other, A>, Self> |
| 740 | where |
| 741 | Other: fmt::Format, |
| 742 | { |
| 743 | match Other::validate(self.as_byte_slice()) { |
| 744 | true => Ok(unsafe { mem::transmute(self) }), |
| 745 | false => Err(self), |
| 746 | } |
| 747 | } |
| 748 | |
| 749 | /// Push some bytes onto the end of the `Tendril`, if they conform to the |
| 750 | /// format. |
| 751 | #[inline ] |
| 752 | pub fn try_push_bytes(&mut self, buf: &[u8]) -> Result<(), ()> { |
| 753 | match F::validate(buf) { |
| 754 | true => unsafe { |
| 755 | self.push_bytes_without_validating(buf); |
| 756 | Ok(()) |
| 757 | }, |
| 758 | false => Err(()), |
| 759 | } |
| 760 | } |
| 761 | |
| 762 | /// Push another `Tendril` onto the end of this one. |
| 763 | #[inline ] |
| 764 | pub fn push_tendril(&mut self, other: &Tendril<F, A>) { |
| 765 | let new_len = self.len32().checked_add(other.len32()).expect(OFLOW); |
| 766 | |
| 767 | unsafe { |
| 768 | if (self.ptr.get().get() > MAX_INLINE_TAG) && (other.ptr.get().get() > MAX_INLINE_TAG) { |
| 769 | let (self_buf, self_shared, _) = self.assume_buf(); |
| 770 | let (other_buf, other_shared, _) = other.assume_buf(); |
| 771 | |
| 772 | if self_shared |
| 773 | && other_shared |
| 774 | && (self_buf.data_ptr() == other_buf.data_ptr()) |
| 775 | && other.aux() == self.aux() + self.raw_len() |
| 776 | { |
| 777 | self.set_len(new_len); |
| 778 | return; |
| 779 | } |
| 780 | } |
| 781 | |
| 782 | self.push_bytes_without_validating(other.as_byte_slice()) |
| 783 | } |
| 784 | } |
| 785 | |
| 786 | /// Attempt to slice this `Tendril` as a new `Tendril`. |
| 787 | /// |
| 788 | /// This will share the buffer when possible. Mutating a shared buffer |
| 789 | /// will copy the contents. |
| 790 | /// |
| 791 | /// The offset and length are in bytes. The function will return |
| 792 | /// `Err` if these are out of bounds, or if the resulting slice |
| 793 | /// does not conform to the format. |
| 794 | #[inline ] |
| 795 | pub fn try_subtendril( |
| 796 | &self, |
| 797 | offset: u32, |
| 798 | length: u32, |
| 799 | ) -> Result<Tendril<F, A>, SubtendrilError> { |
| 800 | let self_len = self.len32(); |
| 801 | if offset > self_len || length > (self_len - offset) { |
| 802 | return Err(SubtendrilError::OutOfBounds); |
| 803 | } |
| 804 | |
| 805 | unsafe { |
| 806 | let byte_slice = unsafe_slice(self.as_byte_slice(), offset as usize, length as usize); |
| 807 | if !F::validate_subseq(byte_slice) { |
| 808 | return Err(SubtendrilError::ValidationFailed); |
| 809 | } |
| 810 | |
| 811 | Ok(self.unsafe_subtendril(offset, length)) |
| 812 | } |
| 813 | } |
| 814 | |
| 815 | /// Slice this `Tendril` as a new `Tendril`. |
| 816 | /// |
| 817 | /// Panics on bounds or validity check failure. |
| 818 | #[inline ] |
| 819 | pub fn subtendril(&self, offset: u32, length: u32) -> Tendril<F, A> { |
| 820 | self.try_subtendril(offset, length).unwrap() |
| 821 | } |
| 822 | |
| 823 | /// Try to drop `n` bytes from the front. |
| 824 | /// |
| 825 | /// Returns `Err` if the bytes are not available, or the suffix fails |
| 826 | /// validation. |
| 827 | #[inline ] |
| 828 | pub fn try_pop_front(&mut self, n: u32) -> Result<(), SubtendrilError> { |
| 829 | if n == 0 { |
| 830 | return Ok(()); |
| 831 | } |
| 832 | let old_len = self.len32(); |
| 833 | if n > old_len { |
| 834 | return Err(SubtendrilError::OutOfBounds); |
| 835 | } |
| 836 | let new_len = old_len - n; |
| 837 | |
| 838 | unsafe { |
| 839 | if !F::validate_suffix(unsafe_slice( |
| 840 | self.as_byte_slice(), |
| 841 | n as usize, |
| 842 | new_len as usize, |
| 843 | )) { |
| 844 | return Err(SubtendrilError::ValidationFailed); |
| 845 | } |
| 846 | |
| 847 | self.unsafe_pop_front(n); |
| 848 | Ok(()) |
| 849 | } |
| 850 | } |
| 851 | |
| 852 | /// Drop `n` bytes from the front. |
| 853 | /// |
| 854 | /// Panics if the bytes are not available, or the suffix fails |
| 855 | /// validation. |
| 856 | #[inline ] |
| 857 | pub fn pop_front(&mut self, n: u32) { |
| 858 | self.try_pop_front(n).unwrap() |
| 859 | } |
| 860 | |
| 861 | /// Drop `n` bytes from the back. |
| 862 | /// |
| 863 | /// Returns `Err` if the bytes are not available, or the prefix fails |
| 864 | /// validation. |
| 865 | #[inline ] |
| 866 | pub fn try_pop_back(&mut self, n: u32) -> Result<(), SubtendrilError> { |
| 867 | if n == 0 { |
| 868 | return Ok(()); |
| 869 | } |
| 870 | let old_len = self.len32(); |
| 871 | if n > old_len { |
| 872 | return Err(SubtendrilError::OutOfBounds); |
| 873 | } |
| 874 | let new_len = old_len - n; |
| 875 | |
| 876 | unsafe { |
| 877 | if !F::validate_prefix(unsafe_slice(self.as_byte_slice(), 0, new_len as usize)) { |
| 878 | return Err(SubtendrilError::ValidationFailed); |
| 879 | } |
| 880 | |
| 881 | self.unsafe_pop_back(n); |
| 882 | Ok(()) |
| 883 | } |
| 884 | } |
| 885 | |
| 886 | /// Drop `n` bytes from the back. |
| 887 | /// |
| 888 | /// Panics if the bytes are not available, or the prefix fails |
| 889 | /// validation. |
| 890 | #[inline ] |
| 891 | pub fn pop_back(&mut self, n: u32) { |
| 892 | self.try_pop_back(n).unwrap() |
| 893 | } |
| 894 | |
| 895 | /// View as another format, without validating. |
| 896 | #[inline (always)] |
| 897 | pub unsafe fn reinterpret_view_without_validating<Other>(&self) -> &Tendril<Other, A> |
| 898 | where |
| 899 | Other: fmt::Format, |
| 900 | { |
| 901 | mem::transmute(self) |
| 902 | } |
| 903 | |
| 904 | /// Convert into another format, without validating. |
| 905 | #[inline (always)] |
| 906 | pub unsafe fn reinterpret_without_validating<Other>(self) -> Tendril<Other, A> |
| 907 | where |
| 908 | Other: fmt::Format, |
| 909 | { |
| 910 | mem::transmute(self) |
| 911 | } |
| 912 | |
| 913 | /// Build a `Tendril` by copying a byte slice, without validating. |
| 914 | #[inline ] |
| 915 | pub unsafe fn from_byte_slice_without_validating(x: &[u8]) -> Tendril<F, A> { |
| 916 | assert!(x.len() <= buf32::MAX_LEN); |
| 917 | if x.len() <= MAX_INLINE_LEN { |
| 918 | Tendril::inline(x) |
| 919 | } else { |
| 920 | Tendril::owned_copy(x) |
| 921 | } |
| 922 | } |
| 923 | |
| 924 | /// Push some bytes onto the end of the `Tendril`, without validating. |
| 925 | #[inline ] |
| 926 | pub unsafe fn push_bytes_without_validating(&mut self, buf: &[u8]) { |
| 927 | assert!(buf.len() <= buf32::MAX_LEN); |
| 928 | |
| 929 | let Fixup { |
| 930 | drop_left, |
| 931 | drop_right, |
| 932 | insert_len, |
| 933 | insert_bytes, |
| 934 | } = F::fixup(self.as_byte_slice(), buf); |
| 935 | |
| 936 | // FIXME: think more about overflow |
| 937 | let adj_len = self.len32() + insert_len - drop_left; |
| 938 | |
| 939 | let new_len = adj_len.checked_add(buf.len() as u32).expect(OFLOW) - drop_right; |
| 940 | |
| 941 | let drop_left = drop_left as usize; |
| 942 | let drop_right = drop_right as usize; |
| 943 | |
| 944 | if new_len <= MAX_INLINE_LEN as u32 { |
| 945 | let mut tmp = [0_u8; MAX_INLINE_LEN]; |
| 946 | { |
| 947 | let old = self.as_byte_slice(); |
| 948 | let mut dest = tmp.as_mut_ptr(); |
| 949 | copy_and_advance(&mut dest, unsafe_slice(old, 0, old.len() - drop_left)); |
| 950 | copy_and_advance( |
| 951 | &mut dest, |
| 952 | unsafe_slice(&insert_bytes, 0, insert_len as usize), |
| 953 | ); |
| 954 | copy_and_advance( |
| 955 | &mut dest, |
| 956 | unsafe_slice(buf, drop_right, buf.len() - drop_right), |
| 957 | ); |
| 958 | } |
| 959 | *self = Tendril::inline(&tmp[..new_len as usize]); |
| 960 | } else { |
| 961 | self.make_owned_with_capacity(new_len); |
| 962 | let (owned, _, _) = self.assume_buf(); |
| 963 | let mut dest = owned |
| 964 | .data_ptr() |
| 965 | .offset((owned.len as usize - drop_left) as isize); |
| 966 | copy_and_advance( |
| 967 | &mut dest, |
| 968 | unsafe_slice(&insert_bytes, 0, insert_len as usize), |
| 969 | ); |
| 970 | copy_and_advance( |
| 971 | &mut dest, |
| 972 | unsafe_slice(buf, drop_right, buf.len() - drop_right), |
| 973 | ); |
| 974 | self.set_len(new_len); |
| 975 | } |
| 976 | } |
| 977 | |
| 978 | /// Slice this `Tendril` as a new `Tendril`. |
| 979 | /// |
| 980 | /// Does not check validity or bounds! |
| 981 | #[inline ] |
| 982 | pub unsafe fn unsafe_subtendril(&self, offset: u32, length: u32) -> Tendril<F, A> { |
| 983 | if length <= MAX_INLINE_LEN as u32 { |
| 984 | Tendril::inline(unsafe_slice( |
| 985 | self.as_byte_slice(), |
| 986 | offset as usize, |
| 987 | length as usize, |
| 988 | )) |
| 989 | } else { |
| 990 | self.make_buf_shared(); |
| 991 | self.incref(); |
| 992 | let (buf, _, _) = self.assume_buf(); |
| 993 | Tendril::shared(buf, self.aux() + offset, length) |
| 994 | } |
| 995 | } |
| 996 | |
| 997 | /// Drop `n` bytes from the front. |
| 998 | /// |
| 999 | /// Does not check validity or bounds! |
| 1000 | #[inline ] |
| 1001 | pub unsafe fn unsafe_pop_front(&mut self, n: u32) { |
| 1002 | let new_len = self.len32() - n; |
| 1003 | if new_len <= MAX_INLINE_LEN as u32 { |
| 1004 | *self = Tendril::inline(unsafe_slice( |
| 1005 | self.as_byte_slice(), |
| 1006 | n as usize, |
| 1007 | new_len as usize, |
| 1008 | )); |
| 1009 | } else { |
| 1010 | self.make_buf_shared(); |
| 1011 | self.set_aux(self.aux() + n); |
| 1012 | let len = self.raw_len(); |
| 1013 | self.set_len(len - n); |
| 1014 | } |
| 1015 | } |
| 1016 | |
| 1017 | /// Drop `n` bytes from the back. |
| 1018 | /// |
| 1019 | /// Does not check validity or bounds! |
| 1020 | #[inline ] |
| 1021 | pub unsafe fn unsafe_pop_back(&mut self, n: u32) { |
| 1022 | let new_len = self.len32() - n; |
| 1023 | if new_len <= MAX_INLINE_LEN as u32 { |
| 1024 | *self = Tendril::inline(unsafe_slice(self.as_byte_slice(), 0, new_len as usize)); |
| 1025 | } else { |
| 1026 | self.make_buf_shared(); |
| 1027 | let len = self.raw_len(); |
| 1028 | self.set_len(len - n); |
| 1029 | } |
| 1030 | } |
| 1031 | |
| 1032 | #[inline ] |
| 1033 | unsafe fn incref(&self) { |
| 1034 | (*self.header()).refcount.increment(); |
| 1035 | } |
| 1036 | |
| 1037 | #[inline ] |
| 1038 | unsafe fn make_buf_shared(&self) { |
| 1039 | let p = self.ptr.get().get(); |
| 1040 | if p & 1 == 0 { |
| 1041 | let header = p as *mut Header<A>; |
| 1042 | (*header).cap = self.aux(); |
| 1043 | |
| 1044 | self.ptr.set(NonZeroUsize::new_unchecked(p | 1)); |
| 1045 | self.set_aux(0); |
| 1046 | } |
| 1047 | } |
| 1048 | |
| 1049 | // This is not public as it is of no practical value to users. |
| 1050 | // By and large they shouldn't need to worry about the distinction at all, |
| 1051 | // and going out of your way to make it owned is pointless. |
| 1052 | #[inline ] |
| 1053 | fn make_owned(&mut self) { |
| 1054 | unsafe { |
| 1055 | let ptr = self.ptr.get().get(); |
| 1056 | if ptr <= MAX_INLINE_TAG || (ptr & 1) == 1 { |
| 1057 | *self = Tendril::owned_copy(self.as_byte_slice()); |
| 1058 | } |
| 1059 | } |
| 1060 | } |
| 1061 | |
| 1062 | #[inline ] |
| 1063 | unsafe fn make_owned_with_capacity(&mut self, cap: u32) { |
| 1064 | self.make_owned(); |
| 1065 | let mut buf = self.assume_buf().0; |
| 1066 | buf.grow(cap); |
| 1067 | self.ptr.set(NonZeroUsize::new_unchecked(buf.ptr as usize)); |
| 1068 | self.set_aux(buf.cap); |
| 1069 | } |
| 1070 | |
| 1071 | #[inline (always)] |
| 1072 | unsafe fn header(&self) -> *mut Header<A> { |
| 1073 | (self.ptr.get().get() & !1) as *mut Header<A> |
| 1074 | } |
| 1075 | |
| 1076 | #[inline ] |
| 1077 | unsafe fn assume_buf(&self) -> (Buf32<Header<A>>, bool, u32) { |
| 1078 | let ptr = self.ptr.get().get(); |
| 1079 | let header = self.header(); |
| 1080 | let shared = (ptr & 1) == 1; |
| 1081 | let (cap, offset) = match shared { |
| 1082 | true => ((*header).cap, self.aux()), |
| 1083 | false => (self.aux(), 0), |
| 1084 | }; |
| 1085 | |
| 1086 | ( |
| 1087 | Buf32 { |
| 1088 | ptr: header, |
| 1089 | len: offset + self.len32(), |
| 1090 | cap: cap, |
| 1091 | }, |
| 1092 | shared, |
| 1093 | offset, |
| 1094 | ) |
| 1095 | } |
| 1096 | |
| 1097 | #[inline ] |
| 1098 | unsafe fn inline(x: &[u8]) -> Tendril<F, A> { |
| 1099 | let len = x.len(); |
| 1100 | let t = Tendril { |
| 1101 | ptr: Cell::new(inline_tag(len as u32)), |
| 1102 | buf: UnsafeCell::new(Buffer { inline: [0; 8] }), |
| 1103 | marker: PhantomData, |
| 1104 | refcount_marker: PhantomData, |
| 1105 | }; |
| 1106 | ptr::copy_nonoverlapping(x.as_ptr(), (*t.buf.get()).inline.as_mut_ptr(), len); |
| 1107 | t |
| 1108 | } |
| 1109 | |
| 1110 | #[inline ] |
| 1111 | unsafe fn owned(x: Buf32<Header<A>>) -> Tendril<F, A> { |
| 1112 | Tendril { |
| 1113 | ptr: Cell::new(NonZeroUsize::new_unchecked(x.ptr as usize)), |
| 1114 | buf: UnsafeCell::new(Buffer { |
| 1115 | heap: Heap { |
| 1116 | len: x.len, |
| 1117 | aux: x.cap, |
| 1118 | }, |
| 1119 | }), |
| 1120 | marker: PhantomData, |
| 1121 | refcount_marker: PhantomData, |
| 1122 | } |
| 1123 | } |
| 1124 | |
| 1125 | #[inline ] |
| 1126 | unsafe fn owned_copy(x: &[u8]) -> Tendril<F, A> { |
| 1127 | let len32 = x.len() as u32; |
| 1128 | let mut b = Buf32::with_capacity(len32, Header::new()); |
| 1129 | ptr::copy_nonoverlapping(x.as_ptr(), b.data_ptr(), x.len()); |
| 1130 | b.len = len32; |
| 1131 | Tendril::owned(b) |
| 1132 | } |
| 1133 | |
| 1134 | #[inline ] |
| 1135 | unsafe fn shared(buf: Buf32<Header<A>>, off: u32, len: u32) -> Tendril<F, A> { |
| 1136 | Tendril { |
| 1137 | ptr: Cell::new(NonZeroUsize::new_unchecked((buf.ptr as usize) | 1)), |
| 1138 | buf: UnsafeCell::new(Buffer { |
| 1139 | heap: Heap { len, aux: off }, |
| 1140 | }), |
| 1141 | marker: PhantomData, |
| 1142 | refcount_marker: PhantomData, |
| 1143 | } |
| 1144 | } |
| 1145 | |
| 1146 | #[inline ] |
| 1147 | fn as_byte_slice<'a>(&'a self) -> &'a [u8] { |
| 1148 | unsafe { |
| 1149 | match self.ptr.get().get() { |
| 1150 | EMPTY_TAG => &[], |
| 1151 | n if n <= MAX_INLINE_LEN => (*self.buf.get()).inline.get_unchecked(..n), |
| 1152 | _ => { |
| 1153 | let (buf, _, offset) = self.assume_buf(); |
| 1154 | copy_lifetime( |
| 1155 | self, |
| 1156 | unsafe_slice(buf.data(), offset as usize, self.len32() as usize), |
| 1157 | ) |
| 1158 | } |
| 1159 | } |
| 1160 | } |
| 1161 | } |
| 1162 | |
| 1163 | // There's no need to worry about locking on an atomic Tendril, because it makes it unique as |
| 1164 | // soon as you do that. |
| 1165 | #[inline ] |
| 1166 | fn as_mut_byte_slice<'a>(&'a mut self) -> &'a mut [u8] { |
| 1167 | unsafe { |
| 1168 | match self.ptr.get().get() { |
| 1169 | EMPTY_TAG => &mut [], |
| 1170 | n if n <= MAX_INLINE_LEN => (*self.buf.get()).inline.get_unchecked_mut(..n), |
| 1171 | _ => { |
| 1172 | self.make_owned(); |
| 1173 | let (mut buf, _, offset) = self.assume_buf(); |
| 1174 | let len = self.len32() as usize; |
| 1175 | copy_lifetime_mut(self, unsafe_slice_mut(buf.data_mut(), offset as usize, len)) |
| 1176 | } |
| 1177 | } |
| 1178 | } |
| 1179 | } |
| 1180 | |
| 1181 | unsafe fn raw_len(&self) -> u32 { |
| 1182 | (*self.buf.get()).heap.len |
| 1183 | } |
| 1184 | |
| 1185 | unsafe fn set_len(&mut self, len: u32) { |
| 1186 | (*self.buf.get()).heap.len = len; |
| 1187 | } |
| 1188 | |
| 1189 | unsafe fn aux(&self) -> u32 { |
| 1190 | (*self.buf.get()).heap.aux |
| 1191 | } |
| 1192 | |
| 1193 | unsafe fn set_aux(&self, aux: u32) { |
| 1194 | (*self.buf.get()).heap.aux = aux; |
| 1195 | } |
| 1196 | } |
| 1197 | |
| 1198 | impl<F, A> Tendril<F, A> |
| 1199 | where |
| 1200 | F: fmt::SliceFormat, |
| 1201 | A: Atomicity, |
| 1202 | { |
| 1203 | /// Build a `Tendril` by copying a slice. |
| 1204 | #[inline ] |
| 1205 | pub fn from_slice(x: &F::Slice) -> Tendril<F, A> { |
| 1206 | unsafe { Tendril::from_byte_slice_without_validating(x.as_bytes()) } |
| 1207 | } |
| 1208 | |
| 1209 | /// Push a slice onto the end of the `Tendril`. |
| 1210 | #[inline ] |
| 1211 | pub fn push_slice(&mut self, x: &F::Slice) { |
| 1212 | unsafe { self.push_bytes_without_validating(buf:x.as_bytes()) } |
| 1213 | } |
| 1214 | } |
| 1215 | |
| 1216 | /// A simple wrapper to make `Tendril` `Send`. |
| 1217 | /// |
| 1218 | /// Although there is a certain subset of the operations on a `Tendril` that a `SendTendril` could |
| 1219 | /// reasonably implement, in order to clearly separate concerns this type is deliberately |
| 1220 | /// minimalist, acting as a safe encapsulation around the invariants which permit `Send`ness and |
| 1221 | /// behaving as an opaque object. |
| 1222 | /// |
| 1223 | /// A `SendTendril` may be produced by `Tendril.into_send()` or `SendTendril::from(tendril)`, |
| 1224 | /// and may be returned to a `Tendril` by `Tendril::from(self)`. |
| 1225 | #[derive (Clone)] |
| 1226 | pub struct SendTendril<F> |
| 1227 | where |
| 1228 | F: fmt::Format, |
| 1229 | { |
| 1230 | tendril: Tendril<F>, |
| 1231 | } |
| 1232 | |
| 1233 | unsafe impl<F> Send for SendTendril<F> where F: fmt::Format {} |
| 1234 | |
| 1235 | impl<F, A> From<Tendril<F, A>> for SendTendril<F> |
| 1236 | where |
| 1237 | F: fmt::Format, |
| 1238 | A: Atomicity, |
| 1239 | { |
| 1240 | #[inline ] |
| 1241 | fn from(tendril: Tendril<F, A>) -> SendTendril<F> { |
| 1242 | tendril.into_send() |
| 1243 | } |
| 1244 | } |
| 1245 | |
| 1246 | impl<F, A> From<SendTendril<F>> for Tendril<F, A> |
| 1247 | where |
| 1248 | F: fmt::Format, |
| 1249 | A: Atomicity, |
| 1250 | { |
| 1251 | #[inline ] |
| 1252 | fn from(send: SendTendril<F>) -> Tendril<F, A> { |
| 1253 | unsafe { mem::transmute(src:send.tendril) } |
| 1254 | // header.refcount may have been initialised as an Atomic or a NonAtomic, but the value |
| 1255 | // will be the same (1) regardless, because the layout is defined. |
| 1256 | // Thus we don't need to fiddle about resetting it or anything like that. |
| 1257 | } |
| 1258 | } |
| 1259 | |
| 1260 | /// `Tendril`-related methods for Rust slices. |
| 1261 | pub trait SliceExt<F>: fmt::Slice |
| 1262 | where |
| 1263 | F: fmt::SliceFormat<Slice = Self>, |
| 1264 | { |
| 1265 | /// Make a `Tendril` from this slice. |
| 1266 | #[inline ] |
| 1267 | fn to_tendril(&self) -> Tendril<F> { |
| 1268 | // It should be done thusly, but at the time of writing the defaults don't help inference: |
| 1269 | //fn to_tendril<A = NonAtomic>(&self) -> Tendril<Self::Format, A> |
| 1270 | // where A: Atomicity, |
| 1271 | //{ |
| 1272 | Tendril::from_slice(self) |
| 1273 | } |
| 1274 | } |
| 1275 | |
| 1276 | impl SliceExt<fmt::UTF8> for str {} |
| 1277 | impl SliceExt<fmt::Bytes> for [u8] {} |
| 1278 | |
| 1279 | impl<F, A> Tendril<F, A> |
| 1280 | where |
| 1281 | F: for<'a> dynfmt::CharFormat<'a>, |
| 1282 | A: Atomicity, |
| 1283 | { |
| 1284 | /// Remove and return the first character, if any. |
| 1285 | #[inline ] |
| 1286 | pub fn pop_front_char<'a>(&'a mut self) -> Option<char> { |
| 1287 | unsafe { |
| 1288 | let next_char; // first char in iterator |
| 1289 | let mut skip = 0; // number of bytes to skip, or 0 to clear |
| 1290 | |
| 1291 | { |
| 1292 | // <--+ |
| 1293 | // | Creating an iterator borrows self, so introduce a |
| 1294 | // +- scope to contain the borrow (that way we can mutate |
| 1295 | // self below, after this scope exits). |
| 1296 | |
| 1297 | let mut iter = F::char_indices(self.as_byte_slice()); |
| 1298 | match iter.next() { |
| 1299 | Some((_, c)) => { |
| 1300 | next_char = Some(c); |
| 1301 | if let Some((n, _)) = iter.next() { |
| 1302 | skip = n as u32; |
| 1303 | } |
| 1304 | } |
| 1305 | None => { |
| 1306 | next_char = None; |
| 1307 | } |
| 1308 | } |
| 1309 | } |
| 1310 | |
| 1311 | if skip != 0 { |
| 1312 | self.unsafe_pop_front(skip); |
| 1313 | } else { |
| 1314 | self.clear(); |
| 1315 | } |
| 1316 | |
| 1317 | next_char |
| 1318 | } |
| 1319 | } |
| 1320 | |
| 1321 | /// Remove and return a run of characters at the front of the `Tendril` |
| 1322 | /// which are classified the same according to the function `classify`. |
| 1323 | /// |
| 1324 | /// Returns `None` on an empty string. |
| 1325 | #[inline ] |
| 1326 | pub fn pop_front_char_run<'a, C, R>(&'a mut self, mut classify: C) -> Option<(Tendril<F, A>, R)> |
| 1327 | where |
| 1328 | C: FnMut(char) -> R, |
| 1329 | R: PartialEq, |
| 1330 | { |
| 1331 | let (class, first_mismatch); |
| 1332 | { |
| 1333 | let mut chars = unsafe { F::char_indices(self.as_byte_slice()) }; |
| 1334 | let (_, first) = unwrap_or_return!(chars.next(), None); |
| 1335 | class = classify(first); |
| 1336 | first_mismatch = chars.find(|&(_, ch)| &classify(ch) != &class); |
| 1337 | } |
| 1338 | |
| 1339 | match first_mismatch { |
| 1340 | Some((idx, _)) => unsafe { |
| 1341 | let t = self.unsafe_subtendril(0, idx as u32); |
| 1342 | self.unsafe_pop_front(idx as u32); |
| 1343 | Some((t, class)) |
| 1344 | }, |
| 1345 | None => { |
| 1346 | let t = self.clone(); |
| 1347 | self.clear(); |
| 1348 | Some((t, class)) |
| 1349 | } |
| 1350 | } |
| 1351 | } |
| 1352 | |
| 1353 | /// Push a character, if it can be represented in this format. |
| 1354 | #[inline ] |
| 1355 | pub fn try_push_char(&mut self, c: char) -> Result<(), ()> { |
| 1356 | F::encode_char(c, |b| unsafe { |
| 1357 | self.push_bytes_without_validating(b); |
| 1358 | }) |
| 1359 | } |
| 1360 | } |
| 1361 | |
| 1362 | /// Extension trait for `io::Read`. |
| 1363 | pub trait ReadExt: io::Read { |
| 1364 | fn read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize> |
| 1365 | where |
| 1366 | A: Atomicity; |
| 1367 | } |
| 1368 | |
| 1369 | impl<T> ReadExt for T |
| 1370 | where |
| 1371 | T: io::Read, |
| 1372 | { |
| 1373 | /// Read all bytes until EOF. |
| 1374 | fn read_to_tendril<A>(&mut self, buf: &mut Tendril<fmt::Bytes, A>) -> io::Result<usize> |
| 1375 | where |
| 1376 | A: Atomicity, |
| 1377 | { |
| 1378 | // Adapted from libstd/io/mod.rs. |
| 1379 | const DEFAULT_BUF_SIZE: u32 = 64 * 1024; |
| 1380 | |
| 1381 | let start_len = buf.len(); |
| 1382 | let mut len = start_len; |
| 1383 | let mut new_write_size = 16; |
| 1384 | let ret; |
| 1385 | loop { |
| 1386 | if len == buf.len() { |
| 1387 | if new_write_size < DEFAULT_BUF_SIZE { |
| 1388 | new_write_size *= 2; |
| 1389 | } |
| 1390 | // FIXME: this exposes uninitialized bytes to a generic R type |
| 1391 | // this is fine for R=File which never reads these bytes, |
| 1392 | // but user-defined types might. |
| 1393 | // The standard library pushes zeros to `Vec<u8>` for that reason. |
| 1394 | unsafe { |
| 1395 | buf.push_uninitialized(new_write_size); |
| 1396 | } |
| 1397 | } |
| 1398 | |
| 1399 | match self.read(&mut buf[len..]) { |
| 1400 | Ok(0) => { |
| 1401 | ret = Ok(len - start_len); |
| 1402 | break; |
| 1403 | } |
| 1404 | Ok(n) => len += n, |
| 1405 | Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} |
| 1406 | Err(e) => { |
| 1407 | ret = Err(e); |
| 1408 | break; |
| 1409 | } |
| 1410 | } |
| 1411 | } |
| 1412 | |
| 1413 | let buf_len = buf.len32(); |
| 1414 | buf.pop_back(buf_len - (len as u32)); |
| 1415 | ret |
| 1416 | } |
| 1417 | } |
| 1418 | |
| 1419 | impl<A> io::Write for Tendril<fmt::Bytes, A> |
| 1420 | where |
| 1421 | A: Atomicity, |
| 1422 | { |
| 1423 | #[inline ] |
| 1424 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
| 1425 | self.push_slice(buf); |
| 1426 | Ok(buf.len()) |
| 1427 | } |
| 1428 | |
| 1429 | #[inline ] |
| 1430 | fn write_all(&mut self, buf: &[u8]) -> io::Result<()> { |
| 1431 | self.push_slice(buf); |
| 1432 | Ok(()) |
| 1433 | } |
| 1434 | |
| 1435 | #[inline (always)] |
| 1436 | fn flush(&mut self) -> io::Result<()> { |
| 1437 | Ok(()) |
| 1438 | } |
| 1439 | } |
| 1440 | |
| 1441 | #[cfg (feature = "encoding" )] |
| 1442 | impl<A> encoding::ByteWriter for Tendril<fmt::Bytes, A> |
| 1443 | where |
| 1444 | A: Atomicity, |
| 1445 | { |
| 1446 | #[inline ] |
| 1447 | fn write_byte(&mut self, b: u8) { |
| 1448 | self.push_slice(&[b]); |
| 1449 | } |
| 1450 | |
| 1451 | #[inline ] |
| 1452 | fn write_bytes(&mut self, v: &[u8]) { |
| 1453 | self.push_slice(v); |
| 1454 | } |
| 1455 | |
| 1456 | #[inline ] |
| 1457 | fn writer_hint(&mut self, additional: usize) { |
| 1458 | self.reserve(::std::cmp::min(u32::MAX as usize, additional) as u32); |
| 1459 | } |
| 1460 | } |
| 1461 | |
| 1462 | impl<F, A> Tendril<F, A> |
| 1463 | where |
| 1464 | A: Atomicity, |
| 1465 | F: fmt::SliceFormat<Slice = [u8]>, |
| 1466 | { |
| 1467 | /// Decode from some character encoding into UTF-8. |
| 1468 | /// |
| 1469 | /// See the [rust-encoding docs](https://lifthrasiir.github.io/rust-encoding/encoding/) |
| 1470 | /// for more information. |
| 1471 | #[inline ] |
| 1472 | #[cfg (feature = "encoding" )] |
| 1473 | pub fn decode( |
| 1474 | &self, |
| 1475 | encoding: EncodingRef, |
| 1476 | trap: DecoderTrap, |
| 1477 | ) -> Result<Tendril<fmt::UTF8, A>, ::std::borrow::Cow<'static, str>> { |
| 1478 | let mut ret = Tendril::new(); |
| 1479 | encoding.decode_to(&*self, trap, &mut ret).map(|_| ret) |
| 1480 | } |
| 1481 | |
| 1482 | /// Push "uninitialized bytes" onto the end. |
| 1483 | /// |
| 1484 | /// Really, this grows the tendril without writing anything to the new area. |
| 1485 | /// It's only defined for byte tendrils because it's only useful if you |
| 1486 | /// plan to then mutate the buffer. |
| 1487 | #[inline ] |
| 1488 | pub unsafe fn push_uninitialized(&mut self, n: u32) { |
| 1489 | let new_len = self.len32().checked_add(n).expect(OFLOW); |
| 1490 | if new_len <= MAX_INLINE_LEN as u32 && self.ptr.get().get() <= MAX_INLINE_TAG { |
| 1491 | self.ptr.set(inline_tag(new_len)) |
| 1492 | } else { |
| 1493 | self.make_owned_with_capacity(new_len); |
| 1494 | self.set_len(new_len); |
| 1495 | } |
| 1496 | } |
| 1497 | } |
| 1498 | |
| 1499 | impl<A> strfmt::Display for Tendril<fmt::UTF8, A> |
| 1500 | where |
| 1501 | A: Atomicity, |
| 1502 | { |
| 1503 | #[inline ] |
| 1504 | fn fmt(&self, f: &mut strfmt::Formatter) -> strfmt::Result { |
| 1505 | <str as strfmt::Display>::fmt(&**self, f) |
| 1506 | } |
| 1507 | } |
| 1508 | |
| 1509 | impl<A> str::FromStr for Tendril<fmt::UTF8, A> |
| 1510 | where |
| 1511 | A: Atomicity, |
| 1512 | { |
| 1513 | type Err = (); |
| 1514 | |
| 1515 | #[inline ] |
| 1516 | fn from_str(s: &str) -> Result<Self, ()> { |
| 1517 | Ok(Tendril::from_slice(s)) |
| 1518 | } |
| 1519 | } |
| 1520 | |
| 1521 | impl<A> strfmt::Write for Tendril<fmt::UTF8, A> |
| 1522 | where |
| 1523 | A: Atomicity, |
| 1524 | { |
| 1525 | #[inline ] |
| 1526 | fn write_str(&mut self, s: &str) -> strfmt::Result { |
| 1527 | self.push_slice(s); |
| 1528 | Ok(()) |
| 1529 | } |
| 1530 | } |
| 1531 | |
| 1532 | #[cfg (feature = "encoding" )] |
| 1533 | impl<A> encoding::StringWriter for Tendril<fmt::UTF8, A> |
| 1534 | where |
| 1535 | A: Atomicity, |
| 1536 | { |
| 1537 | #[inline ] |
| 1538 | fn write_char(&mut self, c: char) { |
| 1539 | self.push_char(c); |
| 1540 | } |
| 1541 | |
| 1542 | #[inline ] |
| 1543 | fn write_str(&mut self, s: &str) { |
| 1544 | self.push_slice(s); |
| 1545 | } |
| 1546 | |
| 1547 | #[inline ] |
| 1548 | fn writer_hint(&mut self, additional: usize) { |
| 1549 | self.reserve(::std::cmp::min(u32::MAX as usize, additional) as u32); |
| 1550 | } |
| 1551 | } |
| 1552 | |
| 1553 | impl<A> Tendril<fmt::UTF8, A> |
| 1554 | where |
| 1555 | A: Atomicity, |
| 1556 | { |
| 1557 | /// Encode from UTF-8 into some other character encoding. |
| 1558 | /// |
| 1559 | /// See the [rust-encoding docs](https://lifthrasiir.github.io/rust-encoding/encoding/) |
| 1560 | /// for more information. |
| 1561 | #[inline ] |
| 1562 | #[cfg (feature = "encoding" )] |
| 1563 | pub fn encode( |
| 1564 | &self, |
| 1565 | encoding: EncodingRef, |
| 1566 | trap: EncoderTrap, |
| 1567 | ) -> Result<Tendril<fmt::Bytes, A>, ::std::borrow::Cow<'static, str>> { |
| 1568 | let mut ret = Tendril::new(); |
| 1569 | encoding.encode_to(&*self, trap, &mut ret).map(|_| ret) |
| 1570 | } |
| 1571 | |
| 1572 | /// Push a character onto the end. |
| 1573 | #[inline ] |
| 1574 | pub fn push_char(&mut self, c: char) { |
| 1575 | unsafe { |
| 1576 | self.push_bytes_without_validating(c.encode_utf8(&mut [0_u8; 4]).as_bytes()); |
| 1577 | } |
| 1578 | } |
| 1579 | |
| 1580 | /// Create a `Tendril` from a single character. |
| 1581 | #[inline ] |
| 1582 | pub fn from_char(c: char) -> Tendril<fmt::UTF8, A> { |
| 1583 | let mut t: Tendril<fmt::UTF8, A> = Tendril::new(); |
| 1584 | t.push_char(c); |
| 1585 | t |
| 1586 | } |
| 1587 | |
| 1588 | /// Helper for the `format_tendril!` macro. |
| 1589 | #[inline ] |
| 1590 | pub fn format(args: strfmt::Arguments) -> Tendril<fmt::UTF8, A> { |
| 1591 | use std::fmt::Write; |
| 1592 | let mut output: Tendril<fmt::UTF8, A> = Tendril::new(); |
| 1593 | let _ = write!(&mut output, " {}" , args); |
| 1594 | output |
| 1595 | } |
| 1596 | } |
| 1597 | |
| 1598 | /// Create a `StrTendril` through string formatting. |
| 1599 | /// |
| 1600 | /// Works just like the standard `format!` macro. |
| 1601 | #[macro_export ] |
| 1602 | macro_rules! format_tendril { |
| 1603 | ($($arg:tt)*) => ($crate::StrTendril::format(format_args!($($arg)*))) |
| 1604 | } |
| 1605 | |
| 1606 | impl<'a, F, A> From<&'a F::Slice> for Tendril<F, A> |
| 1607 | where |
| 1608 | F: fmt::SliceFormat, |
| 1609 | A: Atomicity, |
| 1610 | { |
| 1611 | #[inline ] |
| 1612 | fn from(input: &F::Slice) -> Tendril<F, A> { |
| 1613 | Tendril::from_slice(input) |
| 1614 | } |
| 1615 | } |
| 1616 | |
| 1617 | impl<A> From<String> for Tendril<fmt::UTF8, A> |
| 1618 | where |
| 1619 | A: Atomicity, |
| 1620 | { |
| 1621 | #[inline ] |
| 1622 | fn from(input: String) -> Tendril<fmt::UTF8, A> { |
| 1623 | Tendril::from_slice(&*input) |
| 1624 | } |
| 1625 | } |
| 1626 | |
| 1627 | impl<F, A> AsRef<F::Slice> for Tendril<F, A> |
| 1628 | where |
| 1629 | F: fmt::SliceFormat, |
| 1630 | A: Atomicity, |
| 1631 | { |
| 1632 | #[inline ] |
| 1633 | fn as_ref(&self) -> &F::Slice { |
| 1634 | &**self |
| 1635 | } |
| 1636 | } |
| 1637 | |
| 1638 | impl<A> From<Tendril<fmt::UTF8, A>> for String |
| 1639 | where |
| 1640 | A: Atomicity, |
| 1641 | { |
| 1642 | #[inline ] |
| 1643 | fn from(input: Tendril<fmt::UTF8, A>) -> String { |
| 1644 | String::from(&*input) |
| 1645 | } |
| 1646 | } |
| 1647 | |
| 1648 | impl<'a, A> From<&'a Tendril<fmt::UTF8, A>> for String |
| 1649 | where |
| 1650 | A: Atomicity, |
| 1651 | { |
| 1652 | #[inline ] |
| 1653 | fn from(input: &'a Tendril<fmt::UTF8, A>) -> String { |
| 1654 | String::from(&**input) |
| 1655 | } |
| 1656 | } |
| 1657 | |
| 1658 | #[cfg (all(test, feature = "bench" ))] |
| 1659 | #[path = "bench.rs" ] |
| 1660 | mod bench; |
| 1661 | |
| 1662 | #[cfg (test)] |
| 1663 | mod test { |
| 1664 | use super::{ |
| 1665 | Atomic, ByteTendril, Header, NonAtomic, ReadExt, SendTendril, SliceExt, StrTendril, Tendril, |
| 1666 | }; |
| 1667 | use fmt; |
| 1668 | use std::iter; |
| 1669 | use std::thread; |
| 1670 | |
| 1671 | fn assert_send<T: Send>() {} |
| 1672 | |
| 1673 | #[test ] |
| 1674 | fn smoke_test() { |
| 1675 | assert_eq!("" , &*"" .to_tendril()); |
| 1676 | assert_eq!("abc" , &*"abc" .to_tendril()); |
| 1677 | assert_eq!("Hello, world!" , &*"Hello, world!" .to_tendril()); |
| 1678 | |
| 1679 | assert_eq!(b"" , &*b"" .to_tendril()); |
| 1680 | assert_eq!(b"abc" , &*b"abc" .to_tendril()); |
| 1681 | assert_eq!(b"Hello, world!" , &*b"Hello, world!" .to_tendril()); |
| 1682 | } |
| 1683 | |
| 1684 | #[test ] |
| 1685 | fn assert_sizes() { |
| 1686 | use std::mem; |
| 1687 | struct EmptyWithDrop; |
| 1688 | impl Drop for EmptyWithDrop { |
| 1689 | fn drop(&mut self) {} |
| 1690 | } |
| 1691 | let compiler_uses_inline_drop_flags = mem::size_of::<EmptyWithDrop>() > 0; |
| 1692 | |
| 1693 | let correct = mem::size_of::<*const ()>() |
| 1694 | + 8 |
| 1695 | + if compiler_uses_inline_drop_flags { |
| 1696 | 1 |
| 1697 | } else { |
| 1698 | 0 |
| 1699 | }; |
| 1700 | |
| 1701 | assert_eq!(correct, mem::size_of::<ByteTendril>()); |
| 1702 | assert_eq!(correct, mem::size_of::<StrTendril>()); |
| 1703 | |
| 1704 | assert_eq!(correct, mem::size_of::<Option<ByteTendril>>()); |
| 1705 | assert_eq!(correct, mem::size_of::<Option<StrTendril>>()); |
| 1706 | |
| 1707 | assert_eq!( |
| 1708 | mem::size_of::<*const ()>() * 2, |
| 1709 | mem::size_of::<Header<Atomic>>(), |
| 1710 | ); |
| 1711 | assert_eq!( |
| 1712 | mem::size_of::<Header<Atomic>>(), |
| 1713 | mem::size_of::<Header<NonAtomic>>(), |
| 1714 | ); |
| 1715 | } |
| 1716 | |
| 1717 | #[test ] |
| 1718 | fn validate_utf8() { |
| 1719 | assert!(ByteTendril::try_from_byte_slice(b" \xFF" ).is_ok()); |
| 1720 | assert!(StrTendril::try_from_byte_slice(b" \xFF" ).is_err()); |
| 1721 | assert!(StrTendril::try_from_byte_slice(b" \xEA\x99\xFF" ).is_err()); |
| 1722 | assert!(StrTendril::try_from_byte_slice(b" \xEA\x99" ).is_err()); |
| 1723 | assert!(StrTendril::try_from_byte_slice(b" \xEA\x99\xAE\xEA" ).is_err()); |
| 1724 | assert_eq!( |
| 1725 | " \u{a66e}" , |
| 1726 | &*StrTendril::try_from_byte_slice(b" \xEA\x99\xAE" ).unwrap() |
| 1727 | ); |
| 1728 | |
| 1729 | let mut t = StrTendril::new(); |
| 1730 | assert!(t.try_push_bytes(b" \xEA\x99" ).is_err()); |
| 1731 | assert!(t.try_push_bytes(b" \xAE" ).is_err()); |
| 1732 | assert!(t.try_push_bytes(b" \xEA\x99\xAE" ).is_ok()); |
| 1733 | assert_eq!(" \u{a66e}" , &*t); |
| 1734 | } |
| 1735 | |
| 1736 | #[test ] |
| 1737 | fn share_and_unshare() { |
| 1738 | let s = b"foobarbaz" .to_tendril(); |
| 1739 | assert_eq!(b"foobarbaz" , &*s); |
| 1740 | assert!(!s.is_shared()); |
| 1741 | |
| 1742 | let mut t = s.clone(); |
| 1743 | assert_eq!(s.as_ptr(), t.as_ptr()); |
| 1744 | assert!(s.is_shared()); |
| 1745 | assert!(t.is_shared()); |
| 1746 | |
| 1747 | t.push_slice(b"quux" ); |
| 1748 | assert_eq!(b"foobarbaz" , &*s); |
| 1749 | assert_eq!(b"foobarbazquux" , &*t); |
| 1750 | assert!(s.as_ptr() != t.as_ptr()); |
| 1751 | assert!(!t.is_shared()); |
| 1752 | } |
| 1753 | |
| 1754 | #[test ] |
| 1755 | fn format_display() { |
| 1756 | assert_eq!("foobar" , &*format!("{}" , "foobar" .to_tendril())); |
| 1757 | |
| 1758 | let mut s = "foo" .to_tendril(); |
| 1759 | assert_eq!("foo" , &*format!("{}" , s)); |
| 1760 | |
| 1761 | let t = s.clone(); |
| 1762 | assert_eq!("foo" , &*format!("{}" , s)); |
| 1763 | assert_eq!("foo" , &*format!("{}" , t)); |
| 1764 | |
| 1765 | s.push_slice("barbaz!" ); |
| 1766 | assert_eq!("foobarbaz!" , &*format!("{}" , s)); |
| 1767 | assert_eq!("foo" , &*format!("{}" , t)); |
| 1768 | } |
| 1769 | |
| 1770 | #[test ] |
| 1771 | fn format_debug() { |
| 1772 | assert_eq!( |
| 1773 | r#"Tendril<UTF8>(inline: "foobar")"# , |
| 1774 | &*format!("{:?}" , "foobar" .to_tendril()) |
| 1775 | ); |
| 1776 | assert_eq!( |
| 1777 | r#"Tendril<Bytes>(inline: [102, 111, 111, 98, 97, 114])"# , |
| 1778 | &*format!("{:?}" , b"foobar" .to_tendril()) |
| 1779 | ); |
| 1780 | |
| 1781 | let t = "anextralongstring" .to_tendril(); |
| 1782 | assert_eq!( |
| 1783 | r#"Tendril<UTF8>(owned: "anextralongstring")"# , |
| 1784 | &*format!("{:?}" , t) |
| 1785 | ); |
| 1786 | let _ = t.clone(); |
| 1787 | assert_eq!( |
| 1788 | r#"Tendril<UTF8>(shared: "anextralongstring")"# , |
| 1789 | &*format!("{:?}" , t) |
| 1790 | ); |
| 1791 | } |
| 1792 | |
| 1793 | #[test ] |
| 1794 | fn subtendril() { |
| 1795 | assert_eq!("foo" .to_tendril(), "foo-bar" .to_tendril().subtendril(0, 3)); |
| 1796 | assert_eq!("bar" .to_tendril(), "foo-bar" .to_tendril().subtendril(4, 3)); |
| 1797 | |
| 1798 | let mut t = "foo-bar" .to_tendril(); |
| 1799 | t.pop_front(2); |
| 1800 | assert_eq!("o-bar" .to_tendril(), t); |
| 1801 | t.pop_back(1); |
| 1802 | assert_eq!("o-ba" .to_tendril(), t); |
| 1803 | |
| 1804 | assert_eq!( |
| 1805 | "foo" .to_tendril(), |
| 1806 | "foo-a-longer-string-bar-baz" .to_tendril().subtendril(0, 3) |
| 1807 | ); |
| 1808 | assert_eq!( |
| 1809 | "oo-a-" .to_tendril(), |
| 1810 | "foo-a-longer-string-bar-baz" .to_tendril().subtendril(1, 5) |
| 1811 | ); |
| 1812 | assert_eq!( |
| 1813 | "bar" .to_tendril(), |
| 1814 | "foo-a-longer-string-bar-baz" .to_tendril().subtendril(20, 3) |
| 1815 | ); |
| 1816 | |
| 1817 | let mut t = "another rather long string" .to_tendril(); |
| 1818 | t.pop_front(2); |
| 1819 | assert!(t.starts_with("other rather" )); |
| 1820 | t.pop_back(1); |
| 1821 | assert_eq!("other rather long strin" .to_tendril(), t); |
| 1822 | assert!(t.is_shared()); |
| 1823 | } |
| 1824 | |
| 1825 | #[test ] |
| 1826 | fn subtendril_invalid() { |
| 1827 | assert!(" \u{a66e}" .to_tendril().try_subtendril(0, 2).is_err()); |
| 1828 | assert!(" \u{a66e}" .to_tendril().try_subtendril(1, 2).is_err()); |
| 1829 | |
| 1830 | assert!(" \u{1f4a9}" .to_tendril().try_subtendril(0, 3).is_err()); |
| 1831 | assert!(" \u{1f4a9}" .to_tendril().try_subtendril(0, 2).is_err()); |
| 1832 | assert!(" \u{1f4a9}" .to_tendril().try_subtendril(0, 1).is_err()); |
| 1833 | assert!(" \u{1f4a9}" .to_tendril().try_subtendril(1, 3).is_err()); |
| 1834 | assert!(" \u{1f4a9}" .to_tendril().try_subtendril(1, 2).is_err()); |
| 1835 | assert!(" \u{1f4a9}" .to_tendril().try_subtendril(1, 1).is_err()); |
| 1836 | assert!(" \u{1f4a9}" .to_tendril().try_subtendril(2, 2).is_err()); |
| 1837 | assert!(" \u{1f4a9}" .to_tendril().try_subtendril(2, 1).is_err()); |
| 1838 | assert!(" \u{1f4a9}" .to_tendril().try_subtendril(3, 1).is_err()); |
| 1839 | |
| 1840 | let mut t = " \u{1f4a9}zzzzzz" .to_tendril(); |
| 1841 | assert!(t.try_pop_front(1).is_err()); |
| 1842 | assert!(t.try_pop_front(2).is_err()); |
| 1843 | assert!(t.try_pop_front(3).is_err()); |
| 1844 | assert!(t.try_pop_front(4).is_ok()); |
| 1845 | assert_eq!("zzzzzz" , &*t); |
| 1846 | |
| 1847 | let mut t = "zzzzzz \u{1f4a9}" .to_tendril(); |
| 1848 | assert!(t.try_pop_back(1).is_err()); |
| 1849 | assert!(t.try_pop_back(2).is_err()); |
| 1850 | assert!(t.try_pop_back(3).is_err()); |
| 1851 | assert!(t.try_pop_back(4).is_ok()); |
| 1852 | assert_eq!("zzzzzz" , &*t); |
| 1853 | } |
| 1854 | |
| 1855 | #[test ] |
| 1856 | fn conversion() { |
| 1857 | assert_eq!( |
| 1858 | &[0x66, 0x6F, 0x6F].to_tendril(), |
| 1859 | "foo" .to_tendril().as_bytes() |
| 1860 | ); |
| 1861 | assert_eq!( |
| 1862 | [0x66, 0x6F, 0x6F].to_tendril(), |
| 1863 | "foo" .to_tendril().into_bytes() |
| 1864 | ); |
| 1865 | |
| 1866 | let ascii: Tendril<fmt::ASCII> = b"hello" .to_tendril().try_reinterpret().unwrap(); |
| 1867 | assert_eq!(&"hello" .to_tendril(), ascii.as_superset()); |
| 1868 | assert_eq!("hello" .to_tendril(), ascii.clone().into_superset()); |
| 1869 | |
| 1870 | assert!(b" \xFF" |
| 1871 | .to_tendril() |
| 1872 | .try_reinterpret::<fmt::ASCII>() |
| 1873 | .is_err()); |
| 1874 | |
| 1875 | let t = "hello" .to_tendril(); |
| 1876 | let ascii: &Tendril<fmt::ASCII> = t.try_as_subset().unwrap(); |
| 1877 | assert_eq!(b"hello" , &**ascii.as_bytes()); |
| 1878 | |
| 1879 | assert!("ő" |
| 1880 | .to_tendril() |
| 1881 | .try_reinterpret_view::<fmt::ASCII>() |
| 1882 | .is_err()); |
| 1883 | assert!("ő" .to_tendril().try_as_subset::<fmt::ASCII>().is_err()); |
| 1884 | |
| 1885 | let ascii: Tendril<fmt::ASCII> = "hello" .to_tendril().try_into_subset().unwrap(); |
| 1886 | assert_eq!(b"hello" , &**ascii.as_bytes()); |
| 1887 | |
| 1888 | assert!("ő" .to_tendril().try_reinterpret::<fmt::ASCII>().is_err()); |
| 1889 | assert!("ő" .to_tendril().try_into_subset::<fmt::ASCII>().is_err()); |
| 1890 | } |
| 1891 | |
| 1892 | #[test ] |
| 1893 | fn clear() { |
| 1894 | let mut t = "foo-" .to_tendril(); |
| 1895 | t.clear(); |
| 1896 | assert_eq!(t.len(), 0); |
| 1897 | assert_eq!(t.len32(), 0); |
| 1898 | assert_eq!(&*t, "" ); |
| 1899 | |
| 1900 | let mut t = "much longer" .to_tendril(); |
| 1901 | let s = t.clone(); |
| 1902 | t.clear(); |
| 1903 | assert_eq!(t.len(), 0); |
| 1904 | assert_eq!(t.len32(), 0); |
| 1905 | assert_eq!(&*t, "" ); |
| 1906 | assert_eq!(&*s, "much longer" ); |
| 1907 | } |
| 1908 | |
| 1909 | #[test ] |
| 1910 | fn push_tendril() { |
| 1911 | let mut t = "abc" .to_tendril(); |
| 1912 | t.push_tendril(&"xyz" .to_tendril()); |
| 1913 | assert_eq!("abcxyz" , &*t); |
| 1914 | } |
| 1915 | |
| 1916 | #[test ] |
| 1917 | fn wtf8() { |
| 1918 | assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b" \xED\xA0\xBD" ).is_ok()); |
| 1919 | assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b" \xED\xB2\xA9" ).is_ok()); |
| 1920 | assert!(Tendril::<fmt::WTF8>::try_from_byte_slice(b" \xED\xA0\xBD\xED\xB2\xA9" ).is_err()); |
| 1921 | |
| 1922 | let t: Tendril<fmt::WTF8> = |
| 1923 | Tendril::try_from_byte_slice(b" \xED\xA0\xBD\xEA\x99\xAE" ).unwrap(); |
| 1924 | assert!(b" \xED\xA0\xBD" .to_tendril().try_reinterpret().unwrap() == t.subtendril(0, 3)); |
| 1925 | assert!(b" \xEA\x99\xAE" .to_tendril().try_reinterpret().unwrap() == t.subtendril(3, 3)); |
| 1926 | assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err()); |
| 1927 | |
| 1928 | assert!(t.try_subtendril(0, 1).is_err()); |
| 1929 | assert!(t.try_subtendril(0, 2).is_err()); |
| 1930 | assert!(t.try_subtendril(1, 1).is_err()); |
| 1931 | |
| 1932 | assert!(t.try_subtendril(3, 1).is_err()); |
| 1933 | assert!(t.try_subtendril(3, 2).is_err()); |
| 1934 | assert!(t.try_subtendril(4, 1).is_err()); |
| 1935 | |
| 1936 | // paired surrogates |
| 1937 | let mut t: Tendril<fmt::WTF8> = Tendril::try_from_byte_slice(b" \xED\xA0\xBD" ).unwrap(); |
| 1938 | assert!(t.try_push_bytes(b" \xED\xB2\xA9" ).is_ok()); |
| 1939 | assert_eq!(b" \xF0\x9F\x92\xA9" , t.as_byte_slice()); |
| 1940 | assert!(t.try_reinterpret_view::<fmt::UTF8>().is_ok()); |
| 1941 | |
| 1942 | // unpaired surrogates |
| 1943 | let mut t: Tendril<fmt::WTF8> = Tendril::try_from_byte_slice(b" \xED\xA0\xBB" ).unwrap(); |
| 1944 | assert!(t.try_push_bytes(b" \xED\xA0" ).is_err()); |
| 1945 | assert!(t.try_push_bytes(b" \xED" ).is_err()); |
| 1946 | assert!(t.try_push_bytes(b" \xA0" ).is_err()); |
| 1947 | assert!(t.try_push_bytes(b" \xED\xA0\xBD" ).is_ok()); |
| 1948 | assert_eq!(b" \xED\xA0\xBB\xED\xA0\xBD" , t.as_byte_slice()); |
| 1949 | assert!(t.try_push_bytes(b" \xED\xB2\xA9" ).is_ok()); |
| 1950 | assert_eq!(b" \xED\xA0\xBB\xF0\x9F\x92\xA9" , t.as_byte_slice()); |
| 1951 | assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err()); |
| 1952 | } |
| 1953 | |
| 1954 | #[test ] |
| 1955 | fn front_char() { |
| 1956 | let mut t = "" .to_tendril(); |
| 1957 | assert_eq!(None, t.pop_front_char()); |
| 1958 | assert_eq!(None, t.pop_front_char()); |
| 1959 | |
| 1960 | let mut t = "abc" .to_tendril(); |
| 1961 | assert_eq!(Some('a' ), t.pop_front_char()); |
| 1962 | assert_eq!(Some('b' ), t.pop_front_char()); |
| 1963 | assert_eq!(Some('c' ), t.pop_front_char()); |
| 1964 | assert_eq!(None, t.pop_front_char()); |
| 1965 | assert_eq!(None, t.pop_front_char()); |
| 1966 | |
| 1967 | let mut t = "főo-a-longer-string-bar-baz" .to_tendril(); |
| 1968 | assert_eq!(28, t.len()); |
| 1969 | assert_eq!(Some('f' ), t.pop_front_char()); |
| 1970 | assert_eq!(Some('ő' ), t.pop_front_char()); |
| 1971 | assert_eq!(Some('o' ), t.pop_front_char()); |
| 1972 | assert_eq!(Some('-' ), t.pop_front_char()); |
| 1973 | assert_eq!(23, t.len()); |
| 1974 | } |
| 1975 | |
| 1976 | #[test ] |
| 1977 | fn char_run() { |
| 1978 | for &(s, exp) in &[ |
| 1979 | ("" , None), |
| 1980 | (" " , Some((" " , true))), |
| 1981 | ("x" , Some(("x" , false))), |
| 1982 | (" \t \n" , Some((" \t \n" , true))), |
| 1983 | ("xyzzy" , Some(("xyzzy" , false))), |
| 1984 | (" xyzzy" , Some((" " , true))), |
| 1985 | ("xyzzy " , Some(("xyzzy" , false))), |
| 1986 | (" xyzzy " , Some((" " , true))), |
| 1987 | ("xyzzy hi" , Some(("xyzzy" , false))), |
| 1988 | ("中 " , Some(("中" , false))), |
| 1989 | (" 中 " , Some((" " , true))), |
| 1990 | (" 中 " , Some((" " , true))), |
| 1991 | (" 中 " , Some((" " , true))), |
| 1992 | ] { |
| 1993 | let mut t = s.to_tendril(); |
| 1994 | let res = t.pop_front_char_run(char::is_whitespace); |
| 1995 | match exp { |
| 1996 | None => assert!(res.is_none()), |
| 1997 | Some((es, ec)) => { |
| 1998 | let (rt, rc) = res.unwrap(); |
| 1999 | assert_eq!(es, &*rt); |
| 2000 | assert_eq!(ec, rc); |
| 2001 | } |
| 2002 | } |
| 2003 | } |
| 2004 | } |
| 2005 | |
| 2006 | #[test ] |
| 2007 | fn deref_mut_inline() { |
| 2008 | let mut t = "xyő" .to_tendril().into_bytes(); |
| 2009 | t[3] = 0xff; |
| 2010 | assert_eq!(b"xy \xC5\xFF" , &*t); |
| 2011 | assert!(t.try_reinterpret_view::<fmt::UTF8>().is_err()); |
| 2012 | t[3] = 0x8b; |
| 2013 | assert_eq!("xyŋ" , &**t.try_reinterpret_view::<fmt::UTF8>().unwrap()); |
| 2014 | |
| 2015 | unsafe { |
| 2016 | t.push_uninitialized(3); |
| 2017 | t[4] = 0xEA; |
| 2018 | t[5] = 0x99; |
| 2019 | t[6] = 0xAE; |
| 2020 | assert_eq!( |
| 2021 | "xyŋ \u{a66e}" , |
| 2022 | &**t.try_reinterpret_view::<fmt::UTF8>().unwrap() |
| 2023 | ); |
| 2024 | t.push_uninitialized(20); |
| 2025 | t.pop_back(20); |
| 2026 | assert_eq!( |
| 2027 | "xyŋ \u{a66e}" , |
| 2028 | &**t.try_reinterpret_view::<fmt::UTF8>().unwrap() |
| 2029 | ); |
| 2030 | } |
| 2031 | } |
| 2032 | |
| 2033 | #[test ] |
| 2034 | fn deref_mut() { |
| 2035 | let mut t = b"0123456789" .to_tendril(); |
| 2036 | let u = t.clone(); |
| 2037 | assert!(t.is_shared()); |
| 2038 | t[9] = 0xff; |
| 2039 | assert!(!t.is_shared()); |
| 2040 | assert_eq!(b"0123456789" , &*u); |
| 2041 | assert_eq!(b"012345678 \xff" , &*t); |
| 2042 | } |
| 2043 | |
| 2044 | #[test ] |
| 2045 | fn push_char() { |
| 2046 | let mut t = "xyz" .to_tendril(); |
| 2047 | t.push_char('o' ); |
| 2048 | assert_eq!("xyzo" , &*t); |
| 2049 | t.push_char('ő' ); |
| 2050 | assert_eq!("xyzoő" , &*t); |
| 2051 | t.push_char(' \u{a66e}' ); |
| 2052 | assert_eq!("xyzoő \u{a66e}" , &*t); |
| 2053 | t.push_char(' \u{1f4a9}' ); |
| 2054 | assert_eq!("xyzoő \u{a66e}\u{1f4a9}" , &*t); |
| 2055 | assert_eq!(t.len(), 13); |
| 2056 | } |
| 2057 | |
| 2058 | #[test ] |
| 2059 | #[cfg (feature = "encoding" )] |
| 2060 | fn encode() { |
| 2061 | use encoding::{all, EncoderTrap}; |
| 2062 | |
| 2063 | let t = "안녕하세요 러스트" .to_tendril(); |
| 2064 | assert_eq!( |
| 2065 | b" \xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae" , |
| 2066 | &*t.encode(all::WINDOWS_949, EncoderTrap::Strict).unwrap() |
| 2067 | ); |
| 2068 | |
| 2069 | let t = "Энергия пробуждения ия-я-я! \u{a66e}" .to_tendril(); |
| 2070 | assert_eq!( |
| 2071 | b" \xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\ |
| 2072 | \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21 ?" , |
| 2073 | &*t.encode(all::KOI8_U, EncoderTrap::Replace).unwrap() |
| 2074 | ); |
| 2075 | |
| 2076 | let t = " \u{1f4a9}" .to_tendril(); |
| 2077 | assert!(t.encode(all::WINDOWS_1252, EncoderTrap::Strict).is_err()); |
| 2078 | } |
| 2079 | |
| 2080 | #[test ] |
| 2081 | #[cfg (feature = "encoding" )] |
| 2082 | fn decode() { |
| 2083 | use encoding::{all, DecoderTrap}; |
| 2084 | |
| 2085 | let t = b" \xbe\xc8\xb3\xe7\xc7\xcf\xbc\xbc\ |
| 2086 | \xbf\xe4\x20\xb7\xaf\xbd\xba\xc6\xae" |
| 2087 | .to_tendril(); |
| 2088 | assert_eq!( |
| 2089 | "안녕하세요 러스트" , |
| 2090 | &*t.decode(all::WINDOWS_949, DecoderTrap::Strict).unwrap() |
| 2091 | ); |
| 2092 | |
| 2093 | let t = b" \xfc\xce\xc5\xd2\xc7\xc9\xd1 \xd0\xd2\xcf\xc2\xd5\xd6\xc4\xc5\xce\ |
| 2094 | \xc9\xd1 \xc9\xd1\x2d\xd1\x2d\xd1\x21" |
| 2095 | .to_tendril(); |
| 2096 | assert_eq!( |
| 2097 | "Энергия пробуждения ия-я-я!" , |
| 2098 | &*t.decode(all::KOI8_U, DecoderTrap::Replace).unwrap() |
| 2099 | ); |
| 2100 | |
| 2101 | let t = b"x \xff y" .to_tendril(); |
| 2102 | assert!(t.decode(all::UTF_8, DecoderTrap::Strict).is_err()); |
| 2103 | |
| 2104 | let t = b"x \xff y" .to_tendril(); |
| 2105 | assert_eq!( |
| 2106 | "x \u{fffd} y" , |
| 2107 | &*t.decode(all::UTF_8, DecoderTrap::Replace).unwrap() |
| 2108 | ); |
| 2109 | } |
| 2110 | |
| 2111 | #[test ] |
| 2112 | fn ascii() { |
| 2113 | fn mk(x: &[u8]) -> Tendril<fmt::ASCII> { |
| 2114 | x.to_tendril().try_reinterpret().unwrap() |
| 2115 | } |
| 2116 | |
| 2117 | let mut t = mk(b"xyz" ); |
| 2118 | assert_eq!(Some('x' ), t.pop_front_char()); |
| 2119 | assert_eq!(Some('y' ), t.pop_front_char()); |
| 2120 | assert_eq!(Some('z' ), t.pop_front_char()); |
| 2121 | assert_eq!(None, t.pop_front_char()); |
| 2122 | |
| 2123 | let mut t = mk(b" \t xyz" ); |
| 2124 | assert!(Some((mk(b" \t " ), true)) == t.pop_front_char_run(char::is_whitespace)); |
| 2125 | assert!(Some((mk(b"xyz" ), false)) == t.pop_front_char_run(char::is_whitespace)); |
| 2126 | assert!(t.pop_front_char_run(char::is_whitespace).is_none()); |
| 2127 | |
| 2128 | let mut t = Tendril::<fmt::ASCII>::new(); |
| 2129 | assert!(t.try_push_char('x' ).is_ok()); |
| 2130 | assert!(t.try_push_char(' \0' ).is_ok()); |
| 2131 | assert!(t.try_push_char(' \u{a0}' ).is_err()); |
| 2132 | assert_eq!(b"x \0" , t.as_byte_slice()); |
| 2133 | } |
| 2134 | |
| 2135 | #[test ] |
| 2136 | fn latin1() { |
| 2137 | fn mk(x: &[u8]) -> Tendril<fmt::Latin1> { |
| 2138 | x.to_tendril().try_reinterpret().unwrap() |
| 2139 | } |
| 2140 | |
| 2141 | let mut t = mk(b" \xd8_ \xd8" ); |
| 2142 | assert_eq!(Some('Ø' ), t.pop_front_char()); |
| 2143 | assert_eq!(Some('_' ), t.pop_front_char()); |
| 2144 | assert_eq!(Some('Ø' ), t.pop_front_char()); |
| 2145 | assert_eq!(None, t.pop_front_char()); |
| 2146 | |
| 2147 | let mut t = mk(b" \t \xfe\xa7z" ); |
| 2148 | assert!(Some((mk(b" \t " ), true)) == t.pop_front_char_run(char::is_whitespace)); |
| 2149 | assert!(Some((mk(b" \xfe\xa7z" ), false)) == t.pop_front_char_run(char::is_whitespace)); |
| 2150 | assert!(t.pop_front_char_run(char::is_whitespace).is_none()); |
| 2151 | |
| 2152 | let mut t = Tendril::<fmt::Latin1>::new(); |
| 2153 | assert!(t.try_push_char('x' ).is_ok()); |
| 2154 | assert!(t.try_push_char(' \0' ).is_ok()); |
| 2155 | assert!(t.try_push_char(' \u{a0}' ).is_ok()); |
| 2156 | assert!(t.try_push_char('ő' ).is_err()); |
| 2157 | assert!(t.try_push_char('я' ).is_err()); |
| 2158 | assert!(t.try_push_char(' \u{a66e}' ).is_err()); |
| 2159 | assert!(t.try_push_char(' \u{1f4a9}' ).is_err()); |
| 2160 | assert_eq!(b"x \0\xa0" , t.as_byte_slice()); |
| 2161 | } |
| 2162 | |
| 2163 | #[test ] |
| 2164 | fn format() { |
| 2165 | assert_eq!("" , &*format_tendril!("" )); |
| 2166 | assert_eq!( |
| 2167 | "two and two make 4" , |
| 2168 | &*format_tendril!("two and two make {}" , 2 + 2) |
| 2169 | ); |
| 2170 | } |
| 2171 | |
| 2172 | #[test ] |
| 2173 | fn merge_shared() { |
| 2174 | let t = "012345678901234567890123456789" .to_tendril(); |
| 2175 | let a = t.subtendril(10, 20); |
| 2176 | assert!(a.is_shared()); |
| 2177 | assert_eq!("01234567890123456789" , &*a); |
| 2178 | let mut b = t.subtendril(0, 10); |
| 2179 | assert!(b.is_shared()); |
| 2180 | assert_eq!("0123456789" , &*b); |
| 2181 | |
| 2182 | b.push_tendril(&a); |
| 2183 | assert!(b.is_shared()); |
| 2184 | assert!(a.is_shared()); |
| 2185 | assert!(a.is_shared_with(&b)); |
| 2186 | assert!(b.is_shared_with(&a)); |
| 2187 | assert_eq!("012345678901234567890123456789" , &*b); |
| 2188 | |
| 2189 | assert!(t.is_shared()); |
| 2190 | assert!(t.is_shared_with(&a)); |
| 2191 | assert!(t.is_shared_with(&b)); |
| 2192 | } |
| 2193 | |
| 2194 | #[test ] |
| 2195 | fn merge_cant_share() { |
| 2196 | let t = "012345678901234567890123456789" .to_tendril(); |
| 2197 | let mut b = t.subtendril(0, 10); |
| 2198 | assert!(b.is_shared()); |
| 2199 | assert_eq!("0123456789" , &*b); |
| 2200 | |
| 2201 | b.push_tendril(&"abcd" .to_tendril()); |
| 2202 | assert!(!b.is_shared()); |
| 2203 | assert_eq!("0123456789abcd" , &*b); |
| 2204 | } |
| 2205 | |
| 2206 | #[test ] |
| 2207 | fn shared_doesnt_reserve() { |
| 2208 | let mut t = "012345678901234567890123456789" .to_tendril(); |
| 2209 | let a = t.subtendril(1, 10); |
| 2210 | |
| 2211 | assert!(t.is_shared()); |
| 2212 | t.reserve(10); |
| 2213 | assert!(t.is_shared()); |
| 2214 | |
| 2215 | let _ = a; |
| 2216 | } |
| 2217 | |
| 2218 | #[test ] |
| 2219 | fn out_of_bounds() { |
| 2220 | assert!("" .to_tendril().try_subtendril(0, 1).is_err()); |
| 2221 | assert!("abc" .to_tendril().try_subtendril(0, 4).is_err()); |
| 2222 | assert!("abc" .to_tendril().try_subtendril(3, 1).is_err()); |
| 2223 | assert!("abc" .to_tendril().try_subtendril(7, 1).is_err()); |
| 2224 | |
| 2225 | let mut t = "" .to_tendril(); |
| 2226 | assert!(t.try_pop_front(1).is_err()); |
| 2227 | assert!(t.try_pop_front(5).is_err()); |
| 2228 | assert!(t.try_pop_front(500).is_err()); |
| 2229 | assert!(t.try_pop_back(1).is_err()); |
| 2230 | assert!(t.try_pop_back(5).is_err()); |
| 2231 | assert!(t.try_pop_back(500).is_err()); |
| 2232 | |
| 2233 | let mut t = "abcd" .to_tendril(); |
| 2234 | assert!(t.try_pop_front(1).is_ok()); |
| 2235 | assert!(t.try_pop_front(4).is_err()); |
| 2236 | assert!(t.try_pop_front(500).is_err()); |
| 2237 | assert!(t.try_pop_back(1).is_ok()); |
| 2238 | assert!(t.try_pop_back(3).is_err()); |
| 2239 | assert!(t.try_pop_back(500).is_err()); |
| 2240 | } |
| 2241 | |
| 2242 | #[test ] |
| 2243 | fn compare() { |
| 2244 | for &a in &[ |
| 2245 | "indiscretions" , |
| 2246 | "validity" , |
| 2247 | "hallucinogenics" , |
| 2248 | "timelessness" , |
| 2249 | "original" , |
| 2250 | "microcosms" , |
| 2251 | "boilers" , |
| 2252 | "mammoth" , |
| 2253 | ] { |
| 2254 | for &b in &[ |
| 2255 | "intrepidly" , |
| 2256 | "frigid" , |
| 2257 | "spa" , |
| 2258 | "cardigans" , |
| 2259 | "guileful" , |
| 2260 | "evaporated" , |
| 2261 | "unenthusiastic" , |
| 2262 | "legitimate" , |
| 2263 | ] { |
| 2264 | let ta = a.to_tendril(); |
| 2265 | let tb = b.to_tendril(); |
| 2266 | |
| 2267 | assert_eq!(a.eq(b), ta.eq(&tb)); |
| 2268 | assert_eq!(a.ne(b), ta.ne(&tb)); |
| 2269 | assert_eq!(a.lt(b), ta.lt(&tb)); |
| 2270 | assert_eq!(a.le(b), ta.le(&tb)); |
| 2271 | assert_eq!(a.gt(b), ta.gt(&tb)); |
| 2272 | assert_eq!(a.ge(b), ta.ge(&tb)); |
| 2273 | assert_eq!(a.partial_cmp(b), ta.partial_cmp(&tb)); |
| 2274 | assert_eq!(a.cmp(b), ta.cmp(&tb)); |
| 2275 | } |
| 2276 | } |
| 2277 | } |
| 2278 | |
| 2279 | #[test ] |
| 2280 | fn extend_and_from_iterator() { |
| 2281 | // Testing Extend<T> and FromIterator<T> for the various Ts. |
| 2282 | |
| 2283 | // Tendril<F> |
| 2284 | let mut t = "Hello" .to_tendril(); |
| 2285 | t.extend(None::<&Tendril<_>>.into_iter()); |
| 2286 | assert_eq!("Hello" , &*t); |
| 2287 | t.extend(&[", " .to_tendril(), "world" .to_tendril(), "!" .to_tendril()]); |
| 2288 | assert_eq!("Hello, world!" , &*t); |
| 2289 | assert_eq!( |
| 2290 | "Hello, world!" , |
| 2291 | &*[ |
| 2292 | "Hello" .to_tendril(), |
| 2293 | ", " .to_tendril(), |
| 2294 | "world" .to_tendril(), |
| 2295 | "!" .to_tendril() |
| 2296 | ] |
| 2297 | .iter() |
| 2298 | .collect::<StrTendril>() |
| 2299 | ); |
| 2300 | |
| 2301 | // &str |
| 2302 | let mut t = "Hello" .to_tendril(); |
| 2303 | t.extend(None::<&str>.into_iter()); |
| 2304 | assert_eq!("Hello" , &*t); |
| 2305 | t.extend([", " , "world" , "!" ].iter().map(|&s| s)); |
| 2306 | assert_eq!("Hello, world!" , &*t); |
| 2307 | assert_eq!( |
| 2308 | "Hello, world!" , |
| 2309 | &*["Hello" , ", " , "world" , "!" ] |
| 2310 | .iter() |
| 2311 | .map(|&s| s) |
| 2312 | .collect::<StrTendril>() |
| 2313 | ); |
| 2314 | |
| 2315 | // &[u8] |
| 2316 | let mut t = b"Hello" .to_tendril(); |
| 2317 | t.extend(None::<&[u8]>.into_iter()); |
| 2318 | assert_eq!(b"Hello" , &*t); |
| 2319 | t.extend( |
| 2320 | [b", " .as_ref(), b"world" .as_ref(), b"!" .as_ref()] |
| 2321 | .iter() |
| 2322 | .map(|&s| s), |
| 2323 | ); |
| 2324 | assert_eq!(b"Hello, world!" , &*t); |
| 2325 | assert_eq!( |
| 2326 | b"Hello, world!" , |
| 2327 | &*[ |
| 2328 | b"Hello" .as_ref(), |
| 2329 | b", " .as_ref(), |
| 2330 | b"world" .as_ref(), |
| 2331 | b"!" .as_ref() |
| 2332 | ] |
| 2333 | .iter() |
| 2334 | .map(|&s| s) |
| 2335 | .collect::<ByteTendril>() |
| 2336 | ); |
| 2337 | |
| 2338 | let string = "the quick brown fox jumps over the lazy dog" ; |
| 2339 | let string_expected = string.to_tendril(); |
| 2340 | let bytes = string.as_bytes(); |
| 2341 | let bytes_expected = bytes.to_tendril(); |
| 2342 | |
| 2343 | // char |
| 2344 | assert_eq!(string_expected, string.chars().collect()); |
| 2345 | let mut tendril = StrTendril::new(); |
| 2346 | tendril.extend(string.chars()); |
| 2347 | assert_eq!(string_expected, tendril); |
| 2348 | |
| 2349 | // &u8 |
| 2350 | assert_eq!(bytes_expected, bytes.iter().collect()); |
| 2351 | let mut tendril = ByteTendril::new(); |
| 2352 | tendril.extend(bytes); |
| 2353 | assert_eq!(bytes_expected, tendril); |
| 2354 | |
| 2355 | // u8 |
| 2356 | assert_eq!(bytes_expected, bytes.iter().map(|&b| b).collect()); |
| 2357 | let mut tendril = ByteTendril::new(); |
| 2358 | tendril.extend(bytes.iter().map(|&b| b)); |
| 2359 | assert_eq!(bytes_expected, tendril); |
| 2360 | } |
| 2361 | |
| 2362 | #[test ] |
| 2363 | fn from_str() { |
| 2364 | use std::str::FromStr; |
| 2365 | let t: Tendril<_> = FromStr::from_str("foo bar baz" ).unwrap(); |
| 2366 | assert_eq!("foo bar baz" , &*t); |
| 2367 | } |
| 2368 | |
| 2369 | #[test ] |
| 2370 | fn from_char() { |
| 2371 | assert_eq!("o" , &*StrTendril::from_char('o' )); |
| 2372 | assert_eq!("ő" , &*StrTendril::from_char('ő' )); |
| 2373 | assert_eq!(" \u{a66e}" , &*StrTendril::from_char(' \u{a66e}' )); |
| 2374 | assert_eq!(" \u{1f4a9}" , &*StrTendril::from_char(' \u{1f4a9}' )); |
| 2375 | } |
| 2376 | |
| 2377 | #[test ] |
| 2378 | #[cfg_attr (miri, ignore)] // slow |
| 2379 | fn read() { |
| 2380 | fn check(x: &[u8]) { |
| 2381 | use std::io::Cursor; |
| 2382 | let mut t = ByteTendril::new(); |
| 2383 | assert_eq!(x.len(), Cursor::new(x).read_to_tendril(&mut t).unwrap()); |
| 2384 | assert_eq!(x, &*t); |
| 2385 | } |
| 2386 | |
| 2387 | check(b"" ); |
| 2388 | check(b"abcd" ); |
| 2389 | |
| 2390 | let long: Vec<u8> = iter::repeat(b'x' ).take(1_000_000).collect(); |
| 2391 | check(&long); |
| 2392 | } |
| 2393 | |
| 2394 | #[test ] |
| 2395 | fn hash_map_key() { |
| 2396 | use std::collections::HashMap; |
| 2397 | |
| 2398 | // As noted with Borrow, indexing on HashMap<StrTendril, _> is byte-based because of |
| 2399 | // https://github.com/rust-lang/rust/issues/27108. |
| 2400 | let mut map = HashMap::new(); |
| 2401 | map.insert("foo" .to_tendril(), 1); |
| 2402 | assert_eq!(map.get(b"foo" .as_ref()), Some(&1)); |
| 2403 | assert_eq!(map.get(b"bar" .as_ref()), None); |
| 2404 | |
| 2405 | let mut map = HashMap::new(); |
| 2406 | map.insert(b"foo" .to_tendril(), 1); |
| 2407 | assert_eq!(map.get(b"foo" .as_ref()), Some(&1)); |
| 2408 | assert_eq!(map.get(b"bar" .as_ref()), None); |
| 2409 | } |
| 2410 | |
| 2411 | #[test ] |
| 2412 | fn atomic() { |
| 2413 | assert_send::<Tendril<fmt::UTF8, Atomic>>(); |
| 2414 | let s: Tendril<fmt::UTF8, Atomic> = Tendril::from_slice("this is a string" ); |
| 2415 | assert!(!s.is_shared()); |
| 2416 | let mut t = s.clone(); |
| 2417 | assert!(s.is_shared()); |
| 2418 | let sp = s.as_ptr() as usize; |
| 2419 | thread::spawn(move || { |
| 2420 | assert!(t.is_shared()); |
| 2421 | t.push_slice(" extended" ); |
| 2422 | assert_eq!("this is a string extended" , &*t); |
| 2423 | assert!(t.as_ptr() as usize != sp); |
| 2424 | assert!(!t.is_shared()); |
| 2425 | }) |
| 2426 | .join() |
| 2427 | .unwrap(); |
| 2428 | assert!(s.is_shared()); |
| 2429 | assert_eq!("this is a string" , &*s); |
| 2430 | } |
| 2431 | |
| 2432 | #[test ] |
| 2433 | fn send() { |
| 2434 | assert_send::<SendTendril<fmt::UTF8>>(); |
| 2435 | let s = "this is a string" .to_tendril(); |
| 2436 | let t = s.clone(); |
| 2437 | let s2 = s.into_send(); |
| 2438 | thread::spawn(move || { |
| 2439 | let s = StrTendril::from(s2); |
| 2440 | assert!(!s.is_shared()); |
| 2441 | assert_eq!("this is a string" , &*s); |
| 2442 | }) |
| 2443 | .join() |
| 2444 | .unwrap(); |
| 2445 | assert_eq!("this is a string" , &*t); |
| 2446 | } |
| 2447 | |
| 2448 | /// https://github.com/servo/tendril/issues/58 |
| 2449 | #[test ] |
| 2450 | fn issue_58() { |
| 2451 | let data = "<p><i>Hello!</p>, World!</i>" ; |
| 2452 | let s: Tendril<fmt::UTF8, NonAtomic> = data.into(); |
| 2453 | assert_eq!(&*s, data); |
| 2454 | let s: Tendril<fmt::UTF8, Atomic> = s.into_send().into(); |
| 2455 | assert_eq!(&*s, data); |
| 2456 | } |
| 2457 | |
| 2458 | #[test ] |
| 2459 | fn inline_send() { |
| 2460 | let s = "x" .to_tendril(); |
| 2461 | let t = s.clone(); |
| 2462 | let s2 = s.into_send(); |
| 2463 | thread::spawn(move || { |
| 2464 | let s = StrTendril::from(s2); |
| 2465 | assert!(!s.is_shared()); |
| 2466 | assert_eq!("x" , &*s); |
| 2467 | }) |
| 2468 | .join() |
| 2469 | .unwrap(); |
| 2470 | assert_eq!("x" , &*t); |
| 2471 | } |
| 2472 | } |
| 2473 | |