1#![no_std]
2extern crate alloc;
3
4use alloc::{borrow::Cow, boxed::Box, string::String, sync::Arc};
5use core::{
6 borrow::Borrow,
7 cmp::{self, Ordering},
8 convert::Infallible,
9 fmt, hash, iter,
10 mem::transmute,
11 ops::Deref,
12 str::FromStr,
13};
14
15/// A `SmolStr` is a string type that has the following properties:
16///
17/// * `size_of::<SmolStr>() == 24` (therefor `== size_of::<String>()` on 64 bit platforms)
18/// * `Clone` is `O(1)`
19/// * Strings are stack-allocated if they are:
20/// * Up to 23 bytes long
21/// * Longer than 23 bytes, but substrings of `WS` (see below). Such strings consist
22/// solely of consecutive newlines, followed by consecutive spaces
23/// * If a string does not satisfy the aforementioned conditions, it is heap-allocated
24/// * Additionally, a `SmolStr` can be explicitely created from a `&'static str` without allocation
25///
26/// Unlike `String`, however, `SmolStr` is immutable. The primary use case for
27/// `SmolStr` is a good enough default storage for tokens of typical programming
28/// languages. Strings consisting of a series of newlines, followed by a series of
29/// whitespace are a typical pattern in computer programs because of indentation.
30/// Note that a specialized interner might be a better solution for some use cases.
31///
32/// `WS`: A string of 32 newlines followed by 128 spaces.
33#[derive(Clone)]
34pub struct SmolStr(Repr);
35
36impl SmolStr {
37 #[deprecated = "Use `new_inline` instead"]
38 pub const fn new_inline_from_ascii(len: usize, bytes: &[u8]) -> SmolStr {
39 assert!(len <= INLINE_CAP);
40
41 const ZEROS: &[u8] = &[0; INLINE_CAP];
42
43 let mut buf = [0; INLINE_CAP];
44 macro_rules! s {
45 ($($idx:literal),*) => ( $(s!(set $idx);)* );
46 (set $idx:literal) => ({
47 let src: &[u8] = [ZEROS, bytes][($idx < len) as usize];
48 let byte = src[$idx];
49 let _is_ascii = [(); 128][byte as usize];
50 buf[$idx] = byte
51 });
52 }
53 s!(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22);
54 SmolStr(Repr::Inline {
55 len: unsafe { transmute(len as u8) },
56 buf,
57 })
58 }
59
60 /// Constructs inline variant of `SmolStr`.
61 ///
62 /// Panics if `text.len() > 23`.
63 #[inline]
64 pub const fn new_inline(text: &str) -> SmolStr {
65 assert!(text.len() <= INLINE_CAP); // avoids checks in loop
66
67 let mut buf = [0; INLINE_CAP];
68 let mut i = 0;
69 while i < text.len() {
70 buf[i] = text.as_bytes()[i];
71 i += 1
72 }
73 SmolStr(Repr::Inline {
74 len: unsafe { transmute(text.len() as u8) },
75 buf,
76 })
77 }
78
79 /// Constructs a `SmolStr` from a statically allocated string.
80 ///
81 /// This never allocates.
82 #[inline(always)]
83 pub const fn new_static(text: &'static str) -> SmolStr {
84 // NOTE: this never uses the inline storage; if a canonical
85 // representation is needed, we could check for `len() < INLINE_CAP`
86 // and call `new_inline`, but this would mean an extra branch.
87 SmolStr(Repr::Static(text))
88 }
89
90 pub fn new<T>(text: T) -> SmolStr
91 where
92 T: AsRef<str>,
93 {
94 SmolStr(Repr::new(text))
95 }
96
97 #[inline(always)]
98 pub fn as_str(&self) -> &str {
99 self.0.as_str()
100 }
101
102 #[allow(clippy::inherent_to_string_shadow_display)]
103 #[inline(always)]
104 pub fn to_string(&self) -> String {
105 use alloc::borrow::ToOwned;
106
107 self.as_str().to_owned()
108 }
109
110 #[inline(always)]
111 pub fn len(&self) -> usize {
112 self.0.len()
113 }
114
115 #[inline(always)]
116 pub fn is_empty(&self) -> bool {
117 self.0.is_empty()
118 }
119
120 #[inline(always)]
121 pub const fn is_heap_allocated(&self) -> bool {
122 matches!(self.0, Repr::Heap(..))
123 }
124
125 fn from_char_iter<I: iter::Iterator<Item = char>>(mut iter: I) -> SmolStr {
126 let (min_size, _) = iter.size_hint();
127 if min_size > INLINE_CAP {
128 let heap: String = iter.collect();
129 return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
130 }
131 let mut len = 0;
132 let mut buf = [0u8; INLINE_CAP];
133 while let Some(ch) = iter.next() {
134 let size = ch.len_utf8();
135 if size + len > INLINE_CAP {
136 let (min_remaining, _) = iter.size_hint();
137 let mut heap = String::with_capacity(size + len + min_remaining);
138 heap.push_str(core::str::from_utf8(&buf[..len]).unwrap());
139 heap.push(ch);
140 heap.extend(iter);
141 return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
142 }
143 ch.encode_utf8(&mut buf[len..]);
144 len += size;
145 }
146 SmolStr(Repr::Inline {
147 len: unsafe { transmute(len as u8) },
148 buf,
149 })
150 }
151}
152
153impl Default for SmolStr {
154 #[inline(always)]
155 fn default() -> SmolStr {
156 SmolStr(Repr::Inline {
157 len: InlineSize::_V0,
158 buf: [0; INLINE_CAP],
159 })
160 }
161}
162
163impl Deref for SmolStr {
164 type Target = str;
165
166 #[inline(always)]
167 fn deref(&self) -> &str {
168 self.as_str()
169 }
170}
171
172impl PartialEq<SmolStr> for SmolStr {
173 fn eq(&self, other: &SmolStr) -> bool {
174 self.as_str() == other.as_str()
175 }
176}
177
178impl Eq for SmolStr {}
179
180impl PartialEq<str> for SmolStr {
181 fn eq(&self, other: &str) -> bool {
182 self.as_str() == other
183 }
184}
185
186impl PartialEq<SmolStr> for str {
187 fn eq(&self, other: &SmolStr) -> bool {
188 other == self
189 }
190}
191
192impl<'a> PartialEq<&'a str> for SmolStr {
193 fn eq(&self, other: &&'a str) -> bool {
194 self == *other
195 }
196}
197
198impl<'a> PartialEq<SmolStr> for &'a str {
199 fn eq(&self, other: &SmolStr) -> bool {
200 *self == other
201 }
202}
203
204impl PartialEq<String> for SmolStr {
205 fn eq(&self, other: &String) -> bool {
206 self.as_str() == other
207 }
208}
209
210impl PartialEq<SmolStr> for String {
211 fn eq(&self, other: &SmolStr) -> bool {
212 other == self
213 }
214}
215
216impl<'a> PartialEq<&'a String> for SmolStr {
217 fn eq(&self, other: &&'a String) -> bool {
218 self == *other
219 }
220}
221
222impl<'a> PartialEq<SmolStr> for &'a String {
223 fn eq(&self, other: &SmolStr) -> bool {
224 *self == other
225 }
226}
227
228impl Ord for SmolStr {
229 fn cmp(&self, other: &SmolStr) -> Ordering {
230 self.as_str().cmp(other.as_str())
231 }
232}
233
234impl PartialOrd for SmolStr {
235 fn partial_cmp(&self, other: &SmolStr) -> Option<Ordering> {
236 Some(self.cmp(other))
237 }
238}
239
240impl hash::Hash for SmolStr {
241 fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
242 self.as_str().hash(state:hasher);
243 }
244}
245
246impl fmt::Debug for SmolStr {
247 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
248 fmt::Debug::fmt(self.as_str(), f)
249 }
250}
251
252impl fmt::Display for SmolStr {
253 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
254 fmt::Display::fmt(self.as_str(), f)
255 }
256}
257
258impl iter::FromIterator<char> for SmolStr {
259 fn from_iter<I: iter::IntoIterator<Item = char>>(iter: I) -> SmolStr {
260 let iter: ::IntoIter = iter.into_iter();
261 Self::from_char_iter(iter)
262 }
263}
264
265fn build_from_str_iter<T>(mut iter: impl Iterator<Item = T>) -> SmolStr
266where
267 T: AsRef<str>,
268 String: iter::Extend<T>,
269{
270 let mut len: usize = 0;
271 let mut buf: [u8; 23] = [0u8; INLINE_CAP];
272 while let Some(slice: T) = iter.next() {
273 let slice: &str = slice.as_ref();
274 let size: usize = slice.len();
275 if size + len > INLINE_CAP {
276 let mut heap: String = String::with_capacity(size + len);
277 heap.push_str(string:core::str::from_utf8(&buf[..len]).unwrap());
278 heap.push_str(string:slice);
279 heap.extend(iter);
280 return SmolStr(Repr::Heap(heap.into_boxed_str().into()));
281 }
282 buf[len..][..size].copy_from_slice(src:slice.as_bytes());
283 len += size;
284 }
285 SmolStr(Repr::Inline {
286 len: unsafe { transmute(src:len as u8) },
287 buf,
288 })
289}
290
291impl iter::FromIterator<String> for SmolStr {
292 fn from_iter<I: iter::IntoIterator<Item = String>>(iter: I) -> SmolStr {
293 build_from_str_iter(iter.into_iter())
294 }
295}
296
297impl<'a> iter::FromIterator<&'a String> for SmolStr {
298 fn from_iter<I: iter::IntoIterator<Item = &'a String>>(iter: I) -> SmolStr {
299 SmolStr::from_iter(iter.into_iter().map(|x: &String| x.as_str()))
300 }
301}
302
303impl<'a> iter::FromIterator<&'a str> for SmolStr {
304 fn from_iter<I: iter::IntoIterator<Item = &'a str>>(iter: I) -> SmolStr {
305 build_from_str_iter(iter.into_iter())
306 }
307}
308
309impl AsRef<str> for SmolStr {
310 #[inline(always)]
311 fn as_ref(&self) -> &str {
312 self.as_str()
313 }
314}
315
316impl From<&str> for SmolStr {
317 #[inline]
318 fn from(s: &str) -> SmolStr {
319 SmolStr::new(text:s)
320 }
321}
322
323impl From<&mut str> for SmolStr {
324 #[inline]
325 fn from(s: &mut str) -> SmolStr {
326 SmolStr::new(text:s)
327 }
328}
329
330impl From<&String> for SmolStr {
331 #[inline]
332 fn from(s: &String) -> SmolStr {
333 SmolStr::new(text:s)
334 }
335}
336
337impl From<String> for SmolStr {
338 #[inline(always)]
339 fn from(text: String) -> Self {
340 Self::new(text)
341 }
342}
343
344impl From<Box<str>> for SmolStr {
345 #[inline]
346 fn from(s: Box<str>) -> SmolStr {
347 SmolStr::new(text:s)
348 }
349}
350
351impl From<Arc<str>> for SmolStr {
352 #[inline]
353 fn from(s: Arc<str>) -> SmolStr {
354 let repr: Repr = Repr::new_on_stack(text:s.as_ref()).unwrap_or_else(|| Repr::Heap(s));
355 Self(repr)
356 }
357}
358
359impl<'a> From<Cow<'a, str>> for SmolStr {
360 #[inline]
361 fn from(s: Cow<'a, str>) -> SmolStr {
362 SmolStr::new(text:s)
363 }
364}
365
366impl From<SmolStr> for Arc<str> {
367 #[inline(always)]
368 fn from(text: SmolStr) -> Self {
369 match text.0 {
370 Repr::Heap(data: Arc) => data,
371 _ => text.as_str().into(),
372 }
373 }
374}
375
376impl From<SmolStr> for String {
377 #[inline(always)]
378 fn from(text: SmolStr) -> Self {
379 text.as_str().into()
380 }
381}
382
383impl Borrow<str> for SmolStr {
384 #[inline(always)]
385 fn borrow(&self) -> &str {
386 self.as_str()
387 }
388}
389
390impl FromStr for SmolStr {
391 type Err = Infallible;
392
393 #[inline]
394 fn from_str(s: &str) -> Result<SmolStr, Self::Err> {
395 Ok(SmolStr::from(s))
396 }
397}
398
399#[cfg(feature = "arbitrary")]
400impl<'a> arbitrary::Arbitrary<'a> for SmolStr {
401 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> Result<Self, arbitrary::Error> {
402 let s = <&str>::arbitrary(u)?;
403 Ok(SmolStr::new(s))
404 }
405}
406
407const INLINE_CAP: usize = 23;
408const N_NEWLINES: usize = 32;
409const N_SPACES: usize = 128;
410const WS: &str =
411 "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n ";
412const _: () = {
413 assert!(WS.len() == N_NEWLINES + N_SPACES);
414 assert!(WS.as_bytes()[N_NEWLINES - 1] == b'\n');
415 assert!(WS.as_bytes()[N_NEWLINES] == b' ');
416};
417
418#[derive(Clone, Copy, Debug)]
419#[repr(u8)]
420enum InlineSize {
421 _V0 = 0,
422 _V1,
423 _V2,
424 _V3,
425 _V4,
426 _V5,
427 _V6,
428 _V7,
429 _V8,
430 _V9,
431 _V10,
432 _V11,
433 _V12,
434 _V13,
435 _V14,
436 _V15,
437 _V16,
438 _V17,
439 _V18,
440 _V19,
441 _V20,
442 _V21,
443 _V22,
444 _V23,
445}
446
447#[derive(Clone, Debug)]
448enum Repr {
449 Heap(Arc<str>),
450 Static(&'static str),
451 Inline {
452 len: InlineSize,
453 buf: [u8; INLINE_CAP],
454 },
455}
456
457impl Repr {
458 /// This function tries to create a new Repr::Inline or Repr::Static
459 /// If it isn't possible, this function returns None
460 fn new_on_stack<T>(text: T) -> Option<Self>
461 where
462 T: AsRef<str>,
463 {
464 let text = text.as_ref();
465
466 let len = text.len();
467 if len <= INLINE_CAP {
468 let mut buf = [0; INLINE_CAP];
469 buf[..len].copy_from_slice(text.as_bytes());
470 return Some(Repr::Inline {
471 len: unsafe { transmute(len as u8) },
472 buf,
473 });
474 }
475
476 if len <= N_NEWLINES + N_SPACES {
477 let bytes = text.as_bytes();
478 let possible_newline_count = cmp::min(len, N_NEWLINES);
479 let newlines = bytes[..possible_newline_count]
480 .iter()
481 .take_while(|&&b| b == b'\n')
482 .count();
483 let possible_space_count = len - newlines;
484 if possible_space_count <= N_SPACES && bytes[newlines..].iter().all(|&b| b == b' ') {
485 let spaces = possible_space_count;
486 let substring = &WS[N_NEWLINES - newlines..N_NEWLINES + spaces];
487 return Some(Repr::Static(substring));
488 }
489 }
490 None
491 }
492
493 fn new<T>(text: T) -> Self
494 where
495 T: AsRef<str>,
496 {
497 Self::new_on_stack(text.as_ref()).unwrap_or_else(|| Repr::Heap(text.as_ref().into()))
498 }
499
500 #[inline(always)]
501 fn len(&self) -> usize {
502 match self {
503 Repr::Heap(data) => data.len(),
504 Repr::Static(data) => data.len(),
505 Repr::Inline { len, .. } => *len as usize,
506 }
507 }
508
509 #[inline(always)]
510 fn is_empty(&self) -> bool {
511 match self {
512 Repr::Heap(data) => data.is_empty(),
513 Repr::Static(data) => data.is_empty(),
514 Repr::Inline { len, .. } => *len as u8 == 0,
515 }
516 }
517
518 #[inline]
519 fn as_str(&self) -> &str {
520 match self {
521 Repr::Heap(data) => data,
522 Repr::Static(data) => data,
523 Repr::Inline { len, buf } => {
524 let len = *len as usize;
525 let buf = &buf[..len];
526 unsafe { ::core::str::from_utf8_unchecked(buf) }
527 }
528 }
529 }
530}
531
532/// Convert value to [`SmolStr`] using [`fmt::Display`], potentially without allocating.
533///
534/// Almost identical to [`ToString`], but converts to `SmolStr` instead.
535pub trait ToSmolStr {
536 fn to_smolstr(&self) -> SmolStr;
537}
538
539/// Formats arguments to a [`SmolStr`], potentially without allocating.
540///
541/// See [`alloc::format!`] or [`format_args!`] for syntax documentation.
542#[macro_export]
543macro_rules! format_smolstr {
544 ($($tt:tt)*) => {{
545 use ::core::fmt::Write;
546 let mut w = $crate::Writer::new();
547 w.write_fmt(format_args!($($tt)*)).expect("a formatting trait implementation returned an error");
548 $crate::SmolStr::from(w)
549 }};
550}
551
552#[doc(hidden)]
553pub struct Writer {
554 inline: [u8; INLINE_CAP],
555 heap: String,
556 len: usize,
557}
558
559impl Writer {
560 #[must_use]
561 pub const fn new() -> Self {
562 Writer {
563 inline: [0; INLINE_CAP],
564 heap: String::new(),
565 len: 0,
566 }
567 }
568}
569
570impl fmt::Write for Writer {
571 fn write_str(&mut self, s: &str) -> fmt::Result {
572 // if currently on the stack
573 if self.len <= INLINE_CAP {
574 let old_len = self.len;
575 self.len += s.len();
576
577 // if the new length will fit on the stack (even if it fills it entirely)
578 if self.len <= INLINE_CAP {
579 self.inline[old_len..self.len].copy_from_slice(s.as_bytes());
580
581 return Ok(()); // skip the heap push below
582 }
583
584 self.heap.reserve(self.len);
585
586 // copy existing inline bytes over to the heap
587 unsafe {
588 self.heap
589 .as_mut_vec()
590 .extend_from_slice(&self.inline[..old_len]);
591 }
592 }
593
594 self.heap.push_str(s);
595
596 Ok(())
597 }
598}
599
600impl From<Writer> for SmolStr {
601 fn from(value: Writer) -> Self {
602 SmolStr(if value.len <= INLINE_CAP {
603 Repr::Inline {
604 len: unsafe { transmute(src:value.len as u8) },
605 buf: value.inline,
606 }
607 } else {
608 Repr::new(text:value.heap)
609 })
610 }
611}
612
613impl<T> ToSmolStr for T
614where
615 T: fmt::Display + ?Sized,
616{
617 fn to_smolstr(&self) -> SmolStr {
618 format_smolstr!("{}", self)
619 }
620}
621
622#[cfg(feature = "serde")]
623mod serde {
624 use alloc::{string::String, vec::Vec};
625 use core::fmt;
626
627 use serde::de::{Deserializer, Error, Unexpected, Visitor};
628
629 use crate::SmolStr;
630
631 // https://github.com/serde-rs/serde/blob/629802f2abfd1a54a6072992888fea7ca5bc209f/serde/src/private/de.rs#L56-L125
632 fn smol_str<'de: 'a, 'a, D>(deserializer: D) -> Result<SmolStr, D::Error>
633 where
634 D: Deserializer<'de>,
635 {
636 struct SmolStrVisitor;
637
638 impl<'a> Visitor<'a> for SmolStrVisitor {
639 type Value = SmolStr;
640
641 fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
642 formatter.write_str("a string")
643 }
644
645 fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
646 where
647 E: Error,
648 {
649 Ok(SmolStr::from(v))
650 }
651
652 fn visit_borrowed_str<E>(self, v: &'a str) -> Result<Self::Value, E>
653 where
654 E: Error,
655 {
656 Ok(SmolStr::from(v))
657 }
658
659 fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
660 where
661 E: Error,
662 {
663 Ok(SmolStr::from(v))
664 }
665
666 fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
667 where
668 E: Error,
669 {
670 match core::str::from_utf8(v) {
671 Ok(s) => Ok(SmolStr::from(s)),
672 Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)),
673 }
674 }
675
676 fn visit_borrowed_bytes<E>(self, v: &'a [u8]) -> Result<Self::Value, E>
677 where
678 E: Error,
679 {
680 match core::str::from_utf8(v) {
681 Ok(s) => Ok(SmolStr::from(s)),
682 Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)),
683 }
684 }
685
686 fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
687 where
688 E: Error,
689 {
690 match String::from_utf8(v) {
691 Ok(s) => Ok(SmolStr::from(s)),
692 Err(e) => Err(Error::invalid_value(
693 Unexpected::Bytes(&e.into_bytes()),
694 &self,
695 )),
696 }
697 }
698 }
699
700 deserializer.deserialize_str(SmolStrVisitor)
701 }
702
703 impl serde::Serialize for SmolStr {
704 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
705 where
706 S: serde::Serializer,
707 {
708 self.as_str().serialize(serializer)
709 }
710 }
711
712 impl<'de> serde::Deserialize<'de> for SmolStr {
713 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
714 where
715 D: serde::Deserializer<'de>,
716 {
717 smol_str(deserializer)
718 }
719 }
720}
721