| 1 | // Copyright 2014 The Servo Project Developers. See the COPYRIGHT |
| 2 | // file at the top-level directory of this distribution. |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 7 | // option. This file may not be copied, modified, or distributed |
| 8 | // except according to those terms. |
| 9 | |
| 10 | //! |
| 11 | //! A library for interning things that are `AsRef<str>`. |
| 12 | //! |
| 13 | //! Some strings may be interned at compile time using the `string-cache-codegen` crate, or the |
| 14 | //! `EmptyStaticAtomSet` may be used that has no compile-time interned strings. An `Atom` is an |
| 15 | //! interned string for a given set (either `EmptyStaticAtomSet` or a generated `StaticAtomSet`). |
| 16 | //! |
| 17 | //! Generated `Atom`s will have assocated macros to intern static strings at compile-time. |
| 18 | //! |
| 19 | //! # Examples |
| 20 | //! |
| 21 | //! Here are two examples, one with compile-time `Atom`s, and one without. |
| 22 | //! |
| 23 | //! ## With compile-time atoms |
| 24 | //! |
| 25 | //! In `Cargo.toml`: |
| 26 | //! ```toml |
| 27 | //! [dependencies] |
| 28 | //! string_cache = "0.8" |
| 29 | //! |
| 30 | //! [dev-dependencies] |
| 31 | //! string_cache_codegen = "0.5" |
| 32 | //! ``` |
| 33 | //! |
| 34 | //! In `build.rs`: |
| 35 | //! |
| 36 | //! ```ignore |
| 37 | //! extern crate string_cache_codegen; |
| 38 | //! |
| 39 | //! use std::env; |
| 40 | //! use std::path::Path; |
| 41 | //! |
| 42 | //! fn main() { |
| 43 | //! string_cache_codegen::AtomType::new("foo::FooAtom" , "foo_atom!" ) |
| 44 | //! .atoms(&["foo" , "bar" ]) |
| 45 | //! .write_to_file(&Path::new(&env::var("OUT_DIR" ).unwrap()).join("foo_atom.rs" )) |
| 46 | //! .unwrap() |
| 47 | //! } |
| 48 | //! ``` |
| 49 | //! |
| 50 | //! In `lib.rs`: |
| 51 | //! |
| 52 | //! ```ignore |
| 53 | //! extern crate string_cache; |
| 54 | //! |
| 55 | //! mod foo { |
| 56 | //! include!(concat!(env!("OUT_DIR" ), "/foo_atom.rs" )); |
| 57 | //! } |
| 58 | //! |
| 59 | //! fn use_the_atom(t: &str) { |
| 60 | //! match *t { |
| 61 | //! foo_atom!("foo" ) => println!("Found foo!" ), |
| 62 | //! foo_atom!("bar" ) => println!("Found bar!" ), |
| 63 | //! // foo_atom!("baz") => println!("Found baz!"), - would be a compile time error |
| 64 | //! _ => { |
| 65 | //! println!("String not interned" ); |
| 66 | //! // We can intern strings at runtime as well |
| 67 | //! foo::FooAtom::from(t) |
| 68 | //! } |
| 69 | //! } |
| 70 | //! } |
| 71 | //! ``` |
| 72 | //! |
| 73 | //! ## No compile-time atoms |
| 74 | //! |
| 75 | //! ``` |
| 76 | //! # extern crate string_cache; |
| 77 | //! use string_cache::DefaultAtom; |
| 78 | //! |
| 79 | //! # fn main() { |
| 80 | //! let mut interned_stuff = Vec::new(); |
| 81 | //! let text = "here is a sentence of text that will be tokenised and |
| 82 | //! interned and some repeated tokens is of text and" ; |
| 83 | //! for word in text.split_whitespace() { |
| 84 | //! let seen_before = interned_stuff.iter() |
| 85 | //! // We can use impl PartialEq<T> where T is anything string-like |
| 86 | //! // to compare to interned strings to either other interned strings, |
| 87 | //! // or actual strings Comparing two interned strings is very fast |
| 88 | //! // (normally a single cpu operation). |
| 89 | //! .filter(|interned_word| interned_word == &word) |
| 90 | //! .count(); |
| 91 | //! if seen_before > 0 { |
| 92 | //! println!(r#"Seen the word "{}" {} times"# , word, seen_before); |
| 93 | //! } else { |
| 94 | //! println!(r#"Not seen the word "{}" before"# , word); |
| 95 | //! } |
| 96 | //! // We use the impl From<(Cow<'a, str>, or &'a str, or String)> for |
| 97 | //! // Atom<Static> to intern a new string. |
| 98 | //! interned_stuff.push(DefaultAtom::from(word)); |
| 99 | //! } |
| 100 | //! # } |
| 101 | //! ``` |
| 102 | //! |
| 103 | |
| 104 | #![cfg_attr (test, deny(warnings))] |
| 105 | |
| 106 | // Types, such as Atom, that impl Hash must follow the hash invariant: if two objects match |
| 107 | // with PartialEq, they must also have the same Hash. Clippy warns on types that derive one while |
| 108 | // manually impl-ing the other, because it seems easy for the two to drift apart, causing the |
| 109 | // invariant to be violated. |
| 110 | // |
| 111 | // But Atom is a newtype over NonZeroU64, and probably always will be, since cheap comparisons and |
| 112 | // copying are this library's purpose. So we know what the PartialEq comparison is going to do. |
| 113 | // |
| 114 | // The `get_hash` function, seen in `atom.rs`, consults that number, plus the global string interner |
| 115 | // tables. The only way for the resulting hash for two Atoms with the same inner 64-bit number to |
| 116 | // differ would be if the table entry changed between invocations, and that would be really bad. |
| 117 | #![allow (clippy::derive_hash_xor_eq)] |
| 118 | |
| 119 | mod atom; |
| 120 | mod dynamic_set; |
| 121 | mod static_sets; |
| 122 | mod trivial_impls; |
| 123 | |
| 124 | pub use atom::Atom; |
| 125 | pub use static_sets::{EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; |
| 126 | |
| 127 | /// Use this if you don’t care about static atoms. |
| 128 | pub type DefaultAtom = Atom<EmptyStaticAtomSet>; |
| 129 | |
| 130 | // Some minor tests of internal layout here. |
| 131 | // See ../integration-tests for much more. |
| 132 | |
| 133 | /// Guard against accidental changes to the sizes of things. |
| 134 | #[test ] |
| 135 | fn assert_sizes() { |
| 136 | use std::mem::size_of; |
| 137 | assert_eq!(size_of::<DefaultAtom>(), 8); |
| 138 | assert_eq!(size_of::<Option<DefaultAtom>>(), size_of::<DefaultAtom>(),); |
| 139 | } |
| 140 | |