1 | // Copyright 2014 The Servo Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. This file may not be copied, modified, or distributed |
8 | // except according to those terms. |
9 | |
10 | //! |
11 | //! A library for interning things that are `AsRef<str>`. |
12 | //! |
13 | //! Some strings may be interned at compile time using the `string-cache-codegen` crate, or the |
14 | //! `EmptyStaticAtomSet` may be used that has no compile-time interned strings. An `Atom` is an |
15 | //! interned string for a given set (either `EmptyStaticAtomSet` or a generated `StaticAtomSet`). |
16 | //! |
17 | //! Generated `Atom`s will have assocated macros to intern static strings at compile-time. |
18 | //! |
19 | //! # Examples |
20 | //! |
21 | //! Here are two examples, one with compile-time `Atom`s, and one without. |
22 | //! |
23 | //! ## With compile-time atoms |
24 | //! |
25 | //! In `Cargo.toml`: |
26 | //! ```toml |
27 | //! [dependencies] |
28 | //! string_cache = "0.8" |
29 | //! |
30 | //! [dev-dependencies] |
31 | //! string_cache_codegen = "0.5" |
32 | //! ``` |
33 | //! |
34 | //! In `build.rs`: |
35 | //! |
36 | //! ```ignore |
37 | //! extern crate string_cache_codegen; |
38 | //! |
39 | //! use std::env; |
40 | //! use std::path::Path; |
41 | //! |
42 | //! fn main() { |
43 | //! string_cache_codegen::AtomType::new("foo::FooAtom" , "foo_atom!" ) |
44 | //! .atoms(&["foo" , "bar" ]) |
45 | //! .write_to_file(&Path::new(&env::var("OUT_DIR" ).unwrap()).join("foo_atom.rs" )) |
46 | //! .unwrap() |
47 | //! } |
48 | //! ``` |
49 | //! |
50 | //! In `lib.rs`: |
51 | //! |
52 | //! ```ignore |
53 | //! extern crate string_cache; |
54 | //! |
55 | //! mod foo { |
56 | //! include!(concat!(env!("OUT_DIR" ), "/foo_atom.rs" )); |
57 | //! } |
58 | //! |
59 | //! fn use_the_atom(t: &str) { |
60 | //! match *t { |
61 | //! foo_atom!("foo" ) => println!("Found foo!" ), |
62 | //! foo_atom!("bar" ) => println!("Found bar!" ), |
63 | //! // foo_atom!("baz") => println!("Found baz!"), - would be a compile time error |
64 | //! _ => { |
65 | //! println!("String not interned" ); |
66 | //! // We can intern strings at runtime as well |
67 | //! foo::FooAtom::from(t) |
68 | //! } |
69 | //! } |
70 | //! } |
71 | //! ``` |
72 | //! |
73 | //! ## No compile-time atoms |
74 | //! |
75 | //! ``` |
76 | //! # extern crate string_cache; |
77 | //! use string_cache::DefaultAtom; |
78 | //! |
79 | //! # fn main() { |
80 | //! let mut interned_stuff = Vec::new(); |
81 | //! let text = "here is a sentence of text that will be tokenised and |
82 | //! interned and some repeated tokens is of text and" ; |
83 | //! for word in text.split_whitespace() { |
84 | //! let seen_before = interned_stuff.iter() |
85 | //! // We can use impl PartialEq<T> where T is anything string-like |
86 | //! // to compare to interned strings to either other interned strings, |
87 | //! // or actual strings Comparing two interned strings is very fast |
88 | //! // (normally a single cpu operation). |
89 | //! .filter(|interned_word| interned_word == &word) |
90 | //! .count(); |
91 | //! if seen_before > 0 { |
92 | //! println!(r#"Seen the word "{}" {} times"# , word, seen_before); |
93 | //! } else { |
94 | //! println!(r#"Not seen the word "{}" before"# , word); |
95 | //! } |
96 | //! // We use the impl From<(Cow<'a, str>, or &'a str, or String)> for |
97 | //! // Atom<Static> to intern a new string. |
98 | //! interned_stuff.push(DefaultAtom::from(word)); |
99 | //! } |
100 | //! # } |
101 | //! ``` |
102 | //! |
103 | |
104 | #![cfg_attr (test, deny(warnings))] |
105 | |
106 | // Types, such as Atom, that impl Hash must follow the hash invariant: if two objects match |
107 | // with PartialEq, they must also have the same Hash. Clippy warns on types that derive one while |
108 | // manually impl-ing the other, because it seems easy for the two to drift apart, causing the |
109 | // invariant to be violated. |
110 | // |
111 | // But Atom is a newtype over NonZeroU64, and probably always will be, since cheap comparisons and |
112 | // copying are this library's purpose. So we know what the PartialEq comparison is going to do. |
113 | // |
114 | // The `get_hash` function, seen in `atom.rs`, consults that number, plus the global string interner |
115 | // tables. The only way for the resulting hash for two Atoms with the same inner 64-bit number to |
116 | // differ would be if the table entry changed between invocations, and that would be really bad. |
117 | #![allow (clippy::derive_hash_xor_eq)] |
118 | |
119 | mod atom; |
120 | mod dynamic_set; |
121 | mod static_sets; |
122 | mod trivial_impls; |
123 | |
124 | pub use atom::Atom; |
125 | pub use static_sets::{EmptyStaticAtomSet, PhfStrSet, StaticAtomSet}; |
126 | |
127 | /// Use this if you don’t care about static atoms. |
128 | pub type DefaultAtom = Atom<EmptyStaticAtomSet>; |
129 | |
130 | // Some minor tests of internal layout here. |
131 | // See ../integration-tests for much more. |
132 | |
133 | /// Guard against accidental changes to the sizes of things. |
134 | #[test ] |
135 | fn assert_sizes() { |
136 | use std::mem::size_of; |
137 | assert_eq!(size_of::<DefaultAtom>(), 8); |
138 | assert_eq!(size_of::<Option<DefaultAtom>>(), size_of::<DefaultAtom>(),); |
139 | } |
140 | |