| 1 | // Copyright 2016 The Servo Project Developers. See the COPYRIGHT |
| 2 | // file at the top-level directory of this distribution. |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 7 | // option. This file may not be copied, modified, or distributed |
| 8 | // except according to those terms. |
| 9 | //! A crate to create static string caches at compiletime. |
| 10 | //! |
| 11 | //! # Examples |
| 12 | //! |
| 13 | //! With static atoms: |
| 14 | //! |
| 15 | //! In `Cargo.toml`: |
| 16 | //! |
| 17 | //! ```toml |
| 18 | //! [package] |
| 19 | //! build = "build.rs" |
| 20 | //! |
| 21 | //! [dependencies] |
| 22 | //! string_cache = "0.8" |
| 23 | //! |
| 24 | //! [build-dependencies] |
| 25 | //! string_cache_codegen = "0.5" |
| 26 | //! ``` |
| 27 | //! |
| 28 | //! In `build.rs`: |
| 29 | //! |
| 30 | //! ```no_run |
| 31 | //! extern crate string_cache_codegen; |
| 32 | //! |
| 33 | //! use std::env; |
| 34 | //! use std::path::Path; |
| 35 | //! |
| 36 | //! fn main() { |
| 37 | //! string_cache_codegen::AtomType::new("foo::FooAtom" , "foo_atom!" ) |
| 38 | //! .atoms(&["foo" , "bar" ]) |
| 39 | //! .write_to_file(&Path::new(&env::var("OUT_DIR" ).unwrap()).join("foo_atom.rs" )) |
| 40 | //! .unwrap() |
| 41 | //! } |
| 42 | //! ``` |
| 43 | //! |
| 44 | //! In `lib.rs`: |
| 45 | //! |
| 46 | //! ```ignore |
| 47 | //! extern crate string_cache; |
| 48 | //! |
| 49 | //! mod foo { |
| 50 | //! include!(concat!(env!("OUT_DIR" ), "/foo_atom.rs" )); |
| 51 | //! } |
| 52 | //! ``` |
| 53 | //! |
| 54 | //! The generated code will define a `FooAtom` type and a `foo_atom!` macro. |
| 55 | //! The macro can be used in expression or patterns, with strings listed in `build.rs`. |
| 56 | //! For example: |
| 57 | //! |
| 58 | //! ```ignore |
| 59 | //! fn compute_something(input: &foo::FooAtom) -> u32 { |
| 60 | //! match *input { |
| 61 | //! foo_atom!("foo" ) => 1, |
| 62 | //! foo_atom!("bar" ) => 2, |
| 63 | //! _ => 3, |
| 64 | //! } |
| 65 | //! } |
| 66 | //! ``` |
| 67 | //! |
| 68 | |
| 69 | #![recursion_limit = "128" ] |
| 70 | |
| 71 | use quote::quote; |
| 72 | use std::collections::BTreeSet; |
| 73 | use std::fs::File; |
| 74 | use std::io::{self, BufWriter, Write}; |
| 75 | use std::path::Path; |
| 76 | |
| 77 | /// A builder for a static atom set and relevant macros |
| 78 | pub struct AtomType { |
| 79 | path: String, |
| 80 | atom_doc: Option<String>, |
| 81 | static_set_doc: Option<String>, |
| 82 | macro_name: String, |
| 83 | macro_doc: Option<String>, |
| 84 | atoms: BTreeSet<String>, |
| 85 | } |
| 86 | |
| 87 | impl AtomType { |
| 88 | /// Constructs a new static atom set builder |
| 89 | /// |
| 90 | /// `path` is a path within a crate of the atom type that will be created. |
| 91 | /// e.g. `"FooAtom"` at the crate root or `"foo::Atom"` if the generated code |
| 92 | /// is included in a `foo` module. |
| 93 | /// |
| 94 | /// `macro_name` must end with `!`. |
| 95 | /// |
| 96 | /// For example, `AtomType::new("foo::FooAtom", "foo_atom!")` will generate: |
| 97 | /// |
| 98 | /// ```ignore |
| 99 | /// pub type FooAtom = ::string_cache::Atom<FooAtomStaticSet>; |
| 100 | /// pub struct FooAtomStaticSet; |
| 101 | /// impl ::string_cache::StaticAtomSet for FooAtomStaticSet { |
| 102 | /// // ... |
| 103 | /// } |
| 104 | /// #[macro_export] |
| 105 | /// macro_rules foo_atom { |
| 106 | /// // Expands to: $crate::foo::FooAtom { … } |
| 107 | /// } |
| 108 | /// ``` |
| 109 | pub fn new(path: &str, macro_name: &str) -> Self { |
| 110 | assert!(macro_name.ends_with("!" ), "`macro_name` must end with '!'" ); |
| 111 | AtomType { |
| 112 | path: path.to_owned(), |
| 113 | macro_name: macro_name[..macro_name.len() - "!" .len()].to_owned(), |
| 114 | atom_doc: None, |
| 115 | static_set_doc: None, |
| 116 | macro_doc: None, |
| 117 | atoms: BTreeSet::new(), |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | /// Add some documentation to the generated Atom type alias. |
| 122 | /// |
| 123 | /// This can help the user know that the type uses interned strings. |
| 124 | /// |
| 125 | /// Note that `docs` should not contain the `///` at the front of normal docs. |
| 126 | pub fn with_atom_doc(&mut self, docs: &str) -> &mut Self { |
| 127 | self.atom_doc = Some(docs.to_owned()); |
| 128 | self |
| 129 | } |
| 130 | |
| 131 | /// Add some documentation to the generated static set. |
| 132 | /// |
| 133 | /// This can help the user know that this type is zero-sized and just references a static |
| 134 | /// lookup table, or point them to the `Atom` type alias for more info. |
| 135 | /// |
| 136 | /// Note that `docs` should not contain the `///` at the front of normal docs. |
| 137 | pub fn with_static_set_doc(&mut self, docs: &str) -> &mut Self { |
| 138 | self.static_set_doc = Some(docs.to_owned()); |
| 139 | self |
| 140 | } |
| 141 | |
| 142 | /// Add some documentation to the generated macro. |
| 143 | /// |
| 144 | /// Note that `docs` should not contain the `///` at the front of normal docs. |
| 145 | pub fn with_macro_doc(&mut self, docs: &str) -> &mut Self { |
| 146 | self.macro_doc = Some(docs.to_owned()); |
| 147 | self |
| 148 | } |
| 149 | |
| 150 | /// Adds an atom to the builder |
| 151 | pub fn atom(&mut self, s: &str) -> &mut Self { |
| 152 | self.atoms.insert(s.to_owned()); |
| 153 | self |
| 154 | } |
| 155 | |
| 156 | /// Adds multiple atoms to the builder |
| 157 | pub fn atoms<I>(&mut self, iter: I) -> &mut Self |
| 158 | where |
| 159 | I: IntoIterator, |
| 160 | I::Item: AsRef<str>, |
| 161 | { |
| 162 | self.atoms |
| 163 | .extend(iter.into_iter().map(|s| s.as_ref().to_owned())); |
| 164 | self |
| 165 | } |
| 166 | |
| 167 | /// Write generated code to `destination`. |
| 168 | pub fn write_to<W>(&mut self, mut destination: W) -> io::Result<()> |
| 169 | where |
| 170 | W: Write, |
| 171 | { |
| 172 | destination.write_all( |
| 173 | self.to_tokens() |
| 174 | .to_string() |
| 175 | // Insert some newlines to make the generated code slightly easier to read. |
| 176 | .replace(" [ \"" , "[ \n\"" ) |
| 177 | .replace(" \" , " , " \", \n" ) |
| 178 | .replace(" ( \"" , " \n( \"" ) |
| 179 | .replace("; " , "; \n" ) |
| 180 | .as_bytes(), |
| 181 | ) |
| 182 | } |
| 183 | |
| 184 | #[cfg (test)] |
| 185 | /// Write generated code to destination [`Vec<u8>`] and return it as [`String`] |
| 186 | /// |
| 187 | /// Used mostly for testing or displaying a value. |
| 188 | pub fn write_to_string(&mut self, mut destination: Vec<u8>) -> io::Result<String> |
| 189 | { |
| 190 | destination.write_all( |
| 191 | self.to_tokens() |
| 192 | .to_string() |
| 193 | // Insert some newlines to make the generated code slightly easier to read. |
| 194 | .replace(" [ \"" , "[ \n\"" ) |
| 195 | .replace(" \" , " , " \", \n" ) |
| 196 | .replace(" ( \"" , " \n( \"" ) |
| 197 | .replace("; " , "; \n" ) |
| 198 | .as_bytes(), |
| 199 | )?; |
| 200 | let str = String::from_utf8(destination).unwrap(); |
| 201 | Ok(str) |
| 202 | } |
| 203 | |
| 204 | fn to_tokens(&mut self) -> proc_macro2::TokenStream { |
| 205 | // `impl Default for Atom` requires the empty string to be in the static set. |
| 206 | // This also makes sure the set in non-empty, |
| 207 | // which would cause divisions by zero in rust-phf. |
| 208 | self.atoms.insert(String::new()); |
| 209 | |
| 210 | // Strings over 7 bytes + empty string added to static set. |
| 211 | // Otherwise stored inline. |
| 212 | let (static_strs, inline_strs): (Vec<_>, Vec<_>) = self |
| 213 | .atoms |
| 214 | .iter() |
| 215 | .map(String::as_str) |
| 216 | .partition(|s| s.len() > 7 || s.is_empty()); |
| 217 | |
| 218 | // Static strings |
| 219 | let hash_state = phf_generator::generate_hash(&static_strs); |
| 220 | let phf_generator::HashState { key, disps, map } = hash_state; |
| 221 | let (disps0, disps1): (Vec<_>, Vec<_>) = disps.into_iter().unzip(); |
| 222 | let atoms: Vec<&str> = map.iter().map(|&idx| static_strs[idx]).collect(); |
| 223 | let empty_string_index = atoms.iter().position(|s| s.is_empty()).unwrap() as u32; |
| 224 | let indices = 0..atoms.len() as u32; |
| 225 | |
| 226 | let hashes: Vec<u32> = atoms |
| 227 | .iter() |
| 228 | .map(|string| { |
| 229 | let hash = phf_shared::hash(string, &key); |
| 230 | (hash.g ^ hash.f1) as u32 |
| 231 | }) |
| 232 | .collect(); |
| 233 | |
| 234 | let mut path_parts = self.path.rsplitn(2, "::" ); |
| 235 | let type_name = path_parts.next().unwrap(); |
| 236 | let module = match path_parts.next() { |
| 237 | Some(m) => format!("$crate:: {}" , m), |
| 238 | None => format!("$crate" ), |
| 239 | }; |
| 240 | let atom_doc = match self.atom_doc { |
| 241 | Some(ref doc) => quote!(#[doc = #doc]), |
| 242 | None => quote!(), |
| 243 | }; |
| 244 | let static_set_doc = match self.static_set_doc { |
| 245 | Some(ref doc) => quote!(#[doc = #doc]), |
| 246 | None => quote!(), |
| 247 | }; |
| 248 | let macro_doc = match self.macro_doc { |
| 249 | Some(ref doc) => quote!(#[doc = #doc]), |
| 250 | None => quote!(), |
| 251 | }; |
| 252 | let new_term = |
| 253 | |string: &str| proc_macro2::Ident::new(string, proc_macro2::Span::call_site()); |
| 254 | let static_set_name = new_term(&format!(" {}StaticSet" , type_name)); |
| 255 | let type_name = new_term(type_name); |
| 256 | let macro_name = new_term(&*self.macro_name); |
| 257 | let module = module.parse::<proc_macro2::TokenStream>().unwrap(); |
| 258 | let atom_prefix = format!("ATOM_ {}_" , type_name.to_string().to_uppercase()); |
| 259 | let new_const_name = |atom: &str| { |
| 260 | let mut name = atom_prefix.clone(); |
| 261 | for c in atom.chars() { |
| 262 | name.push_str(&format!("_ {:02X}" , c as u32)) |
| 263 | } |
| 264 | new_term(&name) |
| 265 | }; |
| 266 | let const_names: Vec<_> = atoms.iter().copied().map(new_const_name).collect(); |
| 267 | |
| 268 | // Inline strings |
| 269 | let (inline_const_names, inline_values_and_lengths): (Vec<_>, Vec<_>) = inline_strs |
| 270 | .iter() |
| 271 | .map(|s| { |
| 272 | let const_name = new_const_name(s); |
| 273 | |
| 274 | let mut value = 0u64; |
| 275 | for (index, c) in s.bytes().enumerate() { |
| 276 | value = value | ((c as u64) << (index * 8 + 8)); |
| 277 | } |
| 278 | |
| 279 | let len = s.len() as u8; |
| 280 | |
| 281 | (const_name, (value, len)) |
| 282 | }) |
| 283 | .unzip(); |
| 284 | let (inline_values, inline_lengths): (Vec<_>, Vec<_>) = |
| 285 | inline_values_and_lengths.into_iter().unzip(); |
| 286 | |
| 287 | quote! { |
| 288 | #atom_doc |
| 289 | pub type #type_name = ::string_cache::Atom<#static_set_name>; |
| 290 | |
| 291 | #static_set_doc |
| 292 | #[derive(PartialEq, Eq, PartialOrd, Ord)] |
| 293 | pub struct #static_set_name; |
| 294 | |
| 295 | impl ::string_cache::StaticAtomSet for #static_set_name { |
| 296 | fn get() -> &'static ::string_cache::PhfStrSet { |
| 297 | static SET: ::string_cache::PhfStrSet = ::string_cache::PhfStrSet { |
| 298 | key: #key, |
| 299 | disps: &[#((#disps0, #disps1)),*], |
| 300 | atoms: &[#(#atoms),*], |
| 301 | hashes: &[#(#hashes),*] |
| 302 | }; |
| 303 | &SET |
| 304 | } |
| 305 | fn empty_string_index() -> u32 { |
| 306 | #empty_string_index |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | #( |
| 311 | pub const #const_names: #type_name = #type_name::pack_static(#indices); |
| 312 | )* |
| 313 | #( |
| 314 | pub const #inline_const_names: #type_name = #type_name::pack_inline(#inline_values, #inline_lengths); |
| 315 | )* |
| 316 | |
| 317 | #macro_doc |
| 318 | #[macro_export] |
| 319 | macro_rules! #macro_name { |
| 320 | #( |
| 321 | (#atoms) => { #module::#const_names }; |
| 322 | )* |
| 323 | #( |
| 324 | (#inline_strs) => { #module::#inline_const_names }; |
| 325 | )* |
| 326 | } |
| 327 | } |
| 328 | } |
| 329 | |
| 330 | /// Create a new file at `path` and write generated code there. |
| 331 | /// |
| 332 | /// Typical usage: |
| 333 | /// `.write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join("foo_atom.rs"))` |
| 334 | pub fn write_to_file(&mut self, path: &Path) -> io::Result<()> { |
| 335 | self.write_to(BufWriter::new(File::create(path)?)) |
| 336 | } |
| 337 | } |
| 338 | |
| 339 | #[test ] |
| 340 | fn test_iteration_order() { |
| 341 | let x1 = crate::AtomType::new("foo::Atom" , "foo_atom!" ) |
| 342 | .atoms(&["x" , "xlink" , "svg" , "test" ]) |
| 343 | .write_to_string(Vec::new()).expect("write to string cache x1" ); |
| 344 | |
| 345 | let x2 = crate::AtomType::new("foo::Atom" , "foo_atom!" ) |
| 346 | .atoms(&["x" , "xlink" , "svg" , "test" ]) |
| 347 | .write_to_string(Vec::new()).expect("write to string cache x2" ); |
| 348 | |
| 349 | assert_eq!(x1, x2); |
| 350 | } |