lib.rs source code [crates/heck/src/lib.rs]

1	//! heck* is a case conversion library.*
2	//!
3	//! This library exists to provide case conversion between common cases like
4	//! CamelCase and snake_case. It is intended to be unicode aware, internally
5	//! consistent, and reasonably well performing.
6	//!
7	//! ## Definition of a word boundary
8	//!
9	//! Word boundaries are defined as the "unicode words" defined in the
10	//! `unicode_segmentation` library, as well as within those words in this
11	//! manner:
12	//!
13	//! 1. All underscore characters are considered word boundaries.
14	//! 2. If an uppercase character is followed by lowercase letters, a word
15	//! boundary is considered to be just prior to that uppercase character.
16	//! 3. If multiple uppercase characters are consecutive, they are considered to
17	//! be within a single word, except that the last will be part of the next word
18	//! if it is followed by lowercase characters (see rule 2).
19	//!
20	//! That is, "HelloWorld" is segmented `Hello\|World` whereas "XMLHttpRequest" is
21	//! segmented `XML\|Http\|Request`.
22	//!
23	//! Characters not within words (such as spaces, punctuations, and underscores)
24	//! are not included in the output string except as they are a part of the case
25	//! being converted to. Multiple adjacent word boundaries (such as a series of
26	//! underscores) are folded into one. ("hello__world" in snake case is therefore
27	//! "hello_world", not the exact same string). Leading or trailing word boundary
28	//! indicators are dropped, except insofar as CamelCase capitalizes the first
29	//! word.
30	//!
31	//! ### Cases contained in this library:
32	//!
33	//! 1. UpperCamelCase
34	//! 2. lowerCamelCase
35	//! 3. snake_case
36	//! 4. kebab-case
37	//! 5. SHOUTY_SNAKE_CASE
38	//! 6. Title Case
39	//! 7. SHOUTY-KEBAB-CASE
40	//! 8. Train-Case
41	#![deny(missing_docs)]
42	#![forbid(unsafe_code)]
43
44	mod kebab;
45	mod lower_camel;
46	mod shouty_kebab;
47	mod shouty_snake;
48	mod snake;
49	mod title;
50	mod train;
51	mod upper_camel;
52
53	pub use kebab::{AsKebabCase, ToKebabCase};
54	pub use lower_camel::{AsLowerCamelCase, ToLowerCamelCase};
55	pub use shouty_kebab::{AsShoutyKebabCase, ToShoutyKebabCase};
56	pub use shouty_snake::{
57	AsShoutySnakeCase, AsShoutySnakeCase as AsShoutySnekCase, ToShoutySnakeCase, ToShoutySnekCase,
58	};
59	pub use snake::{AsSnakeCase, AsSnakeCase as AsSnekCase, ToSnakeCase, ToSnekCase};
60	pub use title::{AsTitleCase, ToTitleCase};
61	pub use train::{AsTrainCase, ToTrainCase};
62	pub use upper_camel::{
63	AsUpperCamelCase, AsUpperCamelCase as AsPascalCase, ToPascalCase, ToUpperCamelCase,
64	};
65
66	use std::fmt;
67
68	#[cfg(feature = "unicode")]
69	fn get_iterator(s: &str) -> unicode_segmentation::UnicodeWords {
70	use unicode_segmentation::UnicodeSegmentation;
71	s.unicode_words()
72	}
73	#[cfg(not(feature = "unicode"))]
74	fn get_iterator(s: &str) -> impl Iterator<Item = &str> {
75	s.split(\|letter: char\| !letter.is_ascii_alphanumeric())
76	}
77
78	fn transform<F, G>(
79	s: &str,
80	mut with_word: F,
81	mut boundary: G,
82	f: &mut fmt::Formatter,
83	) -> fmt::Result
84	where
85	F: FnMut(&str, &mut fmt::Formatter) -> fmt::Result,
86	G: FnMut(&mut fmt::Formatter) -> fmt::Result,
87	{
88	/// Tracks the current 'mode' of the transformation algorithm as it scans
89	/// the input string.
90	///
91	/// The mode is a tri-state which tracks the case of the last cased
92	/// character of the current word. If there is no cased character
93	/// (either lowercase or uppercase) since the previous word boundary,
94	/// than the mode is `Boundary`. If the last cased character is lowercase,
95	/// then the mode is `Lowercase`. Othertherwise, the mode is
96	/// `Uppercase`.
97	#[derive(Clone, Copy, PartialEq)]
98	enum WordMode {
99	/// There have been no lowercase or uppercase characters in the current
100	/// word.
101	Boundary,
102	/// The previous cased character in the current word is lowercase.
103	Lowercase,
104	/// The previous cased character in the current word is uppercase.
105	Uppercase,
106	}
107
108	let mut first_word = `true`;
109
110	for word in get_iterator(s) {
111	let mut char_indices = word.char_indices().peekable();
112	let mut init = `0`;
113	let mut mode = WordMode::Boundary;
114
115	while let Some((i, c)) = char_indices.next() {
116	// Skip underscore characters
117	if c == '_' {
118	if init == i {
119	init += `1`;
120	}
121	continue;
122	}
123
124	if let Some(&(next_i, next)) = char_indices.peek() {
125	// The mode including the current character, assuming the
126	// current character does not result in a word boundary.
127	let next_mode = if c.is_lowercase() {
128	WordMode::Lowercase
129	} else if c.is_uppercase() {
130	WordMode::Uppercase
131	} else {
132	mode
133	};
134
135	// Word boundary after if next is underscore or current is
136	// not uppercase and next is uppercase
137	if next == '_' \|\| (next_mode == WordMode::Lowercase && next.is_uppercase()) {
138	if !first_word {
139	boundary(f)?;
140	}
141	with_word(&word[init..next_i], f)?;
142	first_word = `false`;
143	init = next_i;
144	mode = WordMode::Boundary;
145
146	// Otherwise if current and previous are uppercase and next
147	// is lowercase, word boundary before
148	} else if mode == WordMode::Uppercase && c.is_uppercase() && next.is_lowercase() {
149	if !first_word {
150	boundary(f)?;
151	} else {
152	first_word = `false`;
153	}
154	with_word(&word[init..i], f)?;
155	init = i;
156	mode = WordMode::Boundary;
157
158	// Otherwise no word boundary, just update the mode
159	} else {
160	mode = next_mode;
161	}
162	} else {
163	// Collect trailing characters as a word
164	if !first_word {
165	boundary(f)?;
166	} else {
167	first_word = `false`;
168	}
169	with_word(&word[init..], f)?;
170	break;
171	}
172	}
173	}
174
175	Ok(())
176	}
177
178	fn lowercase(s: &str, f: &mut fmt::Formatter) -> fmt::Result {
179	let mut chars: impl Iterator = s.chars().peekable();
180	while let Some(c: char) = chars.next() {
181	if c == 'Σ' && chars.peek().is_none() {
182	write!(f, "ς")?;
183	} else {
184	write!(f, "{}", c.to_lowercase())?;
185	}
186	}
187
188	Ok(())
189	}
190
191	fn uppercase(s: &str, f: &mut fmt::Formatter) -> fmt::Result {
192	for c: char in s.chars() {
193	write!(f, "{}", c.to_uppercase())?;
194	}
195
196	Ok(())
197	}
198
199	fn capitalize(s: &str, f: &mut fmt::Formatter) -> fmt::Result {
200	let mut char_indices: CharIndices<'_> = s.char_indices();
201	if let Some((_, c: char)) = char_indices.next() {
202	write!(f, "{}", c.to_uppercase())?;
203	if let Some((i: usize, _)) = char_indices.next() {
204	lowercase(&s[i..], f)?;
205	}
206	}
207
208	Ok(())
209	}
210