boundary.rs source code [crates/convert_case/src/boundary.rs]

1	use unicode_segmentation::UnicodeSegmentation;
2
3	fn grapheme_is_digit(c: &&str) -> bool {
4	c.chars().all(\|c: char\| c.is_ascii_digit())
5	}
6
7	fn grapheme_is_uppercase(c: &&str) -> bool {
8	c.to_uppercase() != c.to_lowercase() && *c == c.to_uppercase()
9	}
10
11	fn grapheme_is_lowercase(c: &&str) -> bool {
12	c.to_uppercase() != c.to_lowercase() && *c == c.to_lowercase()
13	}
14
15	/// How an identifier is split into words.
16	///
17	/// Some boundaries, `HYPHEN`, `UNDERSCORE`, and `SPACE`, consume the character they
18	/// split on, whereas the other boundaries do not.
19	///
20	/// `Boundary` includes methods that return useful groups of boundaries. It also
21	/// contains the [`defaults_from`](Boundary::defaults_from) method which will generate a subset
22	/// of default boundaries based on the boundaries present in a string.
23	///
24	/// You can also create custom delimiter boundaries using the [`from_delim`](Boundary::from_delim)
25	/// method or directly instantiate Boundary for complex boundary conditions.
26	/// ```
27	/// use convert_case::{Boundary, Case, Casing, Converter};
28	///
29	/// assert_eq!(
30	/// "transformations_in_3d",
31	/// "TransformationsIn3D"
32	/// .from_case(Case::Camel)
33	/// .without_boundaries(&Boundary::digit_letter())
34	/// .to_case(Case::Snake)
35	/// );
36	///
37	/// let conv = Converter::new()
38	/// .set_boundaries(&Boundary::defaults_from("aA "))
39	/// .to_case(Case::Title);
40	/// assert_eq!("7empest By Tool", conv.convert("7empest byTool"));
41	/// ```
42	#[derive(Debug, Eq, Hash, Clone, Copy)]
43	pub struct Boundary {
44	/// A unique name used for comparison.
45	pub name: &'static str,
46	/// A function that determines if this boundary is present at the start
47	/// of the string. Second argument is the `arg` field.
48	pub condition: fn(&[&str], Option<&'static str>) -> bool,
49	/// An optional string passed to `condition` at runtime. Used
50	/// internally for [`Boundary::from_delim`] method.
51	pub arg: Option<&'static str>,
52	/// Where the beginning of the boundary is.
53	pub start: usize,
54	/// The length of the boundary. This is the number of graphemes that
55	/// are removed when splitting.
56	pub len: usize,
57	}
58
59	impl PartialEq for Boundary {
60	fn eq(&self, other: &Self) -> bool {
61	self.name == other.name
62	}
63	}
64
65	impl Boundary {
66	/// Splits on space, consuming the character on segmentation.
67	/// ```
68	/// # use convert_case::Boundary;
69	/// assert_eq!(
70	/// vec![Boundary::SPACE],
71	/// Boundary::defaults_from(" ")
72	/// );
73	/// ```
74	pub const SPACE: Boundary = Boundary {
75	name: "Space",
76	condition: \|s, _\| s.get(`0`) == Some(&" "),
77	arg: None,
78	start: `0`,
79	len: `1`,
80	};
81
82	/// Splits on `-`, consuming the character on segmentation.
83	/// ```
84	/// # use convert_case::Boundary;
85	/// assert_eq!(
86	/// vec![Boundary::HYPHEN],
87	/// Boundary::defaults_from("-")
88	/// );
89	/// ```
90	pub const HYPHEN: Boundary = Boundary {
91	name: "Hyphen",
92	condition: \|s, _\| s.get(`0`) == Some(&"-"),
93	arg: None,
94	start: `0`,
95	len: `1`,
96	};
97
98	/// Splits on `_`, consuming the character on segmentation.
99	/// ```
100	/// # use convert_case::Boundary;
101	/// assert_eq!(
102	/// vec![Boundary::UNDERSCORE],
103	/// Boundary::defaults_from("_")
104	/// );
105	/// ```
106	pub const UNDERSCORE: Boundary = Boundary {
107	name: "Underscore",
108	condition: \|s, _\| s.get(`0`) == Some(&"_"),
109	arg: None,
110	start: `0`,
111	len: `1`,
112	};
113
114	/// Splits where a lowercase letter is followed by an uppercase letter.
115	/// ```
116	/// # use convert_case::Boundary;
117	/// assert_eq!(
118	/// vec![Boundary::LOWER_UPPER],
119	/// Boundary::defaults_from("aA")
120	/// );
121	/// ```
122	pub const LOWER_UPPER: Boundary = Boundary {
123	name: "LowerUpper",
124	condition: \|s, _\| {
125	s.get(`0`).map(grapheme_is_lowercase) == Some(`true`)
126	&& s.get(`1`).map(grapheme_is_uppercase) == Some(`true`)
127	},
128	arg: None,
129	start: `1`,
130	len: `0`,
131	};
132	/// Splits where an uppercase letter is followed by a lowercase letter. This is seldom used,
133	/// and is not* included in the* [defaults](Boundary::defaults).
134	/// ```
135	/// # use convert_case::Boundary;
136	/// assert!(
137	/// Boundary::defaults_from("Aa").len() == `0`
138	/// );
139	/// ```
140	pub const UPPER_LOWER: Boundary = Boundary {
141	name: "UpperLower",
142	condition: \|s, _\| {
143	s.get(`0`).map(grapheme_is_uppercase) == Some(`true`)
144	&& s.get(`1`).map(grapheme_is_lowercase) == Some(`true`)
145	},
146	arg: None,
147	start: `1`,
148	len: `0`,
149	};
150
151	/// Acronyms are identified by two uppercase letters followed by a lowercase letter.
152	/// The word boundary is between the two uppercase letters. For example, "HTTPRequest"
153	/// would have an acronym boundary identified at "PRe" and split into "HTTP" and "Request".
154	/// ```
155	/// # use convert_case::Boundary;
156	/// assert_eq!(
157	/// vec![Boundary::ACRONYM],
158	/// Boundary::defaults_from("AAa")
159	/// );
160	/// ```
161	pub const ACRONYM: Boundary = Boundary {
162	name: "Acronym",
163	condition: \|s, _\| {
164	s.get(`0`).map(grapheme_is_uppercase) == Some(`true`)
165	&& s.get(`1`).map(grapheme_is_uppercase) == Some(`true`)
166	&& s.get(`2`).map(grapheme_is_lowercase) == Some(`true`)
167	},
168	arg: None,
169	start: `1`,
170	len: `0`,
171	};
172
173	/// Splits where a lowercase letter is followed by a digit.
174	/// ```
175	/// # use convert_case::Boundary;
176	/// assert_eq!(
177	/// vec![Boundary::LOWER_DIGIT],
178	/// Boundary::defaults_from("a1")
179	/// );
180	/// ```
181	pub const LOWER_DIGIT: Boundary = Boundary {
182	name: "LowerDigit",
183	condition: \|s, _\| {
184	s.get(`0`).map(grapheme_is_lowercase) == Some(`true`)
185	&& s.get(`1`).map(grapheme_is_digit) == Some(`true`)
186	},
187	arg: None,
188	start: `1`,
189	len: `0`,
190	};
191
192	/// Splits where an uppercase letter is followed by a digit.
193	/// ```
194	/// # use convert_case::Boundary;
195	/// assert_eq!(
196	/// vec![Boundary::UPPER_DIGIT],
197	/// Boundary::defaults_from("A1")
198	/// );
199	/// ```
200	pub const UPPER_DIGIT: Boundary = Boundary {
201	name: "UpperDigit",
202	condition: \|s, _\| {
203	s.get(`0`).map(grapheme_is_uppercase) == Some(`true`)
204	&& s.get(`1`).map(grapheme_is_digit) == Some(`true`)
205	},
206	arg: None,
207	start: `1`,
208	len: `0`,
209	};
210
211	/// Splits where digit is followed by a lowercase letter.
212	/// ```
213	/// # use convert_case::Boundary;
214	/// assert_eq!(
215	/// vec![Boundary::DIGIT_LOWER],
216	/// Boundary::defaults_from("1a")
217	/// );
218	/// ```
219	pub const DIGIT_LOWER: Boundary = Boundary {
220	name: "DigitLower",
221	condition: \|s, _\| {
222	s.get(`0`).map(grapheme_is_digit) == Some(`true`)
223	&& s.get(`1`).map(grapheme_is_lowercase) == Some(`true`)
224	},
225	arg: None,
226	start: `1`,
227	len: `0`,
228	};
229
230	/// Splits where digit is followed by an uppercase letter.
231	/// ```
232	/// # use convert_case::Boundary;
233	/// assert_eq!(
234	/// vec![Boundary::DIGIT_UPPER],
235	/// Boundary::defaults_from("1A")
236	/// );
237	/// ```
238	pub const DIGIT_UPPER: Boundary = Boundary {
239	name: "DigitUpper",
240	condition: \|s, _\| {
241	s.get(`0`).map(grapheme_is_digit) == Some(`true`)
242	&& s.get(`1`).map(grapheme_is_uppercase) == Some(`true`)
243	},
244	arg: None,
245	start: `1`,
246	len: `0`,
247	};
248
249	/// Create a new boundary based on a delimiter.
250	/// ```
251	/// # use convert_case::{Case, Converter, Boundary};
252	/// let conv = Converter::new()
253	/// .set_boundaries(&[Boundary::from_delim("::")])
254	/// .to_case(Case::Camel);
255	/// assert_eq!(
256	/// "myVarName",
257	/// conv.convert("my::var::name")
258	/// )
259	/// ```
260	pub const fn from_delim(delim: &'static str) -> Boundary {
261	Boundary {
262	name: delim,
263	arg: Some(delim),
264	condition: \|s, arg\| s.join("").starts_with(arg.unwrap()),
265	start: `0`,
266	len: delim.len(),
267	}
268	}
269
270	/// The default list of boundaries used when `Casing::to_case` is called directly
271	/// and in a `Converter` generated from `Converter::new()`.
272	/// ```
273	/// # use convert_case::Boundary;
274	/// assert_eq!(
275	/// [
276	/// Boundary::SPACE,
277	/// Boundary::HYPHEN,
278	/// Boundary::UNDERSCORE,
279	/// Boundary::LOWER_UPPER,
280	/// Boundary::ACRONYM,
281	/// Boundary::LOWER_DIGIT,
282	/// Boundary::UPPER_DIGIT,
283	/// Boundary::DIGIT_LOWER,
284	/// Boundary::DIGIT_UPPER,
285	/// ],
286	/// Boundary::defaults()
287	/// );
288	/// ```
289	pub const fn defaults() -> [Boundary; `9`] {
290	[
291	Boundary::SPACE,
292	Boundary::HYPHEN,
293	Boundary::UNDERSCORE,
294	Boundary::LOWER_UPPER,
295	Boundary::ACRONYM,
296	Boundary::LOWER_DIGIT,
297	Boundary::UPPER_DIGIT,
298	Boundary::DIGIT_LOWER,
299	Boundary::DIGIT_UPPER,
300	]
301	}
302
303	/// Returns the boundaries that involve digits.
304	/// `LowerDigit`.
305	/// ```
306	/// # use convert_case::Boundary;
307	/// assert_eq!(
308	/// [
309	/// Boundary::LOWER_DIGIT,
310	/// Boundary::UPPER_DIGIT,
311	/// Boundary::DIGIT_LOWER,
312	/// Boundary::DIGIT_UPPER,
313	/// ],
314	/// Boundary::digits()
315	/// );
316	/// ```
317	pub const fn digits() -> [Boundary; `4`] {
318	[
319	Boundary::LOWER_DIGIT,
320	Boundary::UPPER_DIGIT,
321	Boundary::DIGIT_LOWER,
322	Boundary::DIGIT_UPPER,
323	]
324	}
325
326	/// Returns the boundaries that are letters followed by digits.
327	/// ```
328	/// # use convert_case::Boundary;
329	/// assert_eq!(
330	/// [
331	/// Boundary::LOWER_DIGIT,
332	/// Boundary::UPPER_DIGIT,
333	/// ],
334	/// Boundary::letter_digit()
335	/// );
336	/// ```
337	pub const fn letter_digit() -> [Boundary; `2`] {
338	[Boundary::LOWER_DIGIT, Boundary::UPPER_DIGIT]
339	}
340
341	/// Returns the boundaries that are digits followed by letters.
342	/// ```
343	/// # use convert_case::Boundary;
344	/// assert_eq!(
345	/// [
346	/// Boundary::DIGIT_LOWER,
347	/// Boundary::DIGIT_UPPER
348	/// ],
349	/// Boundary::digit_letter()
350	/// );
351	/// ```
352	pub fn digit_letter() -> [Boundary; `2`] {
353	[Boundary::DIGIT_LOWER, Boundary::DIGIT_UPPER]
354	}
355
356	/// Returns a list of all boundaries that are identified within the given string.
357	/// Could be a short of writing out all the boundaries in a list directly. This will not
358	/// identify boundary `UpperLower` if it also used as part of `Acronym`.
359	///
360	/// If you want to be very explicit and not overlap boundaries, it is recommended to use a colon
361	/// character.
362	/// ```
363	/// # use convert_case::Boundary;
364	/// assert_eq!(
365	/// vec![
366	/// Boundary::SPACE,
367	/// Boundary::HYPHEN,
368	/// Boundary::LOWER_UPPER,
369	/// Boundary::UPPER_DIGIT,
370	/// Boundary::DIGIT_LOWER,
371	/// ],
372	/// Boundary::defaults_from("aA8a -")
373	/// );
374	/// assert_eq!(
375	/// vec![
376	/// Boundary::UNDERSCORE,
377	/// Boundary::LOWER_UPPER,
378	/// Boundary::ACRONYM,
379	/// Boundary::DIGIT_UPPER,
380	/// ],
381	/// Boundary::defaults_from("bD:0B:_:AAa")
382	/// );
383	/// ```
384	pub fn defaults_from(pattern: &str) -> Vec<Boundary> {
385	let mut boundaries = Vec::new();
386	for boundary in Boundary::defaults() {
387	let parts = split(&pattern, &[boundary]);
388	if parts.len() > `1` \|\| parts.len() == `0` \|\| parts[`0`] != pattern {
389	boundaries.push(boundary);
390	}
391	}
392	boundaries
393	}
394	}
395
396	/// Split an identifier into a list of words using the list of boundaries.
397	///
398	/// This is used internally for splitting an identifier before mutating by
399	/// a pattern and joining again with a delimiter.
400	/// ```
401	/// use convert_case::{Boundary, split};
402	/// assert_eq!(
403	/// vec!["one", "two", "three.four"],
404	/// split(&"one_two-three.four", &[Boundary::UNDERSCORE, Boundary::HYPHEN]),
405	/// )
406	/// ```
407	pub fn split<'s, T>(s: &'s T, boundaries: &[Boundary]) -> Vec<&'s str>
408	where
409	T: AsRef<str>,
410	{
411	let s = s.as_ref();
412
413	if s.len() == `0` {
414	return vec![];
415	}
416
417	let mut words = Vec::new();
418	let mut last_boundary_end = `0`;
419
420	let (indices, graphemes): (Vec<_>, Vec<_>) = s.grapheme_indices(`true`).unzip();
421	let grapheme_length = indices[graphemes.len() - `1`] + graphemes[graphemes.len() - `1`].len();
422
423	for i in `0`..graphemes.len() {
424	for boundary in boundaries {
425	//let byte_index = indices[i];
426
427	if (boundary.condition)(&graphemes[i..], boundary.arg) {
428	// What if we find a condition at the end of the array?
429	// Maybe we can stop early based on length
430	// To do this, need to switch the loops
431	// TODO
432	let boundary_byte_start: usize =
433	*indices.get(i + boundary.start).unwrap_or(&grapheme_length);
434	let boundary_byte_end: usize = *indices
435	.get(i + boundary.start + boundary.len)
436	.unwrap_or(&grapheme_length);
437
438	// todo clean this up a bit
439	words.push(&s[last_boundary_end..boundary_byte_start]);
440	last_boundary_end = boundary_byte_end;
441	break;
442	}
443	}
444	}
445	words.push(&s[last_boundary_end..]);
446	words.into_iter().filter(\|s\| !s.is_empty()).collect()
447	}
448
449	// ascii version
450	//pub fn split<'s, T>(s: &'s T, boundaries: &[Boundary]) -> Vec<&'s str>
451	//where
452	// T: AsRef<str>,
453	//{
454	// let s = s.as_ref();
455	//
456	// let mut words = Vec::new();
457	// let mut last_end = 0;
458	// for i in 0..s.len() {
459	// for boundary in boundaries {
460	// if (boundary.condition)(&s[i..]) {
461	// words.push(&s[last_end..i + boundary.start]);
462	// last_end = i + boundary.start + boundary.len;
463	// break;
464	// }
465	// }
466	// }
467	// words.push(&s[last_end..]);
468	// words
469	//}
470
471	#[cfg(test)]
472	mod tests {
473	use super::*;
474
475	#[test]
476	fn hyphen() {
477	let s = "a-b-c";
478	let v = split(&s, &[Boundary::HYPHEN]);
479	assert_eq!(v, vec!["a", "b", "c"]);
480	}
481
482	#[test]
483	fn underscore() {
484	let s = "a_b_c";
485	let v = split(&s, &[Boundary::UNDERSCORE]);
486	assert_eq!(v, vec!["a", "b", "c"]);
487	}
488
489	#[test]
490	fn space() {
491	let s = "a b c";
492	let v = split(&s, &[Boundary::SPACE]);
493	assert_eq!(v, vec!["a", "b", "c"]);
494	}
495
496	#[test]
497	fn delimiters() {
498	let s = "aaa-bbb_ccc ddd ddd-eee";
499	let v = split(
500	&s,
501	&[Boundary::SPACE, Boundary::UNDERSCORE, Boundary::HYPHEN],
502	);
503	assert_eq!(v, vec!["aaa", "bbb", "ccc", "ddd", "ddd", "eee"]);
504	}
505
506	#[test]
507	fn lower_upper() {
508	let s = "lowerUpperUpper";
509	let v = split(&s, &[Boundary::LOWER_UPPER]);
510	assert_eq!(v, vec!["lower", "Upper", "Upper"]);
511	}
512
513	#[test]
514	fn acronym() {
515	let s = "XMLRequest";
516	let v = split(&s, &[Boundary::ACRONYM]);
517	assert_eq!(v, vec!["XML", "Request"]);
518	}
519
520	// TODO: add tests for other boundaries
521
522	#[test]
523	fn boundaries_found_in_string() {
524	// upper lower is not longer a default
525	assert_eq!(Vec::<Boundary>::new(), Boundary::defaults_from(".Aaaa"));
526	assert_eq!(
527	vec![Boundary::LOWER_UPPER, Boundary::LOWER_DIGIT,],
528	Boundary::defaults_from("a8.Aa.aA")
529	);
530	assert_eq!(
531	Boundary::digits().to_vec(),
532	Boundary::defaults_from("b1B1b")
533	);
534	assert_eq!(
535	vec![
536	Boundary::SPACE,
537	Boundary::HYPHEN,
538	Boundary::UNDERSCORE,
539	Boundary::ACRONYM,
540	],
541	Boundary::defaults_from("AAa -_")
542	);
543	}
544
545	#[test]
546	fn boundary_consts_same() {
547	assert_eq!(Boundary::SPACE, Boundary::SPACE);
548	}
549
550	#[test]
551	fn from_delim_dot() {
552	let boundary = Boundary::from_delim(".");
553	let s = "lower.Upper.Upper";
554	let v = split(&s, &[boundary]);
555	assert_eq!(vec!["lower", "Upper", "Upper"], v)
556	}
557
558	#[test]
559	fn from_delim_double_colon() {
560	let boundary = Boundary::from_delim("::");
561	let s = "lower::lowerUpper::Upper";
562	let v = split(&s, &[boundary]);
563	assert_eq!(vec!["lower", "lowerUpper", "Upper"], v)
564	}
565	}
566