1 | use unicode_segmentation::UnicodeSegmentation; |
2 | |
3 | fn grapheme_is_digit(c: &&str) -> bool { |
4 | c.chars().all(|c: char| c.is_ascii_digit()) |
5 | } |
6 | |
7 | fn grapheme_is_uppercase(c: &&str) -> bool { |
8 | c.to_uppercase() != c.to_lowercase() && *c == c.to_uppercase() |
9 | } |
10 | |
11 | fn grapheme_is_lowercase(c: &&str) -> bool { |
12 | c.to_uppercase() != c.to_lowercase() && *c == c.to_lowercase() |
13 | } |
14 | |
15 | /// How an identifier is split into words. |
16 | /// |
17 | /// Some boundaries, `HYPHEN`, `UNDERSCORE`, and `SPACE`, consume the character they |
18 | /// split on, whereas the other boundaries do not. |
19 | /// |
20 | /// `Boundary` includes methods that return useful groups of boundaries. It also |
21 | /// contains the [`defaults_from`](Boundary::defaults_from) method which will generate a subset |
22 | /// of default boundaries based on the boundaries present in a string. |
23 | /// |
24 | /// You can also create custom delimiter boundaries using the [`from_delim`](Boundary::from_delim) |
25 | /// method or directly instantiate Boundary for complex boundary conditions. |
26 | /// ``` |
27 | /// use convert_case::{Boundary, Case, Casing, Converter}; |
28 | /// |
29 | /// assert_eq!( |
30 | /// "transformations_in_3d" , |
31 | /// "TransformationsIn3D" |
32 | /// .from_case(Case::Camel) |
33 | /// .without_boundaries(&Boundary::digit_letter()) |
34 | /// .to_case(Case::Snake) |
35 | /// ); |
36 | /// |
37 | /// let conv = Converter::new() |
38 | /// .set_boundaries(&Boundary::defaults_from("aA " )) |
39 | /// .to_case(Case::Title); |
40 | /// assert_eq!("7empest By Tool" , conv.convert("7empest byTool" )); |
41 | /// ``` |
42 | #[derive (Debug, Eq, Hash, Clone, Copy)] |
43 | pub struct Boundary { |
44 | /// A unique name used for comparison. |
45 | pub name: &'static str, |
46 | /// A function that determines if this boundary is present at the start |
47 | /// of the string. Second argument is the `arg` field. |
48 | pub condition: fn(&[&str], Option<&'static str>) -> bool, |
49 | /// An optional string passed to `condition` at runtime. Used |
50 | /// internally for [`Boundary::from_delim`] method. |
51 | pub arg: Option<&'static str>, |
52 | /// Where the beginning of the boundary is. |
53 | pub start: usize, |
54 | /// The length of the boundary. This is the number of graphemes that |
55 | /// are removed when splitting. |
56 | pub len: usize, |
57 | } |
58 | |
59 | impl PartialEq for Boundary { |
60 | fn eq(&self, other: &Self) -> bool { |
61 | self.name == other.name |
62 | } |
63 | } |
64 | |
65 | impl Boundary { |
66 | /// Splits on space, consuming the character on segmentation. |
67 | /// ``` |
68 | /// # use convert_case::Boundary; |
69 | /// assert_eq!( |
70 | /// vec![Boundary::SPACE], |
71 | /// Boundary::defaults_from(" " ) |
72 | /// ); |
73 | /// ``` |
74 | pub const SPACE: Boundary = Boundary { |
75 | name: "Space" , |
76 | condition: |s, _| s.get(0) == Some(&" " ), |
77 | arg: None, |
78 | start: 0, |
79 | len: 1, |
80 | }; |
81 | |
82 | /// Splits on `-`, consuming the character on segmentation. |
83 | /// ``` |
84 | /// # use convert_case::Boundary; |
85 | /// assert_eq!( |
86 | /// vec![Boundary::HYPHEN], |
87 | /// Boundary::defaults_from("-" ) |
88 | /// ); |
89 | /// ``` |
90 | pub const HYPHEN: Boundary = Boundary { |
91 | name: "Hyphen" , |
92 | condition: |s, _| s.get(0) == Some(&"-" ), |
93 | arg: None, |
94 | start: 0, |
95 | len: 1, |
96 | }; |
97 | |
98 | /// Splits on `_`, consuming the character on segmentation. |
99 | /// ``` |
100 | /// # use convert_case::Boundary; |
101 | /// assert_eq!( |
102 | /// vec![Boundary::UNDERSCORE], |
103 | /// Boundary::defaults_from("_" ) |
104 | /// ); |
105 | /// ``` |
106 | pub const UNDERSCORE: Boundary = Boundary { |
107 | name: "Underscore" , |
108 | condition: |s, _| s.get(0) == Some(&"_" ), |
109 | arg: None, |
110 | start: 0, |
111 | len: 1, |
112 | }; |
113 | |
114 | /// Splits where a lowercase letter is followed by an uppercase letter. |
115 | /// ``` |
116 | /// # use convert_case::Boundary; |
117 | /// assert_eq!( |
118 | /// vec![Boundary::LOWER_UPPER], |
119 | /// Boundary::defaults_from("aA" ) |
120 | /// ); |
121 | /// ``` |
122 | pub const LOWER_UPPER: Boundary = Boundary { |
123 | name: "LowerUpper" , |
124 | condition: |s, _| { |
125 | s.get(0).map(grapheme_is_lowercase) == Some(true) |
126 | && s.get(1).map(grapheme_is_uppercase) == Some(true) |
127 | }, |
128 | arg: None, |
129 | start: 1, |
130 | len: 0, |
131 | }; |
132 | /// Splits where an uppercase letter is followed by a lowercase letter. This is seldom used, |
133 | /// and is **not** included in the [defaults](Boundary::defaults). |
134 | /// ``` |
135 | /// # use convert_case::Boundary; |
136 | /// assert!( |
137 | /// Boundary::defaults_from("Aa" ).len() == 0 |
138 | /// ); |
139 | /// ``` |
140 | pub const UPPER_LOWER: Boundary = Boundary { |
141 | name: "UpperLower" , |
142 | condition: |s, _| { |
143 | s.get(0).map(grapheme_is_uppercase) == Some(true) |
144 | && s.get(1).map(grapheme_is_lowercase) == Some(true) |
145 | }, |
146 | arg: None, |
147 | start: 1, |
148 | len: 0, |
149 | }; |
150 | |
151 | /// Acronyms are identified by two uppercase letters followed by a lowercase letter. |
152 | /// The word boundary is between the two uppercase letters. For example, "HTTPRequest" |
153 | /// would have an acronym boundary identified at "PRe" and split into "HTTP" and "Request". |
154 | /// ``` |
155 | /// # use convert_case::Boundary; |
156 | /// assert_eq!( |
157 | /// vec![Boundary::ACRONYM], |
158 | /// Boundary::defaults_from("AAa" ) |
159 | /// ); |
160 | /// ``` |
161 | pub const ACRONYM: Boundary = Boundary { |
162 | name: "Acronym" , |
163 | condition: |s, _| { |
164 | s.get(0).map(grapheme_is_uppercase) == Some(true) |
165 | && s.get(1).map(grapheme_is_uppercase) == Some(true) |
166 | && s.get(2).map(grapheme_is_lowercase) == Some(true) |
167 | }, |
168 | arg: None, |
169 | start: 1, |
170 | len: 0, |
171 | }; |
172 | |
173 | /// Splits where a lowercase letter is followed by a digit. |
174 | /// ``` |
175 | /// # use convert_case::Boundary; |
176 | /// assert_eq!( |
177 | /// vec![Boundary::LOWER_DIGIT], |
178 | /// Boundary::defaults_from("a1" ) |
179 | /// ); |
180 | /// ``` |
181 | pub const LOWER_DIGIT: Boundary = Boundary { |
182 | name: "LowerDigit" , |
183 | condition: |s, _| { |
184 | s.get(0).map(grapheme_is_lowercase) == Some(true) |
185 | && s.get(1).map(grapheme_is_digit) == Some(true) |
186 | }, |
187 | arg: None, |
188 | start: 1, |
189 | len: 0, |
190 | }; |
191 | |
192 | /// Splits where an uppercase letter is followed by a digit. |
193 | /// ``` |
194 | /// # use convert_case::Boundary; |
195 | /// assert_eq!( |
196 | /// vec![Boundary::UPPER_DIGIT], |
197 | /// Boundary::defaults_from("A1" ) |
198 | /// ); |
199 | /// ``` |
200 | pub const UPPER_DIGIT: Boundary = Boundary { |
201 | name: "UpperDigit" , |
202 | condition: |s, _| { |
203 | s.get(0).map(grapheme_is_uppercase) == Some(true) |
204 | && s.get(1).map(grapheme_is_digit) == Some(true) |
205 | }, |
206 | arg: None, |
207 | start: 1, |
208 | len: 0, |
209 | }; |
210 | |
211 | /// Splits where digit is followed by a lowercase letter. |
212 | /// ``` |
213 | /// # use convert_case::Boundary; |
214 | /// assert_eq!( |
215 | /// vec![Boundary::DIGIT_LOWER], |
216 | /// Boundary::defaults_from("1a" ) |
217 | /// ); |
218 | /// ``` |
219 | pub const DIGIT_LOWER: Boundary = Boundary { |
220 | name: "DigitLower" , |
221 | condition: |s, _| { |
222 | s.get(0).map(grapheme_is_digit) == Some(true) |
223 | && s.get(1).map(grapheme_is_lowercase) == Some(true) |
224 | }, |
225 | arg: None, |
226 | start: 1, |
227 | len: 0, |
228 | }; |
229 | |
230 | /// Splits where digit is followed by an uppercase letter. |
231 | /// ``` |
232 | /// # use convert_case::Boundary; |
233 | /// assert_eq!( |
234 | /// vec![Boundary::DIGIT_UPPER], |
235 | /// Boundary::defaults_from("1A" ) |
236 | /// ); |
237 | /// ``` |
238 | pub const DIGIT_UPPER: Boundary = Boundary { |
239 | name: "DigitUpper" , |
240 | condition: |s, _| { |
241 | s.get(0).map(grapheme_is_digit) == Some(true) |
242 | && s.get(1).map(grapheme_is_uppercase) == Some(true) |
243 | }, |
244 | arg: None, |
245 | start: 1, |
246 | len: 0, |
247 | }; |
248 | |
249 | /// Create a new boundary based on a delimiter. |
250 | /// ``` |
251 | /// # use convert_case::{Case, Converter, Boundary}; |
252 | /// let conv = Converter::new() |
253 | /// .set_boundaries(&[Boundary::from_delim("::" )]) |
254 | /// .to_case(Case::Camel); |
255 | /// assert_eq!( |
256 | /// "myVarName" , |
257 | /// conv.convert("my::var::name" ) |
258 | /// ) |
259 | /// ``` |
260 | pub const fn from_delim(delim: &'static str) -> Boundary { |
261 | Boundary { |
262 | name: delim, |
263 | arg: Some(delim), |
264 | condition: |s, arg| s.join("" ).starts_with(arg.unwrap()), |
265 | start: 0, |
266 | len: delim.len(), |
267 | } |
268 | } |
269 | |
270 | /// The default list of boundaries used when `Casing::to_case` is called directly |
271 | /// and in a `Converter` generated from `Converter::new()`. |
272 | /// ``` |
273 | /// # use convert_case::Boundary; |
274 | /// assert_eq!( |
275 | /// [ |
276 | /// Boundary::SPACE, |
277 | /// Boundary::HYPHEN, |
278 | /// Boundary::UNDERSCORE, |
279 | /// Boundary::LOWER_UPPER, |
280 | /// Boundary::ACRONYM, |
281 | /// Boundary::LOWER_DIGIT, |
282 | /// Boundary::UPPER_DIGIT, |
283 | /// Boundary::DIGIT_LOWER, |
284 | /// Boundary::DIGIT_UPPER, |
285 | /// ], |
286 | /// Boundary::defaults() |
287 | /// ); |
288 | /// ``` |
289 | pub const fn defaults() -> [Boundary; 9] { |
290 | [ |
291 | Boundary::SPACE, |
292 | Boundary::HYPHEN, |
293 | Boundary::UNDERSCORE, |
294 | Boundary::LOWER_UPPER, |
295 | Boundary::ACRONYM, |
296 | Boundary::LOWER_DIGIT, |
297 | Boundary::UPPER_DIGIT, |
298 | Boundary::DIGIT_LOWER, |
299 | Boundary::DIGIT_UPPER, |
300 | ] |
301 | } |
302 | |
303 | /// Returns the boundaries that involve digits. |
304 | /// `LowerDigit`. |
305 | /// ``` |
306 | /// # use convert_case::Boundary; |
307 | /// assert_eq!( |
308 | /// [ |
309 | /// Boundary::LOWER_DIGIT, |
310 | /// Boundary::UPPER_DIGIT, |
311 | /// Boundary::DIGIT_LOWER, |
312 | /// Boundary::DIGIT_UPPER, |
313 | /// ], |
314 | /// Boundary::digits() |
315 | /// ); |
316 | /// ``` |
317 | pub const fn digits() -> [Boundary; 4] { |
318 | [ |
319 | Boundary::LOWER_DIGIT, |
320 | Boundary::UPPER_DIGIT, |
321 | Boundary::DIGIT_LOWER, |
322 | Boundary::DIGIT_UPPER, |
323 | ] |
324 | } |
325 | |
326 | /// Returns the boundaries that are letters followed by digits. |
327 | /// ``` |
328 | /// # use convert_case::Boundary; |
329 | /// assert_eq!( |
330 | /// [ |
331 | /// Boundary::LOWER_DIGIT, |
332 | /// Boundary::UPPER_DIGIT, |
333 | /// ], |
334 | /// Boundary::letter_digit() |
335 | /// ); |
336 | /// ``` |
337 | pub const fn letter_digit() -> [Boundary; 2] { |
338 | [Boundary::LOWER_DIGIT, Boundary::UPPER_DIGIT] |
339 | } |
340 | |
341 | /// Returns the boundaries that are digits followed by letters. |
342 | /// ``` |
343 | /// # use convert_case::Boundary; |
344 | /// assert_eq!( |
345 | /// [ |
346 | /// Boundary::DIGIT_LOWER, |
347 | /// Boundary::DIGIT_UPPER |
348 | /// ], |
349 | /// Boundary::digit_letter() |
350 | /// ); |
351 | /// ``` |
352 | pub fn digit_letter() -> [Boundary; 2] { |
353 | [Boundary::DIGIT_LOWER, Boundary::DIGIT_UPPER] |
354 | } |
355 | |
356 | /// Returns a list of all boundaries that are identified within the given string. |
357 | /// Could be a short of writing out all the boundaries in a list directly. This will not |
358 | /// identify boundary `UpperLower` if it also used as part of `Acronym`. |
359 | /// |
360 | /// If you want to be very explicit and not overlap boundaries, it is recommended to use a colon |
361 | /// character. |
362 | /// ``` |
363 | /// # use convert_case::Boundary; |
364 | /// assert_eq!( |
365 | /// vec![ |
366 | /// Boundary::SPACE, |
367 | /// Boundary::HYPHEN, |
368 | /// Boundary::LOWER_UPPER, |
369 | /// Boundary::UPPER_DIGIT, |
370 | /// Boundary::DIGIT_LOWER, |
371 | /// ], |
372 | /// Boundary::defaults_from("aA8a -" ) |
373 | /// ); |
374 | /// assert_eq!( |
375 | /// vec![ |
376 | /// Boundary::UNDERSCORE, |
377 | /// Boundary::LOWER_UPPER, |
378 | /// Boundary::ACRONYM, |
379 | /// Boundary::DIGIT_UPPER, |
380 | /// ], |
381 | /// Boundary::defaults_from("bD:0B:_:AAa" ) |
382 | /// ); |
383 | /// ``` |
384 | pub fn defaults_from(pattern: &str) -> Vec<Boundary> { |
385 | let mut boundaries = Vec::new(); |
386 | for boundary in Boundary::defaults() { |
387 | let parts = split(&pattern, &[boundary]); |
388 | if parts.len() > 1 || parts.len() == 0 || parts[0] != pattern { |
389 | boundaries.push(boundary); |
390 | } |
391 | } |
392 | boundaries |
393 | } |
394 | } |
395 | |
396 | /// Split an identifier into a list of words using the list of boundaries. |
397 | /// |
398 | /// This is used internally for splitting an identifier before mutating by |
399 | /// a pattern and joining again with a delimiter. |
400 | /// ``` |
401 | /// use convert_case::{Boundary, split}; |
402 | /// assert_eq!( |
403 | /// vec!["one" , "two" , "three.four" ], |
404 | /// split(&"one_two-three.four" , &[Boundary::UNDERSCORE, Boundary::HYPHEN]), |
405 | /// ) |
406 | /// ``` |
407 | pub fn split<'s, T>(s: &'s T, boundaries: &[Boundary]) -> Vec<&'s str> |
408 | where |
409 | T: AsRef<str>, |
410 | { |
411 | let s = s.as_ref(); |
412 | |
413 | if s.len() == 0 { |
414 | return vec![]; |
415 | } |
416 | |
417 | let mut words = Vec::new(); |
418 | let mut last_boundary_end = 0; |
419 | |
420 | let (indices, graphemes): (Vec<_>, Vec<_>) = s.grapheme_indices(true).unzip(); |
421 | let grapheme_length = indices[graphemes.len() - 1] + graphemes[graphemes.len() - 1].len(); |
422 | |
423 | for i in 0..graphemes.len() { |
424 | for boundary in boundaries { |
425 | //let byte_index = indices[i]; |
426 | |
427 | if (boundary.condition)(&graphemes[i..], boundary.arg) { |
428 | // What if we find a condition at the end of the array? |
429 | // Maybe we can stop early based on length |
430 | // To do this, need to switch the loops |
431 | // TODO |
432 | let boundary_byte_start: usize = |
433 | *indices.get(i + boundary.start).unwrap_or(&grapheme_length); |
434 | let boundary_byte_end: usize = *indices |
435 | .get(i + boundary.start + boundary.len) |
436 | .unwrap_or(&grapheme_length); |
437 | |
438 | // todo clean this up a bit |
439 | words.push(&s[last_boundary_end..boundary_byte_start]); |
440 | last_boundary_end = boundary_byte_end; |
441 | break; |
442 | } |
443 | } |
444 | } |
445 | words.push(&s[last_boundary_end..]); |
446 | words.into_iter().filter(|s| !s.is_empty()).collect() |
447 | } |
448 | |
449 | // ascii version |
450 | //pub fn split<'s, T>(s: &'s T, boundaries: &[Boundary]) -> Vec<&'s str> |
451 | //where |
452 | // T: AsRef<str>, |
453 | //{ |
454 | // let s = s.as_ref(); |
455 | // |
456 | // let mut words = Vec::new(); |
457 | // let mut last_end = 0; |
458 | // for i in 0..s.len() { |
459 | // for boundary in boundaries { |
460 | // if (boundary.condition)(&s[i..]) { |
461 | // words.push(&s[last_end..i + boundary.start]); |
462 | // last_end = i + boundary.start + boundary.len; |
463 | // break; |
464 | // } |
465 | // } |
466 | // } |
467 | // words.push(&s[last_end..]); |
468 | // words |
469 | //} |
470 | |
471 | #[cfg (test)] |
472 | mod tests { |
473 | use super::*; |
474 | |
475 | #[test ] |
476 | fn hyphen() { |
477 | let s = "a-b-c" ; |
478 | let v = split(&s, &[Boundary::HYPHEN]); |
479 | assert_eq!(v, vec!["a" , "b" , "c" ]); |
480 | } |
481 | |
482 | #[test ] |
483 | fn underscore() { |
484 | let s = "a_b_c" ; |
485 | let v = split(&s, &[Boundary::UNDERSCORE]); |
486 | assert_eq!(v, vec!["a" , "b" , "c" ]); |
487 | } |
488 | |
489 | #[test ] |
490 | fn space() { |
491 | let s = "a b c" ; |
492 | let v = split(&s, &[Boundary::SPACE]); |
493 | assert_eq!(v, vec!["a" , "b" , "c" ]); |
494 | } |
495 | |
496 | #[test ] |
497 | fn delimiters() { |
498 | let s = "aaa-bbb_ccc ddd ddd-eee" ; |
499 | let v = split( |
500 | &s, |
501 | &[Boundary::SPACE, Boundary::UNDERSCORE, Boundary::HYPHEN], |
502 | ); |
503 | assert_eq!(v, vec!["aaa" , "bbb" , "ccc" , "ddd" , "ddd" , "eee" ]); |
504 | } |
505 | |
506 | #[test ] |
507 | fn lower_upper() { |
508 | let s = "lowerUpperUpper" ; |
509 | let v = split(&s, &[Boundary::LOWER_UPPER]); |
510 | assert_eq!(v, vec!["lower" , "Upper" , "Upper" ]); |
511 | } |
512 | |
513 | #[test ] |
514 | fn acronym() { |
515 | let s = "XMLRequest" ; |
516 | let v = split(&s, &[Boundary::ACRONYM]); |
517 | assert_eq!(v, vec!["XML" , "Request" ]); |
518 | } |
519 | |
520 | // TODO: add tests for other boundaries |
521 | |
522 | #[test ] |
523 | fn boundaries_found_in_string() { |
524 | // upper lower is not longer a default |
525 | assert_eq!(Vec::<Boundary>::new(), Boundary::defaults_from(".Aaaa" )); |
526 | assert_eq!( |
527 | vec![Boundary::LOWER_UPPER, Boundary::LOWER_DIGIT,], |
528 | Boundary::defaults_from("a8.Aa.aA" ) |
529 | ); |
530 | assert_eq!( |
531 | Boundary::digits().to_vec(), |
532 | Boundary::defaults_from("b1B1b" ) |
533 | ); |
534 | assert_eq!( |
535 | vec![ |
536 | Boundary::SPACE, |
537 | Boundary::HYPHEN, |
538 | Boundary::UNDERSCORE, |
539 | Boundary::ACRONYM, |
540 | ], |
541 | Boundary::defaults_from("AAa -_" ) |
542 | ); |
543 | } |
544 | |
545 | #[test ] |
546 | fn boundary_consts_same() { |
547 | assert_eq!(Boundary::SPACE, Boundary::SPACE); |
548 | } |
549 | |
550 | #[test ] |
551 | fn from_delim_dot() { |
552 | let boundary = Boundary::from_delim("." ); |
553 | let s = "lower.Upper.Upper" ; |
554 | let v = split(&s, &[boundary]); |
555 | assert_eq!(vec!["lower" , "Upper" , "Upper" ], v) |
556 | } |
557 | |
558 | #[test ] |
559 | fn from_delim_double_colon() { |
560 | let boundary = Boundary::from_delim("::" ); |
561 | let s = "lower::lowerUpper::Upper" ; |
562 | let v = split(&s, &[boundary]); |
563 | assert_eq!(vec!["lower" , "lowerUpper" , "Upper" ], v) |
564 | } |
565 | } |
566 | |