unichar.rs source code [crates/glib-0.18.5/src/unichar.rs]

1	// Take a look at the license at the top of the repository in the LICENSE file.
2
3	use std::mem::MaybeUninit;
4
5	use crate::{
6	translate::{from_glib, IntoGlib, UnsafeFrom},
7	UnicodeBreakType, UnicodeScript, UnicodeType,
8	};
9
10	mod sealed {
11	pub trait Sealed {}
12	impl Sealed for char {}
13	}
14
15	impl UnsafeFrom<u32> for char {
16	#[inline]
17	unsafe fn unsafe_from(t: u32) -> Self {
18	debug_assert!(
19	char::try_from(t).is_ok(),
20	"glib returned an invalid Unicode codepoint"
21	);
22	unsafe { char::from_u32_unchecked(t) }
23	}
24	}
25
26	// rustdoc-stripper-ignore-next
27	/// The kind of decomposition to perform
28	#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
29	pub enum DecompositionKind {
30	// rustdoc-stripper-ignore-next
31	/// Compatibility decomposition
32	Compatibility,
33
34	// rustdoc-stripper-ignore-next
35	/// Canonical decomposition
36	Canonical,
37	}
38
39	// rustdoc-stripper-ignore-next
40	/// The result of a single step of the Unicode canonical decomposition algorithm
41	#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
42	pub enum CharacterDecomposition {
43	// rustdoc-stripper-ignore-next
44	/// The character could not be decomposed further
45	NoDecomposition,
46	// rustdoc-stripper-ignore-next
47	// A 'singleton' decomposition, which means the character was replaced by another
48	Singleton(char),
49	// rustdoc-stripper-ignore-next
50	/// The first character may decompose further, but the second cannot
51	Pair(char, char),
52	}
53
54	// rustdoc-stripper-ignore-next
55	/// This trait provides access to Unicode character classification and manipulations functions
56	/// provided by GLib that do not exist in the standard library
57	#[doc(alias = "g_unichar")]
58	pub trait Unichar: sealed::Sealed + Copy + Into<u32> + UnsafeFrom<u32> {
59	#[doc(alias = "g_unichar_type")]
60	#[doc(alias = "unichar_type")]
61	#[inline]
62	fn unicode_type(self) -> UnicodeType {
63	unsafe { from_glib(ffi::g_unichar_type(self.into())) }
64	}
65
66	#[doc(alias = "g_unichar_break_type")]
67	#[doc(alias = "unichar_break_type")]
68	#[inline]
69	fn break_type(self) -> UnicodeBreakType {
70	unsafe { from_glib(ffi::g_unichar_break_type(self.into())) }
71	}
72
73	#[doc(alias = "g_unichar_get_script")]
74	#[doc(alias = "unichar_get_script")]
75	#[inline]
76	fn script(self) -> UnicodeScript {
77	unsafe { from_glib(ffi::g_unichar_get_script(self.into())) }
78	}
79
80	#[doc(alias = "g_unichar_combining_class")]
81	#[doc(alias = "unichar_combining_class")]
82	#[inline]
83	fn combining_class(self) -> u8 {
84	// UAX #44 § 5.7.4: The character property invariants regarding Canonical_Combining_Class
85	// guarantee that [...] all values used will be in the range 0..254.
86	// So this cast is fine
87	unsafe { ffi::g_unichar_combining_class(self.into()) as u8 }
88	}
89
90	#[doc(alias = "g_unichar_ismark")]
91	#[doc(alias = "unichar_ismark")]
92	#[inline]
93	fn is_mark(self) -> bool {
94	unsafe { from_glib(ffi::g_unichar_ismark(self.into())) }
95	}
96
97	#[doc(alias = "g_unichar_isgraph")]
98	#[doc(alias = "unichar_isgraph")]
99	#[inline]
100	fn is_graphical(self) -> bool {
101	unsafe { from_glib(ffi::g_unichar_isgraph(self.into())) }
102	}
103
104	#[doc(alias = "g_unichar_ispunct")]
105	#[doc(alias = "unichar_ispunct")]
106	#[inline]
107	fn is_punctuation(self) -> bool {
108	unsafe { from_glib(ffi::g_unichar_ispunct(self.into())) }
109	}
110
111	#[doc(alias = "g_unichar_istitle")]
112	#[doc(alias = "unichar_istitle")]
113	#[inline]
114	fn is_titlecase(self) -> bool {
115	unsafe { from_glib(ffi::g_unichar_istitle(self.into())) }
116	}
117
118	#[doc(alias = "g_unichar_isdefined")]
119	#[doc(alias = "unichar_isdefined")]
120	#[inline]
121	fn is_defined(self) -> bool {
122	unsafe { from_glib(ffi::g_unichar_isdefined(self.into())) }
123	}
124
125	#[doc(alias = "g_unichar_iswide")]
126	#[doc(alias = "unichar_iswide")]
127	#[inline]
128	fn is_wide(self) -> bool {
129	unsafe { from_glib(ffi::g_unichar_iswide(self.into())) }
130	}
131
132	#[doc(alias = "g_unichar_iswide_cjk")]
133	#[doc(alias = "unichar_iswide_cjk")]
134	#[inline]
135	fn is_wide_cjk(self) -> bool {
136	unsafe { from_glib(ffi::g_unichar_iswide_cjk(self.into())) }
137	}
138
139	#[doc(alias = "g_unichar_iszerowidth")]
140	#[doc(alias = "unichar_iszerowidth")]
141	#[inline]
142	fn is_zero_width(self) -> bool {
143	unsafe { from_glib(ffi::g_unichar_iszerowidth(self.into())) }
144	}
145
146	#[doc(alias = "g_unichar_totitle")]
147	#[doc(alias = "unichar_totitle")]
148	#[inline]
149	fn to_titlecase(self) -> Self {
150	unsafe { Self::unsafe_from(ffi::g_unichar_totitle(self.into())) }
151	}
152
153	#[doc(alias = "g_unichar_get_mirror_char")]
154	#[doc(alias = "unichar_get_mirror_char")]
155	#[inline]
156	fn mirror_char(self) -> Option<Self> {
157	// SAFETY: If g_unichar_get_mirror_char returns true, it will initialize `mirrored`
158	unsafe {
159	let mut mirrored = MaybeUninit::uninit();
160	let res = from_glib(ffi::g_unichar_get_mirror_char(
161	self.into(),
162	mirrored.as_mut_ptr(),
163	));
164	if res {
165	Some(Self::unsafe_from(mirrored.assume_init()))
166	} else {
167	None
168	}
169	}
170	}
171
172	#[doc(alias = "g_unichar_fully_decompose")]
173	#[doc(alias = "unichar_fully_decompose")]
174	#[inline]
175	fn fully_decompose(self, decomposition_kind: DecompositionKind) -> Vec<Self> {
176	let compat = match decomposition_kind {
177	DecompositionKind::Compatibility => `true`,
178	DecompositionKind::Canonical => `false`,
179	};
180	let buffer_len = ffi::G_UNICHAR_MAX_DECOMPOSITION_LENGTH as usize;
181
182	// SAFETY: We assume glib only ever writes valid Unicode codepoints in the provided buffer
183	// and that it does not lie about the
184	unsafe {
185	let mut buffer = Vec::<Self>::with_capacity(buffer_len);
186	let decomposition_length = ffi::g_unichar_fully_decompose(
187	self.into(),
188	compat.into_glib(),
189	buffer.as_mut_ptr().cast(),
190	buffer_len,
191	);
192	debug_assert!(decomposition_length <= buffer_len);
193	buffer.set_len(decomposition_length);
194	buffer
195	}
196	}
197
198	#[doc(alias = "g_unichar_decompose")]
199	#[doc(alias = "unichar_decompose")]
200	#[inline]
201	fn decompose(self) -> CharacterDecomposition {
202	// SAFETY: `a` and `b` will always be init after the g_unichar_decompose call returns
203	unsafe {
204	let mut a = MaybeUninit::uninit();
205	let mut b = MaybeUninit::uninit();
206	let res = from_glib(ffi::g_unichar_decompose(
207	self.into(),
208	a.as_mut_ptr(),
209	b.as_mut_ptr(),
210	));
211
212	if res {
213	let (a, b) = (a.assume_init(), b.assume_init());
214	if b == `0` {
215	CharacterDecomposition::Singleton(char::unsafe_from(a))
216	} else {
217	CharacterDecomposition::Pair(char::unsafe_from(a), char::unsafe_from(b))
218	}
219	} else {
220	CharacterDecomposition::NoDecomposition
221	}
222	}
223	}
224
225	#[doc(alias = "g_unichar_compose")]
226	#[doc(alias = "unichar_compose")]
227	#[inline]
228	fn compose(a: char, b: char) -> Option<Self> {
229	// SAFETY: If g_unichar_compose returns true, it will initialize `out`
230	unsafe {
231	let mut out = MaybeUninit::uninit();
232	let res = from_glib(ffi::g_unichar_compose(a.into(), b.into(), out.as_mut_ptr()));
233
234	if res {
235	Some(Self::unsafe_from(out.assume_init()))
236	} else {
237	None
238	}
239	}
240	}
241	}
242
243	impl Unichar for char {}
244