unichar.rs source code [crates/glib/src/unichar.rs]

1	// Take a look at the license at the top of the repository in the LICENSE file.
2
3	use std::mem::MaybeUninit;
4
5	use crate::{
6	ffi,
7	translate::{from_glib, IntoGlib, UnsafeFrom},
8	UnicodeBreakType, UnicodeScript, UnicodeType,
9	};
10
11	mod sealed {
12	pub trait Sealed {}
13	impl Sealed for char {}
14	}
15
16	impl UnsafeFrom<u32> for char {
17	#[inline]
18	unsafe fn unsafe_from(t: u32) -> Self {
19	debug_assert!(
20	char::try_from(t).is_ok(),
21	"glib returned an invalid Unicode codepoint"
22	);
23	unsafe { char::from_u32_unchecked(t) }
24	}
25	}
26
27	// rustdoc-stripper-ignore-next
28	/// The kind of decomposition to perform
29	#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
30	pub enum DecompositionKind {
31	// rustdoc-stripper-ignore-next
32	/// Compatibility decomposition
33	Compatibility,
34
35	// rustdoc-stripper-ignore-next
36	/// Canonical decomposition
37	Canonical,
38	}
39
40	// rustdoc-stripper-ignore-next
41	/// The result of a single step of the Unicode canonical decomposition algorithm
42	#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
43	pub enum CharacterDecomposition {
44	// rustdoc-stripper-ignore-next
45	/// The character could not be decomposed further
46	NoDecomposition,
47	// rustdoc-stripper-ignore-next
48	// A 'singleton' decomposition, which means the character was replaced by another
49	Singleton(char),
50	// rustdoc-stripper-ignore-next
51	/// The first character may decompose further, but the second cannot
52	Pair(char, char),
53	}
54
55	// rustdoc-stripper-ignore-next
56	/// This trait provides access to Unicode character classification and manipulations functions
57	/// provided by GLib that do not exist in the standard library
58	#[doc(alias = "g_unichar")]
59	pub trait Unichar: sealed::Sealed + Copy + Into<u32> + UnsafeFrom<u32> {
60	#[doc(alias = "g_unichar_type")]
61	#[doc(alias = "unichar_type")]
62	#[inline]
63	fn unicode_type(self) -> UnicodeType {
64	unsafe { from_glib(ffi::g_unichar_type(self.into())) }
65	}
66
67	#[doc(alias = "g_unichar_break_type")]
68	#[doc(alias = "unichar_break_type")]
69	#[inline]
70	fn break_type(self) -> UnicodeBreakType {
71	unsafe { from_glib(ffi::g_unichar_break_type(self.into())) }
72	}
73
74	#[doc(alias = "g_unichar_get_script")]
75	#[doc(alias = "unichar_get_script")]
76	#[inline]
77	fn script(self) -> UnicodeScript {
78	unsafe { from_glib(ffi::g_unichar_get_script(self.into())) }
79	}
80
81	#[doc(alias = "g_unichar_combining_class")]
82	#[doc(alias = "unichar_combining_class")]
83	#[inline]
84	fn combining_class(self) -> u8 {
85	// UAX #44 § 5.7.4: The character property invariants regarding Canonical_Combining_Class
86	// guarantee that [...] all values used will be in the range 0..254.
87	// So this cast is fine
88	unsafe { ffi::g_unichar_combining_class(self.into()) as u8 }
89	}
90
91	#[doc(alias = "g_unichar_ismark")]
92	#[doc(alias = "unichar_ismark")]
93	#[inline]
94	fn is_mark(self) -> bool {
95	unsafe { from_glib(ffi::g_unichar_ismark(self.into())) }
96	}
97
98	#[doc(alias = "g_unichar_isgraph")]
99	#[doc(alias = "unichar_isgraph")]
100	#[inline]
101	fn is_graphical(self) -> bool {
102	unsafe { from_glib(ffi::g_unichar_isgraph(self.into())) }
103	}
104
105	#[doc(alias = "g_unichar_ispunct")]
106	#[doc(alias = "unichar_ispunct")]
107	#[inline]
108	fn is_punctuation(self) -> bool {
109	unsafe { from_glib(ffi::g_unichar_ispunct(self.into())) }
110	}
111
112	#[doc(alias = "g_unichar_istitle")]
113	#[doc(alias = "unichar_istitle")]
114	#[inline]
115	fn is_titlecase(self) -> bool {
116	unsafe { from_glib(ffi::g_unichar_istitle(self.into())) }
117	}
118
119	#[doc(alias = "g_unichar_isdefined")]
120	#[doc(alias = "unichar_isdefined")]
121	#[inline]
122	fn is_defined(self) -> bool {
123	unsafe { from_glib(ffi::g_unichar_isdefined(self.into())) }
124	}
125
126	#[doc(alias = "g_unichar_iswide")]
127	#[doc(alias = "unichar_iswide")]
128	#[inline]
129	fn is_wide(self) -> bool {
130	unsafe { from_glib(ffi::g_unichar_iswide(self.into())) }
131	}
132
133	#[doc(alias = "g_unichar_iswide_cjk")]
134	#[doc(alias = "unichar_iswide_cjk")]
135	#[inline]
136	fn is_wide_cjk(self) -> bool {
137	unsafe { from_glib(ffi::g_unichar_iswide_cjk(self.into())) }
138	}
139
140	#[doc(alias = "g_unichar_iszerowidth")]
141	#[doc(alias = "unichar_iszerowidth")]
142	#[inline]
143	fn is_zero_width(self) -> bool {
144	unsafe { from_glib(ffi::g_unichar_iszerowidth(self.into())) }
145	}
146
147	#[doc(alias = "g_unichar_totitle")]
148	#[doc(alias = "unichar_totitle")]
149	#[inline]
150	fn to_titlecase(self) -> Self {
151	unsafe { Self::unsafe_from(ffi::g_unichar_totitle(self.into())) }
152	}
153
154	#[doc(alias = "g_unichar_get_mirror_char")]
155	#[doc(alias = "unichar_get_mirror_char")]
156	#[inline]
157	fn mirror_char(self) -> Option<Self> {
158	// SAFETY: If g_unichar_get_mirror_char returns true, it will initialize `mirrored`
159	unsafe {
160	let mut mirrored = MaybeUninit::uninit();
161	let res = from_glib(ffi::g_unichar_get_mirror_char(
162	self.into(),
163	mirrored.as_mut_ptr(),
164	));
165	if res {
166	Some(Self::unsafe_from(mirrored.assume_init()))
167	} else {
168	None
169	}
170	}
171	}
172
173	#[doc(alias = "g_unichar_fully_decompose")]
174	#[doc(alias = "unichar_fully_decompose")]
175	#[inline]
176	fn fully_decompose(self, decomposition_kind: DecompositionKind) -> Vec<Self> {
177	let compat = match decomposition_kind {
178	DecompositionKind::Compatibility => `true`,
179	DecompositionKind::Canonical => `false`,
180	};
181	let buffer_len = ffi::G_UNICHAR_MAX_DECOMPOSITION_LENGTH as usize;
182
183	// SAFETY: We assume glib only ever writes valid Unicode codepoints in the provided buffer
184	// and that it does not lie about the
185	unsafe {
186	let mut buffer = Vec::<Self>::with_capacity(buffer_len);
187	let decomposition_length = ffi::g_unichar_fully_decompose(
188	self.into(),
189	compat.into_glib(),
190	buffer.as_mut_ptr().cast(),
191	buffer_len,
192	);
193	debug_assert!(decomposition_length <= buffer_len);
194	buffer.set_len(decomposition_length);
195	buffer
196	}
197	}
198
199	#[doc(alias = "g_unichar_decompose")]
200	#[doc(alias = "unichar_decompose")]
201	#[inline]
202	fn decompose(self) -> CharacterDecomposition {
203	// SAFETY: `a` and `b` will always be init after the g_unichar_decompose call returns
204	unsafe {
205	let mut a = MaybeUninit::uninit();
206	let mut b = MaybeUninit::uninit();
207	let res = from_glib(ffi::g_unichar_decompose(
208	self.into(),
209	a.as_mut_ptr(),
210	b.as_mut_ptr(),
211	));
212
213	if res {
214	let (a, b) = (a.assume_init(), b.assume_init());
215	if b == `0` {
216	CharacterDecomposition::Singleton(char::unsafe_from(a))
217	} else {
218	CharacterDecomposition::Pair(char::unsafe_from(a), char::unsafe_from(b))
219	}
220	} else {
221	CharacterDecomposition::NoDecomposition
222	}
223	}
224	}
225
226	#[doc(alias = "g_unichar_compose")]
227	#[doc(alias = "unichar_compose")]
228	#[inline]
229	fn compose(a: char, b: char) -> Option<Self> {
230	// SAFETY: If g_unichar_compose returns true, it will initialize `out`
231	unsafe {
232	let mut out = MaybeUninit::uninit();
233	let res = from_glib(ffi::g_unichar_compose(a.into(), b.into(), out.as_mut_ptr()));
234
235	if res {
236	Some(Self::unsafe_from(out.assume_init()))
237	} else {
238	None
239	}
240	}
241	}
242	}
243
244	impl Unichar for char {}
245