1// Take a look at the license at the top of the repository in the LICENSE file.
2
3use std::mem::MaybeUninit;
4
5use crate::{
6 translate::{from_glib, IntoGlib, UnsafeFrom},
7 UnicodeBreakType, UnicodeScript, UnicodeType,
8};
9
10mod sealed {
11 pub trait Sealed {}
12 impl Sealed for char {}
13}
14
15impl UnsafeFrom<u32> for char {
16 #[inline]
17 unsafe fn unsafe_from(t: u32) -> Self {
18 debug_assert!(
19 char::try_from(t).is_ok(),
20 "glib returned an invalid Unicode codepoint"
21 );
22 unsafe { char::from_u32_unchecked(t) }
23 }
24}
25
26// rustdoc-stripper-ignore-next
27/// The kind of decomposition to perform
28#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
29pub enum DecompositionKind {
30 // rustdoc-stripper-ignore-next
31 /// Compatibility decomposition
32 Compatibility,
33
34 // rustdoc-stripper-ignore-next
35 /// Canonical decomposition
36 Canonical,
37}
38
39// rustdoc-stripper-ignore-next
40/// The result of a single step of the Unicode canonical decomposition algorithm
41#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
42pub enum CharacterDecomposition {
43 // rustdoc-stripper-ignore-next
44 /// The character could not be decomposed further
45 NoDecomposition,
46 // rustdoc-stripper-ignore-next
47 // A 'singleton' decomposition, which means the character was replaced by another
48 Singleton(char),
49 // rustdoc-stripper-ignore-next
50 /// The first character may decompose further, but the second cannot
51 Pair(char, char),
52}
53
54// rustdoc-stripper-ignore-next
55/// This trait provides access to Unicode character classification and manipulations functions
56/// provided by GLib that do not exist in the standard library
57#[doc(alias = "g_unichar")]
58pub trait Unichar: sealed::Sealed + Copy + Into<u32> + UnsafeFrom<u32> {
59 #[doc(alias = "g_unichar_type")]
60 #[doc(alias = "unichar_type")]
61 #[inline]
62 fn unicode_type(self) -> UnicodeType {
63 unsafe { from_glib(ffi::g_unichar_type(self.into())) }
64 }
65
66 #[doc(alias = "g_unichar_break_type")]
67 #[doc(alias = "unichar_break_type")]
68 #[inline]
69 fn break_type(self) -> UnicodeBreakType {
70 unsafe { from_glib(ffi::g_unichar_break_type(self.into())) }
71 }
72
73 #[doc(alias = "g_unichar_get_script")]
74 #[doc(alias = "unichar_get_script")]
75 #[inline]
76 fn script(self) -> UnicodeScript {
77 unsafe { from_glib(ffi::g_unichar_get_script(self.into())) }
78 }
79
80 #[doc(alias = "g_unichar_combining_class")]
81 #[doc(alias = "unichar_combining_class")]
82 #[inline]
83 fn combining_class(self) -> u8 {
84 // UAX #44 ยง 5.7.4: The character property invariants regarding Canonical_Combining_Class
85 // guarantee that [...] all values used will be in the range 0..254.
86 // So this cast is fine
87 unsafe { ffi::g_unichar_combining_class(self.into()) as u8 }
88 }
89
90 #[doc(alias = "g_unichar_ismark")]
91 #[doc(alias = "unichar_ismark")]
92 #[inline]
93 fn is_mark(self) -> bool {
94 unsafe { from_glib(ffi::g_unichar_ismark(self.into())) }
95 }
96
97 #[doc(alias = "g_unichar_isgraph")]
98 #[doc(alias = "unichar_isgraph")]
99 #[inline]
100 fn is_graphical(self) -> bool {
101 unsafe { from_glib(ffi::g_unichar_isgraph(self.into())) }
102 }
103
104 #[doc(alias = "g_unichar_ispunct")]
105 #[doc(alias = "unichar_ispunct")]
106 #[inline]
107 fn is_punctuation(self) -> bool {
108 unsafe { from_glib(ffi::g_unichar_ispunct(self.into())) }
109 }
110
111 #[doc(alias = "g_unichar_istitle")]
112 #[doc(alias = "unichar_istitle")]
113 #[inline]
114 fn is_titlecase(self) -> bool {
115 unsafe { from_glib(ffi::g_unichar_istitle(self.into())) }
116 }
117
118 #[doc(alias = "g_unichar_isdefined")]
119 #[doc(alias = "unichar_isdefined")]
120 #[inline]
121 fn is_defined(self) -> bool {
122 unsafe { from_glib(ffi::g_unichar_isdefined(self.into())) }
123 }
124
125 #[doc(alias = "g_unichar_iswide")]
126 #[doc(alias = "unichar_iswide")]
127 #[inline]
128 fn is_wide(self) -> bool {
129 unsafe { from_glib(ffi::g_unichar_iswide(self.into())) }
130 }
131
132 #[doc(alias = "g_unichar_iswide_cjk")]
133 #[doc(alias = "unichar_iswide_cjk")]
134 #[inline]
135 fn is_wide_cjk(self) -> bool {
136 unsafe { from_glib(ffi::g_unichar_iswide_cjk(self.into())) }
137 }
138
139 #[doc(alias = "g_unichar_iszerowidth")]
140 #[doc(alias = "unichar_iszerowidth")]
141 #[inline]
142 fn is_zero_width(self) -> bool {
143 unsafe { from_glib(ffi::g_unichar_iszerowidth(self.into())) }
144 }
145
146 #[doc(alias = "g_unichar_totitle")]
147 #[doc(alias = "unichar_totitle")]
148 #[inline]
149 fn to_titlecase(self) -> Self {
150 unsafe { Self::unsafe_from(ffi::g_unichar_totitle(self.into())) }
151 }
152
153 #[doc(alias = "g_unichar_get_mirror_char")]
154 #[doc(alias = "unichar_get_mirror_char")]
155 #[inline]
156 fn mirror_char(self) -> Option<Self> {
157 // SAFETY: If g_unichar_get_mirror_char returns true, it will initialize `mirrored`
158 unsafe {
159 let mut mirrored = MaybeUninit::uninit();
160 let res = from_glib(ffi::g_unichar_get_mirror_char(
161 self.into(),
162 mirrored.as_mut_ptr(),
163 ));
164 if res {
165 Some(Self::unsafe_from(mirrored.assume_init()))
166 } else {
167 None
168 }
169 }
170 }
171
172 #[doc(alias = "g_unichar_fully_decompose")]
173 #[doc(alias = "unichar_fully_decompose")]
174 #[inline]
175 fn fully_decompose(self, decomposition_kind: DecompositionKind) -> Vec<Self> {
176 let compat = match decomposition_kind {
177 DecompositionKind::Compatibility => true,
178 DecompositionKind::Canonical => false,
179 };
180 let buffer_len = ffi::G_UNICHAR_MAX_DECOMPOSITION_LENGTH as usize;
181
182 // SAFETY: We assume glib only ever writes valid Unicode codepoints in the provided buffer
183 // and that it does not lie about the
184 unsafe {
185 let mut buffer = Vec::<Self>::with_capacity(buffer_len);
186 let decomposition_length = ffi::g_unichar_fully_decompose(
187 self.into(),
188 compat.into_glib(),
189 buffer.as_mut_ptr().cast(),
190 buffer_len,
191 );
192 debug_assert!(decomposition_length <= buffer_len);
193 buffer.set_len(decomposition_length);
194 buffer
195 }
196 }
197
198 #[doc(alias = "g_unichar_decompose")]
199 #[doc(alias = "unichar_decompose")]
200 #[inline]
201 fn decompose(self) -> CharacterDecomposition {
202 // SAFETY: `a` and `b` will always be init after the g_unichar_decompose call returns
203 unsafe {
204 let mut a = MaybeUninit::uninit();
205 let mut b = MaybeUninit::uninit();
206 let res = from_glib(ffi::g_unichar_decompose(
207 self.into(),
208 a.as_mut_ptr(),
209 b.as_mut_ptr(),
210 ));
211
212 if res {
213 let (a, b) = (a.assume_init(), b.assume_init());
214 if b == 0 {
215 CharacterDecomposition::Singleton(char::unsafe_from(a))
216 } else {
217 CharacterDecomposition::Pair(char::unsafe_from(a), char::unsafe_from(b))
218 }
219 } else {
220 CharacterDecomposition::NoDecomposition
221 }
222 }
223 }
224
225 #[doc(alias = "g_unichar_compose")]
226 #[doc(alias = "unichar_compose")]
227 #[inline]
228 fn compose(a: char, b: char) -> Option<Self> {
229 // SAFETY: If g_unichar_compose returns true, it will initialize `out`
230 unsafe {
231 let mut out = MaybeUninit::uninit();
232 let res = from_glib(ffi::g_unichar_compose(a.into(), b.into(), out.as_mut_ptr()));
233
234 if res {
235 Some(Self::unsafe_from(out.assume_init()))
236 } else {
237 None
238 }
239 }
240 }
241}
242
243impl Unichar for char {}
244