1mod tables;
2
3use crate::unicode::tables::*;
4use alloc::string::String;
5
6const CONT_MASK: u8 = 0b0011_1111;
7
8#[inline(always)]
9fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
10 (ch << 6) | (byte & CONT_MASK) as u32
11}
12
13pub fn decode_utf16(bytes: &[u8]) -> String {
14 let mut output: String = String::new();
15 let mut offset: usize = 0;
16 while offset < bytes.len() {
17 output.push(ch:read_utf16(bytes, &mut offset));
18 }
19 output
20}
21
22pub fn read_utf16(bytes: &[u8], offset: &mut usize) -> char {
23 let a: u16 = ((bytes[*offset] as u16) << 8) | bytes[*offset + 1] as u16;
24 *offset += 2;
25 if a < 0xD800 || 0xDFFF < a {
26 unsafe { core::char::from_u32_unchecked(a as u32) }
27 } else {
28 let b: u16 = ((bytes[*offset] as u16) << 8) | bytes[*offset + 1] as u16;
29 *offset += 2;
30 let c: u32 = (((a - 0xD800) as u32) << 10 | (b - 0xDC00) as u32) + 0x1_0000;
31 unsafe { core::char::from_u32_unchecked(c as u32) }
32 }
33}
34
35/// Returns (length, character). Cannot be run at the end of the string.
36pub fn read_utf8(bytes: &[u8], byte_offset: &mut usize) -> char {
37 let x: u8 = bytes[*byte_offset];
38 *byte_offset += 1;
39 if x < 128 {
40 return unsafe { core::char::from_u32_unchecked(x as u32) };
41 }
42 let init: u32 = (x & (0x7F >> 2)) as u32;
43 let y: u8 = bytes[*byte_offset];
44 *byte_offset += 1;
45 let mut ch: u32 = utf8_acc_cont_byte(ch:init, byte:y);
46 if x >= 0xE0 {
47 let z: u8 = bytes[*byte_offset];
48 *byte_offset += 1;
49 let y_z: u32 = utf8_acc_cont_byte((y & CONT_MASK) as u32, byte:z);
50 ch = init << 12 | y_z;
51 if x >= 0xF0 {
52 let w: u8 = bytes[*byte_offset];
53 *byte_offset += 1;
54 ch = (init & 7) << 18 | utf8_acc_cont_byte(ch:y_z, byte:w);
55 }
56 }
57 unsafe { core::char::from_u32_unchecked(ch) }
58}
59
60#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
61/// Ordering is based on linebreak priority. Ordering is Hard > Soft > None.
62pub struct LinebreakData {
63 bits: u8,
64}
65
66pub const LINEBREAK_NONE: LinebreakData = LinebreakData::new(bits:0b0000_0000);
67pub const LINEBREAK_SOFT: LinebreakData = LinebreakData::new(bits:0b0000_0001);
68pub const LINEBREAK_HARD: LinebreakData = LinebreakData::new(bits:0b0000_0010);
69
70impl LinebreakData {
71 const NONE: u8 = 0b0000_0000;
72 const SOFT: u8 = 0b0000_0001;
73 const HARD: u8 = 0b0000_0010;
74
75 const fn new(bits: u8) -> LinebreakData {
76 LinebreakData {
77 bits,
78 }
79 }
80
81 pub fn from_mask(wrap_soft_breaks: bool, wrap_hard_breaks: bool, has_width: bool) -> LinebreakData {
82 let mut mask = 0;
83 if wrap_hard_breaks {
84 mask |= LinebreakData::HARD;
85 }
86 if wrap_soft_breaks && has_width {
87 mask |= LinebreakData::SOFT;
88 }
89 LinebreakData {
90 bits: mask,
91 }
92 }
93
94 pub fn is_hard(&self) -> bool {
95 self.bits == LinebreakData::HARD
96 }
97
98 pub fn is_soft(&self) -> bool {
99 self.bits == LinebreakData::SOFT
100 }
101
102 pub fn mask(&self, other: LinebreakData) -> LinebreakData {
103 Self::new(self.bits & other.bits)
104 }
105}
106
107#[derive(Debug, Copy, Clone)]
108pub struct Linebreaker {
109 state: u8,
110}
111
112impl Linebreaker {
113 pub fn new() -> Linebreaker {
114 Linebreaker {
115 state: 0,
116 }
117 }
118
119 pub fn reset(&mut self) {
120 self.state = 0;
121 }
122
123 // [See license/xi-editor/xi-unicode] Copyright 2016 The xi-editor Authors
124 pub fn next(&mut self, codepoint: char) -> LinebreakData {
125 let cp = codepoint as usize;
126 let lb = if cp < 0x800 {
127 LINEBREAK_1_2[cp]
128 } else if cp < 0x10000 {
129 let child = LINEBREAK_3_ROOT[cp >> 6];
130 LINEBREAK_3_CHILD[(child as usize) * 0x40 + (cp & 0x3f)]
131 } else {
132 let mid = LINEBREAK_4_ROOT[cp >> 12];
133 let leaf = LINEBREAK_4_MID[(mid as usize) * 0x40 + ((cp >> 6) & 0x3f)];
134 LINEBREAK_4_LEAVES[(leaf as usize) * 0x40 + (cp & 0x3f)]
135 };
136 let i = (self.state as usize) * N_LINEBREAK_CATEGORIES + (lb as usize);
137 let new = LINEBREAK_STATE_MACHINE[i];
138 if (new as i8) < 0 {
139 self.state = new & 0x3f;
140 if new >= 0xc0 {
141 LINEBREAK_HARD
142 } else {
143 LINEBREAK_SOFT
144 }
145 } else {
146 self.state = new;
147 LINEBREAK_NONE
148 }
149 }
150}
151
152/// Miscellaneous metadata associated with a character to assist in layout.
153#[derive(Debug, Copy, Clone, PartialEq, Eq)]
154pub struct CharacterData {
155 bits: u8,
156}
157
158impl CharacterData {
159 const WHITESPACE: u8 = 0b0000_0001;
160 const CONTROL: u8 = 0b0000_0010;
161 const MISSING: u8 = 0b0000_0100;
162
163 /// Classifies a character given its index in the font.
164 pub fn classify(c: char, index: u16) -> CharacterData {
165 let mut class = 0;
166 if index == 0 {
167 class |= CharacterData::MISSING;
168 }
169 match c {
170 '\t' | '\n' | '\x0C' | '\r' | ' ' => class |= CharacterData::WHITESPACE,
171 _ => {}
172 }
173 match c {
174 '\0'..='\x1F' | '\x7F' => class |= CharacterData::CONTROL,
175 _ => {}
176 }
177 CharacterData {
178 bits: class,
179 }
180 }
181
182 /// A heuristic for if the glpyh this was classified from should be rasterized. Missing glyphs,
183 /// whitespace, and control characters will return false.
184 pub fn rasterize(&self) -> bool {
185 self.bits == 0
186 }
187
188 /// Marks if the character is an ASCII whitespace character.
189 pub fn is_whitespace(&self) -> bool {
190 self.bits & CharacterData::WHITESPACE != 0
191 }
192
193 /// Marks if the character is an ASCII control character.
194 pub fn is_control(&self) -> bool {
195 self.bits & CharacterData::CONTROL != 0
196 }
197
198 /// Marks if the character is missing from its associated font.
199 pub fn is_missing(&self) -> bool {
200 self.bits & CharacterData::MISSING != 0
201 }
202}
203

Provided by KDAB

Privacy Policy