1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! CommonMark punctuation set based on spec and Unicode properties.
22
23// Autogenerated by mk_puncttable.py
24
25const PUNCT_MASKS_ASCII: [u16; 8] = [
26 0x0000, // U+0000...U+000F
27 0x0000, // U+0010...U+001F
28 0xfffe, // U+0020...U+002F
29 0xfc00, // U+0030...U+003F
30 0x0001, // U+0040...U+004F
31 0xf800, // U+0050...U+005F
32 0x0001, // U+0060...U+006F
33 0x7800, // U+0070...U+007F
34];
35
36const PUNCT_TAB: [u16; 132] = [
37 10, // U+00A0...U+00AF
38 11, // U+00B0...U+00BF
39 55, // U+0370...U+037F
40 56, // U+0380...U+038F
41 85, // U+0550...U+055F
42 88, // U+0580...U+058F
43 91, // U+05B0...U+05BF
44 92, // U+05C0...U+05CF
45 95, // U+05F0...U+05FF
46 96, // U+0600...U+060F
47 97, // U+0610...U+061F
48 102, // U+0660...U+066F
49 109, // U+06D0...U+06DF
50 112, // U+0700...U+070F
51 127, // U+07F0...U+07FF
52 131, // U+0830...U+083F
53 133, // U+0850...U+085F
54 150, // U+0960...U+096F
55 151, // U+0970...U+097F
56 175, // U+0AF0...U+0AFF
57 223, // U+0DF0...U+0DFF
58 228, // U+0E40...U+0E4F
59 229, // U+0E50...U+0E5F
60 240, // U+0F00...U+0F0F
61 241, // U+0F10...U+0F1F
62 243, // U+0F30...U+0F3F
63 248, // U+0F80...U+0F8F
64 253, // U+0FD0...U+0FDF
65 260, // U+1040...U+104F
66 271, // U+10F0...U+10FF
67 310, // U+1360...U+136F
68 320, // U+1400...U+140F
69 358, // U+1660...U+166F
70 361, // U+1690...U+169F
71 366, // U+16E0...U+16EF
72 371, // U+1730...U+173F
73 381, // U+17D0...U+17DF
74 384, // U+1800...U+180F
75 404, // U+1940...U+194F
76 417, // U+1A10...U+1A1F
77 426, // U+1AA0...U+1AAF
78 437, // U+1B50...U+1B5F
79 438, // U+1B60...U+1B6F
80 447, // U+1BF0...U+1BFF
81 451, // U+1C30...U+1C3F
82 455, // U+1C70...U+1C7F
83 460, // U+1CC0...U+1CCF
84 461, // U+1CD0...U+1CDF
85 513, // U+2010...U+201F
86 514, // U+2020...U+202F
87 515, // U+2030...U+203F
88 516, // U+2040...U+204F
89 517, // U+2050...U+205F
90 519, // U+2070...U+207F
91 520, // U+2080...U+208F
92 560, // U+2300...U+230F
93 562, // U+2320...U+232F
94 630, // U+2760...U+276F
95 631, // U+2770...U+277F
96 636, // U+27C0...U+27CF
97 638, // U+27E0...U+27EF
98 664, // U+2980...U+298F
99 665, // U+2990...U+299F
100 669, // U+29D0...U+29DF
101 671, // U+29F0...U+29FF
102 719, // U+2CF0...U+2CFF
103 727, // U+2D70...U+2D7F
104 736, // U+2E00...U+2E0F
105 737, // U+2E10...U+2E1F
106 738, // U+2E20...U+2E2F
107 739, // U+2E30...U+2E3F
108 740, // U+2E40...U+2E4F
109 768, // U+3000...U+300F
110 769, // U+3010...U+301F
111 771, // U+3030...U+303F
112 778, // U+30A0...U+30AF
113 783, // U+30F0...U+30FF
114 2639, // U+A4F0...U+A4FF
115 2656, // U+A600...U+A60F
116 2663, // U+A670...U+A67F
117 2671, // U+A6F0...U+A6FF
118 2695, // U+A870...U+A87F
119 2700, // U+A8C0...U+A8CF
120 2703, // U+A8F0...U+A8FF
121 2706, // U+A920...U+A92F
122 2709, // U+A950...U+A95F
123 2716, // U+A9C0...U+A9CF
124 2717, // U+A9D0...U+A9DF
125 2725, // U+AA50...U+AA5F
126 2733, // U+AAD0...U+AADF
127 2735, // U+AAF0...U+AAFF
128 2750, // U+ABE0...U+ABEF
129 4051, // U+FD30...U+FD3F
130 4065, // U+FE10...U+FE1F
131 4067, // U+FE30...U+FE3F
132 4068, // U+FE40...U+FE4F
133 4069, // U+FE50...U+FE5F
134 4070, // U+FE60...U+FE6F
135 4080, // U+FF00...U+FF0F
136 4081, // U+FF10...U+FF1F
137 4082, // U+FF20...U+FF2F
138 4083, // U+FF30...U+FF3F
139 4085, // U+FF50...U+FF5F
140 4086, // U+FF60...U+FF6F
141 4112, // U+10100...U+1010F
142 4153, // U+10390...U+1039F
143 4157, // U+103D0...U+103DF
144 4182, // U+10560...U+1056F
145 4229, // U+10850...U+1085F
146 4241, // U+10910...U+1091F
147 4243, // U+10930...U+1093F
148 4261, // U+10A50...U+10A5F
149 4263, // U+10A70...U+10A7F
150 4271, // U+10AF0...U+10AFF
151 4275, // U+10B30...U+10B3F
152 4281, // U+10B90...U+10B9F
153 4356, // U+11040...U+1104F
154 4363, // U+110B0...U+110BF
155 4364, // U+110C0...U+110CF
156 4372, // U+11140...U+1114F
157 4375, // U+11170...U+1117F
158 4380, // U+111C0...U+111CF
159 4387, // U+11230...U+1123F
160 4428, // U+114C0...U+114CF
161 4444, // U+115C0...U+115CF
162 4452, // U+11640...U+1164F
163 4679, // U+12470...U+1247F
164 5798, // U+16A60...U+16A6F
165 5807, // U+16AF0...U+16AFF
166 5811, // U+16B30...U+16B3F
167 5812, // U+16B40...U+16B4F
168 7113, // U+1BC90...U+1BC9F
169];
170
171const PUNCT_MASKS: [u16; 132] = [
172 0x0882, // U+00A0...U+00AF
173 0x88c0, // U+00B0...U+00BF
174 0x4000, // U+0370...U+037F
175 0x0080, // U+0380...U+038F
176 0xfc00, // U+0550...U+055F
177 0x0600, // U+0580...U+058F
178 0x4000, // U+05B0...U+05BF
179 0x0049, // U+05C0...U+05CF
180 0x0018, // U+05F0...U+05FF
181 0x3600, // U+0600...U+060F
182 0xc800, // U+0610...U+061F
183 0x3c00, // U+0660...U+066F
184 0x0010, // U+06D0...U+06DF
185 0x3fff, // U+0700...U+070F
186 0x0380, // U+07F0...U+07FF
187 0x7fff, // U+0830...U+083F
188 0x4000, // U+0850...U+085F
189 0x0030, // U+0960...U+096F
190 0x0001, // U+0970...U+097F
191 0x0001, // U+0AF0...U+0AFF
192 0x0010, // U+0DF0...U+0DFF
193 0x8000, // U+0E40...U+0E4F
194 0x0c00, // U+0E50...U+0E5F
195 0xfff0, // U+0F00...U+0F0F
196 0x0017, // U+0F10...U+0F1F
197 0x3c00, // U+0F30...U+0F3F
198 0x0020, // U+0F80...U+0F8F
199 0x061f, // U+0FD0...U+0FDF
200 0xfc00, // U+1040...U+104F
201 0x0800, // U+10F0...U+10FF
202 0x01ff, // U+1360...U+136F
203 0x0001, // U+1400...U+140F
204 0x6000, // U+1660...U+166F
205 0x1800, // U+1690...U+169F
206 0x3800, // U+16E0...U+16EF
207 0x0060, // U+1730...U+173F
208 0x0770, // U+17D0...U+17DF
209 0x07ff, // U+1800...U+180F
210 0x0030, // U+1940...U+194F
211 0xc000, // U+1A10...U+1A1F
212 0x3f7f, // U+1AA0...U+1AAF
213 0xfc00, // U+1B50...U+1B5F
214 0x0001, // U+1B60...U+1B6F
215 0xf000, // U+1BF0...U+1BFF
216 0xf800, // U+1C30...U+1C3F
217 0xc000, // U+1C70...U+1C7F
218 0x00ff, // U+1CC0...U+1CCF
219 0x0008, // U+1CD0...U+1CDF
220 0xffff, // U+2010...U+201F
221 0x00ff, // U+2020...U+202F
222 0xffff, // U+2030...U+203F
223 0xffef, // U+2040...U+204F
224 0x7ffb, // U+2050...U+205F
225 0x6000, // U+2070...U+207F
226 0x6000, // U+2080...U+208F
227 0x0f00, // U+2300...U+230F
228 0x0600, // U+2320...U+232F
229 0xff00, // U+2760...U+276F
230 0x003f, // U+2770...U+277F
231 0x0060, // U+27C0...U+27CF
232 0xffc0, // U+27E0...U+27EF
233 0xfff8, // U+2980...U+298F
234 0x01ff, // U+2990...U+299F
235 0x0f00, // U+29D0...U+29DF
236 0x3000, // U+29F0...U+29FF
237 0xde00, // U+2CF0...U+2CFF
238 0x0001, // U+2D70...U+2D7F
239 0xffff, // U+2E00...U+2E0F
240 0xffff, // U+2E10...U+2E1F
241 0x7fff, // U+2E20...U+2E2F
242 0xffff, // U+2E30...U+2E3F
243 0x0007, // U+2E40...U+2E4F
244 0xff0e, // U+3000...U+300F
245 0xfff3, // U+3010...U+301F
246 0x2001, // U+3030...U+303F
247 0x0001, // U+30A0...U+30AF
248 0x0800, // U+30F0...U+30FF
249 0xc000, // U+A4F0...U+A4FF
250 0xe000, // U+A600...U+A60F
251 0x4008, // U+A670...U+A67F
252 0x00fc, // U+A6F0...U+A6FF
253 0x00f0, // U+A870...U+A87F
254 0xc000, // U+A8C0...U+A8CF
255 0x0700, // U+A8F0...U+A8FF
256 0xc000, // U+A920...U+A92F
257 0x8000, // U+A950...U+A95F
258 0x3ffe, // U+A9C0...U+A9CF
259 0xc000, // U+A9D0...U+A9DF
260 0xf000, // U+AA50...U+AA5F
261 0xc000, // U+AAD0...U+AADF
262 0x0003, // U+AAF0...U+AAFF
263 0x0800, // U+ABE0...U+ABEF
264 0xc000, // U+FD30...U+FD3F
265 0x03ff, // U+FE10...U+FE1F
266 0xffff, // U+FE30...U+FE3F
267 0xffff, // U+FE40...U+FE4F
268 0xfff7, // U+FE50...U+FE5F
269 0x0d0b, // U+FE60...U+FE6F
270 0xf7ee, // U+FF00...U+FF0F
271 0x8c00, // U+FF10...U+FF1F
272 0x0001, // U+FF20...U+FF2F
273 0xb800, // U+FF30...U+FF3F
274 0xa800, // U+FF50...U+FF5F
275 0x003f, // U+FF60...U+FF6F
276 0x0007, // U+10100...U+1010F
277 0x8000, // U+10390...U+1039F
278 0x0001, // U+103D0...U+103DF
279 0x8000, // U+10560...U+1056F
280 0x0080, // U+10850...U+1085F
281 0x8000, // U+10910...U+1091F
282 0x8000, // U+10930...U+1093F
283 0x01ff, // U+10A50...U+10A5F
284 0x8000, // U+10A70...U+10A7F
285 0x007f, // U+10AF0...U+10AFF
286 0xfe00, // U+10B30...U+10B3F
287 0x1e00, // U+10B90...U+10B9F
288 0x3f80, // U+11040...U+1104F
289 0xd800, // U+110B0...U+110BF
290 0x0003, // U+110C0...U+110CF
291 0x000f, // U+11140...U+1114F
292 0x0030, // U+11170...U+1117F
293 0x21e0, // U+111C0...U+111CF
294 0x3f00, // U+11230...U+1123F
295 0x0040, // U+114C0...U+114CF
296 0x03fe, // U+115C0...U+115CF
297 0x000e, // U+11640...U+1164F
298 0x001f, // U+12470...U+1247F
299 0xc000, // U+16A60...U+16A6F
300 0x0020, // U+16AF0...U+16AFF
301 0x0f80, // U+16B30...U+16B3F
302 0x0010, // U+16B40...U+16B4F
303 0x8000, // U+1BC90...U+1BC9F
304];
305
306pub(crate) fn is_ascii_punctuation(c: u8) -> bool {
307 c < 128 && (PUNCT_MASKS_ASCII[(c / 16) as usize] & (1 << (c & 15))) != 0
308}
309
310pub(crate) fn is_punctuation(c: char) -> bool {
311 let cp: u32 = c as u32;
312 if cp < 128 {
313 return is_ascii_punctuation(cp as u8);
314 }
315 if cp > 0x1BC9F {
316 return false;
317 }
318 let high: u16 = (cp / 16) as u16;
319 match PUNCT_TAB.binary_search(&high) {
320 Ok(index: usize) => (PUNCT_MASKS[index] & (1 << (cp & 15))) != 0,
321 _ => false,
322 }
323}
324
325#[cfg(test)]
326mod tests {
327 use super::{is_ascii_punctuation, is_punctuation};
328
329 #[test]
330 fn test_ascii() {
331 assert!(is_ascii_punctuation(b'!'));
332 assert!(is_ascii_punctuation(b'@'));
333 assert!(is_ascii_punctuation(b'~'));
334 assert!(!is_ascii_punctuation(b' '));
335 assert!(!is_ascii_punctuation(b'0'));
336 assert!(!is_ascii_punctuation(b'A'));
337 assert!(!is_ascii_punctuation(0xA1));
338 }
339
340 #[test]
341 fn test_unicode() {
342 assert!(is_punctuation('~'));
343 assert!(!is_punctuation(' '));
344
345 assert!(is_punctuation('\u{00A1}'));
346 assert!(is_punctuation('\u{060C}'));
347 assert!(is_punctuation('\u{FF65}'));
348 assert!(is_punctuation('\u{1BC9F}'));
349 assert!(!is_punctuation('\u{1BCA0}'));
350 }
351}
352