1 | /* Pango |
2 | * pango-emoji.c: Emoji handling |
3 | * |
4 | * Copyright (C) 2017 Google, Inc. |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Library General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Library General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Library General Public |
17 | * License along with this library; if not, write to the |
18 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
19 | * Boston, MA 02111-1307, USA. |
20 | * |
21 | * Implementation of pango_emoji_iter is based on Chromium's Ragel-based |
22 | * parser: |
23 | * |
24 | * https://chromium-review.googlesource.com/c/chromium/src/+/1264577 |
25 | * |
26 | * The grammar file emoji_presentation_scanner.rl was just modified to |
27 | * adapt the function signature and variables to our usecase. The |
28 | * grammar itself was NOT modified: |
29 | * |
30 | * https://chromium-review.googlesource.com/c/chromium/src/+/1264577/3/third_party/blink/renderer/platform/fonts/emoji_presentation_scanner.rl |
31 | * |
32 | * The emoji_presentation_scanner.c is generated from .rl file by |
33 | * running ragel on it. |
34 | * |
35 | * The categorization is also based on: |
36 | * |
37 | * https://chromium-review.googlesource.com/c/chromium/src/+/1264577/3/third_party/blink/renderer/platform/fonts/utf16_ragel_iterator.h |
38 | * |
39 | * The iterator next() is based on: |
40 | * |
41 | * https://chromium-review.googlesource.com/c/chromium/src/+/1264577/3/third_party/blink/renderer/platform/fonts/symbols_iterator.cc |
42 | * |
43 | * // Copyright 2015 The Chromium Authors. All rights reserved. |
44 | * // Use of this source code is governed by a BSD-style license that can be |
45 | * // found in the LICENSE file. |
46 | */ |
47 | |
48 | #include "config.h" |
49 | #include <stdlib.h> |
50 | #include <string.h> |
51 | |
52 | #include "pango-emoji-private.h" |
53 | #include "pango-emoji-table.h" |
54 | |
55 | static inline gboolean |
56 | bsearch_interval (gunichar c, |
57 | const struct Interval table[], |
58 | guint n) |
59 | { |
60 | guint lower = 0; |
61 | guint upper = n - 1; |
62 | |
63 | while (lower <= upper) |
64 | { |
65 | int mid = (lower + upper) / 2; |
66 | |
67 | if (c < table[mid].start) |
68 | upper = mid - 1; |
69 | else if (c > table[mid].end) |
70 | lower = mid + 1; |
71 | else |
72 | return TRUE; |
73 | } |
74 | |
75 | return FALSE; |
76 | } |
77 | |
78 | #define DEFINE_pango_Is_(name) \ |
79 | static inline gboolean \ |
80 | _pango_Is_##name (gunichar ch) \ |
81 | { \ |
82 | return ch >= _pango_##name##_table[0].start && \ |
83 | bsearch_interval (ch, \ |
84 | _pango_##name##_table, \ |
85 | G_N_ELEMENTS (_pango_##name##_table)); \ |
86 | } |
87 | |
88 | DEFINE_pango_Is_(Emoji) |
89 | DEFINE_pango_Is_(Emoji_Presentation) |
90 | DEFINE_pango_Is_(Emoji_Modifier) |
91 | DEFINE_pango_Is_(Emoji_Modifier_Base) |
92 | DEFINE_pango_Is_(Extended_Pictographic) |
93 | |
94 | gboolean |
95 | _pango_Is_Emoji_Base_Character (gunichar ch) |
96 | { |
97 | return _pango_Is_Emoji (ch); |
98 | } |
99 | |
100 | gboolean |
101 | _pango_Is_Emoji_Extended_Pictographic (gunichar ch) |
102 | { |
103 | return _pango_Is_Extended_Pictographic (ch); |
104 | } |
105 | |
106 | static inline gboolean |
107 | _pango_Is_Emoji_Emoji_Default (gunichar ch) |
108 | { |
109 | return _pango_Is_Emoji_Presentation (ch); |
110 | } |
111 | |
112 | static inline gboolean |
113 | _pango_Is_Emoji_Keycap_Base (gunichar ch) |
114 | { |
115 | return (ch >= '0' && ch <= '9') || ch == '#' || ch == '*'; |
116 | } |
117 | |
118 | static inline gboolean |
119 | _pango_Is_Regional_Indicator (gunichar ch) |
120 | { |
121 | return (ch >= 0x1F1E6 && ch <= 0x1F1FF); |
122 | } |
123 | |
124 | |
125 | #define kCombiningEnclosingCircleBackslashCharacter 0x20E0 |
126 | #define kCombiningEnclosingKeycapCharacter 0x20E3 |
127 | #define kVariationSelector15Character 0xFE0E |
128 | #define kVariationSelector16Character 0xFE0F |
129 | #define kZeroWidthJoinerCharacter 0x200D |
130 | |
131 | enum PangoEmojiScannerCategory { |
132 | EMOJI = 0, |
133 | EMOJI_TEXT_PRESENTATION = 1, |
134 | EMOJI_EMOJI_PRESENTATION = 2, |
135 | EMOJI_MODIFIER_BASE = 3, |
136 | EMOJI_MODIFIER = 4, |
137 | EMOJI_VS_BASE = 5, |
138 | REGIONAL_INDICATOR = 6, |
139 | KEYCAP_BASE = 7, |
140 | COMBINING_ENCLOSING_KEYCAP = 8, |
141 | COMBINING_ENCLOSING_CIRCLE_BACKSLASH = 9, |
142 | ZWJ = 10, |
143 | VS15 = 11, |
144 | VS16 = 12, |
145 | TAG_BASE = 13, |
146 | TAG_SEQUENCE = 14, |
147 | TAG_TERM = 15, |
148 | kMaxEmojiScannerCategory = 16 |
149 | }; |
150 | |
151 | static inline unsigned char |
152 | _pango_EmojiSegmentationCategory (gunichar codepoint) |
153 | { |
154 | /* Specific ones first. */ |
155 | if (('a' <= codepoint && codepoint <= 'z') || |
156 | ('A' <= codepoint && codepoint <= 'Z') || |
157 | codepoint == ' ') |
158 | return kMaxEmojiScannerCategory; |
159 | |
160 | if ('0' <= codepoint && codepoint <= '9') |
161 | return KEYCAP_BASE; |
162 | |
163 | switch (codepoint) |
164 | { |
165 | case kCombiningEnclosingKeycapCharacter: |
166 | return COMBINING_ENCLOSING_KEYCAP; |
167 | case kCombiningEnclosingCircleBackslashCharacter: |
168 | return COMBINING_ENCLOSING_CIRCLE_BACKSLASH; |
169 | case kZeroWidthJoinerCharacter: |
170 | return ZWJ; |
171 | case kVariationSelector15Character: |
172 | return VS15; |
173 | case kVariationSelector16Character: |
174 | return VS16; |
175 | case 0x1F3F4: |
176 | return TAG_BASE; |
177 | case 0xE007F: |
178 | return TAG_TERM; |
179 | default: ; |
180 | } |
181 | |
182 | if ((0xE0030 <= codepoint && codepoint <= 0xE0039) || |
183 | (0xE0061 <= codepoint && codepoint <= 0xE007A)) |
184 | return TAG_SEQUENCE; |
185 | |
186 | if (_pango_Is_Emoji_Modifier_Base (ch: codepoint)) |
187 | return EMOJI_MODIFIER_BASE; |
188 | if (_pango_Is_Emoji_Modifier (ch: codepoint)) |
189 | return EMOJI_MODIFIER; |
190 | if (_pango_Is_Regional_Indicator (ch: codepoint)) |
191 | return REGIONAL_INDICATOR; |
192 | if (_pango_Is_Emoji_Keycap_Base (ch: codepoint)) |
193 | return KEYCAP_BASE; |
194 | if (_pango_Is_Emoji_Emoji_Default (ch: codepoint)) |
195 | return EMOJI_EMOJI_PRESENTATION; |
196 | if (_pango_Is_Emoji (ch: codepoint)) |
197 | return EMOJI_TEXT_PRESENTATION; |
198 | |
199 | /* Ragel state machine will interpret unknown category as "any". */ |
200 | return kMaxEmojiScannerCategory; |
201 | } |
202 | |
203 | |
204 | typedef gboolean bool; |
205 | enum { false = FALSE, true = TRUE }; |
206 | typedef unsigned char *emoji_text_iter_t; |
207 | |
208 | #pragma GCC diagnostic push |
209 | #pragma GCC diagnostic ignored "-Wswitch-default" |
210 | #include "emoji_presentation_scanner.c" |
211 | #pragma GCC diagnostic pop |
212 | |
213 | |
214 | PangoEmojiIter * |
215 | _pango_emoji_iter_init (PangoEmojiIter *iter, |
216 | const char *text, |
217 | int length) |
218 | { |
219 | unsigned int n_chars = g_utf8_strlen (p: text, max: length); |
220 | unsigned char *types = g_malloc (n_bytes: n_chars); |
221 | unsigned int i; |
222 | const char *p; |
223 | |
224 | p = text; |
225 | for (i = 0; i < n_chars; i++) |
226 | { |
227 | types[i] = _pango_EmojiSegmentationCategory (codepoint: g_utf8_get_char (p)); |
228 | p = g_utf8_next_char (p); |
229 | } |
230 | |
231 | iter->text_start = iter->start = iter->end = text; |
232 | if (length >= 0) |
233 | iter->text_end = text + length; |
234 | else |
235 | iter->text_end = text + strlen (s: text); |
236 | iter->is_emoji = FALSE; |
237 | |
238 | iter->types = types; |
239 | iter->n_chars = n_chars; |
240 | iter->cursor = 0; |
241 | |
242 | _pango_emoji_iter_next (iter); |
243 | |
244 | return iter; |
245 | } |
246 | |
247 | void |
248 | _pango_emoji_iter_fini (PangoEmojiIter *iter) |
249 | { |
250 | g_free (mem: iter->types); |
251 | } |
252 | |
253 | gboolean |
254 | _pango_emoji_iter_next (PangoEmojiIter *iter) |
255 | { |
256 | unsigned int old_cursor, cursor; |
257 | gboolean is_emoji; |
258 | |
259 | if (iter->end >= iter->text_end) |
260 | return FALSE; |
261 | |
262 | iter->start = iter->end; |
263 | |
264 | old_cursor = cursor = iter->cursor; |
265 | cursor = scan_emoji_presentation (p: iter->types + cursor, |
266 | pe: iter->types + iter->n_chars, |
267 | is_emoji: &is_emoji) - iter->types; |
268 | do |
269 | { |
270 | iter->cursor = cursor; |
271 | iter->is_emoji = is_emoji; |
272 | |
273 | if (cursor == iter->n_chars) |
274 | break; |
275 | |
276 | cursor = scan_emoji_presentation (p: iter->types + cursor, |
277 | pe: iter->types + iter->n_chars, |
278 | is_emoji: &is_emoji) - iter->types; |
279 | } |
280 | while (iter->is_emoji == is_emoji); |
281 | |
282 | iter->end = g_utf8_offset_to_pointer (str: iter->start, offset: iter->cursor - old_cursor); |
283 | |
284 | return TRUE; |
285 | } |
286 | |
287 | |
288 | /********************************************************** |
289 | * End of code from Chromium |
290 | **********************************************************/ |
291 | |