1/* Pango
2 * pango-emoji.c: Emoji handling
3 *
4 * Copyright (C) 2017 Google, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
20 *
21 * Implementation of pango_emoji_iter is based on Chromium's Ragel-based
22 * parser:
23 *
24 * https://chromium-review.googlesource.com/c/chromium/src/+/1264577
25 *
26 * The grammar file emoji_presentation_scanner.rl was just modified to
27 * adapt the function signature and variables to our usecase. The
28 * grammar itself was NOT modified:
29 *
30 * https://chromium-review.googlesource.com/c/chromium/src/+/1264577/3/third_party/blink/renderer/platform/fonts/emoji_presentation_scanner.rl
31 *
32 * The emoji_presentation_scanner.c is generated from .rl file by
33 * running ragel on it.
34 *
35 * The categorization is also based on:
36 *
37 * https://chromium-review.googlesource.com/c/chromium/src/+/1264577/3/third_party/blink/renderer/platform/fonts/utf16_ragel_iterator.h
38 *
39 * The iterator next() is based on:
40 *
41 * https://chromium-review.googlesource.com/c/chromium/src/+/1264577/3/third_party/blink/renderer/platform/fonts/symbols_iterator.cc
42 *
43 * // Copyright 2015 The Chromium Authors. All rights reserved.
44 * // Use of this source code is governed by a BSD-style license that can be
45 * // found in the LICENSE file.
46 */
47
48#include "config.h"
49#include <stdlib.h>
50#include <string.h>
51
52#include "pango-emoji-private.h"
53#include "pango-emoji-table.h"
54
55static inline gboolean
56bsearch_interval (gunichar c,
57 const struct Interval table[],
58 guint n)
59{
60 guint lower = 0;
61 guint upper = n - 1;
62
63 while (lower <= upper)
64 {
65 int mid = (lower + upper) / 2;
66
67 if (c < table[mid].start)
68 upper = mid - 1;
69 else if (c > table[mid].end)
70 lower = mid + 1;
71 else
72 return TRUE;
73 }
74
75 return FALSE;
76}
77
78#define DEFINE_pango_Is_(name) \
79static inline gboolean \
80_pango_Is_##name (gunichar ch) \
81{ \
82 return ch >= _pango_##name##_table[0].start && \
83 bsearch_interval (ch, \
84 _pango_##name##_table, \
85 G_N_ELEMENTS (_pango_##name##_table)); \
86}
87
88DEFINE_pango_Is_(Emoji)
89DEFINE_pango_Is_(Emoji_Presentation)
90DEFINE_pango_Is_(Emoji_Modifier)
91DEFINE_pango_Is_(Emoji_Modifier_Base)
92DEFINE_pango_Is_(Extended_Pictographic)
93
94gboolean
95_pango_Is_Emoji_Base_Character (gunichar ch)
96{
97 return _pango_Is_Emoji (ch);
98}
99
100gboolean
101_pango_Is_Emoji_Extended_Pictographic (gunichar ch)
102{
103 return _pango_Is_Extended_Pictographic (ch);
104}
105
106static inline gboolean
107_pango_Is_Emoji_Emoji_Default (gunichar ch)
108{
109 return _pango_Is_Emoji_Presentation (ch);
110}
111
112static inline gboolean
113_pango_Is_Emoji_Keycap_Base (gunichar ch)
114{
115 return (ch >= '0' && ch <= '9') || ch == '#' || ch == '*';
116}
117
118static inline gboolean
119_pango_Is_Regional_Indicator (gunichar ch)
120{
121 return (ch >= 0x1F1E6 && ch <= 0x1F1FF);
122}
123
124
125#define kCombiningEnclosingCircleBackslashCharacter 0x20E0
126#define kCombiningEnclosingKeycapCharacter 0x20E3
127#define kVariationSelector15Character 0xFE0E
128#define kVariationSelector16Character 0xFE0F
129#define kZeroWidthJoinerCharacter 0x200D
130
131enum PangoEmojiScannerCategory {
132 EMOJI = 0,
133 EMOJI_TEXT_PRESENTATION = 1,
134 EMOJI_EMOJI_PRESENTATION = 2,
135 EMOJI_MODIFIER_BASE = 3,
136 EMOJI_MODIFIER = 4,
137 EMOJI_VS_BASE = 5,
138 REGIONAL_INDICATOR = 6,
139 KEYCAP_BASE = 7,
140 COMBINING_ENCLOSING_KEYCAP = 8,
141 COMBINING_ENCLOSING_CIRCLE_BACKSLASH = 9,
142 ZWJ = 10,
143 VS15 = 11,
144 VS16 = 12,
145 TAG_BASE = 13,
146 TAG_SEQUENCE = 14,
147 TAG_TERM = 15,
148 kMaxEmojiScannerCategory = 16
149};
150
151static inline unsigned char
152_pango_EmojiSegmentationCategory (gunichar codepoint)
153{
154 /* Specific ones first. */
155 if (('a' <= codepoint && codepoint <= 'z') ||
156 ('A' <= codepoint && codepoint <= 'Z') ||
157 codepoint == ' ')
158 return kMaxEmojiScannerCategory;
159
160 if ('0' <= codepoint && codepoint <= '9')
161 return KEYCAP_BASE;
162
163 switch (codepoint)
164 {
165 case kCombiningEnclosingKeycapCharacter:
166 return COMBINING_ENCLOSING_KEYCAP;
167 case kCombiningEnclosingCircleBackslashCharacter:
168 return COMBINING_ENCLOSING_CIRCLE_BACKSLASH;
169 case kZeroWidthJoinerCharacter:
170 return ZWJ;
171 case kVariationSelector15Character:
172 return VS15;
173 case kVariationSelector16Character:
174 return VS16;
175 case 0x1F3F4:
176 return TAG_BASE;
177 case 0xE007F:
178 return TAG_TERM;
179 default: ;
180 }
181
182 if ((0xE0030 <= codepoint && codepoint <= 0xE0039) ||
183 (0xE0061 <= codepoint && codepoint <= 0xE007A))
184 return TAG_SEQUENCE;
185
186 if (_pango_Is_Emoji_Modifier_Base (ch: codepoint))
187 return EMOJI_MODIFIER_BASE;
188 if (_pango_Is_Emoji_Modifier (ch: codepoint))
189 return EMOJI_MODIFIER;
190 if (_pango_Is_Regional_Indicator (ch: codepoint))
191 return REGIONAL_INDICATOR;
192 if (_pango_Is_Emoji_Keycap_Base (ch: codepoint))
193 return KEYCAP_BASE;
194 if (_pango_Is_Emoji_Emoji_Default (ch: codepoint))
195 return EMOJI_EMOJI_PRESENTATION;
196 if (_pango_Is_Emoji (ch: codepoint))
197 return EMOJI_TEXT_PRESENTATION;
198
199 /* Ragel state machine will interpret unknown category as "any". */
200 return kMaxEmojiScannerCategory;
201}
202
203
204typedef gboolean bool;
205enum { false = FALSE, true = TRUE };
206typedef unsigned char *emoji_text_iter_t;
207
208#pragma GCC diagnostic push
209#pragma GCC diagnostic ignored "-Wswitch-default"
210#include "emoji_presentation_scanner.c"
211#pragma GCC diagnostic pop
212
213
214PangoEmojiIter *
215_pango_emoji_iter_init (PangoEmojiIter *iter,
216 const char *text,
217 int length)
218{
219 unsigned int n_chars = g_utf8_strlen (p: text, max: length);
220 unsigned char *types = g_malloc (n_bytes: n_chars);
221 unsigned int i;
222 const char *p;
223
224 p = text;
225 for (i = 0; i < n_chars; i++)
226 {
227 types[i] = _pango_EmojiSegmentationCategory (codepoint: g_utf8_get_char (p));
228 p = g_utf8_next_char (p);
229 }
230
231 iter->text_start = iter->start = iter->end = text;
232 if (length >= 0)
233 iter->text_end = text + length;
234 else
235 iter->text_end = text + strlen (s: text);
236 iter->is_emoji = FALSE;
237
238 iter->types = types;
239 iter->n_chars = n_chars;
240 iter->cursor = 0;
241
242 _pango_emoji_iter_next (iter);
243
244 return iter;
245}
246
247void
248_pango_emoji_iter_fini (PangoEmojiIter *iter)
249{
250 g_free (mem: iter->types);
251}
252
253gboolean
254_pango_emoji_iter_next (PangoEmojiIter *iter)
255{
256 unsigned int old_cursor, cursor;
257 gboolean is_emoji;
258
259 if (iter->end >= iter->text_end)
260 return FALSE;
261
262 iter->start = iter->end;
263
264 old_cursor = cursor = iter->cursor;
265 cursor = scan_emoji_presentation (p: iter->types + cursor,
266 pe: iter->types + iter->n_chars,
267 is_emoji: &is_emoji) - iter->types;
268 do
269 {
270 iter->cursor = cursor;
271 iter->is_emoji = is_emoji;
272
273 if (cursor == iter->n_chars)
274 break;
275
276 cursor = scan_emoji_presentation (p: iter->types + cursor,
277 pe: iter->types + iter->n_chars,
278 is_emoji: &is_emoji) - iter->types;
279 }
280 while (iter->is_emoji == is_emoji);
281
282 iter->end = g_utf8_offset_to_pointer (str: iter->start, offset: iter->cursor - old_cursor);
283
284 return TRUE;
285}
286
287
288/**********************************************************
289 * End of code from Chromium
290 **********************************************************/
291

source code of gtk/subprojects/pango/pango/pango-emoji.c