1/* Pango
2 * pango-segmentation.c: Test Pango line breaking
3 *
4 * Copyright (C) 2021 Red Hat, Inc
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
20 */
21
22#include <glib.h>
23#include <pango/pangocairo.h>
24#include <string.h>
25#include <stdlib.h>
26#include <locale.h>
27
28#ifndef G_OS_WIN32
29#include <unistd.h>
30#endif
31
32typedef enum {
33 GRAPHEME,
34 WORD,
35 LINE,
36 SENTENCE
37} BreakKind;
38
39static BreakKind
40kind_from_string (const char *str)
41{
42 if (strcmp (s1: str, s2: "grapheme") == 0)
43 return GRAPHEME;
44 else if (strcmp (s1: str, s2: "word") == 0)
45 return WORD;
46 else if (strcmp (s1: str, s2: "line") == 0)
47 return LINE;
48 else if (strcmp (s1: str, s2: "sentence") == 0)
49 return SENTENCE;
50 else
51 {
52 g_printerr (format: "Not a segmentation: %s", str);
53 return 0;
54 }
55}
56
57static gboolean
58show_segmentation (const char *input,
59 BreakKind kind)
60{
61 GString *string;
62 PangoContext *context;
63 gsize length;
64 GError *error = NULL;
65 PangoLogAttr *attrs;
66 int len;
67 char *p;
68 int i;
69 char *text;
70 PangoAttrList *attributes;
71 PangoLayout *layout;
72
73 context = pango_font_map_create_context (fontmap: pango_cairo_font_map_get_default ());
74
75 string = g_string_new (init: "");
76
77 length = strlen (s: input);
78 len = g_utf8_strlen (p: input, max: -1) + 1;
79
80 pango_parse_markup (markup_text: input, length: -1, accel_marker: 0, attr_list: &attributes, text: &text, NULL, error: &error);
81 g_assert_no_error (error);
82
83 layout = pango_layout_new (context);
84 pango_layout_set_text (layout, text, length);
85 pango_layout_set_attributes (layout, attrs: attributes);
86
87 pango_layout_get_log_attrs (layout, attrs: &attrs, n_attrs: &len);
88
89 for (i = 0, p = text; i < len; i++, p = g_utf8_next_char (p))
90 {
91 PangoLogAttr log = attrs[i];
92 gboolean is_break = FALSE;
93
94 switch (kind)
95 {
96 case GRAPHEME:
97 is_break = log.is_cursor_position;
98 break;
99 case WORD:
100 is_break = log.is_word_boundary;
101 break;
102 case LINE:
103 is_break = log.is_line_break;
104 break;
105 case SENTENCE:
106 is_break = log.is_sentence_boundary;
107 break;
108 default:
109 g_assert_not_reached ();
110 }
111
112 if (is_break)
113 g_string_append (string, val: "|");
114
115 if (i < len - 1)
116 {
117 gunichar ch = g_utf8_get_char (p);
118 if (ch == 0x20)
119 g_string_append (string, val: " ");
120 else if (g_unichar_isgraph (c: ch) &&
121 !(g_unichar_type (c: ch) == G_UNICODE_LINE_SEPARATOR ||
122 g_unichar_type (c: ch) == G_UNICODE_PARAGRAPH_SEPARATOR))
123 g_string_append_unichar (string, wc: ch);
124 else
125 g_string_append_printf (string, format: "[%#04x]", ch);
126 }
127 }
128
129 g_object_unref (object: layout);
130 g_free (mem: attrs);
131 g_free (mem: text);
132 pango_attr_list_unref (list: attributes);
133
134 g_print (format: "%s\n", string->str);
135
136 g_string_free (string, TRUE);
137
138 return TRUE;
139}
140
141int
142main (int argc, char *argv[])
143{
144 const char *opt_kind = "grapheme";
145 const char *opt_text = NULL;
146 gboolean opt_version = FALSE;
147 GOptionEntry entries[] = {
148 { "kind", 0, 0, G_OPTION_ARG_STRING, &opt_kind, "Kind of boundary (grapheme/word/line/sentence)", "KIND" },
149 { "text", 0, 0, G_OPTION_ARG_STRING, &opt_text, "Text to display", "STRING" },
150 { "version", 0, 0, G_OPTION_ARG_NONE, &opt_version, "Show version" },
151 { NULL, },
152 };
153 GOptionContext *context;
154 GError *error = NULL;
155 char *text;
156 gsize len;
157
158 g_set_prgname (prgname: "pango-segmentation");
159 setlocale (LC_ALL, locale: "");
160
161 context = g_option_context_new (parameter_string: "[FILE]");
162 g_option_context_add_main_entries (context, entries, NULL);
163 g_option_context_set_description (context,
164 description: "Show text segmentation as determined by Pango.");
165 if (!g_option_context_parse (context, argc: &argc, argv: &argv, error: &error))
166 {
167 g_printerr (format: "%s\n", error->message);
168 exit (status: 1);
169 }
170
171 if (opt_version)
172 {
173 g_print (format: "%s (%s) %s\n", g_get_prgname (), PACKAGE_NAME, PACKAGE_VERSION);
174 exit (status: 0);
175 }
176
177 if (opt_text)
178 {
179 text = (char *)opt_text;
180 }
181 else if (argc > 1)
182 {
183 if (!g_file_get_contents (filename: argv[1], contents: &text, length: &len, error: &error))
184 {
185 g_printerr (format: "%s\n", error->message);
186 exit (status: 1);
187 }
188 }
189 else
190 {
191 g_printerr (format: "Usage: pango-segmentation [OPTIONS…] FILE\n");
192 exit (status: 1);
193 }
194
195 show_segmentation (input: text, kind: kind_from_string (str: opt_kind));
196
197 return 0;
198}
199

source code of gtk/subprojects/pango/utils/pango-segmentation.c