1 | /* Pango |
2 | * pango-segmentation.c: Test Pango line breaking |
3 | * |
4 | * Copyright (C) 2021 Red Hat, Inc |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Library General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Library General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Library General Public |
17 | * License along with this library; if not, write to the |
18 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
19 | * Boston, MA 02111-1307, USA. |
20 | */ |
21 | |
22 | #include <glib.h> |
23 | #include <pango/pangocairo.h> |
24 | #include <string.h> |
25 | #include <stdlib.h> |
26 | #include <locale.h> |
27 | |
28 | #ifndef G_OS_WIN32 |
29 | #include <unistd.h> |
30 | #endif |
31 | |
32 | typedef enum { |
33 | GRAPHEME, |
34 | WORD, |
35 | LINE, |
36 | SENTENCE |
37 | } BreakKind; |
38 | |
39 | static BreakKind |
40 | kind_from_string (const char *str) |
41 | { |
42 | if (strcmp (s1: str, s2: "grapheme" ) == 0) |
43 | return GRAPHEME; |
44 | else if (strcmp (s1: str, s2: "word" ) == 0) |
45 | return WORD; |
46 | else if (strcmp (s1: str, s2: "line" ) == 0) |
47 | return LINE; |
48 | else if (strcmp (s1: str, s2: "sentence" ) == 0) |
49 | return SENTENCE; |
50 | else |
51 | { |
52 | g_printerr (format: "Not a segmentation: %s" , str); |
53 | return 0; |
54 | } |
55 | } |
56 | |
57 | static gboolean |
58 | show_segmentation (const char *input, |
59 | BreakKind kind) |
60 | { |
61 | GString *string; |
62 | PangoContext *context; |
63 | gsize length; |
64 | GError *error = NULL; |
65 | PangoLogAttr *attrs; |
66 | int len; |
67 | char *p; |
68 | int i; |
69 | char *text; |
70 | PangoAttrList *attributes; |
71 | PangoLayout *layout; |
72 | |
73 | context = pango_font_map_create_context (fontmap: pango_cairo_font_map_get_default ()); |
74 | |
75 | string = g_string_new (init: "" ); |
76 | |
77 | length = strlen (s: input); |
78 | len = g_utf8_strlen (p: input, max: -1) + 1; |
79 | |
80 | pango_parse_markup (markup_text: input, length: -1, accel_marker: 0, attr_list: &attributes, text: &text, NULL, error: &error); |
81 | g_assert_no_error (error); |
82 | |
83 | layout = pango_layout_new (context); |
84 | pango_layout_set_text (layout, text, length); |
85 | pango_layout_set_attributes (layout, attrs: attributes); |
86 | |
87 | pango_layout_get_log_attrs (layout, attrs: &attrs, n_attrs: &len); |
88 | |
89 | for (i = 0, p = text; i < len; i++, p = g_utf8_next_char (p)) |
90 | { |
91 | PangoLogAttr log = attrs[i]; |
92 | gboolean is_break = FALSE; |
93 | |
94 | switch (kind) |
95 | { |
96 | case GRAPHEME: |
97 | is_break = log.is_cursor_position; |
98 | break; |
99 | case WORD: |
100 | is_break = log.is_word_boundary; |
101 | break; |
102 | case LINE: |
103 | is_break = log.is_line_break; |
104 | break; |
105 | case SENTENCE: |
106 | is_break = log.is_sentence_boundary; |
107 | break; |
108 | default: |
109 | g_assert_not_reached (); |
110 | } |
111 | |
112 | if (is_break) |
113 | g_string_append (string, val: "|" ); |
114 | |
115 | if (i < len - 1) |
116 | { |
117 | gunichar ch = g_utf8_get_char (p); |
118 | if (ch == 0x20) |
119 | g_string_append (string, val: " " ); |
120 | else if (g_unichar_isgraph (c: ch) && |
121 | !(g_unichar_type (c: ch) == G_UNICODE_LINE_SEPARATOR || |
122 | g_unichar_type (c: ch) == G_UNICODE_PARAGRAPH_SEPARATOR)) |
123 | g_string_append_unichar (string, wc: ch); |
124 | else |
125 | g_string_append_printf (string, format: "[%#04x]" , ch); |
126 | } |
127 | } |
128 | |
129 | g_object_unref (object: layout); |
130 | g_free (mem: attrs); |
131 | g_free (mem: text); |
132 | pango_attr_list_unref (list: attributes); |
133 | |
134 | g_print (format: "%s\n" , string->str); |
135 | |
136 | g_string_free (string, TRUE); |
137 | |
138 | return TRUE; |
139 | } |
140 | |
141 | int |
142 | main (int argc, char *argv[]) |
143 | { |
144 | const char *opt_kind = "grapheme" ; |
145 | const char *opt_text = NULL; |
146 | gboolean opt_version = FALSE; |
147 | GOptionEntry entries[] = { |
148 | { "kind" , 0, 0, G_OPTION_ARG_STRING, &opt_kind, "Kind of boundary (grapheme/word/line/sentence)" , "KIND" }, |
149 | { "text" , 0, 0, G_OPTION_ARG_STRING, &opt_text, "Text to display" , "STRING" }, |
150 | { "version" , 0, 0, G_OPTION_ARG_NONE, &opt_version, "Show version" }, |
151 | { NULL, }, |
152 | }; |
153 | GOptionContext *context; |
154 | GError *error = NULL; |
155 | char *text; |
156 | gsize len; |
157 | |
158 | g_set_prgname (prgname: "pango-segmentation" ); |
159 | setlocale (LC_ALL, locale: "" ); |
160 | |
161 | context = g_option_context_new (parameter_string: "[FILE]" ); |
162 | g_option_context_add_main_entries (context, entries, NULL); |
163 | g_option_context_set_description (context, |
164 | description: "Show text segmentation as determined by Pango." ); |
165 | if (!g_option_context_parse (context, argc: &argc, argv: &argv, error: &error)) |
166 | { |
167 | g_printerr (format: "%s\n" , error->message); |
168 | exit (status: 1); |
169 | } |
170 | |
171 | if (opt_version) |
172 | { |
173 | g_print (format: "%s (%s) %s\n" , g_get_prgname (), PACKAGE_NAME, PACKAGE_VERSION); |
174 | exit (status: 0); |
175 | } |
176 | |
177 | if (opt_text) |
178 | { |
179 | text = (char *)opt_text; |
180 | } |
181 | else if (argc > 1) |
182 | { |
183 | if (!g_file_get_contents (filename: argv[1], contents: &text, length: &len, error: &error)) |
184 | { |
185 | g_printerr (format: "%s\n" , error->message); |
186 | exit (status: 1); |
187 | } |
188 | } |
189 | else |
190 | { |
191 | g_printerr (format: "Usage: pango-segmentation [OPTIONS…] FILE\n" ); |
192 | exit (status: 1); |
193 | } |
194 | |
195 | show_segmentation (input: text, kind: kind_from_string (str: opt_kind)); |
196 | |
197 | return 0; |
198 | } |
199 | |