1 | /* GLIB - Library of useful routines for C programming |
2 | * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald |
3 | * |
4 | * This library is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU Lesser General Public |
6 | * License as published by the Free Software Foundation; either |
7 | * version 2.1 of the License, or (at your option) any later version. |
8 | * |
9 | * This library is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | * Lesser General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU Lesser General Public |
15 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
16 | */ |
17 | |
18 | /* |
19 | * Modified by the GLib Team and others 1997-2000. See the AUTHORS |
20 | * file for a list of people on the GLib Team. See the ChangeLog |
21 | * files for a list of changes. These files are distributed with |
22 | * GLib at ftp://ftp.gtk.org/pub/gtk/. |
23 | */ |
24 | |
25 | #include <string.h> |
26 | #include <glib.h> |
27 | |
28 | /* Test conversions between offsets and pointers */ |
29 | |
30 | static void test_utf8 (gconstpointer d) |
31 | { |
32 | gint num_chars; |
33 | const gchar **p; |
34 | gint i, j; |
35 | const gchar *string = d; |
36 | |
37 | g_assert (g_utf8_validate (string, -1, NULL)); |
38 | |
39 | num_chars = g_utf8_strlen (p: string, max: -1); |
40 | |
41 | p = (const gchar **) g_malloc (n_bytes: num_chars * sizeof (gchar *)); |
42 | |
43 | p[0] = string; |
44 | for (i = 1; i < num_chars; i++) |
45 | p[i] = g_utf8_next_char (p[i-1]); |
46 | |
47 | for (i = 0; i < num_chars; i++) |
48 | for (j = 0; j < num_chars; j++) |
49 | { |
50 | g_assert (g_utf8_offset_to_pointer (p[i], j - i) == p[j]); |
51 | g_assert (g_utf8_pointer_to_offset (p[i], p[j]) == j - i); |
52 | } |
53 | |
54 | g_free (mem: p); |
55 | } |
56 | |
57 | gchar *longline = "asdasdas dsaf asfd as fdasdf asfd asdf as dfas dfasdf a" |
58 | "asd fasdf asdf asdf asd fasfd as fdasfd asdf as fdççççççççças ffsd asfd as fdASASASAs As" |
59 | "Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfg sdfg sdf gsdfg sdfg sd" |
60 | "asd fasdf asdf asdf asd fasfd as fdaèèèèèèè òòòòòòòòòòòòsfd asdf as fdas ffsd asfd as fdASASASAs D" |
61 | "Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfgùùùùùùùùùùùùùù sdfg sdf gsdfg sdfg sd" |
62 | "asd fasdf asdf asdf asd fasfd as fdasfd asd@@@@@@@f as fdas ffsd asfd as fdASASASAs D " |
63 | "Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdf€€€€€€€€€€€€€€€€€€g sdfg sdfg sdf gsdfg sdfg sd" |
64 | "asd fasdf asdf asdf asd fasfd as fdasfd asdf as fdas ffsd asfd as fdASASASAs D" |
65 | "Asfdsf sdfg sdfg dsfg dfg sdfgsdfgsdfgsdfg sdfgsdfg sdfg sdfg sdf gsdfg sdfg sd\n\nlalala\n" ; |
66 | |
67 | static void |
68 | test_length (void) |
69 | { |
70 | g_assert (g_utf8_strlen ("1234" , -1) == 4); |
71 | g_assert (g_utf8_strlen ("1234" , 0) == 0); |
72 | g_assert (g_utf8_strlen ("1234" , 1) == 1); |
73 | g_assert (g_utf8_strlen ("1234" , 2) == 2); |
74 | g_assert (g_utf8_strlen ("1234" , 3) == 3); |
75 | g_assert (g_utf8_strlen ("1234" , 4) == 4); |
76 | g_assert (g_utf8_strlen ("1234" , 5) == 4); |
77 | |
78 | g_assert (g_utf8_strlen (longline, -1) == 762); |
79 | g_assert (g_utf8_strlen (longline, strlen (longline)) == 762); |
80 | g_assert (g_utf8_strlen (longline, 1024) == 762); |
81 | |
82 | g_assert (g_utf8_strlen (NULL, 0) == 0); |
83 | |
84 | g_assert (g_utf8_strlen ("a\340\250\201c" , -1) == 3); |
85 | g_assert (g_utf8_strlen ("a\340\250\201c" , 1) == 1); |
86 | g_assert (g_utf8_strlen ("a\340\250\201c" , 2) == 1); |
87 | g_assert (g_utf8_strlen ("a\340\250\201c" , 3) == 1); |
88 | g_assert (g_utf8_strlen ("a\340\250\201c" , 4) == 2); |
89 | g_assert (g_utf8_strlen ("a\340\250\201c" , 5) == 3); |
90 | } |
91 | |
92 | static void |
93 | test_find (void) |
94 | { |
95 | /* U+0B0B Oriya Letter Vocalic R (\340\254\213) |
96 | * U+10900 Phoenician Letter Alf (\360\220\244\200) |
97 | * U+0041 Latin Capital Letter A (\101) |
98 | * U+1EB6 Latin Capital Letter A With Breve And Dot Below (\341\272\266) |
99 | */ |
100 | #define TEST_STR "\340\254\213\360\220\244\200\101\341\272\266\0\101" |
101 | const gsize str_size = sizeof TEST_STR; |
102 | const gchar *str = TEST_STR; |
103 | const gchar str_array[] = TEST_STR; |
104 | const gchar * volatile str_volatile = TEST_STR; |
105 | #undef TEST_STR |
106 | gchar *str_copy = g_malloc (n_bytes: str_size); |
107 | const gchar *p; |
108 | const gchar *q; |
109 | memcpy (dest: str_copy, src: str, n: str_size); |
110 | |
111 | #define TEST_SET(STR) \ |
112 | G_STMT_START { \ |
113 | p = STR + (str_size - 1); \ |
114 | \ |
115 | q = g_utf8_find_prev_char (STR, p); \ |
116 | g_assert (q == STR + 12); \ |
117 | q = g_utf8_find_prev_char (STR, q); \ |
118 | g_assert (q == STR + 11); \ |
119 | q = g_utf8_find_prev_char (STR, q); \ |
120 | g_assert (q == STR + 8); \ |
121 | q = g_utf8_find_prev_char (STR, q); \ |
122 | g_assert (q == STR + 7); \ |
123 | q = g_utf8_find_prev_char (STR, q); \ |
124 | g_assert (q == STR + 3); \ |
125 | q = g_utf8_find_prev_char (STR, q); \ |
126 | g_assert (q == STR); \ |
127 | q = g_utf8_find_prev_char (STR, q); \ |
128 | g_assert_null (q); \ |
129 | \ |
130 | p = STR + 4; \ |
131 | q = g_utf8_find_prev_char (STR, p); \ |
132 | g_assert (q == STR + 3); \ |
133 | \ |
134 | p = STR + 2; \ |
135 | q = g_utf8_find_prev_char (STR, p); \ |
136 | g_assert (q == STR); \ |
137 | \ |
138 | p = STR + 2; \ |
139 | q = g_utf8_find_next_char (p, NULL); \ |
140 | g_assert (q == STR + 3); \ |
141 | q = g_utf8_find_next_char (q, NULL); \ |
142 | g_assert (q == STR + 7); \ |
143 | \ |
144 | q = g_utf8_find_next_char (p, STR + 6); \ |
145 | g_assert (q == STR + 3); \ |
146 | q = g_utf8_find_next_char (q, STR + 6); \ |
147 | g_assert_null (q); \ |
148 | \ |
149 | q = g_utf8_find_next_char (STR, STR); \ |
150 | g_assert_null (q); \ |
151 | \ |
152 | q = g_utf8_find_next_char (STR + strlen (STR), NULL); \ |
153 | g_assert (q == STR + strlen (STR) + 1); \ |
154 | \ |
155 | /* Check return values when reaching the end of the string, \ |
156 | * with @end set and unset. */ \ |
157 | q = g_utf8_find_next_char (STR + 10, NULL); \ |
158 | g_assert_nonnull (q); \ |
159 | g_assert (*q == '\0'); \ |
160 | \ |
161 | q = g_utf8_find_next_char (STR + 10, STR + 11); \ |
162 | g_assert_null (q); \ |
163 | } G_STMT_END |
164 | |
165 | TEST_SET(str_array); |
166 | TEST_SET(str_copy); |
167 | TEST_SET(str_volatile); |
168 | /* Starting with GCC 8 tests on @str with "-O2 -flto" in CFLAGS fail due to |
169 | * (incorrect?) constant propagation of @str into @g_utf8_find_prev_char. It |
170 | * doesn't happen if @TEST_STR doesn't contain \0 in the middle but the tests |
171 | * should cover all corner cases. |
172 | * For instance, see https://gitlab.gnome.org/GNOME/glib/issues/1917 */ |
173 | |
174 | #undef TEST_SET |
175 | |
176 | g_free (mem: str_copy); |
177 | } |
178 | |
179 | int main (int argc, char *argv[]) |
180 | { |
181 | g_test_init (argc: &argc, argv: &argv, NULL); |
182 | |
183 | g_test_add_data_func (testpath: "/utf8/offsets" , test_data: longline, test_func: test_utf8); |
184 | g_test_add_func (testpath: "/utf8/lengths" , test_func: test_length); |
185 | g_test_add_func (testpath: "/utf8/find" , test_func: test_find); |
186 | |
187 | return g_test_run (); |
188 | } |
189 | |