1 | /* |
2 | * Copyright © 2010 Codethink Limited |
3 | * |
4 | * This library is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU Lesser General Public |
6 | * License as published by the Free Software Foundation; either |
7 | * version 2.1 of the License, or (at your option) any later version. |
8 | * |
9 | * This library is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | * Lesser General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU Lesser General Public |
15 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
16 | * |
17 | * Author: Ryan Lortie <desrt@desrt.ca> |
18 | */ |
19 | |
20 | #include <string.h> |
21 | #include <glib.h> |
22 | |
23 | /* |
24 | * The string info map is an efficient data structure designed to be |
25 | * used with a small set of items. It is used by GSettings schemas for |
26 | * three purposes: |
27 | * |
28 | * 1) Implement <choices> with a list of valid strings |
29 | * |
30 | * 2) Implement <alias> by mapping one string to another |
31 | * |
32 | * 3) Implement enumerated types by mapping strings to integer values |
33 | * (and back). |
34 | * |
35 | * The map is made out of an array of uint32s. Each entry in the array |
36 | * is an integer value, followed by a specially formatted string value: |
37 | * |
38 | * The string starts with the byte 0xff or 0xfe, followed by the |
39 | * content of the string, followed by a nul byte, followed by |
40 | * additional nul bytes for padding, followed by a 0xff byte. |
41 | * |
42 | * Padding is added so that the entire formatted string takes up a |
43 | * multiple of 4 bytes, and not less than 8 bytes. The requirement |
44 | * for a string to take up 8 bytes is so that the scanner doesn't lose |
45 | * synch and mistake a string for an integer value. |
46 | * |
47 | * The first byte of the formatted string depends on if the integer is |
48 | * an enum value (0xff) or an alias (0xfe). If it is an alias then the |
49 | * number refers to the word offset within the info map at which the |
50 | * integer corresponding to the "target" value is stored. |
51 | * |
52 | * For example, consider the case of the string info map representing an |
53 | * enumerated type of 'foo' (value 1) and 'bar' (value 2) and 'baz' |
54 | * (alias for 'bar'). Note that string info maps are always little |
55 | * endian. |
56 | * |
57 | * x01 x00 x00 x00 xff 'f' 'o' 'o' x00 x00 x00 xff x02 x00 x00 x00 |
58 | * xff 'b' 'a' 'r' x00 x00 x00 xff x03 x00 x00 x00 xfe 'b' 'a' 'z' |
59 | * x00 x00 x00 xff |
60 | * |
61 | * |
62 | * The operations that someone may want to perform with the map: |
63 | * |
64 | * - look up if a string is valid (and not an alias) |
65 | * - look up the integer value for a enum 'nick' |
66 | * - look up the integer value for the target of an alias |
67 | * - look up an alias and convert it to its target string |
68 | * - look up the enum nick for a given value |
69 | * |
70 | * In order to look up if a string is valid, it is padded on either side |
71 | * (as described) and scanned for in the array. For example, you might |
72 | * look for "foo": |
73 | * |
74 | * xff 'f' 'o' 'o' x00 x00 x00 xff |
75 | * |
76 | * In order to look up the integer value for a nick, the string is padded |
77 | * on either side and scanned for in the array, as above. Instead of |
78 | * merely succeeding, we look at the integer value to the left of the |
79 | * match. This is the enum value. |
80 | * |
81 | * In order to look up an alias and convert it to its target enum value, |
82 | * the string is padded on either side (as described, with 0xfe) and |
83 | * scanned for. For example, you might look for "baz": |
84 | * |
85 | * xfe 'b' 'a' 'z' x00 x00 x00 xff |
86 | * |
87 | * The integer immediately preceding the match then contains the offset |
88 | * of the integer value of the target. In our example, that's '3'. |
89 | * This index is dereferenced to find the enum value of '2'. |
90 | * |
91 | * To convert the alias to its target string, 5 bytes just need to be |
92 | * added past the start of the integer value to find the start of the |
93 | * string. |
94 | * |
95 | * To look up the enum nick for a given value, the value is searched for |
96 | * in the array. To ensure that the value isn't matching the inside of a |
97 | * string, we must check that it is either the first item in the array or |
98 | * immediately preceded by the byte 0xff. It must also be immediately |
99 | * followed by the byte 0xff. |
100 | * |
101 | * Because strings always take up a minimum of 2 words, because 0xff or |
102 | * 0xfe never appear inside of a utf-8 string and because no two integer |
103 | * values ever appear in sequence, the only way we can have the |
104 | * sequence: |
105 | * |
106 | * xff __ __ __ __ xff (or 0xfe) |
107 | * |
108 | * is in the event of an integer nested between two strings. |
109 | * |
110 | * For implementation simplicity/efficiency, strings may not be more |
111 | * than 65 characters in length (ie: 17 32bit words after padding). |
112 | * |
113 | * In the event that we are doing <choices> (ie: not an enum type) then |
114 | * the value of each choice is set to zero and ignored. |
115 | */ |
116 | |
117 | #define STRINFO_MAX_WORDS 17 |
118 | G_GNUC_UNUSED static guint |
119 | strinfo_string_to_words (const gchar *string, |
120 | guint32 *words, |
121 | gboolean alias) |
122 | { |
123 | guint n_words; |
124 | gsize size; |
125 | |
126 | size = strlen (s: string); |
127 | |
128 | n_words = MAX (2, (size + 6) >> 2); |
129 | |
130 | if (n_words > STRINFO_MAX_WORDS) |
131 | return FALSE; |
132 | |
133 | words[0] = GUINT32_TO_LE (alias ? 0xfe : 0xff); |
134 | words[n_words - 1] = GUINT32_TO_BE (0xff); |
135 | memcpy (dest: ((gchar *) words) + 1, src: string, n: size + 1); |
136 | |
137 | return n_words; |
138 | } |
139 | |
140 | G_GNUC_UNUSED static gint |
141 | strinfo_scan (const guint32 *strinfo, |
142 | guint length, |
143 | const guint32 *words, |
144 | guint n_words) |
145 | { |
146 | guint i = 0; |
147 | |
148 | if (length < n_words) |
149 | return -1; |
150 | |
151 | while (i <= length - n_words) |
152 | { |
153 | guint j = 0; |
154 | |
155 | for (j = 0; j < n_words; j++) |
156 | if (strinfo[i + j] != words[j]) |
157 | break; |
158 | |
159 | if (j == n_words) |
160 | return i; /* match */ |
161 | |
162 | /* skip at least one word, continue */ |
163 | i += j ? j : 1; |
164 | } |
165 | |
166 | return -1; |
167 | } |
168 | |
169 | G_GNUC_UNUSED static gint |
170 | strinfo_find_string (const guint32 *strinfo, |
171 | guint length, |
172 | const gchar *string, |
173 | gboolean alias) |
174 | { |
175 | guint32 words[STRINFO_MAX_WORDS]; |
176 | guint n_words; |
177 | |
178 | if (length == 0) |
179 | return -1; |
180 | |
181 | n_words = strinfo_string_to_words (string, words, alias); |
182 | |
183 | return strinfo_scan (strinfo: strinfo + 1, length: length - 1, words, n_words); |
184 | } |
185 | |
186 | G_GNUC_UNUSED static gint |
187 | strinfo_find_integer (const guint32 *strinfo, |
188 | guint length, |
189 | guint32 value) |
190 | { |
191 | guint i; |
192 | |
193 | for (i = 0; i < length; i++) |
194 | if (strinfo[i] == GUINT32_TO_LE (value)) |
195 | { |
196 | const guchar *charinfo = (const guchar *) &strinfo[i]; |
197 | |
198 | /* make sure it has 0xff on either side */ |
199 | if ((i == 0 || charinfo[-1] == 0xff) && charinfo[4] == 0xff) |
200 | return i; |
201 | } |
202 | |
203 | return -1; |
204 | } |
205 | |
206 | G_GNUC_UNUSED static gboolean |
207 | strinfo_is_string_valid (const guint32 *strinfo, |
208 | guint length, |
209 | const gchar *string) |
210 | { |
211 | return strinfo_find_string (strinfo, length, string, FALSE) != -1; |
212 | } |
213 | |
214 | G_GNUC_UNUSED static gboolean |
215 | strinfo_enum_from_string (const guint32 *strinfo, |
216 | guint length, |
217 | const gchar *string, |
218 | guint *result) |
219 | { |
220 | gint index; |
221 | |
222 | index = strinfo_find_string (strinfo, length, string, FALSE); |
223 | |
224 | if (index < 0) |
225 | return FALSE; |
226 | |
227 | *result = GUINT32_FROM_LE (strinfo[index]); |
228 | return TRUE; |
229 | } |
230 | |
231 | G_GNUC_UNUSED static const gchar * |
232 | strinfo_string_from_enum (const guint32 *strinfo, |
233 | guint length, |
234 | guint value) |
235 | { |
236 | gint index; |
237 | |
238 | index = strinfo_find_integer (strinfo, length, value); |
239 | |
240 | if (index < 0) |
241 | return NULL; |
242 | |
243 | return 1 + (const gchar *) &strinfo[index + 1]; |
244 | } |
245 | |
246 | G_GNUC_UNUSED static const gchar * |
247 | strinfo_string_from_alias (const guint32 *strinfo, |
248 | guint length, |
249 | const gchar *alias) |
250 | { |
251 | gint index; |
252 | |
253 | index = strinfo_find_string (strinfo, length, string: alias, TRUE); |
254 | |
255 | if (index < 0) |
256 | return NULL; |
257 | |
258 | return 1 + (const gchar *) &strinfo[GUINT32_TO_LE (strinfo[index]) + 1]; |
259 | } |
260 | |
261 | G_GNUC_UNUSED static GVariant * |
262 | strinfo_enumerate (const guint32 *strinfo, |
263 | guint length) |
264 | { |
265 | GVariantBuilder builder; |
266 | const gchar *ptr, *end; |
267 | |
268 | ptr = (gpointer) strinfo; |
269 | end = ptr + 4 * length; |
270 | |
271 | ptr += 4; |
272 | |
273 | g_variant_builder_init (builder: &builder, G_VARIANT_TYPE_STRING_ARRAY); |
274 | |
275 | while (ptr < end) |
276 | { |
277 | /* don't include aliases */ |
278 | if (*ptr == '\xff') |
279 | g_variant_builder_add (builder: &builder, format_string: "s" , ptr + 1); |
280 | |
281 | /* find the end of this string */ |
282 | ptr = memchr (s: ptr, c: '\xff', n: end - ptr); |
283 | g_assert (ptr != NULL); |
284 | |
285 | /* skip over the int to the next string */ |
286 | ptr += 5; |
287 | } |
288 | |
289 | return g_variant_builder_end (builder: &builder); |
290 | } |
291 | |
292 | G_GNUC_UNUSED static void |
293 | strinfo_builder_append_item (GString *builder, |
294 | const gchar *string, |
295 | guint value) |
296 | { |
297 | guint32 words[STRINFO_MAX_WORDS]; |
298 | guint n_words; |
299 | |
300 | value = GUINT32_TO_LE (value); |
301 | |
302 | n_words = strinfo_string_to_words (string, words, FALSE); |
303 | g_string_append_len (string: builder, val: (void *) &value, len: sizeof value); |
304 | g_string_append_len (string: builder, val: (void *) words, len: 4 * n_words); |
305 | } |
306 | |
307 | G_GNUC_UNUSED static gboolean |
308 | strinfo_builder_append_alias (GString *builder, |
309 | const gchar *alias, |
310 | const gchar *target) |
311 | { |
312 | guint32 words[STRINFO_MAX_WORDS]; |
313 | guint n_words; |
314 | guint value; |
315 | gint index; |
316 | |
317 | index = strinfo_find_string (strinfo: (const guint32 *) builder->str, |
318 | length: builder->len / 4, string: target, FALSE); |
319 | |
320 | if (index == -1) |
321 | return FALSE; |
322 | |
323 | value = GUINT32_TO_LE (index); |
324 | |
325 | n_words = strinfo_string_to_words (string: alias, words, TRUE); |
326 | g_string_append_len (string: builder, val: (void *) &value, len: sizeof value); |
327 | g_string_append_len (string: builder, val: (void *) words, len: 4 * n_words); |
328 | |
329 | return TRUE; |
330 | } |
331 | |
332 | G_GNUC_UNUSED static gboolean |
333 | strinfo_builder_contains (GString *builder, |
334 | const gchar *string) |
335 | { |
336 | return strinfo_find_string (strinfo: (const guint32 *) builder->str, |
337 | length: builder->len / 4, string, FALSE) != -1 || |
338 | strinfo_find_string (strinfo: (const guint32 *) builder->str, |
339 | length: builder->len / 4, string, TRUE) != -1; |
340 | } |
341 | |
342 | G_GNUC_UNUSED static gboolean |
343 | strinfo_builder_contains_value (GString *builder, |
344 | guint value) |
345 | { |
346 | return strinfo_string_from_enum (strinfo: (const guint32 *) builder->str, |
347 | length: builder->len / 4, value) != NULL; |
348 | } |
349 | |