1 | /* grefstring.c: Reference counted strings |
2 | * |
3 | * Copyright 2018 Emmanuele Bassi |
4 | * |
5 | * This library is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU Lesser General Public |
7 | * License as published by the Free Software Foundation; either |
8 | * version 2.1 of the License, or (at your option) any later version. |
9 | * |
10 | * This library is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | * Lesser General Public License for more details. |
14 | * |
15 | * You should have received a copy of the GNU Lesser General Public |
16 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
17 | */ |
18 | |
19 | /** |
20 | * SECTION:refstring |
21 | * @Title: Reference counted strings |
22 | * @Short_description: Strings with reference counted memory management |
23 | * |
24 | * Reference counted strings are normal C strings that have been augmented |
25 | * with a reference counter to manage their resources. You allocate a new |
26 | * reference counted string and acquire and release references as needed, |
27 | * instead of copying the string among callers; when the last reference on |
28 | * the string is released, the resources allocated for it are freed. |
29 | * |
30 | * Typically, reference counted strings can be used when parsing data from |
31 | * files and storing them into data structures that are passed to various |
32 | * callers: |
33 | * |
34 | * |[<!-- language="C" --> |
35 | * PersonDetails * |
36 | * person_details_from_data (const char *data) |
37 | * { |
38 | * // Use g_autoptr() to simplify error cases |
39 | * g_autoptr(GRefString) full_name = NULL; |
40 | * g_autoptr(GRefString) address = NULL; |
41 | * g_autoptr(GRefString) city = NULL; |
42 | * g_autoptr(GRefString) state = NULL; |
43 | * g_autoptr(GRefString) zip_code = NULL; |
44 | * |
45 | * // parse_person_details() is defined elsewhere; returns refcounted strings |
46 | * if (!parse_person_details (data, &full_name, &address, &city, &state, &zip_code)) |
47 | * return NULL; |
48 | * |
49 | * if (!validate_zip_code (zip_code)) |
50 | * return NULL; |
51 | * |
52 | * // add_address_to_cache() and add_full_name_to_cache() are defined |
53 | * // elsewhere; they add strings to various caches, using refcounted |
54 | * // strings to avoid copying data over and over again |
55 | * add_address_to_cache (address, city, state, zip_code); |
56 | * add_full_name_to_cache (full_name); |
57 | * |
58 | * // person_details_new() is defined elsewhere; it takes a reference |
59 | * // on each string |
60 | * PersonDetails *res = person_details_new (full_name, |
61 | * address, |
62 | * city, |
63 | * state, |
64 | * zip_code); |
65 | * |
66 | * return res; |
67 | * } |
68 | * ]| |
69 | * |
70 | * In the example above, we have multiple functions taking the same strings |
71 | * for different uses; with typical C strings, we'd have to copy the strings |
72 | * every time the life time rules of the data differ from the life time of |
73 | * the string parsed from the original buffer. With reference counted strings, |
74 | * each caller can take a reference on the data, and keep it as long as it |
75 | * needs to own the string. |
76 | * |
77 | * Reference counted strings can also be "interned" inside a global table |
78 | * owned by GLib; while an interned string has at least a reference, creating |
79 | * a new interned reference counted string with the same contents will return |
80 | * a reference to the existing string instead of creating a new reference |
81 | * counted string instance. Once the string loses its last reference, it will |
82 | * be automatically removed from the global interned strings table. |
83 | * |
84 | * Since: 2.58 |
85 | */ |
86 | |
87 | #include "config.h" |
88 | |
89 | #include "grefstring.h" |
90 | |
91 | #include "ghash.h" |
92 | #include "gmessages.h" |
93 | #include "grcbox.h" |
94 | #include "gthread.h" |
95 | |
96 | #include <string.h> |
97 | |
98 | /* A global table of refcounted strings; the hash table does not own |
99 | * the strings, just a pointer to them. Strings are interned as long |
100 | * as they are alive; once their reference count drops to zero, they |
101 | * are removed from the table |
102 | */ |
103 | G_LOCK_DEFINE_STATIC (interned_ref_strings); |
104 | static GHashTable *interned_ref_strings; |
105 | |
106 | /** |
107 | * g_ref_string_new: |
108 | * @str: (not nullable): a NUL-terminated string |
109 | * |
110 | * Creates a new reference counted string and copies the contents of @str |
111 | * into it. |
112 | * |
113 | * Returns: (transfer full) (not nullable): the newly created reference counted string |
114 | * |
115 | * Since: 2.58 |
116 | */ |
117 | char * |
118 | g_ref_string_new (const char *str) |
119 | { |
120 | char *res; |
121 | gsize len; |
122 | |
123 | g_return_val_if_fail (str != NULL, NULL); |
124 | |
125 | len = strlen (s: str); |
126 | |
127 | res = (char *) g_atomic_rc_box_dup (sizeof (char) * len + 1, str); |
128 | |
129 | return res; |
130 | } |
131 | |
132 | /** |
133 | * g_ref_string_new_len: |
134 | * @str: (not nullable): a string |
135 | * @len: length of @str to use, or -1 if @str is nul-terminated |
136 | * |
137 | * Creates a new reference counted string and copies the contents of @str |
138 | * into it, up to @len bytes. |
139 | * |
140 | * Since this function does not stop at nul bytes, it is the caller's |
141 | * responsibility to ensure that @str has at least @len addressable bytes. |
142 | * |
143 | * Returns: (transfer full) (not nullable): the newly created reference counted string |
144 | * |
145 | * Since: 2.58 |
146 | */ |
147 | char * |
148 | g_ref_string_new_len (const char *str, gssize len) |
149 | { |
150 | char *res; |
151 | |
152 | g_return_val_if_fail (str != NULL, NULL); |
153 | |
154 | if (len < 0) |
155 | return g_ref_string_new (str); |
156 | |
157 | /* allocate then copy as str[len] may not be readable */ |
158 | res = (char *) g_atomic_rc_box_alloc (block_size: (gsize) len + 1); |
159 | memcpy (dest: res, src: str, n: len); |
160 | res[len] = '\0'; |
161 | |
162 | return res; |
163 | } |
164 | |
165 | /* interned_str_equal: variant of g_str_equal() that compares |
166 | * pointers as well as contents; this avoids running strcmp() |
167 | * on arbitrarily long strings, as it's more likely to have |
168 | * g_ref_string_new_intern() being called on the same refcounted |
169 | * string instance, than on a different string with the same |
170 | * contents |
171 | */ |
172 | static gboolean |
173 | interned_str_equal (gconstpointer v1, |
174 | gconstpointer v2) |
175 | { |
176 | const char *str1 = v1; |
177 | const char *str2 = v2; |
178 | |
179 | if (v1 == v2) |
180 | return TRUE; |
181 | |
182 | return strcmp (s1: str1, s2: str2) == 0; |
183 | } |
184 | |
185 | /** |
186 | * g_ref_string_new_intern: |
187 | * @str: (not nullable): a NUL-terminated string |
188 | * |
189 | * Creates a new reference counted string and copies the content of @str |
190 | * into it. |
191 | * |
192 | * If you call this function multiple times with the same @str, or with |
193 | * the same contents of @str, it will return a new reference, instead of |
194 | * creating a new string. |
195 | * |
196 | * Returns: (transfer full) (not nullable): the newly created reference |
197 | * counted string, or a new reference to an existing string |
198 | * |
199 | * Since: 2.58 |
200 | */ |
201 | char * |
202 | g_ref_string_new_intern (const char *str) |
203 | { |
204 | char *res; |
205 | |
206 | g_return_val_if_fail (str != NULL, NULL); |
207 | |
208 | G_LOCK (interned_ref_strings); |
209 | |
210 | if (G_UNLIKELY (interned_ref_strings == NULL)) |
211 | interned_ref_strings = g_hash_table_new (hash_func: g_str_hash, key_equal_func: interned_str_equal); |
212 | |
213 | res = g_hash_table_lookup (hash_table: interned_ref_strings, key: str); |
214 | if (res != NULL) |
215 | { |
216 | /* We acquire the reference while holding the lock, to |
217 | * avoid a potential race between releasing the lock on |
218 | * the hash table and another thread releasing the reference |
219 | * on the same string |
220 | */ |
221 | g_atomic_rc_box_acquire (res); |
222 | G_UNLOCK (interned_ref_strings); |
223 | return res; |
224 | } |
225 | |
226 | res = g_ref_string_new (str); |
227 | g_hash_table_add (hash_table: interned_ref_strings, key: res); |
228 | G_UNLOCK (interned_ref_strings); |
229 | |
230 | return res; |
231 | } |
232 | |
233 | /** |
234 | * g_ref_string_acquire: |
235 | * @str: a reference counted string |
236 | * |
237 | * Acquires a reference on a string. |
238 | * |
239 | * Returns: the given string, with its reference count increased |
240 | * |
241 | * Since: 2.58 |
242 | */ |
243 | char * |
244 | g_ref_string_acquire (char *str) |
245 | { |
246 | g_return_val_if_fail (str != NULL, NULL); |
247 | |
248 | return g_atomic_rc_box_acquire (str); |
249 | } |
250 | |
251 | static void |
252 | remove_if_interned (gpointer data) |
253 | { |
254 | char *str = data; |
255 | |
256 | G_LOCK (interned_ref_strings); |
257 | |
258 | if (G_LIKELY (interned_ref_strings != NULL)) |
259 | { |
260 | g_hash_table_remove (hash_table: interned_ref_strings, key: str); |
261 | |
262 | if (g_hash_table_size (hash_table: interned_ref_strings) == 0) |
263 | g_clear_pointer (&interned_ref_strings, g_hash_table_destroy); |
264 | } |
265 | |
266 | G_UNLOCK (interned_ref_strings); |
267 | } |
268 | |
269 | /** |
270 | * g_ref_string_release: |
271 | * @str: a reference counted string |
272 | * |
273 | * Releases a reference on a string; if it was the last reference, the |
274 | * resources allocated by the string are freed as well. |
275 | * |
276 | * Since: 2.58 |
277 | */ |
278 | void |
279 | g_ref_string_release (char *str) |
280 | { |
281 | g_return_if_fail (str != NULL); |
282 | |
283 | g_atomic_rc_box_release_full (mem_block: str, clear_func: remove_if_interned); |
284 | } |
285 | |
286 | /** |
287 | * g_ref_string_length: |
288 | * @str: a reference counted string |
289 | * |
290 | * Retrieves the length of @str. |
291 | * |
292 | * Returns: the length of the given string, in bytes |
293 | * |
294 | * Since: 2.58 |
295 | */ |
296 | gsize |
297 | g_ref_string_length (char *str) |
298 | { |
299 | g_return_val_if_fail (str != NULL, 0); |
300 | |
301 | return g_atomic_rc_box_get_size (mem_block: str) - 1; |
302 | } |
303 | |