ghostutils.c source code [gtk/subprojects/glib/glib/ghostutils.c]

1	/ -- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -- /
2
3	/ GLIB - Library of useful routines for C programming*
4	* Copyright (C) 2008 Red Hat, Inc.
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Lesser General Public
8	* License as published by the Free Software Foundation; either
9	* version 2.1 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Lesser General Public License for more details.
15	*
16	* You should have received a copy of the GNU Lesser General
17	* Public License along with this library; if not, see <http://www.gnu.org/licenses/>.
18	*/
19
20	#include "config.h"
21	#include "glibconfig.h"
22
23	#include <string.h>
24
25	#ifdef G_OS_UNIX
26	#include <unistd.h>
27	#endif
28
29	#include "ghostutils.h"
30
31	#include "garray.h"
32	#include "gmem.h"
33	#include "gstring.h"
34	#include "gstrfuncs.h"
35	#include "glibintl.h"
36
37	#ifdef G_PLATFORM_WIN32
38	#include <windows.h>
39	#endif
40
41
42	/**
43	* SECTION:ghostutils
44	* @short_description: Internet hostname utilities
45	*
46	* Functions for manipulating internet hostnames; in particular, for
47	* converting between Unicode and ASCII-encoded forms of
48	* Internationalized Domain Names (IDNs).
49	*
50	* The
51	* [Internationalized Domain Names for Applications (IDNA)](http://www.ietf.org/rfc/rfc3490.txt)
52	* standards allow for the use
53	* of Unicode domain names in applications, while providing
54	* backward-compatibility with the old ASCII-only DNS, by defining an
55	* ASCII-Compatible Encoding of any given Unicode name, which can be
56	* used with non-IDN-aware applications and protocols. (For example,
57	* "Παν語.org" maps to "xn--4wa8awb4637h.org".)
58	**/
59
60	#define IDNA_ACE_PREFIX "xn--"
61	#define IDNA_ACE_PREFIX_LEN 4
62
63	/ Punycode constants, from RFC 3492. /
64
65	#define PUNYCODE_BASE 36
66	#define PUNYCODE_TMIN 1
67	#define PUNYCODE_TMAX 26
68	#define PUNYCODE_SKEW 38
69	#define PUNYCODE_DAMP 700
70	#define PUNYCODE_INITIAL_BIAS 72
71	#define PUNYCODE_INITIAL_N 0x80
72
73	#define PUNYCODE_IS_BASIC(cp) ((guint)(cp) < 0x80)
74
75	/ Encode/decode a single base-36 digit /
76	static inline gchar
77	encode_digit (guint dig)
78	{
79	if (dig < `26`)
80	return dig + `'a'`;
81	else
82	return dig - `26` + `'0'`;
83	}
84
85	static inline guint
86	decode_digit (gchar dig)
87	{
88	if (dig >= `'A'` && dig <= `'Z'`)
89	return dig - `'A'`;
90	else if (dig >= `'a'` && dig <= `'z'`)
91	return dig - `'a'`;
92	else if (dig >= `'0'` && dig <= `'9'`)
93	return dig - `'0'` + `26`;
94	else
95	return G_MAXUINT;
96	}
97
98	/ Punycode bias adaptation algorithm, RFC 3492 section 6.1 /
99	static guint
100	adapt (guint delta,
101	guint numpoints,
102	gboolean firsttime)
103	{
104	guint k;
105
106	delta = firsttime ? delta / PUNYCODE_DAMP : delta / `2`;
107	delta += delta / numpoints;
108
109	k = `0`;
110	while (delta > ((PUNYCODE_BASE - PUNYCODE_TMIN) * PUNYCODE_TMAX) / `2`)
111	{
112	delta /= PUNYCODE_BASE - PUNYCODE_TMIN;
113	k += PUNYCODE_BASE;
114	}
115
116	return k + ((PUNYCODE_BASE - PUNYCODE_TMIN + `1`) * delta /
117	(delta + PUNYCODE_SKEW));
118	}
119
120	/ Punycode encoder, RFC 3492 section 6.3. The algorithm is*
121	* sufficiently bizarre that it's not really worth trying to explain
122	* here.
123	*/
124	static gboolean
125	punycode_encode (const gchar *input_utf8,
126	gsize input_utf8_length,
127	GString *output)
128	{
129	guint delta, handled_chars, num_basic_chars, bias, j, q, k, t, digit;
130	gunichar n, m, *input;
131	glong input_length;
132	gboolean success = FALSE;
133
134	/ Convert from UTF-8 to Unicode code points /
135	input = g_utf8_to_ucs4 (str: input_utf8, len: input_utf8_length, NULL,
136	items_written: &input_length, NULL);
137	if (!input)
138	return FALSE;
139
140	/ Copy basic chars /
141	for (j = num_basic_chars = `0`; j < input_length; j++)
142	{
143	if (PUNYCODE_IS_BASIC (input[j]))
144	{
145	g_string_append_c (output, g_ascii_tolower (input[j]));
146	num_basic_chars++;
147	}
148	}
149	if (num_basic_chars)
150	g_string_append_c (output, `'-'`);
151
152	handled_chars = num_basic_chars;
153
154	/ Encode non-basic chars /
155	delta = `0`;
156	bias = PUNYCODE_INITIAL_BIAS;
157	n = PUNYCODE_INITIAL_N;
158	while (handled_chars < input_length)
159	{
160	/ let m = the minimum {non-basic} code point >= n in the input /
161	for (m = G_MAXUINT, j = `0`; j < input_length; j++)
162	{
163	if (input[j] >= n && input[j] < m)
164	m = input[j];
165	}
166
167	if (m - n > (G_MAXUINT - delta) / (handled_chars + `1`))
168	goto fail;
169	delta += (m - n) * (handled_chars + `1`);
170	n = m;
171
172	for (j = `0`; j < input_length; j++)
173	{
174	if (input[j] < n)
175	{
176	if (++delta == `0`)
177	goto fail;
178	}
179	else if (input[j] == n)
180	{
181	q = delta;
182	for (k = PUNYCODE_BASE; ; k += PUNYCODE_BASE)
183	{
184	if (k <= bias)
185	t = PUNYCODE_TMIN;
186	else if (k >= bias + PUNYCODE_TMAX)
187	t = PUNYCODE_TMAX;
188	else
189	t = k - bias;
190	if (q < t)
191	break;
192	digit = t + (q - t) % (PUNYCODE_BASE - t);
193	g_string_append_c (output, encode_digit (digit));
194	q = (q - t) / (PUNYCODE_BASE - t);
195	}
196
197	g_string_append_c (output, encode_digit (q));
198	bias = adapt (delta, numpoints: handled_chars + `1`, firsttime: handled_chars == num_basic_chars);
199	delta = `0`;
200	handled_chars++;
201	}
202	}
203
204	delta++;
205	n++;
206	}
207
208	success = TRUE;
209
210	fail:
211	g_free (mem: input);
212	return success;
213	}
214
215	/ From RFC 3454, Table B.1 /
216	#define idna_is_junk(ch) ((ch) == 0x00AD \|\| (ch) == 0x1806 \|\| (ch) == 0x200B \|\| (ch) == 0x2060 \|\| (ch) == 0xFEFF \|\| (ch) == 0x034F \|\| (ch) == 0x180B \|\| (ch) == 0x180C \|\| (ch) == 0x180D \|\| (ch) == 0x200C \|\| (ch) == 0x200D \|\| ((ch) >= 0xFE00 && (ch) <= 0xFE0F))
217
218	/ Scan @str for "junk" and return a cleaned-up string if any junk*
219	* is found. Else return %NULL.
220	*/
221	static gchar *
222	remove_junk (const gchar *str,
223	gint len)
224	{
225	GString *cleaned = NULL;
226	const gchar *p;
227	gunichar ch;
228
229	for (p = str; len == -`1` ? *p : p < str + len; p = g_utf8_next_char (p))
230	{
231	ch = g_utf8_get_char (p);
232	if (idna_is_junk (ch))
233	{
234	if (!cleaned)
235	{
236	cleaned = g_string_new (NULL);
237	g_string_append_len (string: cleaned, val: str, len: p - str);
238	}
239	}
240	else if (cleaned)
241	g_string_append_unichar (string: cleaned, wc: ch);
242	}
243
244	if (cleaned)
245	return g_string_free (string: cleaned, FALSE);
246	else
247	return NULL;
248	}
249
250	static inline gboolean
251	contains_uppercase_letters (const gchar *str,
252	gint len)
253	{
254	const gchar *p;
255
256	for (p = str; len == -`1` ? *p : p < str + len; p = g_utf8_next_char (p))
257	{
258	if (g_unichar_isupper (c: g_utf8_get_char (p)))
259	return TRUE;
260	}
261	return FALSE;
262	}
263
264	static inline gboolean
265	contains_non_ascii (const gchar *str,
266	gint len)
267	{
268	const gchar *p;
269
270	for (p = str; len == -`1` ? *p : p < str + len; p++)
271	{
272	if ((guchar)*p > `0x80`)
273	return TRUE;
274	}
275	return FALSE;
276	}
277
278	/ RFC 3454, Appendix C. ish. /
279	static inline gboolean
280	idna_is_prohibited (gunichar ch)
281	{
282	switch (g_unichar_type (c: ch))
283	{
284	case G_UNICODE_CONTROL:
285	case G_UNICODE_FORMAT:
286	case G_UNICODE_UNASSIGNED:
287	case G_UNICODE_PRIVATE_USE:
288	case G_UNICODE_SURROGATE:
289	case G_UNICODE_LINE_SEPARATOR:
290	case G_UNICODE_PARAGRAPH_SEPARATOR:
291	case G_UNICODE_SPACE_SEPARATOR:
292	return TRUE;
293
294	case G_UNICODE_OTHER_SYMBOL:
295	if (ch == `0xFFFC` \|\| ch == `0xFFFD` \|\|
296	(ch >= `0x2FF0` && ch <= `0x2FFB`))
297	return TRUE;
298	return FALSE;
299
300	case G_UNICODE_NON_SPACING_MARK:
301	if (ch == `0x0340` \|\| ch == `0x0341`)
302	return TRUE;
303	return FALSE;
304
305	default:
306	return FALSE;
307	}
308	}
309
310	/ RFC 3491 IDN cleanup algorithm. /
311	static gchar *
312	nameprep (const gchar *hostname,
313	gint len,
314	gboolean *is_unicode)
315	{
316	gchar name, tmp = NULL, *p;
317
318	/ It would be nice if we could do this without repeatedly*
319	* allocating strings and converting back and forth between
320	* gunichars and UTF-8... The code does at least avoid doing most of
321	* the sub-operations when they would just be equivalent to a
322	* g_strdup().
323	*/
324
325	/ Remove presentation-only characters /
326	name = remove_junk (str: hostname, len);
327	if (name)
328	{
329	tmp = name;
330	len = -`1`;
331	}
332	else
333	name = (gchar *)hostname;
334
335	/ Convert to lowercase /
336	if (contains_uppercase_letters (str: name, len))
337	{
338	name = g_utf8_strdown (str: name, len);
339	g_free (mem: tmp);
340	tmp = name;
341	len = -`1`;
342	}
343
344	/ If there are no UTF8 characters, we're done. /
345	if (!contains_non_ascii (str: name, len))
346	{
347	*is_unicode = FALSE;
348	if (name == (gchar *)hostname)
349	return len == -`1` ? g_strdup (str: hostname) : g_strndup (str: hostname, n: len);
350	else
351	return name;
352	}
353
354	*is_unicode = TRUE;
355
356	/ Normalize /
357	name = g_utf8_normalize (str: name, len, mode: G_NORMALIZE_NFKC);
358	g_free (mem: tmp);
359	tmp = name;
360
361	if (!name)
362	return NULL;
363
364	/ KC normalization may have created more capital letters (eg,*
365	* angstrom -> capital A with ring). So we have to lowercasify a
366	* second time. (This is more-or-less how the nameprep algorithm
367	* does it. If tolower(nfkc(tolower(X))) is guaranteed to be the
368	* same as tolower(nfkc(X)), then we could skip the first tolower,
369	* but I'm not sure it is.)
370	*/
371	if (contains_uppercase_letters (str: name, len: -`1`))
372	{
373	name = g_utf8_strdown (str: name, len: -`1`);
374	g_free (mem: tmp);
375	tmp = name;
376	}
377
378	/ Check for prohibited characters /
379	for (p = name; *p; p = g_utf8_next_char (p))
380	{
381	if (idna_is_prohibited (ch: g_utf8_get_char (p)))
382	{
383	name = NULL;
384	g_free (mem: tmp);
385	goto done;
386	}
387	}
388
389	/ FIXME: We're supposed to verify certain constraints on bidi*
390	* characters, but glib does not appear to have that information.
391	*/
392
393	done:
394	return name;
395	}
396
397	/ RFC 3490, section 3.1 says '.', 0x3002, 0xFF0E, and 0xFF61 count as*
398	* label-separating dots. @str must be '\0'-terminated.
399	*/
400	#define idna_is_dot(str) ( \
401	((guchar)(str)[0] == '.') \|\| \
402	((guchar)(str)[0] == 0xE3 && (guchar)(str)[1] == 0x80 && (guchar)(str)[2] == 0x82) \|\| \
403	((guchar)(str)[0] == 0xEF && (guchar)(str)[1] == 0xBC && (guchar)(str)[2] == 0x8E) \|\| \
404	((guchar)(str)[0] == 0xEF && (guchar)(str)[1] == 0xBD && (guchar)(str)[2] == 0xA1) )
405
406	static const gchar *
407	idna_end_of_label (const gchar *str)
408	{
409	for (; *str; str = g_utf8_next_char (str))
410	{
411	if (idna_is_dot (str))
412	return str;
413	}
414	return str;
415	}
416
417	static gsize
418	get_hostname_max_length_bytes (void)
419	{
420	#if defined(G_OS_WIN32)
421	wchar_t tmp[MAX_COMPUTERNAME_LENGTH];
422	return sizeof (tmp) / sizeof (tmp[`0`]);
423	#elif defined(_SC_HOST_NAME_MAX)
424	glong max = sysconf (_SC_HOST_NAME_MAX);
425	if (max > `0`)
426	return (gsize) max;
427
428	#ifdef HOST_NAME_MAX
429	return HOST_NAME_MAX;
430	#else
431	return _POSIX_HOST_NAME_MAX;
432	#endif /* HOST_NAME_MAX */
433	#else
434	/ Fallback to some reasonable value*
435	* See https://stackoverflow.com/questions/8724954/what-is-the-maximum-number-of-characters-for-a-host-name-in-unix/28918017#28918017 */
436	return `255`;
437	#endif
438	}
439
440	/ Returns %TRUE if `strlen (str) > comparison_length`, but without actually*
441	* running `strlen(str)`, as that would take a very long time for long
442	* (untrusted) input strings. */
443	static gboolean
444	strlen_greater_than (const gchar *str,
445	gsize comparison_length)
446	{
447	gsize i;
448
449	for (i = `0`; str[i] != `'\0'`; i++)
450	if (i > comparison_length)
451	return TRUE;
452
453	return FALSE;
454	}
455
456	/**
457	* g_hostname_to_ascii:
458	* @hostname: a valid UTF-8 or ASCII hostname
459	*
460	* Converts @hostname to its canonical ASCII form; an ASCII-only
461	* string containing no uppercase letters and not ending with a
462	* trailing dot.
463	*
464	* Returns: (nullable) (transfer full): an ASCII hostname, which must be freed,
465	* or %NULL if @hostname is in some way invalid.
466	*
467	* Since: 2.22
468	**/
469	gchar *
470	g_hostname_to_ascii (const gchar *hostname)
471	{
472	gchar name, label, *p;
473	GString *out;
474	gssize llen, oldlen;
475	gboolean unicode;
476	gsize hostname_max_length_bytes = get_hostname_max_length_bytes ();
477
478	/ Do an initial check on the hostname length, as overlong hostnames take a*
479	* long time in the IDN cleanup algorithm in nameprep(). The ultimate
480	* restriction is that the IDN-decoded (i.e. pure ASCII) hostname cannot be
481	* longer than 255 bytes. That’s the least restrictive limit on hostname
482	* length of all the ways hostnames can be interpreted. Typically, the
483	* hostname will be an FQDN, which is limited to 253 bytes long. POSIX
484	* hostnames are limited to `get_hostname_max_length_bytes()` (typically 255
485	* bytes).
486	*
487	* See https://stackoverflow.com/a/28918017/2931197
488	*
489	* It’s possible for a hostname to be %-encoded, in which case its decoded
490	* length will be as much as 3× shorter.
491	*
492	* It’s also possible for a hostname to use overlong UTF-8 encodings, in which
493	* case its decoded length will be as much as 4× shorter.
494	*
495	* Note: This check is not intended as an absolute guarantee that a hostname
496	* is the right length and will be accepted by other systems. It’s intended to
497	* stop wildly-invalid hostnames from taking forever in nameprep().
498	*/
499	if (hostname_max_length_bytes <= G_MAXSIZE / `4` &&
500	strlen_greater_than (str: hostname, comparison_length: `4` * MAX (`255`, hostname_max_length_bytes)))
501	return NULL;
502
503	label = name = nameprep (hostname, len: -`1`, is_unicode: &unicode);
504	if (!name \|\| !unicode)
505	return name;
506
507	out = g_string_new (NULL);
508
509	do
510	{
511	unicode = FALSE;
512	for (p = label; *p && !idna_is_dot (p); p++)
513	{
514	if ((guchar)*p > `0x80`)
515	unicode = TRUE;
516	}
517
518	oldlen = out->len;
519	llen = p - label;
520	if (unicode)
521	{
522	if (!strncmp (s1: label, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN))
523	goto fail;
524
525	g_string_append (string: out, IDNA_ACE_PREFIX);
526	if (!punycode_encode (input_utf8: label, input_utf8_length: llen, output: out))
527	goto fail;
528	}
529	else
530	g_string_append_len (string: out, val: label, len: llen);
531
532	if (out->len - oldlen > `63`)
533	goto fail;
534
535	label += llen;
536	if (*label)
537	label = g_utf8_next_char (label);
538	if (*label)
539	g_string_append_c (out, `'.'`);
540	}
541	while (*label);
542
543	g_free (mem: name);
544	return g_string_free (string: out, FALSE);
545
546	fail:
547	g_free (mem: name);
548	g_string_free (string: out, TRUE);
549	return NULL;
550	}
551
552	/**
553	* g_hostname_is_non_ascii:
554	* @hostname: a hostname
555	*
556	* Tests if @hostname contains Unicode characters. If this returns
557	* %TRUE, you need to encode the hostname with g_hostname_to_ascii()
558	* before using it in non-IDN-aware contexts.
559	*
560	* Note that a hostname might contain a mix of encoded and unencoded
561	* segments, and so it is possible for g_hostname_is_non_ascii() and
562	* g_hostname_is_ascii_encoded() to both return %TRUE for a name.
563	*
564	* Returns: %TRUE if @hostname contains any non-ASCII characters
565	*
566	* Since: 2.22
567	**/
568	gboolean
569	g_hostname_is_non_ascii (const gchar *hostname)
570	{
571	return contains_non_ascii (str: hostname, len: -`1`);
572	}
573
574	/ Punycode decoder, RFC 3492 section 6.2. As with punycode_encode(),*
575	* read the RFC if you want to understand what this is actually doing.
576	*/
577	static gboolean
578	punycode_decode (const gchar *input,
579	gsize input_length,
580	GString *output)
581	{
582	GArray *output_chars;
583	gunichar n;
584	guint i, bias;
585	guint oldi, w, k, digit, t;
586	const gchar *split;
587
588	n = PUNYCODE_INITIAL_N;
589	i = `0`;
590	bias = PUNYCODE_INITIAL_BIAS;
591
592	split = input + input_length - `1`;
593	while (split > input && *split != `'-'`)
594	split--;
595	if (split > input)
596	{
597	output_chars = g_array_sized_new (FALSE, FALSE, element_size: sizeof (gunichar),
598	reserved_size: split - input);
599	input_length -= (split - input) + `1`;
600	while (input < split)
601	{
602	gunichar ch = (gunichar)*input++;
603	if (!PUNYCODE_IS_BASIC (ch))
604	goto fail;
605	g_array_append_val (output_chars, ch);
606	}
607	input++;
608	}
609	else
610	output_chars = g_array_new (FALSE, FALSE, element_size: sizeof (gunichar));
611
612	while (input_length)
613	{
614	oldi = i;
615	w = `1`;
616	for (k = PUNYCODE_BASE; ; k += PUNYCODE_BASE)
617	{
618	if (!input_length--)
619	goto fail;
620	digit = decode_digit (dig: *input++);
621	if (digit >= PUNYCODE_BASE)
622	goto fail;
623	if (digit > (G_MAXUINT - i) / w)
624	goto fail;
625	i += digit * w;
626	if (k <= bias)
627	t = PUNYCODE_TMIN;
628	else if (k >= bias + PUNYCODE_TMAX)
629	t = PUNYCODE_TMAX;
630	else
631	t = k - bias;
632	if (digit < t)
633	break;
634	if (w > G_MAXUINT / (PUNYCODE_BASE - t))
635	goto fail;
636	w *= (PUNYCODE_BASE - t);
637	}
638
639	bias = adapt (delta: i - oldi, numpoints: output_chars->len + `1`, firsttime: oldi == `0`);
640
641	if (i / (output_chars->len + `1`) > G_MAXUINT - n)
642	goto fail;
643	n += i / (output_chars->len + `1`);
644	i %= (output_chars->len + `1`);
645
646	g_array_insert_val (output_chars, i++, n);
647	}
648
649	for (i = `0`; i < output_chars->len; i++)
650	g_string_append_unichar (string: output, g_array_index (output_chars, gunichar, i));
651	g_array_free (array: output_chars, TRUE);
652	return TRUE;
653
654	fail:
655	g_array_free (array: output_chars, TRUE);
656	return FALSE;
657	}
658
659	/**
660	* g_hostname_to_unicode:
661	* @hostname: a valid UTF-8 or ASCII hostname
662	*
663	* Converts @hostname to its canonical presentation form; a UTF-8
664	* string in Unicode normalization form C, containing no uppercase
665	* letters, no forbidden characters, and no ASCII-encoded segments,
666	* and not ending with a trailing dot.
667	*
668	* Of course if @hostname is not an internationalized hostname, then
669	* the canonical presentation form will be entirely ASCII.
670	*
671	* Returns: (nullable) (transfer full): a UTF-8 hostname, which must be freed,
672	* or %NULL if @hostname is in some way invalid.
673	*
674	* Since: 2.22
675	**/
676	gchar *
677	g_hostname_to_unicode (const gchar *hostname)
678	{
679	GString *out;
680	gssize llen;
681	gsize hostname_max_length_bytes = get_hostname_max_length_bytes ();
682
683	/ See the comment at the top of g_hostname_to_ascii(). /
684	if (hostname_max_length_bytes <= G_MAXSIZE / `4` &&
685	strlen_greater_than (str: hostname, comparison_length: `4` * MAX (`255`, hostname_max_length_bytes)))
686	return NULL;
687
688	out = g_string_new (NULL);
689
690	do
691	{
692	llen = idna_end_of_label (str: hostname) - hostname;
693	if (!g_ascii_strncasecmp (s1: hostname, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN))
694	{
695	hostname += IDNA_ACE_PREFIX_LEN;
696	llen -= IDNA_ACE_PREFIX_LEN;
697	if (!punycode_decode (input: hostname, input_length: llen, output: out))
698	{
699	g_string_free (string: out, TRUE);
700	return NULL;
701	}
702	}
703	else
704	{
705	gboolean unicode;
706	gchar *canonicalized = nameprep (hostname, len: llen, is_unicode: &unicode);
707
708	if (!canonicalized)
709	{
710	g_string_free (string: out, TRUE);
711	return NULL;
712	}
713	g_string_append (string: out, val: canonicalized);
714	g_free (mem: canonicalized);
715	}
716
717	hostname += llen;
718	if (*hostname)
719	hostname = g_utf8_next_char (hostname);
720	if (*hostname)
721	g_string_append_c (out, `'.'`);
722	}
723	while (*hostname);
724
725	return g_string_free (string: out, FALSE);
726	}
727
728	/**
729	* g_hostname_is_ascii_encoded:
730	* @hostname: a hostname
731	*
732	* Tests if @hostname contains segments with an ASCII-compatible
733	* encoding of an Internationalized Domain Name. If this returns
734	* %TRUE, you should decode the hostname with g_hostname_to_unicode()
735	* before displaying it to the user.
736	*
737	* Note that a hostname might contain a mix of encoded and unencoded
738	* segments, and so it is possible for g_hostname_is_non_ascii() and
739	* g_hostname_is_ascii_encoded() to both return %TRUE for a name.
740	*
741	* Returns: %TRUE if @hostname contains any ASCII-encoded
742	* segments.
743	*
744	* Since: 2.22
745	**/
746	gboolean
747	g_hostname_is_ascii_encoded (const gchar *hostname)
748	{
749	while (`1`)
750	{
751	if (!g_ascii_strncasecmp (s1: hostname, IDNA_ACE_PREFIX, IDNA_ACE_PREFIX_LEN))
752	return TRUE;
753	hostname = idna_end_of_label (str: hostname);
754	if (*hostname)
755	hostname = g_utf8_next_char (hostname);
756	if (!*hostname)
757	return FALSE;
758	}
759	}
760
761	/**
762	* g_hostname_is_ip_address:
763	* @hostname: a hostname (or IP address in string form)
764	*
765	* Tests if @hostname is the string form of an IPv4 or IPv6 address.
766	* (Eg, "192.168.0.1".)
767	*
768	* Since 2.66, IPv6 addresses with a zone-id are accepted (RFC6874).
769	*
770	* Returns: %TRUE if @hostname is an IP address
771	*
772	* Since: 2.22
773	**/
774	gboolean
775	g_hostname_is_ip_address (const gchar *hostname)
776	{
777	gchar p, end;
778	gint nsegments, octet;
779
780	/ On Linux we could implement this using inet_pton, but the Windows*
781	* equivalent of that requires linking against winsock, so we just
782	* figure this out ourselves. Tested by tests/hostutils.c.
783	*/
784
785	p = (char *)hostname;
786
787	if (strchr (s: p, c: `':'`))
788	{
789	gboolean skipped;
790
791	/ If it contains a ':', it's an IPv6 address (assuming it's an*
792	* IP address at all). This consists of eight ':'-separated
793	* segments, each containing a 1-4 digit hex number, except that
794	* optionally: (a) the last two segments can be replaced by an
795	* IPv4 address, and (b) a single span of 1 to 8 "0000" segments
796	* can be replaced with just "::".
797	*/
798
799	nsegments = `0`;
800	skipped = FALSE;
801	while (p && p != `'%'` && nsegments < `8`)
802	{
803	/ Each segment after the first must be preceded by a ':'.*
804	* (We also handle half of the "string starts with ::" case
805	* here.)
806	*/
807	if (p != (char *)hostname \|\| (p[`0`] == `':'` && p[`1`] == `':'`))
808	{
809	if (*p != `':'`)
810	return FALSE;
811	p++;
812	}
813
814	/ If there's another ':', it means we're skipping some segments /
815	if (*p == `':'` && !skipped)
816	{
817	skipped = TRUE;
818	nsegments++;
819
820	/ Handle the "string ends with ::" case /
821	if (!p[`1`])
822	p++;
823
824	continue;
825	}
826
827	/ Read the segment, make sure it's valid. /
828	for (end = p; g_ascii_isxdigit (*end); end++)
829	;
830	if (end == p \|\| end > p + `4`)
831	return FALSE;
832
833	if (*end == `'.'`)
834	{
835	if ((nsegments == `6` && !skipped) \|\| (nsegments <= `6` && skipped))
836	goto parse_ipv4;
837	else
838	return FALSE;
839	}
840
841	nsegments++;
842	p = end;
843	}
844
845	return (!*p \|\| (p[`0`] == `'%'` && p[`1`])) && (nsegments == `8` \|\| skipped);
846	}
847
848	parse_ipv4:
849
850	/ Parse IPv4: N.N.N.N, where each N <= 255 and doesn't have leading 0s. /
851	for (nsegments = `0`; nsegments < `4`; nsegments++)
852	{
853	if (nsegments != `0`)
854	{
855	if (*p != `'.'`)
856	return FALSE;
857	p++;
858	}
859
860	/ Check the segment; a little tricker than the IPv6 case since*
861	* we can't allow extra leading 0s, and we can't assume that all
862	* strings of valid length are within range.
863	*/
864	octet = `0`;
865	if (*p == `'0'`)
866	end = p + `1`;
867	else
868	{
869	for (end = p; g_ascii_isdigit (*end); end++)
870	{
871	octet = `10` * octet + (*end - `'0'`);
872
873	if (octet > `255`)
874	break;
875	}
876	}
877	if (end == p \|\| end > p + `3` \|\| octet > `255`)
878	return FALSE;
879
880	p = end;
881	}
882
883	/ If there's nothing left to parse, then it's ok. /
884	return !*p;
885	}
886

source code of gtk/subprojects/glib/glib/ghostutils.c