unicode.c source code [linux/fs/hfsplus/unicode.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* linux/fs/hfsplus/unicode.c
4	*
5	* Copyright (C) 2001
6	* Brad Boyer (flar@allandria.com)
7	* (C) 2003 Ardis Technologies <roman@ardistech.com>
8	*
9	* Handler routines for unicode strings
10	*/
11
12	#include <linux/types.h>
13	#include <linux/nls.h>
14	#include "hfsplus_fs.h"
15	#include "hfsplus_raw.h"
16
17	/ Fold the case of a unicode char, given the 16 bit value /
18	/ Returns folded char, or 0 if ignorable /
19	static inline u16 case_fold(u16 c)
20	{
21	u16 tmp;
22
23	tmp = hfsplus_case_fold_table[c >> `8`];
24	if (tmp)
25	tmp = hfsplus_case_fold_table[tmp + (c & `0xff`)];
26	else
27	tmp = c;
28	return tmp;
29	}
30
31	/ Compare unicode strings, return values like normal strcmp /
32	int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
33	const struct hfsplus_unistr *s2)
34	{
35	u16 len1, len2, c1, c2;
36	const hfsplus_unichr p1, p2;
37
38	len1 = be16_to_cpu(s1->length);
39	len2 = be16_to_cpu(s2->length);
40	p1 = s1->unicode;
41	p2 = s2->unicode;
42
43	while (`1`) {
44	c1 = c2 = `0`;
45
46	while (len1 && !c1) {
47	c1 = case_fold(be16_to_cpu(*p1));
48	p1++;
49	len1--;
50	}
51	while (len2 && !c2) {
52	c2 = case_fold(be16_to_cpu(*p2));
53	p2++;
54	len2--;
55	}
56
57	if (c1 != c2)
58	return (c1 < c2) ? -`1` : `1`;
59	if (!c1 && !c2)
60	return `0`;
61	}
62	}
63
64	/ Compare names as a sequence of 16-bit unsigned integers /
65	int hfsplus_strcmp(const struct hfsplus_unistr *s1,
66	const struct hfsplus_unistr *s2)
67	{
68	u16 len1, len2, c1, c2;
69	const hfsplus_unichr p1, p2;
70	int len;
71
72	len1 = be16_to_cpu(s1->length);
73	len2 = be16_to_cpu(s2->length);
74	p1 = s1->unicode;
75	p2 = s2->unicode;
76
77	for (len = min(len1, len2); len > `0`; len--) {
78	c1 = be16_to_cpu(*p1);
79	c2 = be16_to_cpu(*p2);
80	if (c1 != c2)
81	return c1 < c2 ? -`1` : `1`;
82	p1++;
83	p2++;
84	}
85
86	return len1 < len2 ? -`1` :
87	len1 > len2 ? `1` : `0`;
88	}
89
90
91	#define Hangul_SBase 0xac00
92	#define Hangul_LBase 0x1100
93	#define Hangul_VBase 0x1161
94	#define Hangul_TBase 0x11a7
95	#define Hangul_SCount 11172
96	#define Hangul_LCount 19
97	#define Hangul_VCount 21
98	#define Hangul_TCount 28
99	#define Hangul_NCount (Hangul_VCount * Hangul_TCount)
100
101
102	static u16 hfsplus_compose_lookup(u16 p, u16 cc)
103	{
104	int i, s, e;
105
106	s = `1`;
107	e = p[`1`];
108	if (!e \|\| cc < p[s * `2`] \|\| cc > p[e * `2`])
109	return NULL;
110	do {
111	i = (s + e) / `2`;
112	if (cc > p[i * `2`])
113	s = i + `1`;
114	else if (cc < p[i * `2`])
115	e = i - `1`;
116	else
117	return hfsplus_compose_table + p[i * `2` + `1`];
118	} while (s <= e);
119	return NULL;
120	}
121
122	int hfsplus_uni2asc(struct super_block *sb,
123	const struct hfsplus_unistr *ustr,
124	char astr, int* *len_p)
125	{
126	const hfsplus_unichr *ip;
127	struct nls_table *nls = HFSPLUS_SB(sb)->nls;
128	u8 *op;
129	u16 cc, c0, c1;
130	u16 ce1, ce2;
131	int i, len, ustrlen, res, compose;
132
133	op = astr;
134	ip = ustr->unicode;
135	ustrlen = be16_to_cpu(ustr->length);
136	len = *len_p;
137	ce1 = NULL;
138	compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
139
140	while (ustrlen > `0`) {
141	c0 = be16_to_cpu(*ip++);
142	ustrlen--;
143	/ search for single decomposed char /
144	if (likely(compose))
145	ce1 = hfsplus_compose_lookup(p: hfsplus_compose_table, cc: c0);
146	if (ce1)
147	cc = ce1[`0`];
148	else
149	cc = `0`;
150	if (cc) {
151	/ start of a possibly decomposed Hangul char /
152	if (cc != `0xffff`)
153	goto done;
154	if (!ustrlen)
155	goto same;
156	c1 = be16_to_cpu(*ip) - Hangul_VBase;
157	if (c1 < Hangul_VCount) {
158	/ compose the Hangul char /
159	cc = (c0 - Hangul_LBase) * Hangul_VCount;
160	cc = (cc + c1) * Hangul_TCount;
161	cc += Hangul_SBase;
162	ip++;
163	ustrlen--;
164	if (!ustrlen)
165	goto done;
166	c1 = be16_to_cpu(*ip) - Hangul_TBase;
167	if (c1 > `0` && c1 < Hangul_TCount) {
168	cc += c1;
169	ip++;
170	ustrlen--;
171	}
172	goto done;
173	}
174	}
175	while (`1`) {
176	/ main loop for common case of not composed chars /
177	if (!ustrlen)
178	goto same;
179	c1 = be16_to_cpu(*ip);
180	if (likely(compose))
181	ce1 = hfsplus_compose_lookup(
182	p: hfsplus_compose_table, cc: c1);
183	if (ce1)
184	break;
185	switch (c0) {
186	case `0`:
187	c0 = `0x2400`;
188	break;
189	case `'/'`:
190	c0 = `':'`;
191	break;
192	}
193	res = nls->uni2char(c0, op, len);
194	if (res < `0`) {
195	if (res == -ENAMETOOLONG)
196	goto out;
197	*op = `'?'`;
198	res = `1`;
199	}
200	op += res;
201	len -= res;
202	c0 = c1;
203	ip++;
204	ustrlen--;
205	}
206	ce2 = hfsplus_compose_lookup(p: ce1, cc: c0);
207	if (ce2) {
208	i = `1`;
209	while (i < ustrlen) {
210	ce1 = hfsplus_compose_lookup(p: ce2,
211	be16_to_cpu(ip[i]));
212	if (!ce1)
213	break;
214	i++;
215	ce2 = ce1;
216	}
217	cc = ce2[`0`];
218	if (cc) {
219	ip += i;
220	ustrlen -= i;
221	goto done;
222	}
223	}
224	same:
225	switch (c0) {
226	case `0`:
227	cc = `0x2400`;
228	break;
229	case `'/'`:
230	cc = `':'`;
231	break;
232	default:
233	cc = c0;
234	}
235	done:
236	res = nls->uni2char(cc, op, len);
237	if (res < `0`) {
238	if (res == -ENAMETOOLONG)
239	goto out;
240	*op = `'?'`;
241	res = `1`;
242	}
243	op += res;
244	len -= res;
245	}
246	res = `0`;
247	out:
248	len_p = (char* *)op - astr;
249	return res;
250	}
251
252	/*
253	* Convert one or more ASCII characters into a single unicode character.
254	* Returns the number of ASCII characters corresponding to the unicode char.
255	*/
256	static inline int asc2unichar(struct super_block sb, const* char astr, int* len,
257	wchar_t *uc)
258	{
259	int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
260	if (size <= `0`) {
261	*uc = `'?'`;
262	size = `1`;
263	}
264	switch (*uc) {
265	case `0x2400`:
266	*uc = `0`;
267	break;
268	case `':'`:
269	*uc = `'/'`;
270	break;
271	}
272	return size;
273	}
274
275	/ Decomposes a non-Hangul unicode character. /
276	static u16 hfsplus_decompose_nonhangul(wchar_t uc, int* *size)
277	{
278	int off;
279
280	off = hfsplus_decompose_table[(uc >> `12`) & `0xf`];
281	if (off == `0` \|\| off == `0xffff`)
282	return NULL;
283
284	off = hfsplus_decompose_table[off + ((uc >> `8`) & `0xf`)];
285	if (!off)
286	return NULL;
287
288	off = hfsplus_decompose_table[off + ((uc >> `4`) & `0xf`)];
289	if (!off)
290	return NULL;
291
292	off = hfsplus_decompose_table[off + (uc & `0xf`)];
293	*size = off & `3`;
294	if (*size == `0`)
295	return NULL;
296	return hfsplus_decompose_table + (off / `4`);
297	}
298
299	/*
300	* Try to decompose a unicode character as Hangul. Return 0 if @uc is not
301	* precomposed Hangul, otherwise return the length of the decomposition.
302	*
303	* This function was adapted from sample code from the Unicode Standard
304	* Annex #15: Unicode Normalization Forms, version 3.2.0.
305	*
306	* Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed
307	* under the Terms of Use in http://www.unicode.org/copyright.html.
308	*/
309	static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
310	{
311	int index;
312	int l, v, t;
313
314	index = uc - Hangul_SBase;
315	if (index < `0` \|\| index >= Hangul_SCount)
316	return `0`;
317
318	l = Hangul_LBase + index / Hangul_NCount;
319	v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
320	t = Hangul_TBase + index % Hangul_TCount;
321
322	result[`0`] = l;
323	result[`1`] = v;
324	if (t != Hangul_TBase) {
325	result[`2`] = t;
326	return `3`;
327	}
328	return `2`;
329	}
330
331	/ Decomposes a single unicode character. /
332	static u16 decompose_unichar(wchar_t uc, int* size, u16 hangul_buffer)
333	{
334	u16 *result;
335
336	/ Hangul is handled separately /
337	result = hangul_buffer;
338	*size = hfsplus_try_decompose_hangul(uc, result);
339	if (*size == `0`)
340	result = hfsplus_decompose_nonhangul(uc, size);
341	return result;
342	}
343
344	int hfsplus_asc2uni(struct super_block *sb,
345	struct hfsplus_unistr ustr, int* max_unistr_len,
346	const char astr, int* len)
347	{
348	int size, dsize, decompose;
349	u16 *dstr, outlen = `0`;
350	wchar_t c;
351	u16 dhangul[`3`];
352
353	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
354	while (outlen < max_unistr_len && len > `0`) {
355	size = asc2unichar(sb, astr, len, uc: &c);
356
357	if (decompose)
358	dstr = decompose_unichar(uc: c, size: &dsize, hangul_buffer: dhangul);
359	else
360	dstr = NULL;
361	if (dstr) {
362	if (outlen + dsize > max_unistr_len)
363	break;
364	do {
365	ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
366	} while (--dsize > `0`);
367	} else
368	ustr->unicode[outlen++] = cpu_to_be16(c);
369
370	astr += size;
371	len -= size;
372	}
373	ustr->length = cpu_to_be16(outlen);
374	if (len > `0`)
375	return -ENAMETOOLONG;
376	return `0`;
377	}
378
379	/*
380	* Hash a string to an integer as appropriate for the HFS+ filesystem.
381	* Composed unicode characters are decomposed and case-folding is performed
382	* if the appropriate bits are (un)set on the superblock.
383	*/
384	int hfsplus_hash_dentry(const struct dentry dentry, struct* qstr *str)
385	{
386	struct super_block *sb = dentry->d_sb;
387	const char *astr;
388	const u16 *dstr;
389	int casefold, decompose, size, len;
390	unsigned long hash;
391	wchar_t c;
392	u16 c2;
393	u16 dhangul[`3`];
394
395	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
396	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
397	hash = init_name_hash(dentry);
398	astr = str->name;
399	len = str->len;
400	while (len > `0`) {
401	int dsize;
402	size = asc2unichar(sb, astr, len, uc: &c);
403	astr += size;
404	len -= size;
405
406	if (decompose)
407	dstr = decompose_unichar(uc: c, size: &dsize, hangul_buffer: dhangul);
408	else
409	dstr = NULL;
410	if (dstr) {
411	do {
412	c2 = *dstr++;
413	if (casefold)
414	c2 = case_fold(c: c2);
415	if (!casefold \|\| c2)
416	hash = partial_name_hash(c: c2, prevhash: hash);
417	} while (--dsize > `0`);
418	} else {
419	c2 = c;
420	if (casefold)
421	c2 = case_fold(c: c2);
422	if (!casefold \|\| c2)
423	hash = partial_name_hash(c: c2, prevhash: hash);
424	}
425	}
426	str->hash = end_name_hash(hash);
427
428	return `0`;
429	}
430
431	/*
432	* Compare strings with HFS+ filename ordering.
433	* Composed unicode characters are decomposed and case-folding is performed
434	* if the appropriate bits are (un)set on the superblock.
435	*/
436	int hfsplus_compare_dentry(const struct dentry *dentry,
437	unsigned int len, const char str, const* struct qstr *name)
438	{
439	struct super_block *sb = dentry->d_sb;
440	int casefold, decompose, size;
441	int dsize1, dsize2, len1, len2;
442	const u16 dstr1, dstr2;
443	const char astr1, astr2;
444	u16 c1, c2;
445	wchar_t c;
446	u16 dhangul_1[`3`], dhangul_2[`3`];
447
448	casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
449	decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
450	astr1 = str;
451	len1 = len;
452	astr2 = name->name;
453	len2 = name->len;
454	dsize1 = dsize2 = `0`;
455	dstr1 = dstr2 = NULL;
456
457	while (len1 > `0` && len2 > `0`) {
458	if (!dsize1) {
459	size = asc2unichar(sb, astr: astr1, len: len1, uc: &c);
460	astr1 += size;
461	len1 -= size;
462
463	if (decompose)
464	dstr1 = decompose_unichar(uc: c, size: &dsize1,
465	hangul_buffer: dhangul_1);
466	if (!decompose \|\| !dstr1) {
467	c1 = c;
468	dstr1 = &c1;
469	dsize1 = `1`;
470	}
471	}
472
473	if (!dsize2) {
474	size = asc2unichar(sb, astr: astr2, len: len2, uc: &c);
475	astr2 += size;
476	len2 -= size;
477
478	if (decompose)
479	dstr2 = decompose_unichar(uc: c, size: &dsize2,
480	hangul_buffer: dhangul_2);
481	if (!decompose \|\| !dstr2) {
482	c2 = c;
483	dstr2 = &c2;
484	dsize2 = `1`;
485	}
486	}
487
488	c1 = *dstr1;
489	c2 = *dstr2;
490	if (casefold) {
491	c1 = case_fold(c: c1);
492	if (!c1) {
493	dstr1++;
494	dsize1--;
495	continue;
496	}
497	c2 = case_fold(c: c2);
498	if (!c2) {
499	dstr2++;
500	dsize2--;
501	continue;
502	}
503	}
504	if (c1 < c2)
505	return -`1`;
506	else if (c1 > c2)
507	return `1`;
508
509	dstr1++;
510	dsize1--;
511	dstr2++;
512	dsize2--;
513	}
514
515	if (len1 < len2)
516	return -`1`;
517	if (len1 > len2)
518	return `1`;
519	return `0`;
520	}
521

source code of linux/fs/hfsplus/unicode.c