1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * Copyright (c) 2014 SGI. |
4 | * All rights reserved. |
5 | */ |
6 | |
7 | #ifndef UTF8NORM_H |
8 | #define UTF8NORM_H |
9 | |
10 | #include <linux/types.h> |
11 | #include <linux/export.h> |
12 | #include <linux/string.h> |
13 | #include <linux/module.h> |
14 | #include <linux/unicode.h> |
15 | |
16 | int utf8version_is_supported(const struct unicode_map *um, unsigned int version); |
17 | |
18 | /* |
19 | * Determine the length of the normalized from of the string, |
20 | * excluding any terminating NULL byte. |
21 | * Returns 0 if only ignorable code points are present. |
22 | * Returns -1 if the input is not valid UTF-8. |
23 | */ |
24 | ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n, |
25 | const char *s, size_t len); |
26 | |
27 | /* Needed in struct utf8cursor below. */ |
28 | #define UTF8HANGULLEAF (12) |
29 | |
30 | /* |
31 | * Cursor structure used by the normalizer. |
32 | */ |
33 | struct utf8cursor { |
34 | const struct unicode_map *um; |
35 | enum utf8_normalization n; |
36 | const char *s; |
37 | const char *p; |
38 | const char *ss; |
39 | const char *sp; |
40 | unsigned int len; |
41 | unsigned int slen; |
42 | short int ccc; |
43 | short int nccc; |
44 | unsigned char hangul[UTF8HANGULLEAF]; |
45 | }; |
46 | |
47 | /* |
48 | * Initialize a utf8cursor to normalize a string. |
49 | * Returns 0 on success. |
50 | * Returns -1 on failure. |
51 | */ |
52 | int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um, |
53 | enum utf8_normalization n, const char *s, size_t len); |
54 | |
55 | /* |
56 | * Get the next byte in the normalization. |
57 | * Returns a value > 0 && < 256 on success. |
58 | * Returns 0 when the end of the normalization is reached. |
59 | * Returns -1 if the string being normalized is not valid UTF-8. |
60 | */ |
61 | extern int utf8byte(struct utf8cursor *u8c); |
62 | |
63 | struct utf8data { |
64 | unsigned int maxage; |
65 | unsigned int offset; |
66 | }; |
67 | |
68 | struct utf8data_table { |
69 | const unsigned int *utf8agetab; |
70 | int utf8agetab_size; |
71 | |
72 | const struct utf8data *utf8nfdicfdata; |
73 | int utf8nfdicfdata_size; |
74 | |
75 | const struct utf8data *utf8nfdidata; |
76 | int utf8nfdidata_size; |
77 | |
78 | const unsigned char *utf8data; |
79 | }; |
80 | |
81 | extern struct utf8data_table utf8_data_table; |
82 | |
83 | #endif /* UTF8NORM_H */ |
84 | |