1 | #include "unicode-names.h" |
2 | |
3 | const char * |
4 | get_unicode_type_name (GUnicodeType type) |
5 | { |
6 | const char *names[] = { |
7 | "Other, Control" , |
8 | "Other, Format" , |
9 | "Other, Not Assigned" , |
10 | "Other, Private Use" , |
11 | "Other, Surrogate" , |
12 | "Letter, Lowercase" , |
13 | "Letter, Modifier" , |
14 | "Letter, Other" , |
15 | "Letter, Titlecase" , |
16 | "Letter, Uppercase" , |
17 | "Mark, Spacing" , |
18 | "Mark, Enclosing" , |
19 | "Mark, Nonspacing" , |
20 | "Number, Decimal Digit" , |
21 | "Number, Letter" , |
22 | "Number, Other" , |
23 | "Punctuation, Connector" , |
24 | "Punctuation, Dash" , |
25 | "Punctuation, Close" , |
26 | "Punctuation, Final quote" , |
27 | "Punctuation, Initial quote" , |
28 | "Punctuation, Other" , |
29 | "Punctuation, Open" , |
30 | "Symbol, Currency" , |
31 | "Symbol, Modifier" , |
32 | "Symbol, Math" , |
33 | "Symbol, Other" , |
34 | "Separator, Line" , |
35 | "Separator, Paragraph" , |
36 | "Separator, Space" , |
37 | }; |
38 | |
39 | if (type < G_N_ELEMENTS (names)) |
40 | return names[type]; |
41 | |
42 | return "Unknown" ; |
43 | } |
44 | |
45 | const char * |
46 | get_break_type_name (GUnicodeBreakType type) |
47 | { |
48 | const char *names[] = { |
49 | "Mandatory Break" , |
50 | "Carriage Return" , |
51 | "Line Feed" , |
52 | "Attached Characters and Combining Marks" , |
53 | "Surrogates" , |
54 | "Zero Width Space" , |
55 | "Inseparable" , |
56 | "Non-breaking (\"Glue\")" , |
57 | "Contingent Break Opportunity" , |
58 | "Space" , |
59 | "Break Opportunity After" , |
60 | "Break Opportunity Before" , |
61 | "Break Opportunity Before and After" , |
62 | "Hyphen" , |
63 | "Nonstarter" , |
64 | "Opening Punctuation" , |
65 | "Closing Punctuation" , |
66 | "Ambiguous Quotation" , |
67 | "Exclamation/Interrogation" , |
68 | "Ideographic" , |
69 | "Numeric" , |
70 | "Infix Separator (Numeric)" , |
71 | "Symbols Allowing Break After" , |
72 | "Ordinary Alphabetic and Symbol Characters" , |
73 | "Prefix (Numeric)" , |
74 | "Postfix (Numeric)" , |
75 | "Complex Content Dependent (South East Asian)" , |
76 | "Ambiguous (Alphabetic or Ideographic)" , |
77 | "Unknown" , |
78 | "Next Line" , |
79 | "Word Joiner" , |
80 | "Hangul L Jamo" , |
81 | "Hangul V Jamo" , |
82 | "Hangul T Jamo" , |
83 | "Hangul LV Syllable" , |
84 | "Hangul LVT Syllable" , |
85 | "Closing Parenthesis" , |
86 | "Conditional Japanese Starter" , |
87 | "Hebrew Letter" , |
88 | "Regional Indicator" , |
89 | "Emoji Base" , |
90 | "Emoji Modifier" , |
91 | "Zero Width Joiner" , |
92 | }; |
93 | |
94 | if (type < G_N_ELEMENTS (names)) |
95 | return names[type]; |
96 | |
97 | return "Unknown" ; |
98 | } |
99 | |
100 | const char * |
101 | get_combining_class_name (int cclass) |
102 | { |
103 | const char *classes[256] = { 0, }; |
104 | |
105 | classes[0] = "Not Reordered" ; |
106 | classes[1] = "Overlay" ; |
107 | classes[7] = "Nukta" ; |
108 | classes[8] = "Kana Voicing" ; |
109 | classes[9] = "Virama" ; |
110 | classes[10] = "CCC10 (Hebrew)" ; |
111 | classes[11] = "CCC11 (Hebrew)" ; |
112 | classes[12] = "CCC12 (Hebrew)" ; |
113 | classes[13] = "CCC13 (Hebrew)" ; |
114 | classes[14] = "CCC14 (Hebrew)" ; |
115 | classes[15] = "CCC15 (Hebrew)" ; |
116 | classes[16] = "CCC16 (Hebrew)" ; |
117 | classes[17] = "CCC17 (Hebrew)" ; |
118 | classes[18] = "CCC18 (Hebrew)" ; |
119 | classes[19] = "CCC19 (Hebrew)" ; |
120 | classes[20] = "CCC20 (Hebrew)" ; |
121 | classes[21] = "CCC21 (Hebrew)" ; |
122 | classes[22] = "CCC22 (Hebrew)" ; |
123 | classes[23] = "CCC23 (Hebrew)" ; |
124 | classes[24] = "CCC24 (Hebrew)" ; |
125 | classes[25] = "CCC25 (Hebrew)" ; |
126 | classes[26] = "CCC26 (Hebrew)" ; |
127 | |
128 | classes[27] = "CCC27 (Arabic)" ; |
129 | classes[28] = "CCC28 (Arabic)" ; |
130 | classes[29] = "CCC29 (Arabic)" ; |
131 | classes[30] = "CCC30 (Arabic)" ; |
132 | classes[31] = "CCC31 (Arabic)" ; |
133 | classes[32] = "CCC32 (Arabic)" ; |
134 | classes[33] = "CCC33 (Arabic)" ; |
135 | classes[34] = "CCC34 (Arabic)" ; |
136 | classes[35] = "CCC35 (Arabic)" ; |
137 | |
138 | classes[36] = "CCC36 (Syriac)" ; |
139 | |
140 | classes[84] = "CCC84 (Telugu)" ; |
141 | classes[85] = "CCC85 (Telugu)" ; |
142 | |
143 | classes[103] = "CCC103 (Thai)" ; |
144 | classes[107] = "CCC107 (Thai)" ; |
145 | |
146 | classes[118] = "CCC118 (Lao)" ; |
147 | classes[122] = "CCC122 (Lao)" ; |
148 | |
149 | classes[129] = "CCC129 (Tibetan)" ; |
150 | classes[130] = "CCC130 (Tibetan)" ; |
151 | classes[133] = "CCC133 (Tibetan)" ; |
152 | |
153 | classes[200] = "Attached Below Left" ; |
154 | classes[202] = "Attached Below" ; |
155 | classes[214] = "Attached Above" ; |
156 | classes[216] = "Attached Above Right" ; |
157 | classes[218] = "Below Left" ; |
158 | classes[220] = "Below" ; |
159 | classes[222] = "Below Right" ; |
160 | classes[224] = "Left" ; |
161 | classes[226] = "Right" ; |
162 | classes[228] = "Above Left" ; |
163 | classes[230] = "Above" ; |
164 | classes[232] = "Above Right" ; |
165 | classes[233] = "Double Below" ; |
166 | classes[234] = "Double Above" ; |
167 | classes[240] = "Iota Subscript" ; |
168 | classes[255] = "Invalid" ; |
169 | |
170 | if (cclass < 256 && classes[cclass] != NULL) |
171 | return classes[cclass]; |
172 | |
173 | return "Unknown" ; |
174 | } |
175 | |