1 | /* |
2 | * Copyright (C) 2006 George Staikos <staikos@kde.org> |
3 | * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> |
4 | * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Library General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Library General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Library General Public License |
17 | * along with this library; see the file COPYING.LIB. If not, write to |
18 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
19 | * Boston, MA 02110-1301, USA. |
20 | * |
21 | */ |
22 | |
23 | #ifndef WTF_UNICODE_QT4_H |
24 | #define WTF_UNICODE_QT4_H |
25 | |
26 | #include <QtCore/qchar.h> |
27 | #include <QtCore/qstring.h> |
28 | |
29 | #include <config.h> |
30 | |
31 | #include <stdint.h> |
32 | |
33 | // ugly hack to make UChar compatible with JSChar in API/JSStringRef.h |
34 | #if defined(Q_OS_WIN) || COMPILER(WINSCW) || COMPILER(RVCT) |
35 | typedef wchar_t UChar; |
36 | #else |
37 | typedef uint16_t UChar; |
38 | #endif |
39 | typedef int32_t UChar32; |
40 | |
41 | // some defines from ICU |
42 | |
43 | #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) |
44 | #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) |
45 | #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) |
46 | #define U16_GET_SUPPLEMENTARY(lead, trail) \ |
47 | (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) |
48 | |
49 | #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) |
50 | #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) |
51 | |
52 | #define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) |
53 | #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) |
54 | #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) |
55 | #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) |
56 | |
57 | #define U16_NEXT(s, i, length, c) { \ |
58 | (c)=(s)[(i)++]; \ |
59 | if(U16_IS_LEAD(c)) { \ |
60 | uint16_t __c2; \ |
61 | if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ |
62 | ++(i); \ |
63 | (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ |
64 | } \ |
65 | } \ |
66 | } |
67 | |
68 | #define U16_PREV(s, start, i, c) { \ |
69 | (c)=(s)[--(i)]; \ |
70 | if(U16_IS_TRAIL(c)) { \ |
71 | uint16_t __c2; \ |
72 | if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ |
73 | --(i); \ |
74 | (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ |
75 | } \ |
76 | } \ |
77 | } |
78 | |
79 | #define U_MASK(x) ((uint32_t)1<<(x)) |
80 | |
81 | namespace WTF { |
82 | namespace Unicode { |
83 | |
84 | QT_USE_NAMESPACE |
85 | |
86 | enum Direction { |
87 | LeftToRight = QChar::DirL, |
88 | RightToLeft = QChar::DirR, |
89 | EuropeanNumber = QChar::DirEN, |
90 | EuropeanNumberSeparator = QChar::DirES, |
91 | EuropeanNumberTerminator = QChar::DirET, |
92 | ArabicNumber = QChar::DirAN, |
93 | CommonNumberSeparator = QChar::DirCS, |
94 | BlockSeparator = QChar::DirB, |
95 | SegmentSeparator = QChar::DirS, |
96 | WhiteSpaceNeutral = QChar::DirWS, |
97 | OtherNeutral = QChar::DirON, |
98 | LeftToRightEmbedding = QChar::DirLRE, |
99 | LeftToRightOverride = QChar::DirLRO, |
100 | RightToLeftArabic = QChar::DirAL, |
101 | RightToLeftEmbedding = QChar::DirRLE, |
102 | RightToLeftOverride = QChar::DirRLO, |
103 | PopDirectionalFormat = QChar::DirPDF, |
104 | NonSpacingMark = QChar::DirNSM, |
105 | BoundaryNeutral = QChar::DirBN |
106 | }; |
107 | |
108 | enum DecompositionType { |
109 | DecompositionNone = QChar::NoDecomposition, |
110 | DecompositionCanonical = QChar::Canonical, |
111 | DecompositionCompat = QChar::Compat, |
112 | DecompositionCircle = QChar::Circle, |
113 | DecompositionFinal = QChar::Final, |
114 | DecompositionFont = QChar::Font, |
115 | DecompositionFraction = QChar::Fraction, |
116 | DecompositionInitial = QChar::Initial, |
117 | DecompositionIsolated = QChar::Isolated, |
118 | DecompositionMedial = QChar::Medial, |
119 | DecompositionNarrow = QChar::Narrow, |
120 | DecompositionNoBreak = QChar::NoBreak, |
121 | DecompositionSmall = QChar::Small, |
122 | DecompositionSquare = QChar::Square, |
123 | DecompositionSub = QChar::Sub, |
124 | DecompositionSuper = QChar::Super, |
125 | DecompositionVertical = QChar::Vertical, |
126 | DecompositionWide = QChar::Wide |
127 | }; |
128 | |
129 | enum CharCategory { |
130 | Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing), |
131 | Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining), |
132 | Mark_Enclosing = U_MASK(QChar::Mark_Enclosing), |
133 | Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit), |
134 | Number_Letter = U_MASK(QChar::Number_Letter), |
135 | Number_Other = U_MASK(QChar::Number_Other), |
136 | Separator_Space = U_MASK(QChar::Separator_Space), |
137 | Separator_Line = U_MASK(QChar::Separator_Line), |
138 | Separator_Paragraph = U_MASK(QChar::Separator_Paragraph), |
139 | Other_Control = U_MASK(QChar::Other_Control), |
140 | Other_Format = U_MASK(QChar::Other_Format), |
141 | Other_Surrogate = U_MASK(QChar::Other_Surrogate), |
142 | Other_PrivateUse = U_MASK(QChar::Other_PrivateUse), |
143 | Other_NotAssigned = U_MASK(QChar::Other_NotAssigned), |
144 | Letter_Uppercase = U_MASK(QChar::Letter_Uppercase), |
145 | Letter_Lowercase = U_MASK(QChar::Letter_Lowercase), |
146 | Letter_Titlecase = U_MASK(QChar::Letter_Titlecase), |
147 | Letter_Modifier = U_MASK(QChar::Letter_Modifier), |
148 | Letter_Other = U_MASK(QChar::Letter_Other), |
149 | Punctuation_Connector = U_MASK(QChar::Punctuation_Connector), |
150 | Punctuation_Dash = U_MASK(QChar::Punctuation_Dash), |
151 | Punctuation_Open = U_MASK(QChar::Punctuation_Open), |
152 | Punctuation_Close = U_MASK(QChar::Punctuation_Close), |
153 | Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote), |
154 | Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote), |
155 | Punctuation_Other = U_MASK(QChar::Punctuation_Other), |
156 | Symbol_Math = U_MASK(QChar::Symbol_Math), |
157 | Symbol_Currency = U_MASK(QChar::Symbol_Currency), |
158 | Symbol_Modifier = U_MASK(QChar::Symbol_Modifier), |
159 | Symbol_Other = U_MASK(QChar::Symbol_Other) |
160 | }; |
161 | |
162 | |
163 | // FIXME: handle surrogates correctly in all methods |
164 | |
165 | inline UChar32 toLower(UChar32 ch) |
166 | { |
167 | return QChar::toLower(ucs4: ch); |
168 | } |
169 | |
170 | inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) |
171 | { |
172 | QString s = QString::fromRawData(reinterpret_cast<const QChar *>(src), size: srcLength); |
173 | |
174 | s = s.toLower(); |
175 | |
176 | *error = resultLength < s.size(); |
177 | |
178 | if (!*error && result) { |
179 | const ushort *p = reinterpret_cast<const ushort *>(s.constData()); |
180 | ushort *pp = reinterpret_cast<ushort *>(result); |
181 | memcpy(dest: pp, src: p, n: s.size() * sizeof(ushort)); |
182 | |
183 | if (resultLength > s.size()) |
184 | pp[s.size()] = 0; |
185 | } |
186 | |
187 | return s.size(); |
188 | } |
189 | |
190 | inline UChar32 toUpper(UChar32 ch) |
191 | { |
192 | return QChar::toUpper(ucs4: ch); |
193 | } |
194 | |
195 | inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) |
196 | { |
197 | QString s = QString::fromRawData(reinterpret_cast<const QChar *>(src), size: srcLength); |
198 | |
199 | s = s.toUpper(); |
200 | |
201 | *error = resultLength < s.size(); |
202 | |
203 | if (!*error && result) { |
204 | const ushort *p = reinterpret_cast<const ushort *>(s.constData()); |
205 | ushort *pp = reinterpret_cast<ushort *>(result); |
206 | memcpy(dest: pp, src: p, n: s.size() * sizeof(ushort)); |
207 | |
208 | if (resultLength > s.size()) |
209 | pp[s.size()] = 0; |
210 | } |
211 | |
212 | return s.size(); |
213 | } |
214 | |
215 | inline UChar32 toTitleCase(UChar32 c) |
216 | { |
217 | return QChar::toTitleCase(ucs4: c); |
218 | } |
219 | |
220 | inline UChar32 foldCase(UChar32 c) |
221 | { |
222 | return QChar::toCaseFolded(ucs4: c); |
223 | } |
224 | |
225 | inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error) |
226 | { |
227 | QString s = QString::fromRawData(reinterpret_cast<const QChar *>(src), size: srcLength); |
228 | |
229 | s = s.toCaseFolded(); |
230 | |
231 | *error = resultLength < s.size(); |
232 | |
233 | if (!*error && result) { |
234 | const ushort *p = reinterpret_cast<const ushort *>(s.constData()); |
235 | ushort *pp = reinterpret_cast<ushort *>(result); |
236 | memcpy(dest: pp, src: p, n: s.size() * sizeof(ushort)); |
237 | |
238 | if (resultLength > s.size()) |
239 | pp[s.size()] = 0; |
240 | } |
241 | |
242 | return s.size(); |
243 | } |
244 | |
245 | inline bool isArabicChar(UChar32 c) |
246 | { |
247 | return c >= 0x0600 && c <= 0x06FF; |
248 | } |
249 | |
250 | inline bool isPrintableChar(UChar32 c) |
251 | { |
252 | return QChar::isPrint(ucs4: c); |
253 | } |
254 | |
255 | inline bool isSeparatorSpace(UChar32 c) |
256 | { |
257 | return QChar::category(ucs4: c) == QChar::Separator_Space; |
258 | } |
259 | |
260 | inline bool isPunct(UChar32 c) |
261 | { |
262 | return QChar::isPunct(ucs4: c); |
263 | } |
264 | |
265 | inline bool isLower(UChar32 c) |
266 | { |
267 | return QChar::isLower(ucs4: c); |
268 | } |
269 | |
270 | inline bool hasLineBreakingPropertyComplexContext(UChar32) |
271 | { |
272 | // FIXME: Implement this to return whether the character has line breaking property SA (Complex Context). |
273 | return false; |
274 | } |
275 | |
276 | inline UChar32 mirroredChar(UChar32 c) |
277 | { |
278 | return QChar::mirroredChar(ucs4: c); |
279 | } |
280 | |
281 | inline uint8_t combiningClass(UChar32 c) |
282 | { |
283 | return QChar::combiningClass(ucs4: c); |
284 | } |
285 | |
286 | inline DecompositionType decompositionType(UChar32 c) |
287 | { |
288 | return (DecompositionType)QChar::decompositionTag(ucs4: c); |
289 | } |
290 | |
291 | inline int umemcasecmp(const UChar* a, const UChar* b, int len) |
292 | { |
293 | // handle surrogates correctly |
294 | for (int i = 0; i < len; ++i) { |
295 | uint c1 = QChar::toCaseFolded(ucs4: ushort(a[i])); |
296 | uint c2 = QChar::toCaseFolded(ucs4: ushort(b[i])); |
297 | if (c1 != c2) |
298 | return c1 - c2; |
299 | } |
300 | return 0; |
301 | } |
302 | |
303 | inline Direction direction(UChar32 c) |
304 | { |
305 | return (Direction)QChar::direction(ucs4: c); |
306 | } |
307 | |
308 | inline CharCategory category(UChar32 c) |
309 | { |
310 | return (CharCategory) U_MASK(QChar::category(c)); |
311 | } |
312 | |
313 | } } |
314 | |
315 | #endif // WTF_UNICODE_QT4_H |
316 | |