| 1 | // © 2016 and later: Unicode, Inc. and others. | 
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
| 3 | /* | 
| 4 | ******************************************************************************* | 
| 5 | * | 
| 6 | *   Copyright (C) 1999-2012, International Business Machines | 
| 7 | *   Corporation and others.  All Rights Reserved. | 
| 8 | * | 
| 9 | ******************************************************************************* | 
| 10 | *   file name:  utf16.h | 
| 11 | *   encoding:   UTF-8 | 
| 12 | *   tab size:   8 (not used) | 
| 13 | *   indentation:4 | 
| 14 | * | 
| 15 | *   created on: 1999sep09 | 
| 16 | *   created by: Markus W. Scherer | 
| 17 | */ | 
| 18 |  | 
| 19 | /** | 
| 20 |  * \file | 
| 21 |  * \brief C API: 16-bit Unicode handling macros | 
| 22 |  *  | 
| 23 |  * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings. | 
| 24 |  * | 
| 25 |  * For more information see utf.h and the ICU User Guide Strings chapter | 
| 26 |  * (https://unicode-org.github.io/icu/userguide/strings). | 
| 27 |  * | 
| 28 |  * <em>Usage:</em> | 
| 29 |  * ICU coding guidelines for if() statements should be followed when using these macros. | 
| 30 |  * Compound statements (curly braces {}) must be used  for if-else-while...  | 
| 31 |  * bodies and all macro statements should be terminated with semicolon. | 
| 32 |  */ | 
| 33 |  | 
| 34 | #ifndef __UTF16_H__ | 
| 35 | #define __UTF16_H__ | 
| 36 |  | 
| 37 | #include <stdbool.h> | 
| 38 | #include "unicode/umachine.h" | 
| 39 | #ifndef __UTF_H__ | 
| 40 | #   include "unicode/utf.h" | 
| 41 | #endif | 
| 42 |  | 
| 43 | /* single-code point definitions -------------------------------------------- */ | 
| 44 |  | 
| 45 | /** | 
| 46 |  * Does this code unit alone encode a code point (BMP, not a surrogate)? | 
| 47 |  * @param c 16-bit code unit | 
| 48 |  * @return true or false | 
| 49 |  * @stable ICU 2.4 | 
| 50 |  */ | 
| 51 | #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) | 
| 52 |  | 
| 53 | /** | 
| 54 |  * Is this code unit a lead surrogate (U+d800..U+dbff)? | 
| 55 |  * @param c 16-bit code unit | 
| 56 |  * @return true or false | 
| 57 |  * @stable ICU 2.4 | 
| 58 |  */ | 
| 59 | #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) | 
| 60 |  | 
| 61 | /** | 
| 62 |  * Is this code unit a trail surrogate (U+dc00..U+dfff)? | 
| 63 |  * @param c 16-bit code unit | 
| 64 |  * @return true or false | 
| 65 |  * @stable ICU 2.4 | 
| 66 |  */ | 
| 67 | #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) | 
| 68 |  | 
| 69 | /** | 
| 70 |  * Is this code unit a surrogate (U+d800..U+dfff)? | 
| 71 |  * @param c 16-bit code unit | 
| 72 |  * @return true or false | 
| 73 |  * @stable ICU 2.4 | 
| 74 |  */ | 
| 75 | #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) | 
| 76 |  | 
| 77 | /** | 
| 78 |  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), | 
| 79 |  * is it a lead surrogate? | 
| 80 |  * @param c 16-bit code unit | 
| 81 |  * @return true or false | 
| 82 |  * @stable ICU 2.4 | 
| 83 |  */ | 
| 84 | #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) | 
| 85 |  | 
| 86 | /** | 
| 87 |  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), | 
| 88 |  * is it a trail surrogate? | 
| 89 |  * @param c 16-bit code unit | 
| 90 |  * @return true or false | 
| 91 |  * @stable ICU 4.2 | 
| 92 |  */ | 
| 93 | #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) | 
| 94 |  | 
| 95 | /** | 
| 96 |  * Helper constant for U16_GET_SUPPLEMENTARY. | 
| 97 |  * @internal | 
| 98 |  */ | 
| 99 | #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) | 
| 100 |  | 
| 101 | /** | 
| 102 |  * Get a supplementary code point value (U+10000..U+10ffff) | 
| 103 |  * from its lead and trail surrogates. | 
| 104 |  * The result is undefined if the input values are not | 
| 105 |  * lead and trail surrogates. | 
| 106 |  * | 
| 107 |  * @param lead lead surrogate (U+d800..U+dbff) | 
| 108 |  * @param trail trail surrogate (U+dc00..U+dfff) | 
| 109 |  * @return supplementary code point (U+10000..U+10ffff) | 
| 110 |  * @stable ICU 2.4 | 
| 111 |  */ | 
| 112 | #define U16_GET_SUPPLEMENTARY(lead, trail) \ | 
| 113 |     (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) | 
| 114 |  | 
| 115 |  | 
| 116 | /** | 
| 117 |  * Get the lead surrogate (0xd800..0xdbff) for a | 
| 118 |  * supplementary code point (0x10000..0x10ffff). | 
| 119 |  * @param supplementary 32-bit code point (U+10000..U+10ffff) | 
| 120 |  * @return lead surrogate (U+d800..U+dbff) for supplementary | 
| 121 |  * @stable ICU 2.4 | 
| 122 |  */ | 
| 123 | #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) | 
| 124 |  | 
| 125 | /** | 
| 126 |  * Get the trail surrogate (0xdc00..0xdfff) for a | 
| 127 |  * supplementary code point (0x10000..0x10ffff). | 
| 128 |  * @param supplementary 32-bit code point (U+10000..U+10ffff) | 
| 129 |  * @return trail surrogate (U+dc00..U+dfff) for supplementary | 
| 130 |  * @stable ICU 2.4 | 
| 131 |  */ | 
| 132 | #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) | 
| 133 |  | 
| 134 | /** | 
| 135 |  * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) | 
| 136 |  * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). | 
| 137 |  * @param c 32-bit code point | 
| 138 |  * @return 1 or 2 | 
| 139 |  * @stable ICU 2.4 | 
| 140 |  */ | 
| 141 | #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) | 
| 142 |  | 
| 143 | /** | 
| 144 |  * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). | 
| 145 |  * @return 2 | 
| 146 |  * @stable ICU 2.4 | 
| 147 |  */ | 
| 148 | #define U16_MAX_LENGTH 2 | 
| 149 |  | 
| 150 | /** | 
| 151 |  * Get a code point from a string at a random-access offset, | 
| 152 |  * without changing the offset. | 
| 153 |  * "Unsafe" macro, assumes well-formed UTF-16. | 
| 154 |  * | 
| 155 |  * The offset may point to either the lead or trail surrogate unit | 
| 156 |  * for a supplementary code point, in which case the macro will read | 
| 157 |  * the adjacent matching surrogate as well. | 
| 158 |  * The result is undefined if the offset points to a single, unpaired surrogate. | 
| 159 |  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. | 
| 160 |  * | 
| 161 |  * @param s const UChar * string | 
| 162 |  * @param i string offset | 
| 163 |  * @param c output UChar32 variable | 
| 164 |  * @see U16_GET | 
| 165 |  * @stable ICU 2.4 | 
| 166 |  */ | 
| 167 | #define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 168 |     (c)=(s)[i]; \ | 
| 169 |     if(U16_IS_SURROGATE(c)) { \ | 
| 170 |         if(U16_IS_SURROGATE_LEAD(c)) { \ | 
| 171 |             (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ | 
| 172 |         } else { \ | 
| 173 |             (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ | 
| 174 |         } \ | 
| 175 |     } \ | 
| 176 | } UPRV_BLOCK_MACRO_END | 
| 177 |  | 
| 178 | /** | 
| 179 |  * Get a code point from a string at a random-access offset, | 
| 180 |  * without changing the offset. | 
| 181 |  * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | 
| 182 |  * | 
| 183 |  * The offset may point to either the lead or trail surrogate unit | 
| 184 |  * for a supplementary code point, in which case the macro will read | 
| 185 |  * the adjacent matching surrogate as well. | 
| 186 |  * | 
| 187 |  * The length can be negative for a NUL-terminated string. | 
| 188 |  * | 
| 189 |  * If the offset points to a single, unpaired surrogate, then | 
| 190 |  * c is set to that unpaired surrogate. | 
| 191 |  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. | 
| 192 |  * | 
| 193 |  * @param s const UChar * string | 
| 194 |  * @param start starting string offset (usually 0) | 
| 195 |  * @param i string offset, must be start<=i<length | 
| 196 |  * @param length string length | 
| 197 |  * @param c output UChar32 variable | 
| 198 |  * @see U16_GET_UNSAFE | 
| 199 |  * @stable ICU 2.4 | 
| 200 |  */ | 
| 201 | #define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 202 |     (c)=(s)[i]; \ | 
| 203 |     if(U16_IS_SURROGATE(c)) { \ | 
| 204 |         uint16_t __c2; \ | 
| 205 |         if(U16_IS_SURROGATE_LEAD(c)) { \ | 
| 206 |             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ | 
| 207 |                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | 
| 208 |             } \ | 
| 209 |         } else { \ | 
| 210 |             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ | 
| 211 |                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ | 
| 212 |             } \ | 
| 213 |         } \ | 
| 214 |     } \ | 
| 215 | } UPRV_BLOCK_MACRO_END | 
| 216 |  | 
| 217 | /** | 
| 218 |  * Get a code point from a string at a random-access offset, | 
| 219 |  * without changing the offset. | 
| 220 |  * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | 
| 221 |  * | 
| 222 |  * The offset may point to either the lead or trail surrogate unit | 
| 223 |  * for a supplementary code point, in which case the macro will read | 
| 224 |  * the adjacent matching surrogate as well. | 
| 225 |  * | 
| 226 |  * The length can be negative for a NUL-terminated string. | 
| 227 |  * | 
| 228 |  * If the offset points to a single, unpaired surrogate, then | 
| 229 |  * c is set to U+FFFD. | 
| 230 |  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD. | 
| 231 |  * | 
| 232 |  * @param s const UChar * string | 
| 233 |  * @param start starting string offset (usually 0) | 
| 234 |  * @param i string offset, must be start<=i<length | 
| 235 |  * @param length string length | 
| 236 |  * @param c output UChar32 variable | 
| 237 |  * @see U16_GET_UNSAFE | 
| 238 |  * @stable ICU 60 | 
| 239 |  */ | 
| 240 | #define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 241 |     (c)=(s)[i]; \ | 
| 242 |     if(U16_IS_SURROGATE(c)) { \ | 
| 243 |         uint16_t __c2; \ | 
| 244 |         if(U16_IS_SURROGATE_LEAD(c)) { \ | 
| 245 |             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ | 
| 246 |                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | 
| 247 |             } else { \ | 
| 248 |                 (c)=0xfffd; \ | 
| 249 |             } \ | 
| 250 |         } else { \ | 
| 251 |             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ | 
| 252 |                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ | 
| 253 |             } else { \ | 
| 254 |                 (c)=0xfffd; \ | 
| 255 |             } \ | 
| 256 |         } \ | 
| 257 |     } \ | 
| 258 | } UPRV_BLOCK_MACRO_END | 
| 259 |  | 
| 260 | /* definitions with forward iteration --------------------------------------- */ | 
| 261 |  | 
| 262 | /** | 
| 263 |  * Get a code point from a string at a code point boundary offset, | 
| 264 |  * and advance the offset to the next code point boundary. | 
| 265 |  * (Post-incrementing forward iteration.) | 
| 266 |  * "Unsafe" macro, assumes well-formed UTF-16. | 
| 267 |  * | 
| 268 |  * The offset may point to the lead surrogate unit | 
| 269 |  * for a supplementary code point, in which case the macro will read | 
| 270 |  * the following trail surrogate as well. | 
| 271 |  * If the offset points to a trail surrogate, then that itself | 
| 272 |  * will be returned as the code point. | 
| 273 |  * The result is undefined if the offset points to a single, unpaired lead surrogate. | 
| 274 |  * | 
| 275 |  * @param s const UChar * string | 
| 276 |  * @param i string offset | 
| 277 |  * @param c output UChar32 variable | 
| 278 |  * @see U16_NEXT | 
| 279 |  * @stable ICU 2.4 | 
| 280 |  */ | 
| 281 | #define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 282 |     (c)=(s)[(i)++]; \ | 
| 283 |     if(U16_IS_LEAD(c)) { \ | 
| 284 |         (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ | 
| 285 |     } \ | 
| 286 | } UPRV_BLOCK_MACRO_END | 
| 287 |  | 
| 288 | /** | 
| 289 |  * Get a code point from a string at a code point boundary offset, | 
| 290 |  * and advance the offset to the next code point boundary. | 
| 291 |  * (Post-incrementing forward iteration.) | 
| 292 |  * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | 
| 293 |  * | 
| 294 |  * The length can be negative for a NUL-terminated string. | 
| 295 |  * | 
| 296 |  * The offset may point to the lead surrogate unit | 
| 297 |  * for a supplementary code point, in which case the macro will read | 
| 298 |  * the following trail surrogate as well. | 
| 299 |  * If the offset points to a trail surrogate or | 
| 300 |  * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate. | 
| 301 |  * | 
| 302 |  * @param s const UChar * string | 
| 303 |  * @param i string offset, must be i<length | 
| 304 |  * @param length string length | 
| 305 |  * @param c output UChar32 variable | 
| 306 |  * @see U16_NEXT_UNSAFE | 
| 307 |  * @stable ICU 2.4 | 
| 308 |  */ | 
| 309 | #define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 310 |     (c)=(s)[(i)++]; \ | 
| 311 |     if(U16_IS_LEAD(c)) { \ | 
| 312 |         uint16_t __c2; \ | 
| 313 |         if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ | 
| 314 |             ++(i); \ | 
| 315 |             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | 
| 316 |         } \ | 
| 317 |     } \ | 
| 318 | } UPRV_BLOCK_MACRO_END | 
| 319 |  | 
| 320 | /** | 
| 321 |  * Get a code point from a string at a code point boundary offset, | 
| 322 |  * and advance the offset to the next code point boundary. | 
| 323 |  * (Post-incrementing forward iteration.) | 
| 324 |  * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | 
| 325 |  * | 
| 326 |  * The length can be negative for a NUL-terminated string. | 
| 327 |  * | 
| 328 |  * The offset may point to the lead surrogate unit | 
| 329 |  * for a supplementary code point, in which case the macro will read | 
| 330 |  * the following trail surrogate as well. | 
| 331 |  * If the offset points to a trail surrogate or | 
| 332 |  * to a single, unpaired lead surrogate, then c is set to U+FFFD. | 
| 333 |  * | 
| 334 |  * @param s const UChar * string | 
| 335 |  * @param i string offset, must be i<length | 
| 336 |  * @param length string length | 
| 337 |  * @param c output UChar32 variable | 
| 338 |  * @see U16_NEXT_UNSAFE | 
| 339 |  * @stable ICU 60 | 
| 340 |  */ | 
| 341 | #define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 342 |     (c)=(s)[(i)++]; \ | 
| 343 |     if(U16_IS_SURROGATE(c)) { \ | 
| 344 |         uint16_t __c2; \ | 
| 345 |         if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ | 
| 346 |             ++(i); \ | 
| 347 |             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ | 
| 348 |         } else { \ | 
| 349 |             (c)=0xfffd; \ | 
| 350 |         } \ | 
| 351 |     } \ | 
| 352 | } UPRV_BLOCK_MACRO_END | 
| 353 |  | 
| 354 | /** | 
| 355 |  * Append a code point to a string, overwriting 1 or 2 code units. | 
| 356 |  * The offset points to the current end of the string contents | 
| 357 |  * and is advanced (post-increment). | 
| 358 |  * "Unsafe" macro, assumes a valid code point and sufficient space in the string. | 
| 359 |  * Otherwise, the result is undefined. | 
| 360 |  * | 
| 361 |  * @param s const UChar * string buffer | 
| 362 |  * @param i string offset | 
| 363 |  * @param c code point to append | 
| 364 |  * @see U16_APPEND | 
| 365 |  * @stable ICU 2.4 | 
| 366 |  */ | 
| 367 | #define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 368 |     if((uint32_t)(c)<=0xffff) { \ | 
| 369 |         (s)[(i)++]=(uint16_t)(c); \ | 
| 370 |     } else { \ | 
| 371 |         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ | 
| 372 |         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ | 
| 373 |     } \ | 
| 374 | } UPRV_BLOCK_MACRO_END | 
| 375 |  | 
| 376 | /** | 
| 377 |  * Append a code point to a string, overwriting 1 or 2 code units. | 
| 378 |  * The offset points to the current end of the string contents | 
| 379 |  * and is advanced (post-increment). | 
| 380 |  * "Safe" macro, checks for a valid code point. | 
| 381 |  * If a surrogate pair is written, checks for sufficient space in the string. | 
| 382 |  * If the code point is not valid or a trail surrogate does not fit, | 
| 383 |  * then isError is set to true. | 
| 384 |  * | 
| 385 |  * @param s const UChar * string buffer | 
| 386 |  * @param i string offset, must be i<capacity | 
| 387 |  * @param capacity size of the string buffer | 
| 388 |  * @param c code point to append | 
| 389 |  * @param isError output UBool set to true if an error occurs, otherwise not modified | 
| 390 |  * @see U16_APPEND_UNSAFE | 
| 391 |  * @stable ICU 2.4 | 
| 392 |  */ | 
| 393 | #define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 394 |     if((uint32_t)(c)<=0xffff) { \ | 
| 395 |         (s)[(i)++]=(uint16_t)(c); \ | 
| 396 |     } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ | 
| 397 |         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ | 
| 398 |         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ | 
| 399 |     } else /* c>0x10ffff or not enough space */ { \ | 
| 400 |         (isError)=true; \ | 
| 401 |     } \ | 
| 402 | } UPRV_BLOCK_MACRO_END | 
| 403 |  | 
| 404 | /** | 
| 405 |  * Advance the string offset from one code point boundary to the next. | 
| 406 |  * (Post-incrementing iteration.) | 
| 407 |  * "Unsafe" macro, assumes well-formed UTF-16. | 
| 408 |  * | 
| 409 |  * @param s const UChar * string | 
| 410 |  * @param i string offset | 
| 411 |  * @see U16_FWD_1 | 
| 412 |  * @stable ICU 2.4 | 
| 413 |  */ | 
| 414 | #define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 415 |     if(U16_IS_LEAD((s)[(i)++])) { \ | 
| 416 |         ++(i); \ | 
| 417 |     } \ | 
| 418 | } UPRV_BLOCK_MACRO_END | 
| 419 |  | 
| 420 | /** | 
| 421 |  * Advance the string offset from one code point boundary to the next. | 
| 422 |  * (Post-incrementing iteration.) | 
| 423 |  * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | 
| 424 |  * | 
| 425 |  * The length can be negative for a NUL-terminated string. | 
| 426 |  * | 
| 427 |  * @param s const UChar * string | 
| 428 |  * @param i string offset, must be i<length | 
| 429 |  * @param length string length | 
| 430 |  * @see U16_FWD_1_UNSAFE | 
| 431 |  * @stable ICU 2.4 | 
| 432 |  */ | 
| 433 | #define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 434 |     if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \ | 
| 435 |         ++(i); \ | 
| 436 |     } \ | 
| 437 | } UPRV_BLOCK_MACRO_END | 
| 438 |  | 
| 439 | /** | 
| 440 |  * Advance the string offset from one code point boundary to the n-th next one, | 
| 441 |  * i.e., move forward by n code points. | 
| 442 |  * (Post-incrementing iteration.) | 
| 443 |  * "Unsafe" macro, assumes well-formed UTF-16. | 
| 444 |  * | 
| 445 |  * @param s const UChar * string | 
| 446 |  * @param i string offset | 
| 447 |  * @param n number of code points to skip | 
| 448 |  * @see U16_FWD_N | 
| 449 |  * @stable ICU 2.4 | 
| 450 |  */ | 
| 451 | #define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 452 |     int32_t __N=(n); \ | 
| 453 |     while(__N>0) { \ | 
| 454 |         U16_FWD_1_UNSAFE(s, i); \ | 
| 455 |         --__N; \ | 
| 456 |     } \ | 
| 457 | } UPRV_BLOCK_MACRO_END | 
| 458 |  | 
| 459 | /** | 
| 460 |  * Advance the string offset from one code point boundary to the n-th next one, | 
| 461 |  * i.e., move forward by n code points. | 
| 462 |  * (Post-incrementing iteration.) | 
| 463 |  * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | 
| 464 |  * | 
| 465 |  * The length can be negative for a NUL-terminated string. | 
| 466 |  * | 
| 467 |  * @param s const UChar * string | 
| 468 |  * @param i int32_t string offset, must be i<length | 
| 469 |  * @param length int32_t string length | 
| 470 |  * @param n number of code points to skip | 
| 471 |  * @see U16_FWD_N_UNSAFE | 
| 472 |  * @stable ICU 2.4 | 
| 473 |  */ | 
| 474 | #define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 475 |     int32_t __N=(n); \ | 
| 476 |     while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ | 
| 477 |         U16_FWD_1(s, i, length); \ | 
| 478 |         --__N; \ | 
| 479 |     } \ | 
| 480 | } UPRV_BLOCK_MACRO_END | 
| 481 |  | 
| 482 | /** | 
| 483 |  * Adjust a random-access offset to a code point boundary | 
| 484 |  * at the start of a code point. | 
| 485 |  * If the offset points to the trail surrogate of a surrogate pair, | 
| 486 |  * then the offset is decremented. | 
| 487 |  * Otherwise, it is not modified. | 
| 488 |  * "Unsafe" macro, assumes well-formed UTF-16. | 
| 489 |  * | 
| 490 |  * @param s const UChar * string | 
| 491 |  * @param i string offset | 
| 492 |  * @see U16_SET_CP_START | 
| 493 |  * @stable ICU 2.4 | 
| 494 |  */ | 
| 495 | #define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 496 |     if(U16_IS_TRAIL((s)[i])) { \ | 
| 497 |         --(i); \ | 
| 498 |     } \ | 
| 499 | } UPRV_BLOCK_MACRO_END | 
| 500 |  | 
| 501 | /** | 
| 502 |  * Adjust a random-access offset to a code point boundary | 
| 503 |  * at the start of a code point. | 
| 504 |  * If the offset points to the trail surrogate of a surrogate pair, | 
| 505 |  * then the offset is decremented. | 
| 506 |  * Otherwise, it is not modified. | 
| 507 |  * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | 
| 508 |  * | 
| 509 |  * @param s const UChar * string | 
| 510 |  * @param start starting string offset (usually 0) | 
| 511 |  * @param i string offset, must be start<=i | 
| 512 |  * @see U16_SET_CP_START_UNSAFE | 
| 513 |  * @stable ICU 2.4 | 
| 514 |  */ | 
| 515 | #define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 516 |     if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ | 
| 517 |         --(i); \ | 
| 518 |     } \ | 
| 519 | } UPRV_BLOCK_MACRO_END | 
| 520 |  | 
| 521 | /* definitions with backward iteration -------------------------------------- */ | 
| 522 |  | 
| 523 | /** | 
| 524 |  * Move the string offset from one code point boundary to the previous one | 
| 525 |  * and get the code point between them. | 
| 526 |  * (Pre-decrementing backward iteration.) | 
| 527 |  * "Unsafe" macro, assumes well-formed UTF-16. | 
| 528 |  * | 
| 529 |  * The input offset may be the same as the string length. | 
| 530 |  * If the offset is behind a trail surrogate unit | 
| 531 |  * for a supplementary code point, then the macro will read | 
| 532 |  * the preceding lead surrogate as well. | 
| 533 |  * If the offset is behind a lead surrogate, then that itself | 
| 534 |  * will be returned as the code point. | 
| 535 |  * The result is undefined if the offset is behind a single, unpaired trail surrogate. | 
| 536 |  * | 
| 537 |  * @param s const UChar * string | 
| 538 |  * @param i string offset | 
| 539 |  * @param c output UChar32 variable | 
| 540 |  * @see U16_PREV | 
| 541 |  * @stable ICU 2.4 | 
| 542 |  */ | 
| 543 | #define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 544 |     (c)=(s)[--(i)]; \ | 
| 545 |     if(U16_IS_TRAIL(c)) { \ | 
| 546 |         (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ | 
| 547 |     } \ | 
| 548 | } UPRV_BLOCK_MACRO_END | 
| 549 |  | 
| 550 | /** | 
| 551 |  * Move the string offset from one code point boundary to the previous one | 
| 552 |  * and get the code point between them. | 
| 553 |  * (Pre-decrementing backward iteration.) | 
| 554 |  * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | 
| 555 |  * | 
| 556 |  * The input offset may be the same as the string length. | 
| 557 |  * If the offset is behind a trail surrogate unit | 
| 558 |  * for a supplementary code point, then the macro will read | 
| 559 |  * the preceding lead surrogate as well. | 
| 560 |  * If the offset is behind a lead surrogate or behind a single, unpaired | 
| 561 |  * trail surrogate, then c is set to that unpaired surrogate. | 
| 562 |  * | 
| 563 |  * @param s const UChar * string | 
| 564 |  * @param start starting string offset (usually 0) | 
| 565 |  * @param i string offset, must be start<i | 
| 566 |  * @param c output UChar32 variable | 
| 567 |  * @see U16_PREV_UNSAFE | 
| 568 |  * @stable ICU 2.4 | 
| 569 |  */ | 
| 570 | #define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 571 |     (c)=(s)[--(i)]; \ | 
| 572 |     if(U16_IS_TRAIL(c)) { \ | 
| 573 |         uint16_t __c2; \ | 
| 574 |         if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ | 
| 575 |             --(i); \ | 
| 576 |             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ | 
| 577 |         } \ | 
| 578 |     } \ | 
| 579 | } UPRV_BLOCK_MACRO_END | 
| 580 |  | 
| 581 | /** | 
| 582 |  * Move the string offset from one code point boundary to the previous one | 
| 583 |  * and get the code point between them. | 
| 584 |  * (Pre-decrementing backward iteration.) | 
| 585 |  * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | 
| 586 |  * | 
| 587 |  * The input offset may be the same as the string length. | 
| 588 |  * If the offset is behind a trail surrogate unit | 
| 589 |  * for a supplementary code point, then the macro will read | 
| 590 |  * the preceding lead surrogate as well. | 
| 591 |  * If the offset is behind a lead surrogate or behind a single, unpaired | 
| 592 |  * trail surrogate, then c is set to U+FFFD. | 
| 593 |  * | 
| 594 |  * @param s const UChar * string | 
| 595 |  * @param start starting string offset (usually 0) | 
| 596 |  * @param i string offset, must be start<i | 
| 597 |  * @param c output UChar32 variable | 
| 598 |  * @see U16_PREV_UNSAFE | 
| 599 |  * @stable ICU 60 | 
| 600 |  */ | 
| 601 | #define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 602 |     (c)=(s)[--(i)]; \ | 
| 603 |     if(U16_IS_SURROGATE(c)) { \ | 
| 604 |         uint16_t __c2; \ | 
| 605 |         if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ | 
| 606 |             --(i); \ | 
| 607 |             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ | 
| 608 |         } else { \ | 
| 609 |             (c)=0xfffd; \ | 
| 610 |         } \ | 
| 611 |     } \ | 
| 612 | } UPRV_BLOCK_MACRO_END | 
| 613 |  | 
| 614 | /** | 
| 615 |  * Move the string offset from one code point boundary to the previous one. | 
| 616 |  * (Pre-decrementing backward iteration.) | 
| 617 |  * The input offset may be the same as the string length. | 
| 618 |  * "Unsafe" macro, assumes well-formed UTF-16. | 
| 619 |  * | 
| 620 |  * @param s const UChar * string | 
| 621 |  * @param i string offset | 
| 622 |  * @see U16_BACK_1 | 
| 623 |  * @stable ICU 2.4 | 
| 624 |  */ | 
| 625 | #define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 626 |     if(U16_IS_TRAIL((s)[--(i)])) { \ | 
| 627 |         --(i); \ | 
| 628 |     } \ | 
| 629 | } UPRV_BLOCK_MACRO_END | 
| 630 |  | 
| 631 | /** | 
| 632 |  * Move the string offset from one code point boundary to the previous one. | 
| 633 |  * (Pre-decrementing backward iteration.) | 
| 634 |  * The input offset may be the same as the string length. | 
| 635 |  * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | 
| 636 |  * | 
| 637 |  * @param s const UChar * string | 
| 638 |  * @param start starting string offset (usually 0) | 
| 639 |  * @param i string offset, must be start<i | 
| 640 |  * @see U16_BACK_1_UNSAFE | 
| 641 |  * @stable ICU 2.4 | 
| 642 |  */ | 
| 643 | #define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 644 |     if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ | 
| 645 |         --(i); \ | 
| 646 |     } \ | 
| 647 | } UPRV_BLOCK_MACRO_END | 
| 648 |  | 
| 649 | /** | 
| 650 |  * Move the string offset from one code point boundary to the n-th one before it, | 
| 651 |  * i.e., move backward by n code points. | 
| 652 |  * (Pre-decrementing backward iteration.) | 
| 653 |  * The input offset may be the same as the string length. | 
| 654 |  * "Unsafe" macro, assumes well-formed UTF-16. | 
| 655 |  * | 
| 656 |  * @param s const UChar * string | 
| 657 |  * @param i string offset | 
| 658 |  * @param n number of code points to skip | 
| 659 |  * @see U16_BACK_N | 
| 660 |  * @stable ICU 2.4 | 
| 661 |  */ | 
| 662 | #define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 663 |     int32_t __N=(n); \ | 
| 664 |     while(__N>0) { \ | 
| 665 |         U16_BACK_1_UNSAFE(s, i); \ | 
| 666 |         --__N; \ | 
| 667 |     } \ | 
| 668 | } UPRV_BLOCK_MACRO_END | 
| 669 |  | 
| 670 | /** | 
| 671 |  * Move the string offset from one code point boundary to the n-th one before it, | 
| 672 |  * i.e., move backward by n code points. | 
| 673 |  * (Pre-decrementing backward iteration.) | 
| 674 |  * The input offset may be the same as the string length. | 
| 675 |  * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | 
| 676 |  * | 
| 677 |  * @param s const UChar * string | 
| 678 |  * @param start start of string | 
| 679 |  * @param i string offset, must be start<i | 
| 680 |  * @param n number of code points to skip | 
| 681 |  * @see U16_BACK_N_UNSAFE | 
| 682 |  * @stable ICU 2.4 | 
| 683 |  */ | 
| 684 | #define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 685 |     int32_t __N=(n); \ | 
| 686 |     while(__N>0 && (i)>(start)) { \ | 
| 687 |         U16_BACK_1(s, start, i); \ | 
| 688 |         --__N; \ | 
| 689 |     } \ | 
| 690 | } UPRV_BLOCK_MACRO_END | 
| 691 |  | 
| 692 | /** | 
| 693 |  * Adjust a random-access offset to a code point boundary after a code point. | 
| 694 |  * If the offset is behind the lead surrogate of a surrogate pair, | 
| 695 |  * then the offset is incremented. | 
| 696 |  * Otherwise, it is not modified. | 
| 697 |  * The input offset may be the same as the string length. | 
| 698 |  * "Unsafe" macro, assumes well-formed UTF-16. | 
| 699 |  * | 
| 700 |  * @param s const UChar * string | 
| 701 |  * @param i string offset | 
| 702 |  * @see U16_SET_CP_LIMIT | 
| 703 |  * @stable ICU 2.4 | 
| 704 |  */ | 
| 705 | #define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 706 |     if(U16_IS_LEAD((s)[(i)-1])) { \ | 
| 707 |         ++(i); \ | 
| 708 |     } \ | 
| 709 | } UPRV_BLOCK_MACRO_END | 
| 710 |  | 
| 711 | /** | 
| 712 |  * Adjust a random-access offset to a code point boundary after a code point. | 
| 713 |  * If the offset is behind the lead surrogate of a surrogate pair, | 
| 714 |  * then the offset is incremented. | 
| 715 |  * Otherwise, it is not modified. | 
| 716 |  * The input offset may be the same as the string length. | 
| 717 |  * "Safe" macro, handles unpaired surrogates and checks for string boundaries. | 
| 718 |  * | 
| 719 |  * The length can be negative for a NUL-terminated string. | 
| 720 |  * | 
| 721 |  * @param s const UChar * string | 
| 722 |  * @param start int32_t starting string offset (usually 0) | 
| 723 |  * @param i int32_t string offset, start<=i<=length | 
| 724 |  * @param length int32_t string length | 
| 725 |  * @see U16_SET_CP_LIMIT_UNSAFE | 
| 726 |  * @stable ICU 2.4 | 
| 727 |  */ | 
| 728 | #define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ | 
| 729 |     if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ | 
| 730 |         ++(i); \ | 
| 731 |     } \ | 
| 732 | } UPRV_BLOCK_MACRO_END | 
| 733 |  | 
| 734 | #endif | 
| 735 |  |