1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * |
6 | * Copyright (C) 2002-2011 International Business Machines |
7 | * Corporation and others. All Rights Reserved. |
8 | * |
9 | ******************************************************************************* |
10 | * file name: uiter.h |
11 | * encoding: UTF-8 |
12 | * tab size: 8 (not used) |
13 | * indentation:4 |
14 | * |
15 | * created on: 2002jan18 |
16 | * created by: Markus W. Scherer |
17 | */ |
18 | |
19 | #ifndef __UITER_H__ |
20 | #define __UITER_H__ |
21 | |
22 | /** |
23 | * \file |
24 | * \brief C API: Unicode Character Iteration |
25 | * |
26 | * @see UCharIterator |
27 | */ |
28 | |
29 | #include "unicode/utypes.h" |
30 | |
31 | #if U_SHOW_CPLUSPLUS_API |
32 | U_NAMESPACE_BEGIN |
33 | |
34 | class CharacterIterator; |
35 | class Replaceable; |
36 | |
37 | U_NAMESPACE_END |
38 | #endif |
39 | |
40 | U_CDECL_BEGIN |
41 | |
42 | struct UCharIterator; |
43 | typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ |
44 | |
45 | /** |
46 | * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). |
47 | * @see UCharIteratorMove |
48 | * @see UCharIterator |
49 | * @stable ICU 2.1 |
50 | */ |
51 | typedef enum UCharIteratorOrigin { |
52 | UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH |
53 | } UCharIteratorOrigin; |
54 | |
55 | /** Constants for UCharIterator. @stable ICU 2.6 */ |
56 | enum { |
57 | /** |
58 | * Constant value that may be returned by UCharIteratorMove |
59 | * indicating that the final UTF-16 index is not known, but that the move succeeded. |
60 | * This can occur when moving relative to limit or length, or |
61 | * when moving relative to the current index after a setState() |
62 | * when the current UTF-16 index is not known. |
63 | * |
64 | * It would be very inefficient to have to count from the beginning of the text |
65 | * just to get the current/limit/length index after moving relative to it. |
66 | * The actual index can be determined with getIndex(UITER_CURRENT) |
67 | * which will count the UChars if necessary. |
68 | * |
69 | * @stable ICU 2.6 |
70 | */ |
71 | UITER_UNKNOWN_INDEX=-2 |
72 | }; |
73 | |
74 | |
75 | /** |
76 | * Constant for UCharIterator getState() indicating an error or |
77 | * an unknown state. |
78 | * Returned by uiter_getState()/UCharIteratorGetState |
79 | * when an error occurs. |
80 | * Also, some UCharIterator implementations may not be able to return |
81 | * a valid state for each position. This will be clearly documented |
82 | * for each such iterator (none of the public ones here). |
83 | * |
84 | * @stable ICU 2.6 |
85 | */ |
86 | #define UITER_NO_STATE ((uint32_t)0xffffffff) |
87 | |
88 | /** |
89 | * Function type declaration for UCharIterator.getIndex(). |
90 | * |
91 | * Gets the current position, or the start or limit of the |
92 | * iteration range. |
93 | * |
94 | * This function may perform slowly for UITER_CURRENT after setState() was called, |
95 | * or for UITER_LENGTH, because an iterator implementation may have to count |
96 | * UChars if the underlying storage is not UTF-16. |
97 | * |
98 | * @param iter the UCharIterator structure ("this pointer") |
99 | * @param origin get the 0, start, limit, length, or current index |
100 | * @return the requested index, or U_SENTINEL in an error condition |
101 | * |
102 | * @see UCharIteratorOrigin |
103 | * @see UCharIterator |
104 | * @stable ICU 2.1 |
105 | */ |
106 | typedef int32_t U_CALLCONV |
107 | UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); |
108 | |
109 | /** |
110 | * Function type declaration for UCharIterator.move(). |
111 | * |
112 | * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). |
113 | * |
114 | * Moves the current position relative to the start or limit of the |
115 | * iteration range, or relative to the current position itself. |
116 | * The movement is expressed in numbers of code units forward |
117 | * or backward by specifying a positive or negative delta. |
118 | * Out of bounds movement will be pinned to the start or limit. |
119 | * |
120 | * This function may perform slowly for moving relative to UITER_LENGTH |
121 | * because an iterator implementation may have to count the rest of the |
122 | * UChars if the native storage is not UTF-16. |
123 | * |
124 | * When moving relative to the limit or length, or |
125 | * relative to the current position after setState() was called, |
126 | * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient |
127 | * determination of the actual UTF-16 index. |
128 | * The actual index can be determined with getIndex(UITER_CURRENT) |
129 | * which will count the UChars if necessary. |
130 | * See UITER_UNKNOWN_INDEX for details. |
131 | * |
132 | * @param iter the UCharIterator structure ("this pointer") |
133 | * @param delta can be positive, zero, or negative |
134 | * @param origin move relative to the 0, start, limit, length, or current index |
135 | * @return the new index, or U_SENTINEL on an error condition, |
136 | * or UITER_UNKNOWN_INDEX when the index is not known. |
137 | * |
138 | * @see UCharIteratorOrigin |
139 | * @see UCharIterator |
140 | * @see UITER_UNKNOWN_INDEX |
141 | * @stable ICU 2.1 |
142 | */ |
143 | typedef int32_t U_CALLCONV |
144 | UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); |
145 | |
146 | /** |
147 | * Function type declaration for UCharIterator.hasNext(). |
148 | * |
149 | * Check if current() and next() can still |
150 | * return another code unit. |
151 | * |
152 | * @param iter the UCharIterator structure ("this pointer") |
153 | * @return boolean value for whether current() and next() can still return another code unit |
154 | * |
155 | * @see UCharIterator |
156 | * @stable ICU 2.1 |
157 | */ |
158 | typedef UBool U_CALLCONV |
159 | UCharIteratorHasNext(UCharIterator *iter); |
160 | |
161 | /** |
162 | * Function type declaration for UCharIterator.hasPrevious(). |
163 | * |
164 | * Check if previous() can still return another code unit. |
165 | * |
166 | * @param iter the UCharIterator structure ("this pointer") |
167 | * @return boolean value for whether previous() can still return another code unit |
168 | * |
169 | * @see UCharIterator |
170 | * @stable ICU 2.1 |
171 | */ |
172 | typedef UBool U_CALLCONV |
173 | UCharIteratorHasPrevious(UCharIterator *iter); |
174 | |
175 | /** |
176 | * Function type declaration for UCharIterator.current(). |
177 | * |
178 | * Return the code unit at the current position, |
179 | * or U_SENTINEL if there is none (index is at the limit). |
180 | * |
181 | * @param iter the UCharIterator structure ("this pointer") |
182 | * @return the current code unit |
183 | * |
184 | * @see UCharIterator |
185 | * @stable ICU 2.1 |
186 | */ |
187 | typedef UChar32 U_CALLCONV |
188 | UCharIteratorCurrent(UCharIterator *iter); |
189 | |
190 | /** |
191 | * Function type declaration for UCharIterator.next(). |
192 | * |
193 | * Return the code unit at the current index and increment |
194 | * the index (post-increment, like s[i++]), |
195 | * or return U_SENTINEL if there is none (index is at the limit). |
196 | * |
197 | * @param iter the UCharIterator structure ("this pointer") |
198 | * @return the current code unit (and post-increment the current index) |
199 | * |
200 | * @see UCharIterator |
201 | * @stable ICU 2.1 |
202 | */ |
203 | typedef UChar32 U_CALLCONV |
204 | UCharIteratorNext(UCharIterator *iter); |
205 | |
206 | /** |
207 | * Function type declaration for UCharIterator.previous(). |
208 | * |
209 | * Decrement the index and return the code unit from there |
210 | * (pre-decrement, like s[--i]), |
211 | * or return U_SENTINEL if there is none (index is at the start). |
212 | * |
213 | * @param iter the UCharIterator structure ("this pointer") |
214 | * @return the previous code unit (after pre-decrementing the current index) |
215 | * |
216 | * @see UCharIterator |
217 | * @stable ICU 2.1 |
218 | */ |
219 | typedef UChar32 U_CALLCONV |
220 | UCharIteratorPrevious(UCharIterator *iter); |
221 | |
222 | /** |
223 | * Function type declaration for UCharIterator.reservedFn(). |
224 | * Reserved for future use. |
225 | * |
226 | * @param iter the UCharIterator structure ("this pointer") |
227 | * @param something some integer argument |
228 | * @return some integer |
229 | * |
230 | * @see UCharIterator |
231 | * @stable ICU 2.1 |
232 | */ |
233 | typedef int32_t U_CALLCONV |
234 | UCharIteratorReserved(UCharIterator *iter, int32_t something); |
235 | |
236 | /** |
237 | * Function type declaration for UCharIterator.getState(). |
238 | * |
239 | * Get the "state" of the iterator in the form of a single 32-bit word. |
240 | * It is recommended that the state value be calculated to be as small as |
241 | * is feasible. For strings with limited lengths, fewer than 32 bits may |
242 | * be sufficient. |
243 | * |
244 | * This is used together with setState()/UCharIteratorSetState |
245 | * to save and restore the iterator position more efficiently than with |
246 | * getIndex()/move(). |
247 | * |
248 | * The iterator state is defined as a uint32_t value because it is designed |
249 | * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state |
250 | * of the character iterator. |
251 | * |
252 | * With some UCharIterator implementations (e.g., UTF-8), |
253 | * getting and setting the UTF-16 index with existing functions |
254 | * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but |
255 | * relatively slow because the iterator has to "walk" from a known index |
256 | * to the requested one. |
257 | * This takes more time the farther it needs to go. |
258 | * |
259 | * An opaque state value allows an iterator implementation to provide |
260 | * an internal index (UTF-8: the source byte array index) for |
261 | * fast, constant-time restoration. |
262 | * |
263 | * After calling setState(), a getIndex(UITER_CURRENT) may be slow because |
264 | * the UTF-16 index may not be restored as well, but the iterator can deliver |
265 | * the correct text contents and move relative to the current position |
266 | * without performance degradation. |
267 | * |
268 | * Some UCharIterator implementations may not be able to return |
269 | * a valid state for each position, in which case they return UITER_NO_STATE instead. |
270 | * This will be clearly documented for each such iterator (none of the public ones here). |
271 | * |
272 | * @param iter the UCharIterator structure ("this pointer") |
273 | * @return the state word |
274 | * |
275 | * @see UCharIterator |
276 | * @see UCharIteratorSetState |
277 | * @see UITER_NO_STATE |
278 | * @stable ICU 2.6 |
279 | */ |
280 | typedef uint32_t U_CALLCONV |
281 | UCharIteratorGetState(const UCharIterator *iter); |
282 | |
283 | /** |
284 | * Function type declaration for UCharIterator.setState(). |
285 | * |
286 | * Restore the "state" of the iterator using a state word from a getState() call. |
287 | * The iterator object need not be the same one as for which getState() was called, |
288 | * but it must be of the same type (set up using the same uiter_setXYZ function) |
289 | * and it must iterate over the same string |
290 | * (binary identical regardless of memory address). |
291 | * For more about the state word see UCharIteratorGetState. |
292 | * |
293 | * After calling setState(), a getIndex(UITER_CURRENT) may be slow because |
294 | * the UTF-16 index may not be restored as well, but the iterator can deliver |
295 | * the correct text contents and move relative to the current position |
296 | * without performance degradation. |
297 | * |
298 | * @param iter the UCharIterator structure ("this pointer") |
299 | * @param state the state word from a getState() call |
300 | * on a same-type, same-string iterator |
301 | * @param pErrorCode Must be a valid pointer to an error code value, |
302 | * which must not indicate a failure before the function call. |
303 | * |
304 | * @see UCharIterator |
305 | * @see UCharIteratorGetState |
306 | * @stable ICU 2.6 |
307 | */ |
308 | typedef void U_CALLCONV |
309 | UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); |
310 | |
311 | |
312 | /** |
313 | * C API for code unit iteration. |
314 | * This can be used as a C wrapper around |
315 | * CharacterIterator, Replaceable, or implemented using simple strings, etc. |
316 | * |
317 | * There are two roles for using UCharIterator: |
318 | * |
319 | * A "provider" sets the necessary function pointers and controls the "protected" |
320 | * fields of the UCharIterator structure. A "provider" passes a UCharIterator |
321 | * into C APIs that need a UCharIterator as an abstract, flexible string interface. |
322 | * |
323 | * Implementations of such C APIs are "callers" of UCharIterator functions; |
324 | * they only use the "public" function pointers and never access the "protected" |
325 | * fields directly. |
326 | * |
327 | * The current() and next() functions only check the current index against the |
328 | * limit, and previous() only checks the current index against the start, |
329 | * to see if the iterator already reached the end of the iteration range. |
330 | * |
331 | * The assumption - in all iterators - is that the index is moved via the API, |
332 | * which means it won't go out of bounds, or the index is modified by |
333 | * user code that knows enough about the iterator implementation to set valid |
334 | * index values. |
335 | * |
336 | * UCharIterator functions return code unit values 0..0xffff, |
337 | * or U_SENTINEL if the iteration bounds are reached. |
338 | * |
339 | * @stable ICU 2.1 |
340 | */ |
341 | struct UCharIterator { |
342 | /** |
343 | * (protected) Pointer to string or wrapped object or similar. |
344 | * Not used by caller. |
345 | * @stable ICU 2.1 |
346 | */ |
347 | const void *context; |
348 | |
349 | /** |
350 | * (protected) Length of string or similar. |
351 | * Not used by caller. |
352 | * @stable ICU 2.1 |
353 | */ |
354 | int32_t length; |
355 | |
356 | /** |
357 | * (protected) Start index or similar. |
358 | * Not used by caller. |
359 | * @stable ICU 2.1 |
360 | */ |
361 | int32_t start; |
362 | |
363 | /** |
364 | * (protected) Current index or similar. |
365 | * Not used by caller. |
366 | * @stable ICU 2.1 |
367 | */ |
368 | int32_t index; |
369 | |
370 | /** |
371 | * (protected) Limit index or similar. |
372 | * Not used by caller. |
373 | * @stable ICU 2.1 |
374 | */ |
375 | int32_t limit; |
376 | |
377 | /** |
378 | * (protected) Used by UTF-8 iterators and possibly others. |
379 | * @stable ICU 2.1 |
380 | */ |
381 | int32_t reservedField; |
382 | |
383 | /** |
384 | * (public) Returns the current position or the |
385 | * start or limit index of the iteration range. |
386 | * |
387 | * @see UCharIteratorGetIndex |
388 | * @stable ICU 2.1 |
389 | */ |
390 | UCharIteratorGetIndex *getIndex; |
391 | |
392 | /** |
393 | * (public) Moves the current position relative to the start or limit of the |
394 | * iteration range, or relative to the current position itself. |
395 | * The movement is expressed in numbers of code units forward |
396 | * or backward by specifying a positive or negative delta. |
397 | * |
398 | * @see UCharIteratorMove |
399 | * @stable ICU 2.1 |
400 | */ |
401 | UCharIteratorMove *move; |
402 | |
403 | /** |
404 | * (public) Check if current() and next() can still |
405 | * return another code unit. |
406 | * |
407 | * @see UCharIteratorHasNext |
408 | * @stable ICU 2.1 |
409 | */ |
410 | UCharIteratorHasNext *hasNext; |
411 | |
412 | /** |
413 | * (public) Check if previous() can still return another code unit. |
414 | * |
415 | * @see UCharIteratorHasPrevious |
416 | * @stable ICU 2.1 |
417 | */ |
418 | UCharIteratorHasPrevious *hasPrevious; |
419 | |
420 | /** |
421 | * (public) Return the code unit at the current position, |
422 | * or U_SENTINEL if there is none (index is at the limit). |
423 | * |
424 | * @see UCharIteratorCurrent |
425 | * @stable ICU 2.1 |
426 | */ |
427 | UCharIteratorCurrent *current; |
428 | |
429 | /** |
430 | * (public) Return the code unit at the current index and increment |
431 | * the index (post-increment, like s[i++]), |
432 | * or return U_SENTINEL if there is none (index is at the limit). |
433 | * |
434 | * @see UCharIteratorNext |
435 | * @stable ICU 2.1 |
436 | */ |
437 | UCharIteratorNext *next; |
438 | |
439 | /** |
440 | * (public) Decrement the index and return the code unit from there |
441 | * (pre-decrement, like s[--i]), |
442 | * or return U_SENTINEL if there is none (index is at the start). |
443 | * |
444 | * @see UCharIteratorPrevious |
445 | * @stable ICU 2.1 |
446 | */ |
447 | UCharIteratorPrevious *previous; |
448 | |
449 | /** |
450 | * (public) Reserved for future use. Currently NULL. |
451 | * |
452 | * @see UCharIteratorReserved |
453 | * @stable ICU 2.1 |
454 | */ |
455 | UCharIteratorReserved *reservedFn; |
456 | |
457 | /** |
458 | * (public) Return the state of the iterator, to be restored later with setState(). |
459 | * This function pointer is NULL if the iterator does not implement it. |
460 | * |
461 | * @see UCharIteratorGet |
462 | * @stable ICU 2.6 |
463 | */ |
464 | UCharIteratorGetState *getState; |
465 | |
466 | /** |
467 | * (public) Restore the iterator state from the state word from a call |
468 | * to getState(). |
469 | * This function pointer is NULL if the iterator does not implement it. |
470 | * |
471 | * @see UCharIteratorSet |
472 | * @stable ICU 2.6 |
473 | */ |
474 | UCharIteratorSetState *setState; |
475 | }; |
476 | |
477 | /** |
478 | * Helper function for UCharIterator to get the code point |
479 | * at the current index. |
480 | * |
481 | * Return the code point that includes the code unit at the current position, |
482 | * or U_SENTINEL if there is none (index is at the limit). |
483 | * If the current code unit is a lead or trail surrogate, |
484 | * then the following or preceding surrogate is used to form |
485 | * the code point value. |
486 | * |
487 | * @param iter the UCharIterator structure ("this pointer") |
488 | * @return the current code point |
489 | * |
490 | * @see UCharIterator |
491 | * @see U16_GET |
492 | * @see UnicodeString::char32At() |
493 | * @stable ICU 2.1 |
494 | */ |
495 | U_CAPI UChar32 U_EXPORT2 |
496 | uiter_current32(UCharIterator *iter); |
497 | |
498 | /** |
499 | * Helper function for UCharIterator to get the next code point. |
500 | * |
501 | * Return the code point at the current index and increment |
502 | * the index (post-increment, like s[i++]), |
503 | * or return U_SENTINEL if there is none (index is at the limit). |
504 | * |
505 | * @param iter the UCharIterator structure ("this pointer") |
506 | * @return the current code point (and post-increment the current index) |
507 | * |
508 | * @see UCharIterator |
509 | * @see U16_NEXT |
510 | * @stable ICU 2.1 |
511 | */ |
512 | U_CAPI UChar32 U_EXPORT2 |
513 | uiter_next32(UCharIterator *iter); |
514 | |
515 | /** |
516 | * Helper function for UCharIterator to get the previous code point. |
517 | * |
518 | * Decrement the index and return the code point from there |
519 | * (pre-decrement, like s[--i]), |
520 | * or return U_SENTINEL if there is none (index is at the start). |
521 | * |
522 | * @param iter the UCharIterator structure ("this pointer") |
523 | * @return the previous code point (after pre-decrementing the current index) |
524 | * |
525 | * @see UCharIterator |
526 | * @see U16_PREV |
527 | * @stable ICU 2.1 |
528 | */ |
529 | U_CAPI UChar32 U_EXPORT2 |
530 | uiter_previous32(UCharIterator *iter); |
531 | |
532 | /** |
533 | * Get the "state" of the iterator in the form of a single 32-bit word. |
534 | * This is a convenience function that calls iter->getState(iter) |
535 | * if iter->getState is not NULL; |
536 | * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. |
537 | * |
538 | * Some UCharIterator implementations may not be able to return |
539 | * a valid state for each position, in which case they return UITER_NO_STATE instead. |
540 | * This will be clearly documented for each such iterator (none of the public ones here). |
541 | * |
542 | * @param iter the UCharIterator structure ("this pointer") |
543 | * @return the state word |
544 | * |
545 | * @see UCharIterator |
546 | * @see UCharIteratorGetState |
547 | * @see UITER_NO_STATE |
548 | * @stable ICU 2.6 |
549 | */ |
550 | U_CAPI uint32_t U_EXPORT2 |
551 | uiter_getState(const UCharIterator *iter); |
552 | |
553 | /** |
554 | * Restore the "state" of the iterator using a state word from a getState() call. |
555 | * This is a convenience function that calls iter->setState(iter, state, pErrorCode) |
556 | * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. |
557 | * |
558 | * @param iter the UCharIterator structure ("this pointer") |
559 | * @param state the state word from a getState() call |
560 | * on a same-type, same-string iterator |
561 | * @param pErrorCode Must be a valid pointer to an error code value, |
562 | * which must not indicate a failure before the function call. |
563 | * |
564 | * @see UCharIterator |
565 | * @see UCharIteratorSetState |
566 | * @stable ICU 2.6 |
567 | */ |
568 | U_CAPI void U_EXPORT2 |
569 | uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); |
570 | |
571 | /** |
572 | * Set up a UCharIterator to iterate over a string. |
573 | * |
574 | * Sets the UCharIterator function pointers for iteration over the string s |
575 | * with iteration boundaries start=index=0 and length=limit=string length. |
576 | * The "provider" may set the start, index, and limit values at any time |
577 | * within the range 0..length. |
578 | * The length field will be ignored. |
579 | * |
580 | * The string pointer s is set into UCharIterator.context without copying |
581 | * or reallocating the string contents. |
582 | * |
583 | * getState() simply returns the current index. |
584 | * move() will always return the final index. |
585 | * |
586 | * @param iter UCharIterator structure to be set for iteration |
587 | * @param s String to iterate over |
588 | * @param length Length of s, or -1 if NUL-terminated |
589 | * |
590 | * @see UCharIterator |
591 | * @stable ICU 2.1 |
592 | */ |
593 | U_CAPI void U_EXPORT2 |
594 | uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); |
595 | |
596 | /** |
597 | * Set up a UCharIterator to iterate over a UTF-16BE string |
598 | * (byte vector with a big-endian pair of bytes per UChar). |
599 | * |
600 | * Everything works just like with a normal UChar iterator (uiter_setString), |
601 | * except that UChars are assembled from byte pairs, |
602 | * and that the length argument here indicates an even number of bytes. |
603 | * |
604 | * getState() simply returns the current index. |
605 | * move() will always return the final index. |
606 | * |
607 | * @param iter UCharIterator structure to be set for iteration |
608 | * @param s UTF-16BE string to iterate over |
609 | * @param length Length of s as an even number of bytes, or -1 if NUL-terminated |
610 | * (NUL means pair of 0 bytes at even index from s) |
611 | * |
612 | * @see UCharIterator |
613 | * @see uiter_setString |
614 | * @stable ICU 2.6 |
615 | */ |
616 | U_CAPI void U_EXPORT2 |
617 | uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); |
618 | |
619 | /** |
620 | * Set up a UCharIterator to iterate over a UTF-8 string. |
621 | * |
622 | * Sets the UCharIterator function pointers for iteration over the UTF-8 string s |
623 | * with UTF-8 iteration boundaries 0 and length. |
624 | * The implementation counts the UTF-16 index on the fly and |
625 | * lazily evaluates the UTF-16 length of the text. |
626 | * |
627 | * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. |
628 | * When the reservedField is not 0, then it contains a supplementary code point |
629 | * and the UTF-16 index is between the two corresponding surrogates. |
630 | * At that point, the UTF-8 index is behind that code point. |
631 | * |
632 | * The UTF-8 string pointer s is set into UCharIterator.context without copying |
633 | * or reallocating the string contents. |
634 | * |
635 | * getState() returns a state value consisting of |
636 | * - the current UTF-8 source byte index (bits 31..1) |
637 | * - a flag (bit 0) that indicates whether the UChar position is in the middle |
638 | * of a surrogate pair |
639 | * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) |
640 | * |
641 | * getState() cannot also encode the UTF-16 index in the state value. |
642 | * move(relative to limit or length), or |
643 | * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. |
644 | * |
645 | * @param iter UCharIterator structure to be set for iteration |
646 | * @param s UTF-8 string to iterate over |
647 | * @param length Length of s in bytes, or -1 if NUL-terminated |
648 | * |
649 | * @see UCharIterator |
650 | * @stable ICU 2.6 |
651 | */ |
652 | U_CAPI void U_EXPORT2 |
653 | uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); |
654 | |
655 | #if U_SHOW_CPLUSPLUS_API |
656 | |
657 | /** |
658 | * Set up a UCharIterator to wrap around a C++ CharacterIterator. |
659 | * |
660 | * Sets the UCharIterator function pointers for iteration using the |
661 | * CharacterIterator charIter. |
662 | * |
663 | * The CharacterIterator pointer charIter is set into UCharIterator.context |
664 | * without copying or cloning the CharacterIterator object. |
665 | * The other "protected" UCharIterator fields are set to 0 and will be ignored. |
666 | * The iteration index and boundaries are controlled by the CharacterIterator. |
667 | * |
668 | * getState() simply returns the current index. |
669 | * move() will always return the final index. |
670 | * |
671 | * @param iter UCharIterator structure to be set for iteration |
672 | * @param charIter CharacterIterator to wrap |
673 | * |
674 | * @see UCharIterator |
675 | * @stable ICU 2.1 |
676 | */ |
677 | U_CAPI void U_EXPORT2 |
678 | uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter); |
679 | |
680 | /** |
681 | * Set up a UCharIterator to iterate over a C++ Replaceable. |
682 | * |
683 | * Sets the UCharIterator function pointers for iteration over the |
684 | * Replaceable rep with iteration boundaries start=index=0 and |
685 | * length=limit=rep->length(). |
686 | * The "provider" may set the start, index, and limit values at any time |
687 | * within the range 0..length=rep->length(). |
688 | * The length field will be ignored. |
689 | * |
690 | * The Replaceable pointer rep is set into UCharIterator.context without copying |
691 | * or cloning/reallocating the Replaceable object. |
692 | * |
693 | * getState() simply returns the current index. |
694 | * move() will always return the final index. |
695 | * |
696 | * @param iter UCharIterator structure to be set for iteration |
697 | * @param rep Replaceable to iterate over |
698 | * |
699 | * @see UCharIterator |
700 | * @stable ICU 2.1 |
701 | */ |
702 | U_CAPI void U_EXPORT2 |
703 | uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep); |
704 | |
705 | #endif |
706 | |
707 | U_CDECL_END |
708 | |
709 | #endif |
710 | |