| 1 | // © 2018 and later: Unicode, Inc. and others. | 
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
| 3 |  | 
| 4 | // ucpmap.h | 
| 5 | // created: 2018sep03 Markus W. Scherer | 
| 6 |  | 
| 7 | #ifndef __UCPMAP_H__ | 
| 8 | #define __UCPMAP_H__ | 
| 9 |  | 
| 10 | #include "unicode/utypes.h" | 
| 11 |  | 
| 12 | U_CDECL_BEGIN | 
| 13 |  | 
| 14 | /** | 
| 15 |  * \file | 
| 16 |  * | 
| 17 |  * This file defines an abstract map from Unicode code points to integer values. | 
| 18 |  * | 
| 19 |  * @see UCPMap | 
| 20 |  * @see UCPTrie | 
| 21 |  * @see UMutableCPTrie | 
| 22 |  */ | 
| 23 |  | 
| 24 | /** | 
| 25 |  * Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values. | 
| 26 |  * | 
| 27 |  * @see UCPTrie | 
| 28 |  * @see UMutableCPTrie | 
| 29 |  * @stable ICU 63 | 
| 30 |  */ | 
| 31 | typedef struct UCPMap UCPMap; | 
| 32 |  | 
| 33 | /** | 
| 34 |  * Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates. | 
| 35 |  * Most users should use UCPMAP_RANGE_NORMAL. | 
| 36 |  * | 
| 37 |  * @see ucpmap_getRange | 
| 38 |  * @see ucptrie_getRange | 
| 39 |  * @see umutablecptrie_getRange | 
| 40 |  * @stable ICU 63 | 
| 41 |  */ | 
| 42 | enum UCPMapRangeOption { | 
| 43 |     /** | 
| 44 |      * ucpmap_getRange() enumerates all same-value ranges as stored in the map. | 
| 45 |      * Most users should use this option. | 
| 46 |      * @stable ICU 63 | 
| 47 |      */ | 
| 48 |     UCPMAP_RANGE_NORMAL, | 
| 49 |     /** | 
| 50 |      * ucpmap_getRange() enumerates all same-value ranges as stored in the map, | 
| 51 |      * except that lead surrogates (U+D800..U+DBFF) are treated as having the | 
| 52 |      * surrogateValue, which is passed to getRange() as a separate parameter. | 
| 53 |      * The surrogateValue is not transformed via filter(). | 
| 54 |      * See U_IS_LEAD(c). | 
| 55 |      * | 
| 56 |      * Most users should use UCPMAP_RANGE_NORMAL instead. | 
| 57 |      * | 
| 58 |      * This option is useful for maps that map surrogate code *units* to | 
| 59 |      * special values optimized for UTF-16 string processing | 
| 60 |      * or for special error behavior for unpaired surrogates, | 
| 61 |      * but those values are not to be associated with the lead surrogate code *points*. | 
| 62 |      * @stable ICU 63 | 
| 63 |      */ | 
| 64 |     UCPMAP_RANGE_FIXED_LEAD_SURROGATES, | 
| 65 |     /** | 
| 66 |      * ucpmap_getRange() enumerates all same-value ranges as stored in the map, | 
| 67 |      * except that all surrogates (U+D800..U+DFFF) are treated as having the | 
| 68 |      * surrogateValue, which is passed to getRange() as a separate parameter. | 
| 69 |      * The surrogateValue is not transformed via filter(). | 
| 70 |      * See U_IS_SURROGATE(c). | 
| 71 |      * | 
| 72 |      * Most users should use UCPMAP_RANGE_NORMAL instead. | 
| 73 |      * | 
| 74 |      * This option is useful for maps that map surrogate code *units* to | 
| 75 |      * special values optimized for UTF-16 string processing | 
| 76 |      * or for special error behavior for unpaired surrogates, | 
| 77 |      * but those values are not to be associated with the lead surrogate code *points*. | 
| 78 |      * @stable ICU 63 | 
| 79 |      */ | 
| 80 |     UCPMAP_RANGE_FIXED_ALL_SURROGATES | 
| 81 | }; | 
| 82 | #ifndef U_IN_DOXYGEN | 
| 83 | typedef enum UCPMapRangeOption UCPMapRangeOption; | 
| 84 | #endif | 
| 85 |  | 
| 86 | /** | 
| 87 |  * Returns the value for a code point as stored in the map, with range checking. | 
| 88 |  * Returns an implementation-defined error value if c is not in the range 0..U+10FFFF. | 
| 89 |  * | 
| 90 |  * @param map the map | 
| 91 |  * @param c the code point | 
| 92 |  * @return the map value, | 
| 93 |  *         or an implementation-defined error value if the code point is not in the range 0..U+10FFFF | 
| 94 |  * @stable ICU 63 | 
| 95 |  */ | 
| 96 | U_CAPI uint32_t U_EXPORT2 | 
| 97 | ucpmap_get(const UCPMap *map, UChar32 c); | 
| 98 |  | 
| 99 | /** | 
| 100 |  * Callback function type: Modifies a map value. | 
| 101 |  * Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange(). | 
| 102 |  * The modified value will be returned by the getRange function. | 
| 103 |  * | 
| 104 |  * Can be used to ignore some of the value bits, | 
| 105 |  * make a filter for one of several values, | 
| 106 |  * return a value index computed from the map value, etc. | 
| 107 |  * | 
| 108 |  * @param context an opaque pointer, as passed into the getRange function | 
| 109 |  * @param value a value from the map | 
| 110 |  * @return the modified value | 
| 111 |  * @stable ICU 63 | 
| 112 |  */ | 
| 113 | typedef uint32_t U_CALLCONV | 
| 114 | UCPMapValueFilter(const void *context, uint32_t value); | 
| 115 |  | 
| 116 | /** | 
| 117 |  * Returns the last code point such that all those from start to there have the same value. | 
| 118 |  * Can be used to efficiently iterate over all same-value ranges in a map. | 
| 119 |  * (This is normally faster than iterating over code points and get()ting each value, | 
| 120 |  * but much slower than a data structure that stores ranges directly.) | 
| 121 |  * | 
| 122 |  * If the UCPMapValueFilter function pointer is not NULL, then | 
| 123 |  * the value to be delivered is passed through that function, and the return value is the end | 
| 124 |  * of the range where all values are modified to the same actual value. | 
| 125 |  * The value is unchanged if that function pointer is NULL. | 
| 126 |  * | 
| 127 |  * Example: | 
| 128 |  * \code | 
| 129 |  * UChar32 start = 0, end; | 
| 130 |  * uint32_t value; | 
| 131 |  * while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0, | 
| 132 |  *                               NULL, NULL, &value)) >= 0) { | 
| 133 |  *     // Work with the range start..end and its value. | 
| 134 |  *     start = end + 1; | 
| 135 |  * } | 
| 136 |  * \endcode | 
| 137 |  * | 
| 138 |  * @param map the map | 
| 139 |  * @param start range start | 
| 140 |  * @param option defines whether surrogates are treated normally, | 
| 141 |  *               or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL | 
| 142 |  * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL | 
| 143 |  * @param filter a pointer to a function that may modify the map data value, | 
| 144 |  *     or NULL if the values from the map are to be used unmodified | 
| 145 |  * @param context an opaque pointer that is passed on to the filter function | 
| 146 |  * @param pValue if not NULL, receives the value that every code point start..end has; | 
| 147 |  *     may have been modified by filter(context, map value) | 
| 148 |  *     if that function pointer is not NULL | 
| 149 |  * @return the range end code point, or -1 if start is not a valid code point | 
| 150 |  * @stable ICU 63 | 
| 151 |  */ | 
| 152 | U_CAPI UChar32 U_EXPORT2 | 
| 153 | ucpmap_getRange(const UCPMap *map, UChar32 start, | 
| 154 |                 UCPMapRangeOption option, uint32_t surrogateValue, | 
| 155 |                 UCPMapValueFilter *filter, const void *context, uint32_t *pValue); | 
| 156 |  | 
| 157 | U_CDECL_END | 
| 158 |  | 
| 159 | #endif | 
| 160 |  |