| 1 | // © 2017 and later: Unicode, Inc. and others. | 
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
| 3 |  | 
| 4 | // stringoptions.h | 
| 5 | // created: 2017jun08 Markus W. Scherer | 
| 6 |  | 
| 7 | #ifndef __STRINGOPTIONS_H__ | 
| 8 | #define __STRINGOPTIONS_H__ | 
| 9 |  | 
| 10 | #include "unicode/utypes.h" | 
| 11 |  | 
| 12 | /** | 
| 13 |  * \file | 
| 14 |  * \brief C API: Bit set option bit constants for various string and character processing functions. | 
| 15 |  */ | 
| 16 |  | 
| 17 | /** | 
| 18 |  * Option value for case folding: Use default mappings defined in CaseFolding.txt. | 
| 19 |  * | 
| 20 |  * @stable ICU 2.0 | 
| 21 |  */ | 
| 22 | #define U_FOLD_CASE_DEFAULT 0 | 
| 23 |  | 
| 24 | /** | 
| 25 |  * Option value for case folding: | 
| 26 |  * | 
| 27 |  * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I | 
| 28 |  * and dotless i appropriately for Turkic languages (tr, az). | 
| 29 |  * | 
| 30 |  * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that | 
| 31 |  * are to be included for default mappings and | 
| 32 |  * excluded for the Turkic-specific mappings. | 
| 33 |  * | 
| 34 |  * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that | 
| 35 |  * are to be excluded for default mappings and | 
| 36 |  * included for the Turkic-specific mappings. | 
| 37 |  * | 
| 38 |  * @stable ICU 2.0 | 
| 39 |  */ | 
| 40 | #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 | 
| 41 |  | 
| 42 | /** | 
| 43 |  * Titlecase the string as a whole rather than each word. | 
| 44 |  * (Titlecase only the character at index 0, possibly adjusted.) | 
| 45 |  * Option bits value for titlecasing APIs that take an options bit set. | 
| 46 |  * | 
| 47 |  * It is an error to specify multiple titlecasing iterator options together, | 
| 48 |  * including both an options bit and an explicit BreakIterator. | 
| 49 |  * | 
| 50 |  * @see U_TITLECASE_ADJUST_TO_CASED | 
| 51 |  * @stable ICU 60 | 
| 52 |  */ | 
| 53 | #define U_TITLECASE_WHOLE_STRING 0x20 | 
| 54 |  | 
| 55 | /** | 
| 56 |  * Titlecase sentences rather than words. | 
| 57 |  * (Titlecase only the first character of each sentence, possibly adjusted.) | 
| 58 |  * Option bits value for titlecasing APIs that take an options bit set. | 
| 59 |  * | 
| 60 |  * It is an error to specify multiple titlecasing iterator options together, | 
| 61 |  * including both an options bit and an explicit BreakIterator. | 
| 62 |  * | 
| 63 |  * @see U_TITLECASE_ADJUST_TO_CASED | 
| 64 |  * @stable ICU 60 | 
| 65 |  */ | 
| 66 | #define U_TITLECASE_SENTENCES 0x40 | 
| 67 |  | 
| 68 | /** | 
| 69 |  * Do not lowercase non-initial parts of words when titlecasing. | 
| 70 |  * Option bit for titlecasing APIs that take an options bit set. | 
| 71 |  * | 
| 72 |  * By default, titlecasing will titlecase the character at each | 
| 73 |  * (possibly adjusted) BreakIterator index and | 
| 74 |  * lowercase all other characters up to the next iterator index. | 
| 75 |  * With this option, the other characters will not be modified. | 
| 76 |  * | 
| 77 |  * @see U_TITLECASE_ADJUST_TO_CASED | 
| 78 |  * @see UnicodeString::toTitle | 
| 79 |  * @see CaseMap::toTitle | 
| 80 |  * @see ucasemap_setOptions | 
| 81 |  * @see ucasemap_toTitle | 
| 82 |  * @see ucasemap_utf8ToTitle | 
| 83 |  * @stable ICU 3.8 | 
| 84 |  */ | 
| 85 | #define U_TITLECASE_NO_LOWERCASE 0x100 | 
| 86 |  | 
| 87 | /** | 
| 88 |  * Do not adjust the titlecasing BreakIterator indexes; | 
| 89 |  * titlecase exactly the characters at breaks from the iterator. | 
| 90 |  * Option bit for titlecasing APIs that take an options bit set. | 
| 91 |  * | 
| 92 |  * By default, titlecasing will take each break iterator index, | 
| 93 |  * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED), | 
| 94 |  * and titlecase that one. | 
| 95 |  * | 
| 96 |  * Other characters are lowercased. | 
| 97 |  * | 
| 98 |  * It is an error to specify multiple titlecasing adjustment options together. | 
| 99 |  * | 
| 100 |  * @see U_TITLECASE_ADJUST_TO_CASED | 
| 101 |  * @see U_TITLECASE_NO_LOWERCASE | 
| 102 |  * @see UnicodeString::toTitle | 
| 103 |  * @see CaseMap::toTitle | 
| 104 |  * @see ucasemap_setOptions | 
| 105 |  * @see ucasemap_toTitle | 
| 106 |  * @see ucasemap_utf8ToTitle | 
| 107 |  * @stable ICU 3.8 | 
| 108 |  */ | 
| 109 | #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 | 
| 110 |  | 
| 111 | /** | 
| 112 |  * Adjust each titlecasing BreakIterator index to the next cased character. | 
| 113 |  * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).) | 
| 114 |  * Option bit for titlecasing APIs that take an options bit set. | 
| 115 |  * | 
| 116 |  * This used to be the default index adjustment in ICU. | 
| 117 |  * Since ICU 60, the default index adjustment is to the next character that is | 
| 118 |  * a letter, number, symbol, or private use code point. | 
| 119 |  * (Uncased modifier letters are skipped.) | 
| 120 |  * The difference in behavior is small for word titlecasing, | 
| 121 |  * but the new adjustment is much better for whole-string and sentence titlecasing: | 
| 122 |  * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»". | 
| 123 |  * | 
| 124 |  * It is an error to specify multiple titlecasing adjustment options together. | 
| 125 |  * | 
| 126 |  * @see U_TITLECASE_NO_BREAK_ADJUSTMENT | 
| 127 |  * @stable ICU 60 | 
| 128 |  */ | 
| 129 | #define U_TITLECASE_ADJUST_TO_CASED 0x400 | 
| 130 |  | 
| 131 | /** | 
| 132 |  * Option for string transformation functions to not first reset the Edits object. | 
| 133 |  * Used for example in some case-mapping and normalization functions. | 
| 134 |  * | 
| 135 |  * @see CaseMap | 
| 136 |  * @see Edits | 
| 137 |  * @see Normalizer2 | 
| 138 |  * @stable ICU 60 | 
| 139 |  */ | 
| 140 | #define U_EDITS_NO_RESET 0x2000 | 
| 141 |  | 
| 142 | /** | 
| 143 |  * Omit unchanged text when recording how source substrings | 
| 144 |  * relate to changed and unchanged result substrings. | 
| 145 |  * Used for example in some case-mapping and normalization functions. | 
| 146 |  * | 
| 147 |  * @see CaseMap | 
| 148 |  * @see Edits | 
| 149 |  * @see Normalizer2 | 
| 150 |  * @stable ICU 60 | 
| 151 |  */ | 
| 152 | #define U_OMIT_UNCHANGED_TEXT 0x4000 | 
| 153 |  | 
| 154 | /** | 
| 155 |  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: | 
| 156 |  * Compare strings in code point order instead of code unit order. | 
| 157 |  * @stable ICU 2.2 | 
| 158 |  */ | 
| 159 | #define U_COMPARE_CODE_POINT_ORDER  0x8000 | 
| 160 |  | 
| 161 | /** | 
| 162 |  * Option bit for unorm_compare: | 
| 163 |  * Perform case-insensitive comparison. | 
| 164 |  * @stable ICU 2.2 | 
| 165 |  */ | 
| 166 | #define U_COMPARE_IGNORE_CASE       0x10000 | 
| 167 |  | 
| 168 | /** | 
| 169 |  * Option bit for unorm_compare: | 
| 170 |  * Both input strings are assumed to fulfill FCD conditions. | 
| 171 |  * @stable ICU 2.2 | 
| 172 |  */ | 
| 173 | #define UNORM_INPUT_IS_FCD          0x20000 | 
| 174 |  | 
| 175 | // Related definitions elsewhere. | 
| 176 | // Options that are not meaningful in the same functions | 
| 177 | // can share the same bits. | 
| 178 | // | 
| 179 | // Public: | 
| 180 | // unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 | 
| 181 | // | 
| 182 | // Internal: (may change or be removed) | 
| 183 | // ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff | 
| 184 | // ucase.h #define _FOLD_CASE_OPTIONS_MASK 7 | 
| 185 | // ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0 | 
| 186 | // ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600 | 
| 187 | // ustr_imp.h #define _STRNCMP_STYLE 0x1000 | 
| 188 | // unormcmp.cpp #define _COMPARE_EQUIV 0x80000 | 
| 189 |  | 
| 190 | #endif  // __STRINGOPTIONS_H__ | 
| 191 |  |