1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | * Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved. |
5 | ********************************************************************** |
6 | * Date Name Description |
7 | * 07/18/01 aliu Creation. |
8 | ********************************************************************** |
9 | */ |
10 | #ifndef UNIMATCH_H |
11 | #define UNIMATCH_H |
12 | |
13 | #include "unicode/utypes.h" |
14 | |
15 | /** |
16 | * \file |
17 | * \brief C++ API: Unicode Matcher |
18 | */ |
19 | |
20 | #if U_SHOW_CPLUSPLUS_API |
21 | |
22 | U_NAMESPACE_BEGIN |
23 | |
24 | class Replaceable; |
25 | class UnicodeString; |
26 | class UnicodeSet; |
27 | |
28 | /** |
29 | * Constants returned by <code>UnicodeMatcher::matches()</code> |
30 | * indicating the degree of match. |
31 | * @stable ICU 2.4 |
32 | */ |
33 | enum UMatchDegree { |
34 | /** |
35 | * Constant returned by <code>matches()</code> indicating a |
36 | * mismatch between the text and this matcher. The text contains |
37 | * a character which does not match, or the text does not contain |
38 | * all desired characters for a non-incremental match. |
39 | * @stable ICU 2.4 |
40 | */ |
41 | U_MISMATCH, |
42 | |
43 | /** |
44 | * Constant returned by <code>matches()</code> indicating a |
45 | * partial match between the text and this matcher. This value is |
46 | * only returned for incremental match operations. All characters |
47 | * of the text match, but more characters are required for a |
48 | * complete match. Alternatively, for variable-length matchers, |
49 | * all characters of the text match, and if more characters were |
50 | * supplied at limit, they might also match. |
51 | * @stable ICU 2.4 |
52 | */ |
53 | U_PARTIAL_MATCH, |
54 | |
55 | /** |
56 | * Constant returned by <code>matches()</code> indicating a |
57 | * complete match between the text and this matcher. For an |
58 | * incremental variable-length match, this value is returned if |
59 | * the given text matches, and it is known that additional |
60 | * characters would not alter the extent of the match. |
61 | * @stable ICU 2.4 |
62 | */ |
63 | U_MATCH |
64 | }; |
65 | |
66 | /** |
67 | * <code>UnicodeMatcher</code> defines a protocol for objects that can |
68 | * match a range of characters in a Replaceable string. |
69 | * @stable ICU 2.4 |
70 | */ |
71 | class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ { |
72 | |
73 | public: |
74 | /** |
75 | * Destructor. |
76 | * @stable ICU 2.4 |
77 | */ |
78 | virtual ~UnicodeMatcher(); |
79 | |
80 | /** |
81 | * Return a UMatchDegree value indicating the degree of match for |
82 | * the given text at the given offset. Zero, one, or more |
83 | * characters may be matched. |
84 | * |
85 | * Matching in the forward direction is indicated by limit > |
86 | * offset. Characters from offset forwards to limit-1 will be |
87 | * considered for matching. |
88 | * |
89 | * Matching in the reverse direction is indicated by limit < |
90 | * offset. Characters from offset backwards to limit+1 will be |
91 | * considered for matching. |
92 | * |
93 | * If limit == offset then the only match possible is a zero |
94 | * character match (which subclasses may implement if desired). |
95 | * |
96 | * As a side effect, advance the offset parameter to the limit of |
97 | * the matched substring. In the forward direction, this will be |
98 | * the index of the last matched character plus one. In the |
99 | * reverse direction, this will be the index of the last matched |
100 | * character minus one. |
101 | * |
102 | * <p>Note: This method is not const because some classes may |
103 | * modify their state as the result of a match. |
104 | * |
105 | * @param text the text to be matched |
106 | * @param offset on input, the index into text at which to begin |
107 | * matching. On output, the limit of the matched text. The |
108 | * number of matched characters is the output value of offset |
109 | * minus the input value. Offset should always point to the |
110 | * HIGH SURROGATE (leading code unit) of a pair of surrogates, |
111 | * both on entry and upon return. |
112 | * @param limit the limit index of text to be matched. Greater |
113 | * than offset for a forward direction match, less than offset for |
114 | * a backward direction match. The last character to be |
115 | * considered for matching will be text.charAt(limit-1) in the |
116 | * forward direction or text.charAt(limit+1) in the backward |
117 | * direction. |
118 | * @param incremental if true, then assume further characters may |
119 | * be inserted at limit and check for partial matching. Otherwise |
120 | * assume the text as given is complete. |
121 | * @return a match degree value indicating a full match, a partial |
122 | * match, or a mismatch. If incremental is false then |
123 | * U_PARTIAL_MATCH should never be returned. |
124 | * @stable ICU 2.4 |
125 | */ |
126 | virtual UMatchDegree matches(const Replaceable& text, |
127 | int32_t& offset, |
128 | int32_t limit, |
129 | UBool incremental) = 0; |
130 | |
131 | /** |
132 | * Returns a string representation of this matcher. If the result of |
133 | * calling this function is passed to the appropriate parser, it |
134 | * will produce another matcher that is equal to this one. |
135 | * @param result the string to receive the pattern. Previous |
136 | * contents will be deleted. |
137 | * @param escapeUnprintable if true then convert unprintable |
138 | * character to their hex escape representations, \\uxxxx or |
139 | * \\Uxxxxxxxx. Unprintable characters are those other than |
140 | * U+000A, U+0020..U+007E. |
141 | * @stable ICU 2.4 |
142 | */ |
143 | virtual UnicodeString& toPattern(UnicodeString& result, |
144 | UBool escapeUnprintable = false) const = 0; |
145 | |
146 | /** |
147 | * Returns true if this matcher will match a character c, where c |
148 | * & 0xFF == v, at offset, in the forward direction (with limit > |
149 | * offset). This is used by <tt>RuleBasedTransliterator</tt> for |
150 | * indexing. |
151 | * @stable ICU 2.4 |
152 | */ |
153 | virtual UBool matchesIndexValue(uint8_t v) const = 0; |
154 | |
155 | /** |
156 | * Union the set of all characters that may be matched by this object |
157 | * into the given set. |
158 | * @param toUnionTo the set into which to union the source characters |
159 | * @stable ICU 2.4 |
160 | */ |
161 | virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0; |
162 | }; |
163 | |
164 | U_NAMESPACE_END |
165 | |
166 | #endif /* U_SHOW_CPLUSPLUS_API */ |
167 | |
168 | #endif |
169 | |