1 | // |
2 | // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) |
3 | // |
4 | // Distributed under the Boost Software License, Version 1.0. |
5 | // https://www.boost.org/LICENSE_1_0.txt |
6 | |
7 | #ifndef BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED |
8 | #define BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED |
9 | |
10 | #include <boost/locale/util/string.hpp> |
11 | #include <iosfwd> |
12 | #include <iterator> |
13 | #include <locale> |
14 | #include <string> |
15 | |
16 | #ifdef BOOST_MSVC |
17 | # pragma warning(push) |
18 | # pragma warning(disable : 4275 4251 4231 4660) |
19 | #endif |
20 | |
21 | namespace boost { namespace locale { namespace boundary { |
22 | /// \cond INTERNAL |
23 | namespace detail { |
24 | template<typename LeftIterator, typename RightIterator> |
25 | int compare_text(LeftIterator l_begin, LeftIterator l_end, RightIterator r_begin, RightIterator r_end) |
26 | { |
27 | typedef LeftIterator left_iterator; |
28 | typedef typename std::iterator_traits<left_iterator>::value_type char_type; |
29 | typedef std::char_traits<char_type> traits; |
30 | while(l_begin != l_end && r_begin != r_end) { |
31 | char_type lchar = *l_begin++; |
32 | char_type rchar = *r_begin++; |
33 | if(traits::eq(lchar, rchar)) |
34 | continue; |
35 | if(traits::lt(lchar, rchar)) |
36 | return -1; |
37 | else |
38 | return 1; |
39 | } |
40 | if(l_begin == l_end && r_begin == r_end) |
41 | return 0; |
42 | if(l_begin == l_end) |
43 | return -1; |
44 | else |
45 | return 1; |
46 | } |
47 | |
48 | template<typename Left, typename Right> |
49 | int compare_text(const Left& l, const Right& r) |
50 | { |
51 | return compare_text(l.begin(), l.end(), r.begin(), r.end()); |
52 | } |
53 | |
54 | template<typename Left, typename Char> |
55 | int compare_string(const Left& l, const Char* begin) |
56 | { |
57 | return compare_text(l.begin(), l.end(), begin, util::str_end(begin)); |
58 | } |
59 | |
60 | template<typename Right, typename Char> |
61 | int compare_string(const Char* begin, const Right& r) |
62 | { |
63 | return compare_text(begin, util::str_end(begin), r.begin(), r.end()); |
64 | } |
65 | |
66 | } // namespace detail |
67 | /// \endcond |
68 | |
69 | /// \addtogroup boundary |
70 | /// @{ |
71 | |
72 | /// \brief a segment object that represents a pair of two iterators that define the range where |
73 | /// this segment exits and a rule that defines it. |
74 | /// |
75 | /// This type of object is dereferenced by the iterators of segment_index. Using a rule() member function |
76 | /// you can get a specific rule this segment was selected with. For example, when you use |
77 | /// word boundary analysis, you can check if the specific word contains Kana letters by checking (rule() & \ref |
78 | /// word_kana)!=0 For a sentence analysis you can check if the sentence is selected because a sentence terminator is |
79 | /// found (\ref sentence_term) or there is a line break (\ref sentence_sep). |
80 | /// |
81 | /// This object can be automatically converted to std::basic_string with the same type of character. It is also |
82 | /// valid range that has begin() and end() member functions returning iterators on the location of the segment. |
83 | /// |
84 | /// \see |
85 | /// |
86 | /// - \ref segment_index |
87 | /// - \ref boundary_point |
88 | /// - \ref boundary_point_index |
89 | template<typename IteratorType> |
90 | class segment : public std::pair<IteratorType, IteratorType> { |
91 | public: |
92 | /// The type of the underlying character |
93 | typedef typename std::iterator_traits<IteratorType>::value_type char_type; |
94 | /// The type of the string it is converted to |
95 | typedef std::basic_string<char_type> string_type; |
96 | /// The value that iterators return - the character itself |
97 | typedef char_type value_type; |
98 | /// The iterator that allows to iterate the range |
99 | typedef IteratorType iterator; |
100 | /// The iterator that allows to iterate the range |
101 | typedef IteratorType const_iterator; |
102 | /// The type that represent a difference between two iterators |
103 | typedef typename std::iterator_traits<IteratorType>::difference_type difference_type; |
104 | |
105 | /// Default constructor |
106 | segment() : rule_(0) {} |
107 | /// Create a segment using two iterators and a rule that represents this point |
108 | segment(iterator b, iterator e, rule_type r) : std::pair<IteratorType, IteratorType>(b, e), rule_(r) {} |
109 | /// Set the start of the range |
110 | void begin(const iterator& v) { this->first = v; } |
111 | /// Set the end of the range |
112 | void end(const iterator& v) { this->second = v; } |
113 | |
114 | /// Get the start of the range |
115 | IteratorType begin() const { return this->first; } |
116 | /// Set the end of the range |
117 | IteratorType end() const { return this->second; } |
118 | |
119 | /// Convert the range to a string automatically |
120 | template<class T, class A> |
121 | operator std::basic_string<char_type, T, A>() const |
122 | { |
123 | return std::basic_string<char_type, T, A>(this->first, this->second); |
124 | } |
125 | |
126 | /// Create a string from the range explicitly |
127 | string_type str() const { return string_type(begin(), end()); } |
128 | |
129 | /// Get the length of the text chunk |
130 | size_t length() const { return std::distance(begin(), end()); } |
131 | |
132 | /// Check if the segment is empty |
133 | bool empty() const { return begin() == end(); } |
134 | |
135 | /// Get the rule that is used for selection of this segment. |
136 | rule_type rule() const { return rule_; } |
137 | /// Set a rule that is used for segment selection |
138 | void rule(rule_type r) { rule_ = r; } |
139 | |
140 | // make sure we override std::pair's operator== |
141 | |
142 | /// Compare two segments |
143 | bool operator==(const segment& other) const { return detail::compare_text(*this, other) == 0; } |
144 | /// Compare two segments |
145 | bool operator!=(const segment& other) const { return detail::compare_text(*this, other) != 0; } |
146 | |
147 | private: |
148 | rule_type rule_; |
149 | }; |
150 | |
151 | /// Compare two segments |
152 | template<typename IteratorL, typename IteratorR> |
153 | bool operator==(const segment<IteratorL>& l, const segment<IteratorR>& r) |
154 | { |
155 | return detail::compare_text(l, r) == 0; |
156 | } |
157 | /// Compare two segments |
158 | template<typename IteratorL, typename IteratorR> |
159 | bool operator!=(const segment<IteratorL>& l, const segment<IteratorR>& r) |
160 | { |
161 | return detail::compare_text(l, r) != 0; |
162 | } |
163 | |
164 | /// Compare two segments |
165 | template<typename IteratorL, typename IteratorR> |
166 | bool operator<(const segment<IteratorL>& l, const segment<IteratorR>& r) |
167 | { |
168 | return detail::compare_text(l, r) < 0; |
169 | } |
170 | /// Compare two segments |
171 | template<typename IteratorL, typename IteratorR> |
172 | bool operator<=(const segment<IteratorL>& l, const segment<IteratorR>& r) |
173 | { |
174 | return detail::compare_text(l, r) <= 0; |
175 | } |
176 | /// Compare two segments |
177 | template<typename IteratorL, typename IteratorR> |
178 | bool operator>(const segment<IteratorL>& l, const segment<IteratorR>& r) |
179 | { |
180 | return detail::compare_text(l, r) > 0; |
181 | } |
182 | /// Compare two segments |
183 | template<typename IteratorL, typename IteratorR> |
184 | bool operator>=(const segment<IteratorL>& l, const segment<IteratorR>& r) |
185 | { |
186 | return detail::compare_text(l, r) >= 0; |
187 | } |
188 | |
189 | /// Compare string and segment |
190 | template<typename CharType, typename Traits, typename Alloc, typename IteratorR> |
191 | bool operator==(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r) |
192 | { |
193 | return detail::compare_text(l, r) == 0; |
194 | } |
195 | /// Compare string and segment |
196 | template<typename CharType, typename Traits, typename Alloc, typename IteratorR> |
197 | bool operator!=(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r) |
198 | { |
199 | return detail::compare_text(l, r) != 0; |
200 | } |
201 | |
202 | /// Compare string and segment |
203 | template<typename CharType, typename Traits, typename Alloc, typename IteratorR> |
204 | bool operator<(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r) |
205 | { |
206 | return detail::compare_text(l, r) < 0; |
207 | } |
208 | /// Compare string and segment |
209 | template<typename CharType, typename Traits, typename Alloc, typename IteratorR> |
210 | bool operator<=(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r) |
211 | { |
212 | return detail::compare_text(l, r) <= 0; |
213 | } |
214 | /// Compare string and segment |
215 | template<typename CharType, typename Traits, typename Alloc, typename IteratorR> |
216 | bool operator>(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r) |
217 | { |
218 | return detail::compare_text(l, r) > 0; |
219 | } |
220 | /// Compare string and segment |
221 | template<typename CharType, typename Traits, typename Alloc, typename IteratorR> |
222 | bool operator>=(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r) |
223 | { |
224 | return detail::compare_text(l, r) >= 0; |
225 | } |
226 | |
227 | /// Compare string and segment |
228 | template<typename Iterator, typename CharType, typename Traits, typename Alloc> |
229 | bool operator==(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r) |
230 | { |
231 | return detail::compare_text(l, r) == 0; |
232 | } |
233 | /// Compare string and segment |
234 | template<typename Iterator, typename CharType, typename Traits, typename Alloc> |
235 | bool operator!=(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r) |
236 | { |
237 | return detail::compare_text(l, r) != 0; |
238 | } |
239 | |
240 | /// Compare string and segment |
241 | template<typename Iterator, typename CharType, typename Traits, typename Alloc> |
242 | bool operator<(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r) |
243 | { |
244 | return detail::compare_text(l, r) < 0; |
245 | } |
246 | /// Compare string and segment |
247 | template<typename Iterator, typename CharType, typename Traits, typename Alloc> |
248 | bool operator<=(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r) |
249 | { |
250 | return detail::compare_text(l, r) <= 0; |
251 | } |
252 | /// Compare string and segment |
253 | template<typename Iterator, typename CharType, typename Traits, typename Alloc> |
254 | bool operator>(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r) |
255 | { |
256 | return detail::compare_text(l, r) > 0; |
257 | } |
258 | /// Compare string and segment |
259 | template<typename Iterator, typename CharType, typename Traits, typename Alloc> |
260 | bool operator>=(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r) |
261 | { |
262 | return detail::compare_text(l, r) >= 0; |
263 | } |
264 | |
265 | /// Compare C string and segment |
266 | template<typename CharType, typename IteratorR> |
267 | bool operator==(const CharType* l, const segment<IteratorR>& r) |
268 | { |
269 | return detail::compare_string(l, r) == 0; |
270 | } |
271 | /// Compare C string and segment |
272 | template<typename CharType, typename IteratorR> |
273 | bool operator!=(const CharType* l, const segment<IteratorR>& r) |
274 | { |
275 | return detail::compare_string(l, r) != 0; |
276 | } |
277 | |
278 | /// Compare C string and segment |
279 | template<typename CharType, typename IteratorR> |
280 | bool operator<(const CharType* l, const segment<IteratorR>& r) |
281 | { |
282 | return detail::compare_string(l, r) < 0; |
283 | } |
284 | /// Compare C string and segment |
285 | template<typename CharType, typename IteratorR> |
286 | bool operator<=(const CharType* l, const segment<IteratorR>& r) |
287 | { |
288 | return detail::compare_string(l, r) <= 0; |
289 | } |
290 | /// Compare C string and segment |
291 | template<typename CharType, typename IteratorR> |
292 | bool operator>(const CharType* l, const segment<IteratorR>& r) |
293 | { |
294 | return detail::compare_string(l, r) > 0; |
295 | } |
296 | /// Compare C string and segment |
297 | template<typename CharType, typename IteratorR> |
298 | bool operator>=(const CharType* l, const segment<IteratorR>& r) |
299 | { |
300 | return detail::compare_string(l, r) >= 0; |
301 | } |
302 | |
303 | /// Compare C string and segment |
304 | template<typename Iterator, typename CharType> |
305 | bool operator==(const segment<Iterator>& l, const CharType* r) |
306 | { |
307 | return detail::compare_string(l, r) == 0; |
308 | } |
309 | /// Compare C string and segment |
310 | template<typename Iterator, typename CharType> |
311 | bool operator!=(const segment<Iterator>& l, const CharType* r) |
312 | { |
313 | return detail::compare_string(l, r) != 0; |
314 | } |
315 | |
316 | /// Compare C string and segment |
317 | template<typename Iterator, typename CharType> |
318 | bool operator<(const segment<Iterator>& l, const CharType* r) |
319 | { |
320 | return detail::compare_string(l, r) < 0; |
321 | } |
322 | /// Compare C string and segment |
323 | template<typename Iterator, typename CharType> |
324 | bool operator<=(const segment<Iterator>& l, const CharType* r) |
325 | { |
326 | return detail::compare_string(l, r) <= 0; |
327 | } |
328 | /// Compare C string and segment |
329 | template<typename Iterator, typename CharType> |
330 | bool operator>(const segment<Iterator>& l, const CharType* r) |
331 | { |
332 | return detail::compare_string(l, r) > 0; |
333 | } |
334 | /// Compare C string and segment |
335 | template<typename Iterator, typename CharType> |
336 | bool operator>=(const segment<Iterator>& l, const CharType* r) |
337 | { |
338 | return detail::compare_string(l, r) >= 0; |
339 | } |
340 | |
341 | typedef segment<std::string::const_iterator> ssegment; ///< convenience typedef |
342 | typedef segment<std::wstring::const_iterator> wssegment; ///< convenience typedef |
343 | #ifndef BOOST_LOCALE_NO_CXX20_STRING8 |
344 | typedef segment<std::u8string::const_iterator> u8ssegment; ///< convenience typedef |
345 | #endif |
346 | #ifdef BOOST_LOCALE_ENABLE_CHAR16_T |
347 | typedef segment<std::u16string::const_iterator> u16ssegment; ///< convenience typedef |
348 | #endif |
349 | #ifdef BOOST_LOCALE_ENABLE_CHAR32_T |
350 | typedef segment<std::u32string::const_iterator> u32ssegment; ///< convenience typedef |
351 | #endif |
352 | |
353 | typedef segment<const char*> csegment; ///< convenience typedef |
354 | typedef segment<const wchar_t*> wcsegment; ///< convenience typedef |
355 | #ifdef __cpp_char8_t |
356 | typedef segment<const char8_t*> u8csegment; ///< convenience typedef |
357 | #endif |
358 | #ifdef BOOST_LOCALE_ENABLE_CHAR16_T |
359 | typedef segment<const char16_t*> u16csegment; ///< convenience typedef |
360 | #endif |
361 | #ifdef BOOST_LOCALE_ENABLE_CHAR32_T |
362 | typedef segment<const char32_t*> u32csegment; ///< convenience typedef |
363 | #endif |
364 | |
365 | /// Write the segment to the stream character by character |
366 | template<typename CharType, typename TraitsType, typename Iterator> |
367 | std::basic_ostream<CharType, TraitsType>& operator<<(std::basic_ostream<CharType, TraitsType>& out, |
368 | const segment<Iterator>& seg) |
369 | { |
370 | for(const auto& p : seg) |
371 | out << p; |
372 | return out; |
373 | } |
374 | |
375 | /// @} |
376 | |
377 | }}} // namespace boost::locale::boundary |
378 | |
379 | #ifdef BOOST_MSVC |
380 | # pragma warning(pop) |
381 | #endif |
382 | |
383 | #endif |
384 | |