1//
2// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3//
4// Distributed under the Boost Software License, Version 1.0.
5// https://www.boost.org/LICENSE_1_0.txt
6
7#ifndef BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
8#define BOOST_LOCALE_BOUNDARY_INDEX_HPP_INCLUDED
9
10#include <boost/locale/boundary/boundary_point.hpp>
11#include <boost/locale/boundary/facets.hpp>
12#include <boost/locale/boundary/segment.hpp>
13#include <boost/locale/boundary/types.hpp>
14#include <boost/iterator/iterator_facade.hpp>
15#include <algorithm>
16#include <cstdint>
17#include <iterator>
18#include <locale>
19#include <memory>
20#include <stdexcept>
21#include <string>
22#include <type_traits>
23#include <vector>
24
25#ifdef BOOST_MSVC
26# pragma warning(push)
27# pragma warning(disable : 4275 4251 4231 4660)
28#endif
29
30namespace boost { namespace locale { namespace boundary {
31 ///
32 /// \defgroup boundary Boundary Analysis
33 ///
34 /// This module contains all operations required for %boundary analysis of text: character, word, line and sentence
35 /// boundaries
36 ///
37 /// @{
38 ///
39
40 /// \cond INTERNAL
41
42 namespace detail {
43 template<typename Char>
44 const boundary_indexing<Char>& get_boundary_indexing(const std::locale& l)
45 {
46 using facet_type = boundary_indexing<Char>;
47 if(!std::has_facet<facet_type>(l))
48 throw std::runtime_error("Locale was generated without segmentation support!");
49 return std::use_facet<facet_type>(l);
50 }
51
52 template<typename IteratorType,
53 typename CategoryType = typename std::iterator_traits<IteratorType>::iterator_category>
54 struct mapping_traits {
55 typedef typename std::iterator_traits<IteratorType>::value_type char_type;
56 static index_type map(boundary_type t, IteratorType b, IteratorType e, const std::locale& l)
57 {
58 std::basic_string<char_type> str(b, e);
59 return get_boundary_indexing<char_type>(l).map(t, str.c_str(), str.c_str() + str.size());
60 }
61 };
62
63 template<typename CharType, typename SomeIteratorType>
64 struct linear_iterator_traits {
65 static constexpr bool is_linear =
66 std::is_same<SomeIteratorType, CharType*>::value || std::is_same<SomeIteratorType, const CharType*>::value
67 || std::is_same<SomeIteratorType, typename std::basic_string<CharType>::iterator>::value
68 || std::is_same<SomeIteratorType, typename std::basic_string<CharType>::const_iterator>::value
69 || std::is_same<SomeIteratorType, typename std::vector<CharType>::iterator>::value
70 || std::is_same<SomeIteratorType, typename std::vector<CharType>::const_iterator>::value;
71 };
72
73 template<typename IteratorType>
74 struct mapping_traits<IteratorType, std::random_access_iterator_tag> {
75 typedef typename std::iterator_traits<IteratorType>::value_type char_type;
76
77 static index_type map(boundary_type t, IteratorType b, IteratorType e, const std::locale& l)
78 {
79 index_type result;
80
81 // Optimize for most common cases
82 //
83 // C++11 requires that string is continuous in memory and all known
84 // string implementations do this because of c_str() support.
85
86 if(linear_iterator_traits<char_type, IteratorType>::is_linear && b != e) {
87 const char_type* begin = &*b;
88 const char_type* end = begin + (e - b);
89 index_type tmp = get_boundary_indexing<char_type>(l).map(t, begin, end);
90 result.swap(x&: tmp);
91 } else {
92 std::basic_string<char_type> str(b, e);
93 index_type tmp = get_boundary_indexing<char_type>(l).map(t, str.c_str(), str.c_str() + str.size());
94 result.swap(x&: tmp);
95 }
96 return result;
97 }
98 };
99
100 template<typename BaseIterator>
101 class mapping {
102 public:
103 typedef BaseIterator base_iterator;
104 typedef typename std::iterator_traits<base_iterator>::value_type char_type;
105
106 mapping(boundary_type type, base_iterator begin, base_iterator end, const std::locale& loc) :
107 index_(new index_type()), begin_(begin), end_(end)
108 {
109 index_type idx = detail::mapping_traits<base_iterator>::map(type, begin, end, loc);
110 index_->swap(x&: idx);
111 }
112
113 mapping() {}
114
115 const index_type& index() const { return *index_; }
116
117 base_iterator begin() const { return begin_; }
118
119 base_iterator end() const { return end_; }
120
121 private:
122 std::shared_ptr<index_type> index_;
123 base_iterator begin_, end_;
124 };
125
126 template<typename BaseIterator>
127 class segment_index_iterator : public boost::iterator_facade<segment_index_iterator<BaseIterator>,
128 segment<BaseIterator>,
129 boost::bidirectional_traversal_tag,
130 const segment<BaseIterator>&> {
131 public:
132 typedef BaseIterator base_iterator;
133 typedef mapping<base_iterator> mapping_type;
134 typedef segment<base_iterator> segment_type;
135
136 segment_index_iterator() : current_(0, 0), map_(nullptr), mask_(0), full_select_(false) {}
137
138 segment_index_iterator(base_iterator p, const mapping_type* map, rule_type mask, bool full_select) :
139 map_(map), mask_(mask), full_select_(full_select)
140 {
141 set(p);
142 }
143 segment_index_iterator(bool is_begin, const mapping_type* map, rule_type mask, bool full_select) :
144 map_(map), mask_(mask), full_select_(full_select)
145 {
146 if(is_begin)
147 set_begin();
148 else
149 set_end();
150 }
151
152 const segment_type& dereference() const { return value_; }
153
154 bool equal(const segment_index_iterator& other) const
155 {
156 return map_ == other.map_ && current_.second == other.current_.second;
157 }
158
159 void increment()
160 {
161 std::pair<size_t, size_t> next = current_;
162 if(full_select_) {
163 next.first = next.second;
164 while(next.second < size()) {
165 next.second++;
166 if(valid_offset(offset: next.second))
167 break;
168 }
169 if(next.second == size())
170 next.first = next.second - 1;
171 } else {
172 while(next.second < size()) {
173 next.first = next.second;
174 next.second++;
175 if(valid_offset(offset: next.second))
176 break;
177 }
178 }
179 update_current(pos: next);
180 }
181
182 void decrement()
183 {
184 std::pair<size_t, size_t> next = current_;
185 if(full_select_) {
186 while(next.second > 1) {
187 next.second--;
188 if(valid_offset(offset: next.second))
189 break;
190 }
191 next.first = next.second;
192 while(next.first > 0) {
193 next.first--;
194 if(valid_offset(offset: next.first))
195 break;
196 }
197 } else {
198 while(next.second > 1) {
199 next.second--;
200 if(valid_offset(offset: next.second))
201 break;
202 }
203 next.first = next.second - 1;
204 }
205 update_current(pos: next);
206 }
207
208 private:
209 void set_end()
210 {
211 current_.first = size() - 1;
212 current_.second = size();
213 value_ = segment_type(map_->end(), map_->end(), 0);
214 }
215 void set_begin()
216 {
217 current_.first = current_.second = 0;
218 value_ = segment_type(map_->begin(), map_->begin(), 0);
219 increment();
220 }
221
222 void set(base_iterator p)
223 {
224 const auto b = map_->index().begin(), e = map_->index().end();
225 auto boundary_point = std::upper_bound(b, e, break_info(std::distance(map_->begin(), p)));
226 while(boundary_point != e && (boundary_point->rule & mask_) == 0)
227 ++boundary_point;
228
229 current_.first = current_.second = boundary_point - b;
230
231 if(full_select_) {
232 while(current_.first > 0) {
233 current_.first--;
234 if(valid_offset(offset: current_.first))
235 break;
236 }
237 } else {
238 if(current_.first > 0)
239 current_.first--;
240 }
241 value_.first = map_->begin();
242 std::advance(value_.first, get_offset(ind: current_.first));
243 value_.second = value_.first;
244 std::advance(value_.second, get_offset(ind: current_.second) - get_offset(ind: current_.first));
245
246 update_rule();
247 }
248
249 void update_current(std::pair<size_t, size_t> pos)
250 {
251 std::ptrdiff_t first_diff = get_offset(ind: pos.first) - get_offset(ind: current_.first);
252 std::ptrdiff_t second_diff = get_offset(ind: pos.second) - get_offset(ind: current_.second);
253 std::advance(value_.first, first_diff);
254 std::advance(value_.second, second_diff);
255 current_ = pos;
256 update_rule();
257 }
258
259 void update_rule()
260 {
261 if(current_.second != size())
262 value_.rule(index()[current_.second].rule);
263 }
264 size_t get_offset(size_t ind) const
265 {
266 if(ind == size())
267 return index().back().offset;
268 return index()[ind].offset;
269 }
270
271 bool valid_offset(size_t offset) const
272 {
273 return offset == 0 || offset == size() // make sure we not acess index[size]
274 || (index()[offset].rule & mask_) != 0;
275 }
276
277 size_t size() const { return index().size(); }
278
279 const index_type& index() const { return map_->index(); }
280
281 segment_type value_;
282 std::pair<size_t, size_t> current_;
283 const mapping_type* map_;
284 rule_type mask_;
285 bool full_select_;
286 };
287
288 template<typename BaseIterator>
289 class boundary_point_index_iterator : public boost::iterator_facade<boundary_point_index_iterator<BaseIterator>,
290 boundary_point<BaseIterator>,
291 boost::bidirectional_traversal_tag,
292 const boundary_point<BaseIterator>&> {
293 public:
294 typedef BaseIterator base_iterator;
295 typedef mapping<base_iterator> mapping_type;
296 typedef boundary_point<base_iterator> boundary_point_type;
297
298 boundary_point_index_iterator() : current_(0), map_(nullptr), mask_(0) {}
299
300 boundary_point_index_iterator(bool is_begin, const mapping_type* map, rule_type mask) :
301 map_(map), mask_(mask)
302 {
303 if(is_begin)
304 set_begin();
305 else
306 set_end();
307 }
308 boundary_point_index_iterator(base_iterator p, const mapping_type* map, rule_type mask) :
309 map_(map), mask_(mask)
310 {
311 set(p);
312 }
313
314 const boundary_point_type& dereference() const { return value_; }
315
316 bool equal(const boundary_point_index_iterator& other) const
317 {
318 return map_ == other.map_ && current_ == other.current_;
319 }
320
321 void increment()
322 {
323 size_t next = current_;
324 while(next < size()) {
325 next++;
326 if(valid_offset(offset: next))
327 break;
328 }
329 update_current(pos: next);
330 }
331
332 void decrement()
333 {
334 size_t next = current_;
335 while(next > 0) {
336 next--;
337 if(valid_offset(offset: next))
338 break;
339 }
340 update_current(pos: next);
341 }
342
343 private:
344 void set_end()
345 {
346 current_ = size();
347 value_ = boundary_point_type(map_->end(), 0);
348 }
349 void set_begin()
350 {
351 current_ = 0;
352 value_ = boundary_point_type(map_->begin(), 0);
353 }
354
355 void set(base_iterator p)
356 {
357 size_t dist = std::distance(map_->begin(), p);
358
359 const auto b = index().begin(), e = index().end();
360 const auto ptr = std::lower_bound(b, e, break_info(dist));
361
362 if(ptr == e)
363 current_ = size() - 1;
364 else
365 current_ = ptr - b;
366
367 while(!valid_offset(offset: current_))
368 current_++;
369
370 std::ptrdiff_t diff = get_offset(ind: current_) - dist;
371 std::advance(p, diff);
372 value_.iterator(p);
373 update_rule();
374 }
375
376 void update_current(size_t pos)
377 {
378 std::ptrdiff_t diff = get_offset(ind: pos) - get_offset(ind: current_);
379 base_iterator i = value_.iterator();
380 std::advance(i, diff);
381 current_ = pos;
382 value_.iterator(i);
383 update_rule();
384 }
385
386 void update_rule()
387 {
388 if(current_ != size())
389 value_.rule(index()[current_].rule);
390 }
391 size_t get_offset(size_t ind) const
392 {
393 if(ind == size())
394 return index().back().offset;
395 return index()[ind].offset;
396 }
397
398 bool valid_offset(size_t offset) const
399 {
400 return offset == 0 || offset + 1 >= size() // last and first are always valid regardless of mark
401 || (index()[offset].rule & mask_) != 0;
402 }
403
404 size_t size() const { return index().size(); }
405
406 const index_type& index() const { return map_->index(); }
407
408 boundary_point_type value_;
409 size_t current_;
410 const mapping_type* map_;
411 rule_type mask_;
412 };
413
414 } // namespace detail
415
416 /// \endcond
417
418 template<typename BaseIterator>
419 class segment_index;
420
421 template<typename BaseIterator>
422 class boundary_point_index;
423
424 /// \brief This class holds an index of segments in the text range and allows to iterate over them
425 ///
426 /// This class is provides \ref begin() and \ref end() member functions that return bidirectional iterators
427 /// to the \ref segment objects.
428 ///
429 /// It provides two options on way of selecting segments:
430 ///
431 /// - \ref rule(rule_type mask) - a mask that allows to select only specific types of segments according to
432 /// various masks %as \ref word_any.
433 /// \n
434 /// The default is to select any types of boundaries.
435 /// \n
436 /// For example: using word %boundary analysis, when the provided mask is \ref word_kana then the iterators
437 /// would iterate only over the words containing Kana letters and \ref word_any would select all types of
438 /// words excluding ranges that consist of white space and punctuation marks. So iterating over the text
439 /// "to be or not to be?" with \ref word_any rule would return segments "to", "be", "or", "not", "to", "be",
440 /// instead of default "to", " ", "be", " ", "or", " ", "not", " ", "to", " ", "be", "?".
441 /// - \ref full_select(bool how) - a flag that defines the way a range is selected if the rule of the previous
442 /// %boundary point does not fit the selected rule.
443 /// \n
444 /// For example: We want to fetch all sentences from the following text: "Hello! How\nare you?".
445 /// \n
446 /// This text contains three %boundary points separating it to sentences by different rules:
447 /// - The exclamation mark "!" ends the sentence "Hello!"
448 /// - The line feed that splits the sentence "How\nare you?" into two parts.
449 /// - The question mark that ends the second sentence.
450 /// \n
451 /// If you would only change the \ref rule() to \ref sentence_term then the segment_index would
452 /// provide two sentences "Hello!" and "are you?" %as only them actually terminated with required
453 /// terminator "!" or "?". But changing \ref full_select() to true, the selected segment would include
454 /// all the text up to previous valid %boundary point and would return two expected sentences:
455 /// "Hello!" and "How\nare you?".
456 ///
457 /// This class allows to find a segment according to the given iterator in range using \ref find() member
458 /// function.
459 ///
460 /// \note
461 ///
462 /// - Changing any of the options - \ref rule() or \ref full_select() and of course re-indexing the text
463 /// invalidates existing iterators and they can't be used any more.
464 /// - segment_index can be created from boundary_point_index or other segment_index that was created with
465 /// same \ref boundary_type. This is very fast operation %as they shared same index
466 /// and it does not require its regeneration.
467 ///
468 /// \see
469 ///
470 /// - \ref boundary_point_index
471 /// - \ref segment
472 /// - \ref boundary_point
473
474 template<typename BaseIterator>
475 class segment_index {
476 public:
477 /// The type of the iterator used to iterate over the original text
478 typedef BaseIterator base_iterator;
479
480#ifdef BOOST_LOCALE_DOXYGEN
481 /// The bidirectional iterator that iterates over \ref value_type objects.
482 ///
483 /// - The iterators may be invalidated by use of any non-const member function
484 /// including but not limited to \ref rule(rule_type) and \ref full_select(bool).
485 /// - The returned value_type object is valid %as long %as iterator points to it.
486 /// So this following code is wrong %as t used after p was updated:
487 /// \code
488 /// segment_index<some_iterator>::iterator p=index.begin();
489 /// segment<some_iterator> &t = *p;
490 /// ++p;
491 /// std::cout << t.str() << std::endl;
492 /// \endcode
493 typedef unspecified_iterator_type iterator;
494 /// \copydoc iterator
495 typedef unspecified_iterator_type const_iterator;
496#else
497 typedef detail::segment_index_iterator<base_iterator> iterator;
498 typedef detail::segment_index_iterator<base_iterator> const_iterator;
499#endif
500 /// The type dereferenced by the \ref iterator and \ref const_iterator. It is
501 /// an object that represents selected segment.
502 typedef segment<base_iterator> value_type;
503
504 /// Default constructor.
505 ///
506 /// \note
507 ///
508 /// When this object is constructed by default it does not include a valid index, thus
509 /// calling \ref begin(), \ref end() or \ref find() member functions would lead to undefined
510 /// behavior
511 segment_index() : mask_(0xFFFFFFFFu), full_select_(false) {}
512 /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
513 /// in range [begin,end) using a rule \a mask for locale \a loc.
514 segment_index(boundary_type type,
515 base_iterator begin,
516 base_iterator end,
517 rule_type mask,
518 const std::locale& loc = std::locale()) :
519 map_(type, begin, end, loc),
520 mask_(mask), full_select_(false)
521 {}
522 /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
523 /// in range [begin,end) selecting all possible segments (full mask) for locale \a loc.
524 segment_index(boundary_type type,
525 base_iterator begin,
526 base_iterator end,
527 const std::locale& loc = std::locale()) :
528 map_(type, begin, end, loc),
529 mask_(0xFFFFFFFFu), full_select_(false)
530 {}
531
532 /// Create a segment_index from a \ref boundary_point_index. It copies all indexing information
533 /// and used default rule (all possible segments)
534 ///
535 /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
536 /// range it is much better to create one from another rather then indexing the same
537 /// range twice.
538 ///
539 /// \note \ref rule() flags are not copied
540 segment_index(const boundary_point_index<base_iterator>&);
541
542 /// Copy an index from a \ref boundary_point_index. It copies all indexing information
543 /// and uses the default rule (all possible segments)
544 ///
545 /// This operation is very cheap, so if you use boundary_point_index and segment_index on same text
546 /// range it is much better to create one from another rather then indexing the same
547 /// range twice.
548 ///
549 /// \note \ref rule() flags are not copied
550 segment_index& operator=(const boundary_point_index<base_iterator>&);
551
552 /// Create a new index for %boundary analysis \ref boundary_type "type" of the text
553 /// in range [begin,end) for locale \a loc.
554 ///
555 /// \note \ref rule() and \ref full_select() remain unchanged.
556 void map(boundary_type type, base_iterator begin, base_iterator end, const std::locale& loc = std::locale())
557 {
558 map_ = mapping_type(type, begin, end, loc);
559 }
560
561 /// Get the \ref iterator on the beginning of the segments range.
562 ///
563 /// Preconditions: the segment_index should have a mapping
564 ///
565 /// \note
566 ///
567 /// The returned iterator is invalidated by access to any non-const member functions of this object
568 iterator begin() const
569 {
570 return iterator(true, &map_, mask_, full_select_);
571 }
572
573 /// Get the \ref iterator on the ending of the segments range.
574 ///
575 /// Preconditions: the segment_index should have a mapping
576 ///
577 /// The returned iterator is invalidated by access to any non-const member functions of this object
578 iterator end() const
579 {
580 return iterator(false, &map_, mask_, full_select_);
581 }
582
583 /// Find a first valid segment following a position \a p.
584 ///
585 /// If \a p is inside a valid segment this segment is selected:
586 ///
587 /// For example: For \ref word %boundary analysis with \ref word_any rule():
588 ///
589 /// - "to| be or ", would point to "be",
590 /// - "t|o be or ", would point to "to",
591 /// - "to be or| ", would point to end.
592 ///
593 ///
594 /// Preconditions: the segment_index should have a mapping and \a p should be valid iterator
595 /// to the text in the mapped range.
596 ///
597 /// The returned iterator is invalidated by access to any non-const member functions of this object
598 iterator find(base_iterator p) const
599 {
600 return iterator(p, &map_, mask_, full_select_);
601 }
602
603 /// Get the mask of rules that are used
604 rule_type rule() const
605 {
606 return mask_;
607 }
608 /// Set the mask of rules that are used
609 void rule(rule_type v)
610 {
611 mask_ = v;
612 }
613
614 /// Get the full_select property value - should segment include in the range
615 /// values that not belong to specific \ref rule() or not.
616 ///
617 /// The default value is false.
618 ///
619 /// For example for \ref sentence %boundary with rule \ref sentence_term the segments
620 /// of text "Hello! How\nare you?" are "Hello!\", "are you?" when full_select() is false
621 /// because "How\n" is selected %as sentence by a rule spits the text by line feed. If full_select()
622 /// is true the returned segments are "Hello! ", "How\nare you?" where "How\n" is joined with the
623 /// following part "are you?"
624 bool full_select() const
625 {
626 return full_select_;
627 }
628
629 /// Set the full_select property value - should segment include in the range
630 /// values that not belong to specific \ref rule() or not.
631 ///
632 /// The default value is false.
633 ///
634 /// For example for \ref sentence %boundary with rule \ref sentence_term the segments
635 /// of text "Hello! How\nare you?" are "Hello!\", "are you?" when full_select() is false
636 /// because "How\n" is selected %as sentence by a rule spits the text by line feed. If full_select()
637 /// is true the returned segments are "Hello! ", "How\nare you?" where "How\n" is joined with the
638 /// following part "are you?"
639 void full_select(bool v)
640 {
641 full_select_ = v;
642 }
643
644 private:
645 friend class boundary_point_index<base_iterator>;
646 typedef detail::mapping<base_iterator> mapping_type;
647 mapping_type map_;
648 rule_type mask_;
649 bool full_select_;
650 };
651
652 /// \brief This class holds an index of \ref boundary_point "boundary points" and allows iterating
653 /// over them.
654 ///
655 /// This class is provides \ref begin() and \ref end() member functions that return bidirectional iterators
656 /// to the \ref boundary_point objects.
657 ///
658 /// It provides an option that affects selecting %boundary points according to different rules:
659 /// using \ref rule(rule_type mask) member function. It allows to set a mask that select only specific
660 /// types of %boundary points like \ref sentence_term.
661 ///
662 /// For example for a sentence %boundary analysis of a text "Hello! How\nare you?" when the default
663 /// rule is used the %boundary points would be:
664 ///
665 /// - "|Hello! How\nare you?"
666 /// - "Hello! |How\nare you?"
667 /// - "Hello! How\n|are you?"
668 /// - "Hello! How\nare you?|"
669 ///
670 /// However if \ref rule() is set to \ref sentence_term then the selected %boundary points would be:
671 ///
672 /// - "|Hello! How\nare you?"
673 /// - "Hello! |How\nare you?"
674 /// - "Hello! How\nare you?|"
675 ///
676 /// Such that a %boundary point defined by a line feed character would be ignored.
677 ///
678 /// This class allows to find a boundary_point according to the given iterator in range using \ref find() member
679 /// function.
680 ///
681 /// \note
682 /// - Even an empty text range [x,x) considered to have a one %boundary point x.
683 /// - \a a and \a b points of the range [a,b) are always considered %boundary points
684 /// regardless the rules used.
685 /// - Changing any of the option \ref rule() or course re-indexing the text
686 /// invalidates existing iterators and they can't be used any more.
687 /// - boundary_point_index can be created from segment_index or other boundary_point_index that was created with
688 /// same \ref boundary_type. This is very fast operation %as they shared same index
689 /// and it does not require its regeneration.
690 ///
691 /// \see
692 ///
693 /// - \ref segment_index
694 /// - \ref boundary_point
695 /// - \ref segment
696 template<typename BaseIterator>
697 class boundary_point_index {
698 public:
699 /// The type of the iterator used to iterate over the original text
700 typedef BaseIterator base_iterator;
701
702#ifdef BOOST_LOCALE_DOXYGEN
703 /// The bidirectional iterator that iterates over \ref value_type objects.
704 ///
705 /// - The iterators may be invalidated by use of any non-const member function
706 /// including but not limited to \ref rule(rule_type) member function.
707 /// - The returned value_type object is valid %as long %as iterator points to it.
708 /// So this following code is wrong %as t used after p was updated:
709 /// \code
710 /// boundary_point_index<some_iterator>::iterator p=index.begin();
711 /// boundary_point<some_iterator> &t = *p;
712 /// ++p;
713 /// rule_type r = t->rule();
714 /// \endcode
715 ///
716 typedef unspecified_iterator_type iterator;
717 /// \copydoc iterator
718 typedef unspecified_iterator_type const_iterator;
719#else
720 typedef detail::boundary_point_index_iterator<base_iterator> iterator;
721 typedef detail::boundary_point_index_iterator<base_iterator> const_iterator;
722#endif
723 /// The type dereferenced by the \ref iterator and \ref const_iterator. It is
724 /// an object that represents the selected \ref boundary_point "boundary point".
725 typedef boundary_point<base_iterator> value_type;
726
727 /// Default constructor.
728 ///
729 /// \note
730 ///
731 /// When this object is constructed by default it does not include a valid index, thus
732 /// calling \ref begin(), \ref end() or \ref find() member functions would lead to undefined
733 /// behavior
734 boundary_point_index() : mask_(0xFFFFFFFFu) {}
735
736 /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
737 /// in range [begin,end) using a rule \a mask for locale \a loc.
738 boundary_point_index(boundary_type type,
739 base_iterator begin,
740 base_iterator end,
741 rule_type mask,
742 const std::locale& loc = std::locale()) :
743 map_(type, begin, end, loc),
744 mask_(mask)
745 {}
746 /// Create a segment_index for %boundary analysis \ref boundary_type "type" of the text
747 /// in range [begin,end) selecting all possible %boundary points (full mask) for locale \a loc.
748 boundary_point_index(boundary_type type,
749 base_iterator begin,
750 base_iterator end,
751 const std::locale& loc = std::locale()) :
752 map_(type, begin, end, loc),
753 mask_(0xFFFFFFFFu)
754 {}
755
756 /// Create a boundary_point_index from a \ref segment_index. It copies all indexing information
757 /// and uses the default rule (all possible %boundary points)
758 ///
759 /// This operation is very cheap, so if you use boundary_point_index and segment_index on the same text
760 /// range it is much better to create one from another rather then indexing the same
761 /// range twice.
762 ///
763 /// \note \ref rule() flags are not copied
764 boundary_point_index(const segment_index<base_iterator>& other);
765 /// Copy a boundary_point_index from a \ref segment_index. It copies all indexing information
766 /// and keeps the current \ref rule() unchanged
767 ///
768 /// This operation is very cheap, so if you use boundary_point_index and segment_index on the same text
769 /// range it is much better to create one from another rather then indexing the same
770 /// range twice.
771 ///
772 /// \note \ref rule() flags are not copied
773 boundary_point_index& operator=(const segment_index<base_iterator>& other);
774
775 /// Create a new index for %boundary analysis \ref boundary_type "type" of the text
776 /// in range [begin,end) for locale \a loc.
777 ///
778 /// \note \ref rule() remains unchanged.
779 void map(boundary_type type, base_iterator begin, base_iterator end, const std::locale& loc = std::locale())
780 {
781 map_ = mapping_type(type, begin, end, loc);
782 }
783
784 /// Get the \ref iterator on the beginning of the %boundary points range.
785 ///
786 /// Preconditions: this boundary_point_index should have a mapping
787 ///
788 /// \note
789 ///
790 /// The returned iterator is invalidated by access to any non-const member functions of this object
791 iterator begin() const
792 {
793 return iterator(true, &map_, mask_);
794 }
795
796 /// Get the \ref iterator on the ending of the %boundary points range.
797 ///
798 /// Preconditions: this boundary_point_index should have a mapping
799 ///
800 /// \note
801 ///
802 /// The returned iterator is invalidated by access to any non-const member functions of this object
803 iterator end() const
804 {
805 return iterator(false, &map_, mask_);
806 }
807
808 /// Find a first valid %boundary point on a position \a p or following it.
809 ///
810 /// For example: For \ref word %boundary analysis of the text "to be or"
811 ///
812 /// - "|to be", would return %boundary point at "|to be",
813 /// - "t|o be", would point to "to| be"
814 ///
815 /// Preconditions: the boundary_point_index should have a mapping and \a p should be valid iterator
816 /// to the text in the mapped range.
817 ///
818 /// The returned iterator is invalidated by access to any non-const member functions of this object
819 iterator find(base_iterator p) const
820 {
821 return iterator(p, &map_, mask_);
822 }
823
824 /// Get the mask of rules that are used
825 rule_type rule() const
826 {
827 return mask_;
828 }
829 /// Set the mask of rules that are used
830 void rule(rule_type v)
831 {
832 mask_ = v;
833 }
834
835 private:
836 friend class segment_index<base_iterator>;
837 typedef detail::mapping<base_iterator> mapping_type;
838 mapping_type map_;
839 rule_type mask_;
840 };
841
842 /// \cond INTERNAL
843 template<typename BaseIterator>
844 segment_index<BaseIterator>::segment_index(const boundary_point_index<BaseIterator>& other) :
845 map_(other.map_), mask_(0xFFFFFFFFu), full_select_(false)
846 {}
847
848 template<typename BaseIterator>
849 boundary_point_index<BaseIterator>::boundary_point_index(const segment_index<BaseIterator>& other) :
850 map_(other.map_), mask_(0xFFFFFFFFu)
851 {}
852
853 template<typename BaseIterator>
854 segment_index<BaseIterator>& segment_index<BaseIterator>::operator=(const boundary_point_index<BaseIterator>& other)
855 {
856 map_ = other.map_;
857 return *this;
858 }
859
860 template<typename BaseIterator>
861 boundary_point_index<BaseIterator>&
862 boundary_point_index<BaseIterator>::operator=(const segment_index<BaseIterator>& other)
863 {
864 map_ = other.map_;
865 return *this;
866 }
867 /// \endcond
868
869 typedef segment_index<std::string::const_iterator> ssegment_index; ///< convenience typedef
870 typedef segment_index<std::wstring::const_iterator> wssegment_index; ///< convenience typedef
871#ifndef BOOST_LOCALE_NO_CXX20_STRING8
872 typedef segment_index<std::u8string::const_iterator> u8ssegment_index; ///< convenience typedef
873#endif
874#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
875 typedef segment_index<std::u16string::const_iterator> u16ssegment_index; ///< convenience typedef
876#endif
877#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
878 typedef segment_index<std::u32string::const_iterator> u32ssegment_index; ///< convenience typedef
879#endif
880
881 typedef segment_index<const char*> csegment_index; ///< convenience typedef
882 typedef segment_index<const wchar_t*> wcsegment_index; ///< convenience typedef
883#ifdef __cpp_char8_t
884 typedef segment_index<const char8_t*> u8csegment_index; ///< convenience typedef
885#endif
886#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
887 typedef segment_index<const char16_t*> u16csegment_index; ///< convenience typedef
888#endif
889#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
890 typedef segment_index<const char32_t*> u32csegment_index; ///< convenience typedef
891#endif
892
893 typedef boundary_point_index<std::string::const_iterator> sboundary_point_index; ///< convenience typedef
894 typedef boundary_point_index<std::wstring::const_iterator> wsboundary_point_index; ///< convenience typedef
895#ifndef BOOST_LOCALE_NO_CXX20_STRING8
896 typedef boundary_point_index<std::u8string::const_iterator> u8sboundary_point_index; ///< convenience typedef
897#endif
898#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
899 typedef boundary_point_index<std::u16string::const_iterator> u16sboundary_point_index; ///< convenience typedef
900#endif
901#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
902 typedef boundary_point_index<std::u32string::const_iterator> u32sboundary_point_index; ///< convenience typedef
903#endif
904
905 typedef boundary_point_index<const char*> cboundary_point_index; ///< convenience typedef
906 typedef boundary_point_index<const wchar_t*> wcboundary_point_index; ///< convenience typedef
907#ifdef __cpp_char8_t
908 typedef boundary_point_index<const char8_t*> u8cboundary_point_index; ///< convenience typedef
909#endif
910#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
911 typedef boundary_point_index<const char16_t*> u16cboundary_point_index; ///< convenience typedef
912#endif
913#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
914 typedef boundary_point_index<const char32_t*> u32cboundary_point_index; ///< convenience typedef
915#endif
916
917}}} // namespace boost::locale::boundary
918
919///
920/// \example boundary.cpp
921/// Example of using segment_index
922/// \example wboundary.cpp
923/// Example of using segment_index over wide strings
924///
925
926#ifdef BOOST_MSVC
927# pragma warning(pop)
928#endif
929
930#endif
931

source code of boost/libs/locale/include/boost/locale/boundary/index.hpp