1//
2// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
3//
4// Distributed under the Boost Software License, Version 1.0.
5// https://www.boost.org/LICENSE_1_0.txt
6
7#ifndef BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED
8#define BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED
9
10#include <boost/locale/config.hpp>
11#include <cstdint>
12
13#ifdef BOOST_MSVC
14# pragma warning(push)
15# pragma warning(disable : 4275 4251 4231 4660)
16#endif
17
18namespace boost { namespace locale {
19
20 /// \brief This namespace contains all operations required for boundary analysis of text
21 namespace boundary {
22 /// \defgroup boundary Boundary Analysis
23 ///
24 /// This module contains all operations required for boundary analysis of text: character, word, like and
25 /// sentence boundaries
26 ///
27 /// @{
28
29 /// This type describes a possible boundary analysis alternatives.
30 enum boundary_type {
31 character, ///< Analyse the text for character boundaries
32 word, ///< Analyse the text for word boundaries
33 sentence, ///< Analyse the text for Find sentence boundaries
34 line ///< Analyse the text for positions suitable for line breaks
35 };
36
37 /// \brief Flags used with word boundary analysis -- the type of the word, line or sentence boundary found.
38 ///
39 /// It is a bit-mask that represents various combinations of rules used to select this specific boundary.
40 typedef uint32_t rule_type;
41
42 /// \anchor bl_boundary_word_rules
43 /// \name Flags that describe a type of word selected
44 /// @{
45 constexpr rule_type word_none = 0x0000F, ///< Not a word, like white space or punctuation mark
46 word_number = 0x000F0, ///< Word that appear to be a number
47 word_letter = 0x00F00, ///< Word that contains letters, excluding kana and ideographic characters
48 word_kana = 0x0F000, ///< Word that contains kana characters
49 word_ideo = 0xF0000, ///< Word that contains ideographic characters
50 word_any = 0xFFFF0, ///< Any word including numbers, 0 is special flag, equivalent to 15
51 word_letters = 0xFFF00, ///< Any word, excluding numbers but including letters, kana and ideograms.
52 word_kana_ideo = 0xFF000, ///< Word that includes kana or ideographic characters
53 word_mask = 0xFFFFF; ///< Full word mask - select all possible variants
54 /// @}
55
56 /// \anchor bl_boundary_line_rules
57 /// \name Flags that describe a type of line break
58 /// @{
59 constexpr rule_type line_soft = 0x0F, ///< Soft line break: optional but not required
60 line_hard = 0xF0, ///< Hard line break: like break is required (as per CR/LF)
61 line_any = 0xFF, ///< Soft or Hard line break
62 line_mask = 0xFF; ///< Select all types of line breaks
63
64 /// @}
65
66 /// \anchor bl_boundary_sentence_rules
67 /// \name Flags that describe a type of sentence break
68 ///
69 /// @{
70 constexpr rule_type sentence_term = 0x0F, ///< \brief The sentence was terminated with a sentence terminator
71 /// like ".", "!" possible followed by hard separator like CR, LF, PS
72 sentence_sep =
73 0xF0, ///< \brief The sentence does not contain terminator like ".", "!" but ended with hard separator
74 /// like CR, LF, PS or end of input.
75 sentence_any = 0xFF, ///< Either first or second sentence break type;.
76 sentence_mask = 0xFF; ///< Select all sentence breaking points
77
78 ///@}
79
80 /// \name Flags that describe a type of character break.
81 ///
82 /// At this point break iterator does not distinguish different
83 /// kinds of characters so it is used for consistency.
84 ///@{
85 constexpr rule_type character_any = 0xF, ///< Not in use, just for consistency
86 character_mask = 0xF; ///< Select all character breaking points
87
88 ///@}
89
90 /// This function returns the mask that covers all variants for specific boundary type
91 inline rule_type boundary_rule(boundary_type t)
92 {
93 switch(t) {
94 case character: return character_mask;
95 case word: return word_mask;
96 case sentence: return sentence_mask;
97 case line: return line_mask;
98 }
99 return 0;
100 }
101
102 ///@}
103 } // namespace boundary
104}} // namespace boost::locale
105
106#ifdef BOOST_MSVC
107# pragma warning(pop)
108#endif
109
110#endif
111

source code of boost/libs/locale/include/boost/locale/boundary/types.hpp