1 | // |
2 | // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) |
3 | // |
4 | // Distributed under the Boost Software License, Version 1.0. |
5 | // https://www.boost.org/LICENSE_1_0.txt |
6 | |
7 | #ifndef BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED |
8 | #define BOOST_LOCALE_BOUNDARY_TYPES_HPP_INCLUDED |
9 | |
10 | #include <boost/locale/config.hpp> |
11 | #include <cstdint> |
12 | |
13 | #ifdef BOOST_MSVC |
14 | # pragma warning(push) |
15 | # pragma warning(disable : 4275 4251 4231 4660) |
16 | #endif |
17 | |
18 | namespace boost { namespace locale { |
19 | |
20 | /// \brief This namespace contains all operations required for boundary analysis of text |
21 | namespace boundary { |
22 | /// \defgroup boundary Boundary Analysis |
23 | /// |
24 | /// This module contains all operations required for boundary analysis of text: character, word, like and |
25 | /// sentence boundaries |
26 | /// |
27 | /// @{ |
28 | |
29 | /// This type describes a possible boundary analysis alternatives. |
30 | enum boundary_type { |
31 | character, ///< Analyse the text for character boundaries |
32 | word, ///< Analyse the text for word boundaries |
33 | sentence, ///< Analyse the text for Find sentence boundaries |
34 | line ///< Analyse the text for positions suitable for line breaks |
35 | }; |
36 | |
37 | /// \brief Flags used with word boundary analysis -- the type of the word, line or sentence boundary found. |
38 | /// |
39 | /// It is a bit-mask that represents various combinations of rules used to select this specific boundary. |
40 | typedef uint32_t rule_type; |
41 | |
42 | /// \anchor bl_boundary_word_rules |
43 | /// \name Flags that describe a type of word selected |
44 | /// @{ |
45 | constexpr rule_type word_none = 0x0000F, ///< Not a word, like white space or punctuation mark |
46 | word_number = 0x000F0, ///< Word that appear to be a number |
47 | word_letter = 0x00F00, ///< Word that contains letters, excluding kana and ideographic characters |
48 | word_kana = 0x0F000, ///< Word that contains kana characters |
49 | word_ideo = 0xF0000, ///< Word that contains ideographic characters |
50 | word_any = 0xFFFF0, ///< Any word including numbers, 0 is special flag, equivalent to 15 |
51 | word_letters = 0xFFF00, ///< Any word, excluding numbers but including letters, kana and ideograms. |
52 | word_kana_ideo = 0xFF000, ///< Word that includes kana or ideographic characters |
53 | word_mask = 0xFFFFF; ///< Full word mask - select all possible variants |
54 | /// @} |
55 | |
56 | /// \anchor bl_boundary_line_rules |
57 | /// \name Flags that describe a type of line break |
58 | /// @{ |
59 | constexpr rule_type line_soft = 0x0F, ///< Soft line break: optional but not required |
60 | line_hard = 0xF0, ///< Hard line break: like break is required (as per CR/LF) |
61 | line_any = 0xFF, ///< Soft or Hard line break |
62 | line_mask = 0xFF; ///< Select all types of line breaks |
63 | |
64 | /// @} |
65 | |
66 | /// \anchor bl_boundary_sentence_rules |
67 | /// \name Flags that describe a type of sentence break |
68 | /// |
69 | /// @{ |
70 | constexpr rule_type sentence_term = 0x0F, ///< \brief The sentence was terminated with a sentence terminator |
71 | /// like ".", "!" possible followed by hard separator like CR, LF, PS |
72 | sentence_sep = |
73 | 0xF0, ///< \brief The sentence does not contain terminator like ".", "!" but ended with hard separator |
74 | /// like CR, LF, PS or end of input. |
75 | sentence_any = 0xFF, ///< Either first or second sentence break type;. |
76 | sentence_mask = 0xFF; ///< Select all sentence breaking points |
77 | |
78 | ///@} |
79 | |
80 | /// \name Flags that describe a type of character break. |
81 | /// |
82 | /// At this point break iterator does not distinguish different |
83 | /// kinds of characters so it is used for consistency. |
84 | ///@{ |
85 | constexpr rule_type character_any = 0xF, ///< Not in use, just for consistency |
86 | character_mask = 0xF; ///< Select all character breaking points |
87 | |
88 | ///@} |
89 | |
90 | /// This function returns the mask that covers all variants for specific boundary type |
91 | inline rule_type boundary_rule(boundary_type t) |
92 | { |
93 | switch(t) { |
94 | case character: return character_mask; |
95 | case word: return word_mask; |
96 | case sentence: return sentence_mask; |
97 | case line: return line_mask; |
98 | } |
99 | return 0; |
100 | } |
101 | |
102 | ///@} |
103 | } // namespace boundary |
104 | }} // namespace boost::locale |
105 | |
106 | #ifdef BOOST_MSVC |
107 | # pragma warning(pop) |
108 | #endif |
109 | |
110 | #endif |
111 | |