1 | // |
2 | // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) |
3 | // |
4 | // Distributed under the Boost Software License, Version 1.0. |
5 | // https://www.boost.org/LICENSE_1_0.txt |
6 | |
7 | #ifndef BOOST_LOCALE_COLLATOR_HPP_INCLUDED |
8 | #define BOOST_LOCALE_COLLATOR_HPP_INCLUDED |
9 | |
10 | #include <boost/locale/config.hpp> |
11 | #include <boost/locale/detail/facet_id.hpp> |
12 | #include <locale> |
13 | |
14 | #ifdef BOOST_MSVC |
15 | # pragma warning(push) |
16 | # pragma warning(disable : 4275 4251 4231 4660) |
17 | #endif |
18 | |
19 | namespace boost { namespace locale { |
20 | |
21 | /// \defgroup collation Collation |
22 | /// |
23 | /// This module introduces collation related classes |
24 | /// @{ |
25 | |
26 | /// Unicode collation level types |
27 | enum class collate_level { |
28 | primary = 0, ///< 1st collation level: base letters |
29 | secondary = 1, ///< 2nd collation level: letters and accents |
30 | tertiary = 2, ///< 3rd collation level: letters, accents and case |
31 | quaternary = 3, ///< 4th collation level: letters, accents, case and punctuation |
32 | identical = 4 ///< identical collation level: include code-point comparison |
33 | }; |
34 | |
35 | class BOOST_DEPRECATED("Use collate_level" ) collator_base { |
36 | public: |
37 | using level_type = collate_level; |
38 | static constexpr auto primary = collate_level::primary; |
39 | static constexpr auto secondary = collate_level::secondary; |
40 | static constexpr auto tertiary = collate_level::tertiary; |
41 | static constexpr auto quaternary = collate_level::quaternary; |
42 | static constexpr auto identical = collate_level::identical; |
43 | }; |
44 | |
45 | /// \brief Collation facet. |
46 | /// |
47 | /// It reimplements standard C++ std::collate with support for collation levels |
48 | template<typename CharType> |
49 | class BOOST_SYMBOL_VISIBLE collator : public std::locale::facet, public detail::facet_id<collator<CharType>> { |
50 | public: |
51 | /// Type of the underlying character |
52 | typedef CharType char_type; |
53 | /// Type of string used with this facet |
54 | typedef std::basic_string<CharType> string_type; |
55 | |
56 | /// Compare two strings in range [b1,e1), [b2,e2) according to collation level \a level. Calls do_compare |
57 | /// |
58 | /// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if |
59 | /// they considered equal. |
60 | int compare(collate_level level, |
61 | const char_type* b1, |
62 | const char_type* e1, |
63 | const char_type* b2, |
64 | const char_type* e2) const |
65 | { |
66 | return do_compare(level, b1, e1, b2, e2); |
67 | } |
68 | |
69 | /// Default compare function as-in std::collate that does not take collation level into account. |
70 | /// Uses identical level |
71 | int compare(const char_type* b1, const char_type* e1, const char_type* b2, const char_type* e2) const |
72 | { |
73 | return compare(collate_level::identical, b1, e1, b2, e2); |
74 | } |
75 | |
76 | /// Create a binary string that can be compared to other in order to get collation order. The string is created |
77 | /// for text in range [b,e). It is useful for collation of multiple strings for text. |
78 | /// |
79 | /// The transformation follows these rules: |
80 | /// \code |
81 | /// compare(level,b1,e1,b2,e2) == sign( transform(level,b1,e1).compare(transform(level,b2,e2)) ); |
82 | /// \endcode |
83 | /// |
84 | /// Calls do_transform |
85 | string_type transform(collate_level level, const char_type* b, const char_type* e) const |
86 | { |
87 | return do_transform(level, b, e); |
88 | } |
89 | |
90 | /// Default transform function as-in std::collate that does not take collation level into account. |
91 | /// Uses identical level |
92 | string_type transform(const char_type* b, const char_type* e) const |
93 | { |
94 | return transform(collate_level::identical, b, e); |
95 | } |
96 | |
97 | /// Calculate a hash of a text in range [b,e). The value can be used for collation sensitive string comparison. |
98 | /// |
99 | /// If compare(level,b1,e1,b2,e2) == 0 then hash(level,b1,e1) == hash(level,b2,e2) |
100 | /// |
101 | /// Calls do_hash |
102 | long hash(collate_level level, const char_type* b, const char_type* e) const { return do_hash(level, b, e); } |
103 | |
104 | /// Default hash function as-in std::collate that does not take collation level into account. |
105 | /// Uses identical level |
106 | long hash(const char_type* b, const char_type* e) const { return hash(collate_level::identical, b, e); } |
107 | |
108 | /// Compare two strings \a l and \a r using collation level \a level |
109 | /// |
110 | /// Returns -1 if the first of the two strings sorts before the seconds, returns 1 if sorts after and 0 if |
111 | /// they considered equal. |
112 | int compare(collate_level level, const string_type& l, const string_type& r) const |
113 | { |
114 | return do_compare(level, b1: l.data(), e1: l.data() + l.size(), b2: r.data(), e2: r.data() + r.size()); |
115 | } |
116 | |
117 | /// Calculate a hash that can be used for collation sensitive string comparison of a string \a s |
118 | /// |
119 | /// If compare(level,s1,s2) == 0 then hash(level,s1) == hash(level,s2) |
120 | long hash(collate_level level, const string_type& s) const |
121 | { |
122 | return do_hash(level, b: s.data(), e: s.data() + s.size()); |
123 | } |
124 | |
125 | /// Create a binary string from string \a s, that can be compared to other, useful for collation of multiple |
126 | /// strings. |
127 | /// |
128 | /// The transformation follows this rule: |
129 | /// \code |
130 | /// compare(level,s1,s2) == sign( transform(level,s1).compare(transform(level,s2)) ); |
131 | /// \endcode |
132 | string_type transform(collate_level level, const string_type& s) const |
133 | { |
134 | return do_transform(level, b: s.data(), e: s.data() + s.size()); |
135 | } |
136 | |
137 | protected: |
138 | /// constructor of the collator object |
139 | collator(size_t refs = 0) : std::locale::facet(refs) {} |
140 | |
141 | /// Actual function that performs comparison between the strings. For details see compare member function. Can |
142 | /// be overridden. |
143 | virtual int do_compare(collate_level level, |
144 | const char_type* b1, |
145 | const char_type* e1, |
146 | const char_type* b2, |
147 | const char_type* e2) const = 0; |
148 | |
149 | /// Actual function that performs transformation. For details see transform member function. Can be overridden. |
150 | virtual string_type do_transform(collate_level level, const char_type* b, const char_type* e) const = 0; |
151 | /// Actual function that calculates hash. For details see hash member function. Can be overridden. |
152 | virtual long do_hash(collate_level level, const char_type* b, const char_type* e) const = 0; |
153 | }; |
154 | |
155 | /// \brief This class can be used in STL algorithms and containers for comparison of strings |
156 | /// with a level other than identical |
157 | /// |
158 | /// For example: |
159 | /// |
160 | /// \code |
161 | /// std::map<std::string,std::string,comparator<char,collate_level::secondary> > data; |
162 | /// \endcode |
163 | /// |
164 | /// Would create a map the keys of which are sorted using secondary collation level |
165 | template<typename CharType, collate_level default_level = collate_level::identical> |
166 | struct comparator { |
167 | public: |
168 | /// Create a comparator class for locale \a l and with collation level \a level |
169 | /// |
170 | /// \throws std::bad_cast: \a l does not have \ref collator facet installed |
171 | comparator(const std::locale& l = std::locale(), collate_level level = default_level) : |
172 | locale_(l), collator_(std::use_facet<collator<CharType>>(locale_)), level_(level) |
173 | {} |
174 | |
175 | /// Compare two strings -- equivalent to return left < right according to collation rules |
176 | bool operator()(const std::basic_string<CharType>& left, const std::basic_string<CharType>& right) const |
177 | { |
178 | return collator_.compare(level_, left, right) < 0; |
179 | } |
180 | |
181 | private: |
182 | std::locale locale_; |
183 | const collator<CharType>& collator_; |
184 | collate_level level_; |
185 | }; |
186 | |
187 | ///@} |
188 | }} // namespace boost::locale |
189 | |
190 | #ifdef BOOST_MSVC |
191 | # pragma warning(pop) |
192 | #endif |
193 | |
194 | /// |
195 | /// \example collate.cpp |
196 | /// Example of using collation functions |
197 | /// |
198 | |
199 | #endif |
200 | |