1 | // |
2 | // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) |
3 | // |
4 | // Distributed under the Boost Software License, Version 1.0. |
5 | // https://www.boost.org/LICENSE_1_0.txt |
6 | |
7 | #ifndef BOOST_LOCALE_CONVERTER_HPP_INCLUDED |
8 | #define BOOST_LOCALE_CONVERTER_HPP_INCLUDED |
9 | |
10 | #include <boost/locale/detail/facet_id.hpp> |
11 | #include <boost/locale/detail/is_supported_char.hpp> |
12 | #include <boost/locale/util/string.hpp> |
13 | #include <locale> |
14 | |
15 | #ifdef BOOST_MSVC |
16 | # pragma warning(push) |
17 | # pragma warning(disable : 4275 4251 4231 4660) |
18 | #endif |
19 | |
20 | namespace boost { namespace locale { |
21 | |
22 | /// \defgroup convert Text Conversions |
23 | /// |
24 | /// This module provides various function for string manipulation like Unicode normalization, case conversion etc. |
25 | /// @{ |
26 | |
27 | /// \brief This class provides base flags for text manipulation. It is used as base for converter facet. |
28 | class converter_base { |
29 | public: |
30 | /// The flag used for facet - the type of operation to perform |
31 | enum conversion_type { |
32 | normalization, ///< Apply Unicode normalization on the text |
33 | upper_case, ///< Convert text to upper case |
34 | lower_case, ///< Convert text to lower case |
35 | case_folding, ///< Fold case in the text |
36 | title_case ///< Convert text to title case |
37 | }; |
38 | }; |
39 | |
40 | /// \brief The facet that implements text manipulation |
41 | /// |
42 | /// It is used to perform text conversion operations defined by \ref converter_base::conversion_type. |
43 | /// It is implemented for supported character types, at least \c char, \c wchar_t |
44 | template<typename Char> |
45 | class BOOST_SYMBOL_VISIBLE converter : public converter_base, |
46 | public std::locale::facet, |
47 | public detail::facet_id<converter<Char>> { |
48 | BOOST_LOCALE_ASSERT_IS_SUPPORTED(Char); |
49 | |
50 | public: |
51 | /// Standard constructor |
52 | converter(size_t refs = 0) : std::locale::facet(refs) {} |
53 | /// Convert text in range [\a begin, \a end) according to conversion method \a how. Parameter |
54 | /// \a flags is used for specification of normalization method like nfd, nfc etc. |
55 | virtual std::basic_string<Char> |
56 | convert(conversion_type how, const Char* begin, const Char* end, int flags = 0) const = 0; |
57 | }; |
58 | |
59 | /// The type that defined <a href="http://unicode.org/reports/tr15/#Norm_Forms">normalization form</a> |
60 | enum norm_type { |
61 | norm_nfd, ///< Canonical decomposition |
62 | norm_nfc, ///< Canonical decomposition followed by canonical composition |
63 | norm_nfkd, ///< Compatibility decomposition |
64 | norm_nfkc, ///< Compatibility decomposition followed by canonical composition. |
65 | norm_default = norm_nfc, ///< Default normalization - canonical decomposition followed by canonical composition |
66 | }; |
67 | |
68 | /// Normalize Unicode string in range [begin,end) according to \ref norm_type "normalization form" \a n |
69 | /// |
70 | /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take |
71 | /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside |
72 | /// of a Unicode character set. |
73 | /// |
74 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
75 | template<typename CharType> |
76 | std::basic_string<CharType> normalize(const CharType* begin, |
77 | const CharType* end, |
78 | norm_type n = norm_default, |
79 | const std::locale& loc = std::locale()) |
80 | { |
81 | return std::use_facet<converter<CharType>>(loc).convert(converter_base::normalization, begin, end, n); |
82 | } |
83 | |
84 | /// Normalize Unicode string \a str according to \ref norm_type "normalization form" \a n |
85 | /// |
86 | /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take |
87 | /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside |
88 | /// of a Unicode character set. |
89 | /// |
90 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
91 | template<typename CharType> |
92 | std::basic_string<CharType> normalize(const std::basic_string<CharType>& str, |
93 | norm_type n = norm_default, |
94 | const std::locale& loc = std::locale()) |
95 | { |
96 | return normalize(str.data(), str.data() + str.size(), n, loc); |
97 | } |
98 | |
99 | /// Normalize NULL terminated Unicode string \a str according to \ref norm_type "normalization form" \a n |
100 | /// |
101 | /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take |
102 | /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside |
103 | /// of a Unicode character set. |
104 | /// |
105 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
106 | template<typename CharType> |
107 | std::basic_string<CharType> |
108 | normalize(const CharType* str, norm_type n = norm_default, const std::locale& loc = std::locale()) |
109 | { |
110 | return normalize(str, util::str_end(str), n, loc); |
111 | } |
112 | |
113 | /////////////////////////////////////////////////// |
114 | |
115 | /// Convert a string in range [begin,end) to upper case according to locale \a loc |
116 | /// |
117 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
118 | template<typename CharType> |
119 | std::basic_string<CharType> |
120 | to_upper(const CharType* begin, const CharType* end, const std::locale& loc = std::locale()) |
121 | { |
122 | return std::use_facet<converter<CharType>>(loc).convert(converter_base::upper_case, begin, end); |
123 | } |
124 | |
125 | /// Convert a string \a str to upper case according to locale \a loc |
126 | /// |
127 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
128 | template<typename CharType> |
129 | std::basic_string<CharType> to_upper(const std::basic_string<CharType>& str, const std::locale& loc = std::locale()) |
130 | { |
131 | return to_upper(str.data(), str.data() + str.size(), loc); |
132 | } |
133 | |
134 | /// Convert a NULL terminated string \a str to upper case according to locale \a loc |
135 | /// |
136 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
137 | template<typename CharType> |
138 | std::basic_string<CharType> to_upper(const CharType* str, const std::locale& loc = std::locale()) |
139 | { |
140 | return to_upper(str, util::str_end(str), loc); |
141 | } |
142 | |
143 | /////////////////////////////////////////////////// |
144 | |
145 | /// Convert a string in range [begin,end) to lower case according to locale \a loc |
146 | /// |
147 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
148 | template<typename CharType> |
149 | std::basic_string<CharType> |
150 | to_lower(const CharType* begin, const CharType* end, const std::locale& loc = std::locale()) |
151 | { |
152 | return std::use_facet<converter<CharType>>(loc).convert(converter_base::lower_case, begin, end); |
153 | } |
154 | |
155 | /// Convert a string \a str to lower case according to locale \a loc |
156 | /// |
157 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
158 | template<typename CharType> |
159 | std::basic_string<CharType> to_lower(const std::basic_string<CharType>& str, const std::locale& loc = std::locale()) |
160 | { |
161 | return to_lower(str.data(), str.data() + str.size(), loc); |
162 | } |
163 | |
164 | /// Convert a NULL terminated string \a str to lower case according to locale \a loc |
165 | /// |
166 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
167 | template<typename CharType> |
168 | std::basic_string<CharType> to_lower(const CharType* str, const std::locale& loc = std::locale()) |
169 | { |
170 | return to_lower(str, util::str_end(str), loc); |
171 | } |
172 | |
173 | /////////////////////////////////////////////////// |
174 | |
175 | /// Convert a string in range [begin,end) to title case according to locale \a loc |
176 | /// |
177 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
178 | template<typename CharType> |
179 | std::basic_string<CharType> |
180 | to_title(const CharType* begin, const CharType* end, const std::locale& loc = std::locale()) |
181 | { |
182 | return std::use_facet<converter<CharType>>(loc).convert(converter_base::title_case, begin, end); |
183 | } |
184 | |
185 | /// Convert a string \a str to title case according to locale \a loc |
186 | /// |
187 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
188 | template<typename CharType> |
189 | std::basic_string<CharType> to_title(const std::basic_string<CharType>& str, const std::locale& loc = std::locale()) |
190 | { |
191 | return to_title(str.data(), str.data() + str.size(), loc); |
192 | } |
193 | |
194 | /// Convert a NULL terminated string \a str to title case according to locale \a loc |
195 | /// |
196 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
197 | template<typename CharType> |
198 | std::basic_string<CharType> to_title(const CharType* str, const std::locale& loc = std::locale()) |
199 | { |
200 | return to_title(str, util::str_end(str), loc); |
201 | } |
202 | |
203 | /////////////////////////////////////////////////// |
204 | |
205 | /// Fold case of a string in range [begin,end) according to locale \a loc |
206 | /// |
207 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
208 | template<typename CharType> |
209 | std::basic_string<CharType> |
210 | fold_case(const CharType* begin, const CharType* end, const std::locale& loc = std::locale()) |
211 | { |
212 | return std::use_facet<converter<CharType>>(loc).convert(converter_base::case_folding, begin, end); |
213 | } |
214 | |
215 | /// Fold case of a string \a str according to locale \a loc |
216 | /// |
217 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
218 | template<typename CharType> |
219 | std::basic_string<CharType> fold_case(const std::basic_string<CharType>& str, |
220 | const std::locale& loc = std::locale()) |
221 | { |
222 | return fold_case(str.data(), str.data() + str.size(), loc); |
223 | } |
224 | |
225 | /// Fold case of a NULL terminated string \a str according to locale \a loc |
226 | /// |
227 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
228 | template<typename CharType> |
229 | std::basic_string<CharType> fold_case(const CharType* str, const std::locale& loc = std::locale()) |
230 | { |
231 | return fold_case(str, util::str_end(str), loc); |
232 | } |
233 | |
234 | ///@} |
235 | }} // namespace boost::locale |
236 | |
237 | #ifdef BOOST_MSVC |
238 | # pragma warning(pop) |
239 | #endif |
240 | |
241 | /// \example conversions.cpp |
242 | /// |
243 | /// Example of using various text conversion functions. |
244 | /// |
245 | /// \example wconversions.cpp |
246 | /// |
247 | /// Example of using various text conversion functions with wide strings. |
248 | |
249 | #endif |
250 | |