| 1 | // |
| 2 | // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh) |
| 3 | // |
| 4 | // Distributed under the Boost Software License, Version 1.0. |
| 5 | // https://www.boost.org/LICENSE_1_0.txt |
| 6 | |
| 7 | #ifndef BOOST_LOCALE_CONVERTER_HPP_INCLUDED |
| 8 | #define BOOST_LOCALE_CONVERTER_HPP_INCLUDED |
| 9 | |
| 10 | #include <boost/locale/detail/facet_id.hpp> |
| 11 | #include <boost/locale/detail/is_supported_char.hpp> |
| 12 | #include <boost/locale/util/string.hpp> |
| 13 | #include <locale> |
| 14 | |
| 15 | #ifdef BOOST_MSVC |
| 16 | # pragma warning(push) |
| 17 | # pragma warning(disable : 4275 4251 4231 4660) |
| 18 | #endif |
| 19 | |
| 20 | namespace boost { namespace locale { |
| 21 | |
| 22 | /// \defgroup convert Text Conversions |
| 23 | /// |
| 24 | /// This module provides various function for string manipulation like Unicode normalization, case conversion etc. |
| 25 | /// @{ |
| 26 | |
| 27 | /// \brief This class provides base flags for text manipulation. It is used as base for converter facet. |
| 28 | class converter_base { |
| 29 | public: |
| 30 | /// The flag used for facet - the type of operation to perform |
| 31 | enum conversion_type { |
| 32 | normalization, ///< Apply Unicode normalization on the text |
| 33 | upper_case, ///< Convert text to upper case |
| 34 | lower_case, ///< Convert text to lower case |
| 35 | case_folding, ///< Fold case in the text |
| 36 | title_case ///< Convert text to title case |
| 37 | }; |
| 38 | }; |
| 39 | |
| 40 | /// \brief The facet that implements text manipulation |
| 41 | /// |
| 42 | /// It is used to perform text conversion operations defined by \ref converter_base::conversion_type. |
| 43 | /// It is implemented for supported character types, at least \c char, \c wchar_t |
| 44 | template<typename Char> |
| 45 | class BOOST_SYMBOL_VISIBLE converter : public converter_base, |
| 46 | public std::locale::facet, |
| 47 | public detail::facet_id<converter<Char>> { |
| 48 | BOOST_LOCALE_ASSERT_IS_SUPPORTED(Char); |
| 49 | |
| 50 | public: |
| 51 | /// Standard constructor |
| 52 | converter(size_t refs = 0) : std::locale::facet(refs) {} |
| 53 | /// Convert text in range [\a begin, \a end) according to conversion method \a how. Parameter |
| 54 | /// \a flags is used for specification of normalization method like nfd, nfc etc. |
| 55 | virtual std::basic_string<Char> |
| 56 | convert(conversion_type how, const Char* begin, const Char* end, int flags = 0) const = 0; |
| 57 | }; |
| 58 | |
| 59 | /// The type that defined <a href="http://unicode.org/reports/tr15/#Norm_Forms">normalization form</a> |
| 60 | enum norm_type { |
| 61 | norm_nfd, ///< Canonical decomposition |
| 62 | norm_nfc, ///< Canonical decomposition followed by canonical composition |
| 63 | norm_nfkd, ///< Compatibility decomposition |
| 64 | norm_nfkc, ///< Compatibility decomposition followed by canonical composition. |
| 65 | norm_default = norm_nfc, ///< Default normalization - canonical decomposition followed by canonical composition |
| 66 | }; |
| 67 | |
| 68 | /// Normalize Unicode string in range [begin,end) according to \ref norm_type "normalization form" \a n |
| 69 | /// |
| 70 | /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take |
| 71 | /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside |
| 72 | /// of a Unicode character set. |
| 73 | /// |
| 74 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
| 75 | template<typename CharType> |
| 76 | std::basic_string<CharType> normalize(const CharType* begin, |
| 77 | const CharType* end, |
| 78 | norm_type n = norm_default, |
| 79 | const std::locale& loc = std::locale()) |
| 80 | { |
| 81 | return std::use_facet<converter<CharType>>(loc).convert(converter_base::normalization, begin, end, n); |
| 82 | } |
| 83 | |
| 84 | /// Normalize Unicode string \a str according to \ref norm_type "normalization form" \a n |
| 85 | /// |
| 86 | /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take |
| 87 | /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside |
| 88 | /// of a Unicode character set. |
| 89 | /// |
| 90 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
| 91 | template<typename CharType> |
| 92 | std::basic_string<CharType> normalize(const std::basic_string<CharType>& str, |
| 93 | norm_type n = norm_default, |
| 94 | const std::locale& loc = std::locale()) |
| 95 | { |
| 96 | return normalize(str.data(), str.data() + str.size(), n, loc); |
| 97 | } |
| 98 | |
| 99 | /// Normalize NULL terminated Unicode string \a str according to \ref norm_type "normalization form" \a n |
| 100 | /// |
| 101 | /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take |
| 102 | /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside |
| 103 | /// of a Unicode character set. |
| 104 | /// |
| 105 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
| 106 | template<typename CharType> |
| 107 | std::basic_string<CharType> |
| 108 | normalize(const CharType* str, norm_type n = norm_default, const std::locale& loc = std::locale()) |
| 109 | { |
| 110 | return normalize(str, util::str_end(str), n, loc); |
| 111 | } |
| 112 | |
| 113 | /////////////////////////////////////////////////// |
| 114 | |
| 115 | /// Convert a string in range [begin,end) to upper case according to locale \a loc |
| 116 | /// |
| 117 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
| 118 | template<typename CharType> |
| 119 | std::basic_string<CharType> |
| 120 | to_upper(const CharType* begin, const CharType* end, const std::locale& loc = std::locale()) |
| 121 | { |
| 122 | return std::use_facet<converter<CharType>>(loc).convert(converter_base::upper_case, begin, end); |
| 123 | } |
| 124 | |
| 125 | /// Convert a string \a str to upper case according to locale \a loc |
| 126 | /// |
| 127 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
| 128 | template<typename CharType> |
| 129 | std::basic_string<CharType> to_upper(const std::basic_string<CharType>& str, const std::locale& loc = std::locale()) |
| 130 | { |
| 131 | return to_upper(str.data(), str.data() + str.size(), loc); |
| 132 | } |
| 133 | |
| 134 | /// Convert a NULL terminated string \a str to upper case according to locale \a loc |
| 135 | /// |
| 136 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
| 137 | template<typename CharType> |
| 138 | std::basic_string<CharType> to_upper(const CharType* str, const std::locale& loc = std::locale()) |
| 139 | { |
| 140 | return to_upper(str, util::str_end(str), loc); |
| 141 | } |
| 142 | |
| 143 | /////////////////////////////////////////////////// |
| 144 | |
| 145 | /// Convert a string in range [begin,end) to lower case according to locale \a loc |
| 146 | /// |
| 147 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
| 148 | template<typename CharType> |
| 149 | std::basic_string<CharType> |
| 150 | to_lower(const CharType* begin, const CharType* end, const std::locale& loc = std::locale()) |
| 151 | { |
| 152 | return std::use_facet<converter<CharType>>(loc).convert(converter_base::lower_case, begin, end); |
| 153 | } |
| 154 | |
| 155 | /// Convert a string \a str to lower case according to locale \a loc |
| 156 | /// |
| 157 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
| 158 | template<typename CharType> |
| 159 | std::basic_string<CharType> to_lower(const std::basic_string<CharType>& str, const std::locale& loc = std::locale()) |
| 160 | { |
| 161 | return to_lower(str.data(), str.data() + str.size(), loc); |
| 162 | } |
| 163 | |
| 164 | /// Convert a NULL terminated string \a str to lower case according to locale \a loc |
| 165 | /// |
| 166 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
| 167 | template<typename CharType> |
| 168 | std::basic_string<CharType> to_lower(const CharType* str, const std::locale& loc = std::locale()) |
| 169 | { |
| 170 | return to_lower(str, util::str_end(str), loc); |
| 171 | } |
| 172 | |
| 173 | /////////////////////////////////////////////////// |
| 174 | |
| 175 | /// Convert a string in range [begin,end) to title case according to locale \a loc |
| 176 | /// |
| 177 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
| 178 | template<typename CharType> |
| 179 | std::basic_string<CharType> |
| 180 | to_title(const CharType* begin, const CharType* end, const std::locale& loc = std::locale()) |
| 181 | { |
| 182 | return std::use_facet<converter<CharType>>(loc).convert(converter_base::title_case, begin, end); |
| 183 | } |
| 184 | |
| 185 | /// Convert a string \a str to title case according to locale \a loc |
| 186 | /// |
| 187 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
| 188 | template<typename CharType> |
| 189 | std::basic_string<CharType> to_title(const std::basic_string<CharType>& str, const std::locale& loc = std::locale()) |
| 190 | { |
| 191 | return to_title(str.data(), str.data() + str.size(), loc); |
| 192 | } |
| 193 | |
| 194 | /// Convert a NULL terminated string \a str to title case according to locale \a loc |
| 195 | /// |
| 196 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
| 197 | template<typename CharType> |
| 198 | std::basic_string<CharType> to_title(const CharType* str, const std::locale& loc = std::locale()) |
| 199 | { |
| 200 | return to_title(str, util::str_end(str), loc); |
| 201 | } |
| 202 | |
| 203 | /////////////////////////////////////////////////// |
| 204 | |
| 205 | /// Fold case of a string in range [begin,end) according to locale \a loc |
| 206 | /// |
| 207 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
| 208 | template<typename CharType> |
| 209 | std::basic_string<CharType> |
| 210 | fold_case(const CharType* begin, const CharType* end, const std::locale& loc = std::locale()) |
| 211 | { |
| 212 | return std::use_facet<converter<CharType>>(loc).convert(converter_base::case_folding, begin, end); |
| 213 | } |
| 214 | |
| 215 | /// Fold case of a string \a str according to locale \a loc |
| 216 | /// |
| 217 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
| 218 | template<typename CharType> |
| 219 | std::basic_string<CharType> fold_case(const std::basic_string<CharType>& str, |
| 220 | const std::locale& loc = std::locale()) |
| 221 | { |
| 222 | return fold_case(str.data(), str.data() + str.size(), loc); |
| 223 | } |
| 224 | |
| 225 | /// Fold case of a NULL terminated string \a str according to locale \a loc |
| 226 | /// |
| 227 | /// \throws std::bad_cast: \a loc does not have \ref converter facet installed |
| 228 | template<typename CharType> |
| 229 | std::basic_string<CharType> fold_case(const CharType* str, const std::locale& loc = std::locale()) |
| 230 | { |
| 231 | return fold_case(str, util::str_end(str), loc); |
| 232 | } |
| 233 | |
| 234 | ///@} |
| 235 | }} // namespace boost::locale |
| 236 | |
| 237 | #ifdef BOOST_MSVC |
| 238 | # pragma warning(pop) |
| 239 | #endif |
| 240 | |
| 241 | /// \example conversions.cpp |
| 242 | /// |
| 243 | /// Example of using various text conversion functions. |
| 244 | /// |
| 245 | /// \example wconversions.cpp |
| 246 | /// |
| 247 | /// Example of using various text conversion functions with wide strings. |
| 248 | |
| 249 | #endif |
| 250 | |