| 1 | #ifndef _DATE_TIME_DATE_PARSING_HPP___ |
| 2 | #define _DATE_TIME_DATE_PARSING_HPP___ |
| 3 | |
| 4 | /* Copyright (c) 2002,2003,2005 CrystalClear Software, Inc. |
| 5 | * Use, modification and distribution is subject to the |
| 6 | * Boost Software License, Version 1.0. (See accompanying |
| 7 | * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt) |
| 8 | * Author: Jeff Garland, Bart Garst |
| 9 | * $Date$ |
| 10 | */ |
| 11 | |
| 12 | #include <map> |
| 13 | #include <string> |
| 14 | #include <sstream> |
| 15 | #include <iterator> |
| 16 | #include <algorithm> |
| 17 | #include <boost/tokenizer.hpp> |
| 18 | #include <boost/lexical_cast.hpp> |
| 19 | #include <boost/date_time/compiler_config.hpp> |
| 20 | #include <boost/date_time/parse_format_base.hpp> |
| 21 | #include <boost/date_time/period.hpp> |
| 22 | |
| 23 | #if defined(BOOST_DATE_TIME_NO_LOCALE) |
| 24 | #include <cctype> // ::tolower(int) |
| 25 | #else |
| 26 | #include <locale> // std::tolower(char, locale) |
| 27 | #endif |
| 28 | |
| 29 | namespace boost { |
| 30 | namespace date_time { |
| 31 | |
| 32 | //! A function to replace the std::transform( , , ,tolower) construct |
| 33 | /*! This function simply takes a string, and changes all the characters |
| 34 | * in that string to lowercase (according to the default system locale). |
| 35 | * In the event that a compiler does not support locales, the old |
| 36 | * C style tolower() is used. |
| 37 | */ |
| 38 | inline |
| 39 | std::string |
| 40 | convert_to_lower(std::string inp) |
| 41 | { |
| 42 | #if !defined(BOOST_DATE_TIME_NO_LOCALE) |
| 43 | const std::locale loc(std::locale::classic()); |
| 44 | #endif |
| 45 | std::string::size_type i = 0, n = inp.length(); |
| 46 | for (; i < n; ++i) { |
| 47 | inp[i] = |
| 48 | #if defined(BOOST_DATE_TIME_NO_LOCALE) |
| 49 | static_cast<char>(std::tolower(inp[i])); |
| 50 | #else |
| 51 | // tolower and others were brought in to std for borland >= v564 |
| 52 | // in compiler_config.hpp |
| 53 | std::tolower(c: inp[i], loc: loc); |
| 54 | #endif |
| 55 | } |
| 56 | return inp; |
| 57 | } |
| 58 | |
| 59 | //! Helper function for parse_date. |
| 60 | template<class month_type> |
| 61 | inline unsigned short |
| 62 | month_str_to_ushort(std::string const& s) { |
| 63 | if((s.at(n: 0) >= '0') && (s.at(n: 0) <= '9')) { |
| 64 | return boost::lexical_cast<unsigned short>(arg: s); |
| 65 | } |
| 66 | else { |
| 67 | std::string str = convert_to_lower(inp: s); |
| 68 | //c++98 support |
| 69 | #if defined(BOOST_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX) |
| 70 | static std::map<std::string, unsigned short> month_map; |
| 71 | typedef std::map<std::string, unsigned short>::value_type vtype; |
| 72 | if( month_map.empty() ) { |
| 73 | month_map.insert( vtype("jan" , static_cast<unsigned short>(1)) ); |
| 74 | month_map.insert( vtype("january" , static_cast<unsigned short>(1)) ); |
| 75 | month_map.insert( vtype("feb" , static_cast<unsigned short>(2)) ); |
| 76 | month_map.insert( vtype("february" , static_cast<unsigned short>(2)) ); |
| 77 | month_map.insert( vtype("mar" , static_cast<unsigned short>(3)) ); |
| 78 | month_map.insert( vtype("march" , static_cast<unsigned short>(3)) ); |
| 79 | month_map.insert( vtype("apr" , static_cast<unsigned short>(4)) ); |
| 80 | month_map.insert( vtype("april" , static_cast<unsigned short>(4)) ); |
| 81 | month_map.insert( vtype("may" , static_cast<unsigned short>(5)) ); |
| 82 | month_map.insert( vtype("jun" , static_cast<unsigned short>(6)) ); |
| 83 | month_map.insert( vtype("june" , static_cast<unsigned short>(6)) ); |
| 84 | month_map.insert( vtype("jul" , static_cast<unsigned short>(7)) ); |
| 85 | month_map.insert( vtype("july" , static_cast<unsigned short>(7)) ); |
| 86 | month_map.insert( vtype("aug" , static_cast<unsigned short>(8)) ); |
| 87 | month_map.insert( vtype("august" , static_cast<unsigned short>(8)) ); |
| 88 | month_map.insert( vtype("sep" , static_cast<unsigned short>(9)) ); |
| 89 | month_map.insert( vtype("september" , static_cast<unsigned short>(9)) ); |
| 90 | month_map.insert( vtype("oct" , static_cast<unsigned short>(10)) ); |
| 91 | month_map.insert( vtype("october" , static_cast<unsigned short>(10)) ); |
| 92 | month_map.insert( vtype("nov" , static_cast<unsigned short>(11)) ); |
| 93 | month_map.insert( vtype("november" , static_cast<unsigned short>(11)) ); |
| 94 | month_map.insert( vtype("dec" , static_cast<unsigned short>(12)) ); |
| 95 | month_map.insert( vtype("december" , static_cast<unsigned short>(12)) ); |
| 96 | } |
| 97 | #else //c+11 and beyond |
| 98 | static std::map<std::string, unsigned short> month_map = |
| 99 | { { "jan" , static_cast<unsigned short>(1) }, { "january" , static_cast<unsigned short>(1) }, |
| 100 | { "feb" , static_cast<unsigned short>(2) }, { "february" , static_cast<unsigned short>(2) }, |
| 101 | { "mar" , static_cast<unsigned short>(3) }, { "march" , static_cast<unsigned short>(3) }, |
| 102 | { "apr" , static_cast<unsigned short>(4) }, { "april" , static_cast<unsigned short>(4) }, |
| 103 | { "may" , static_cast<unsigned short>(5) }, |
| 104 | { "jun" , static_cast<unsigned short>(6) }, { "june" , static_cast<unsigned short>(6) }, |
| 105 | { "jul" , static_cast<unsigned short>(7) }, { "july" , static_cast<unsigned short>(7) }, |
| 106 | { "aug" , static_cast<unsigned short>(8) }, { "august" , static_cast<unsigned short>(8) }, |
| 107 | { "sep" , static_cast<unsigned short>(9) }, { "september" , static_cast<unsigned short>(9) }, |
| 108 | { "oct" , static_cast<unsigned short>(10) }, { "october" , static_cast<unsigned short>(10)}, |
| 109 | { "nov" , static_cast<unsigned short>(11) }, { "november" , static_cast<unsigned short>(11)}, |
| 110 | { "dec" , static_cast<unsigned short>(12) }, { "december" , static_cast<unsigned short>(12)} |
| 111 | }; |
| 112 | #endif |
| 113 | std::map<std::string, unsigned short>::const_iterator mitr = month_map.find( x: str ); |
| 114 | if ( mitr != month_map.end() ) { |
| 115 | return mitr->second; |
| 116 | } |
| 117 | } |
| 118 | return 13; // intentionally out of range - name not found |
| 119 | } |
| 120 | |
| 121 | |
| 122 | //! Generic function to parse a delimited date (eg: 2002-02-10) |
| 123 | /*! Accepted formats are: "2003-02-10" or " 2003-Feb-10" or |
| 124 | * "2003-Feburary-10" |
| 125 | * The order in which the Month, Day, & Year appear in the argument |
| 126 | * string can be accomodated by passing in the appropriate ymd_order_spec |
| 127 | */ |
| 128 | template<class date_type> |
| 129 | date_type |
| 130 | parse_date(const std::string& s, int order_spec = ymd_order_iso) { |
| 131 | std::string spec_str; |
| 132 | if(order_spec == ymd_order_iso) { |
| 133 | spec_str = "ymd" ; |
| 134 | } |
| 135 | else if(order_spec == ymd_order_dmy) { |
| 136 | spec_str = "dmy" ; |
| 137 | } |
| 138 | else { // (order_spec == ymd_order_us) |
| 139 | spec_str = "mdy" ; |
| 140 | } |
| 141 | |
| 142 | typedef typename date_type::month_type month_type; |
| 143 | unsigned pos = 0; |
| 144 | unsigned short year(0), month(0), day(0); |
| 145 | typedef typename std::basic_string<char>::traits_type traits_type; |
| 146 | typedef boost::char_separator<char, traits_type> char_separator_type; |
| 147 | typedef boost::tokenizer<char_separator_type, |
| 148 | std::basic_string<char>::const_iterator, |
| 149 | std::basic_string<char> > tokenizer; |
| 150 | typedef boost::tokenizer<char_separator_type, |
| 151 | std::basic_string<char>::const_iterator, |
| 152 | std::basic_string<char> >::iterator tokenizer_iterator; |
| 153 | // may need more delimiters, these work for the regression tests |
| 154 | const char sep_char[] = {',','-','.',' ','/','\0'}; |
| 155 | char_separator_type sep(sep_char); |
| 156 | tokenizer tok(s,sep); |
| 157 | for(tokenizer_iterator beg=tok.begin(); |
| 158 | beg!=tok.end() && pos < spec_str.size(); |
| 159 | ++beg, ++pos) { |
| 160 | switch(spec_str.at(n: pos)) { |
| 161 | case 'y': |
| 162 | { |
| 163 | year = boost::lexical_cast<unsigned short>(arg: *beg); |
| 164 | break; |
| 165 | } |
| 166 | case 'm': |
| 167 | { |
| 168 | month = month_str_to_ushort<month_type>(*beg); |
| 169 | break; |
| 170 | } |
| 171 | case 'd': |
| 172 | { |
| 173 | day = boost::lexical_cast<unsigned short>(arg: *beg); |
| 174 | break; |
| 175 | } |
| 176 | default: break; |
| 177 | } //switch |
| 178 | } |
| 179 | return date_type(year, month, day); |
| 180 | } |
| 181 | |
| 182 | //! Generic function to parse undelimited date (eg: 20020201) |
| 183 | template<class date_type> |
| 184 | date_type |
| 185 | parse_undelimited_date(const std::string& s) { |
| 186 | int offsets[] = {4,2,2}; |
| 187 | int pos = 0; |
| 188 | //typename date_type::ymd_type ymd((year_type::min)(),1,1); |
| 189 | unsigned short y = 0, m = 0, d = 0; |
| 190 | |
| 191 | /* The two bool arguments state that parsing will not wrap |
| 192 | * (only the first 8 characters will be parsed) and partial |
| 193 | * strings will not be parsed. |
| 194 | * Ex: |
| 195 | * "2005121" will parse 2005 & 12, but not the "1" */ |
| 196 | boost::offset_separator osf(offsets, offsets+3, false, false); |
| 197 | |
| 198 | typedef typename boost::tokenizer<boost::offset_separator, |
| 199 | std::basic_string<char>::const_iterator, |
| 200 | std::basic_string<char> > tokenizer_type; |
| 201 | tokenizer_type tok(s, osf); |
| 202 | for(typename tokenizer_type::iterator ti=tok.begin(); ti!=tok.end();++ti) { |
| 203 | unsigned short i = boost::lexical_cast<unsigned short>(arg: *ti); |
| 204 | switch(pos) { |
| 205 | case 0: y = i; break; |
| 206 | case 1: m = i; break; |
| 207 | case 2: d = i; break; |
| 208 | default: break; |
| 209 | } |
| 210 | pos++; |
| 211 | } |
| 212 | return date_type(y,m,d); |
| 213 | } |
| 214 | |
| 215 | //! Helper function for 'date gregorian::from_stream()' |
| 216 | /*! Creates a string from the iterators that reference the |
| 217 | * begining & end of a char[] or string. All elements are |
| 218 | * used in output string */ |
| 219 | template<class date_type, class iterator_type> |
| 220 | inline |
| 221 | date_type |
| 222 | from_stream_type(iterator_type& beg, |
| 223 | iterator_type const& end, |
| 224 | char) |
| 225 | { |
| 226 | std::ostringstream ss; |
| 227 | while(beg != end) { |
| 228 | ss << *beg++; |
| 229 | } |
| 230 | return parse_date<date_type>(ss.str()); |
| 231 | } |
| 232 | |
| 233 | //! Helper function for 'date gregorian::from_stream()' |
| 234 | /*! Returns the first string found in the stream referenced by the |
| 235 | * begining & end iterators */ |
| 236 | template<class date_type, class iterator_type> |
| 237 | inline |
| 238 | date_type |
| 239 | from_stream_type(iterator_type& beg, |
| 240 | iterator_type const& /* end */, |
| 241 | std::string const&) |
| 242 | { |
| 243 | return parse_date<date_type>(*beg); |
| 244 | } |
| 245 | |
| 246 | /* I believe the wchar stuff would be best elsewhere, perhaps in |
| 247 | * parse_date<>()? In the mean time this gets us started... */ |
| 248 | //! Helper function for 'date gregorian::from_stream()' |
| 249 | /*! Creates a string from the iterators that reference the |
| 250 | * begining & end of a wstring. All elements are |
| 251 | * used in output string */ |
| 252 | template<class date_type, class iterator_type> |
| 253 | inline |
| 254 | date_type from_stream_type(iterator_type& beg, |
| 255 | iterator_type const& end, |
| 256 | wchar_t) |
| 257 | { |
| 258 | std::ostringstream ss; |
| 259 | #if !defined(BOOST_DATE_TIME_NO_LOCALE) |
| 260 | std::locale loc; |
| 261 | std::ctype<wchar_t> const& fac = std::use_facet<std::ctype<wchar_t> >(loc: loc); |
| 262 | while(beg != end) { |
| 263 | ss << fac.narrow(*beg++, 'X'); // 'X' will cause exception to be thrown |
| 264 | } |
| 265 | #else |
| 266 | while(beg != end) { |
| 267 | char c = 'X'; // 'X' will cause exception to be thrown |
| 268 | const wchar_t wc = *beg++; |
| 269 | if (wc >= 0 && wc <= 127) |
| 270 | c = static_cast< char >(wc); |
| 271 | ss << c; |
| 272 | } |
| 273 | #endif |
| 274 | return parse_date<date_type>(ss.str()); |
| 275 | } |
| 276 | #ifndef BOOST_NO_STD_WSTRING |
| 277 | //! Helper function for 'date gregorian::from_stream()' |
| 278 | /*! Creates a string from the first wstring found in the stream |
| 279 | * referenced by the begining & end iterators */ |
| 280 | template<class date_type, class iterator_type> |
| 281 | inline |
| 282 | date_type |
| 283 | from_stream_type(iterator_type& beg, |
| 284 | iterator_type const& /* end */, |
| 285 | std::wstring const&) { |
| 286 | std::wstring ws = *beg; |
| 287 | std::ostringstream ss; |
| 288 | std::wstring::iterator wsb = ws.begin(), wse = ws.end(); |
| 289 | #if !defined(BOOST_DATE_TIME_NO_LOCALE) |
| 290 | std::locale loc; |
| 291 | std::ctype<wchar_t> const& fac = std::use_facet<std::ctype<wchar_t> >(loc: loc); |
| 292 | while(wsb != wse) { |
| 293 | ss << fac.narrow(c: *wsb++, dfault: 'X'); // 'X' will cause exception to be thrown |
| 294 | } |
| 295 | #else |
| 296 | while(wsb != wse) { |
| 297 | char c = 'X'; // 'X' will cause exception to be thrown |
| 298 | const wchar_t wc = *wsb++; |
| 299 | if (wc >= 0 && wc <= 127) |
| 300 | c = static_cast< char >(wc); |
| 301 | ss << c; |
| 302 | } |
| 303 | #endif |
| 304 | return parse_date<date_type>(ss.str()); |
| 305 | } |
| 306 | #endif // BOOST_NO_STD_WSTRING |
| 307 | #if (defined(BOOST_MSVC) && (_MSC_VER < 1300)) |
| 308 | // This function cannot be compiled with MSVC 6.0 due to internal compiler shorcomings |
| 309 | #else |
| 310 | //! function called by wrapper functions: date_period_from_(w)string() |
| 311 | template<class date_type, class charT> |
| 312 | period<date_type, typename date_type::duration_type> |
| 313 | from_simple_string_type(const std::basic_string<charT>& s){ |
| 314 | typedef typename std::basic_string<charT>::traits_type traits_type; |
| 315 | typedef typename boost::char_separator<charT, traits_type> char_separator; |
| 316 | typedef typename boost::tokenizer<char_separator, |
| 317 | typename std::basic_string<charT>::const_iterator, |
| 318 | std::basic_string<charT> > tokenizer; |
| 319 | const charT sep_list[4] = {'[','/',']','\0'}; |
| 320 | char_separator sep(sep_list); |
| 321 | tokenizer tokens(s, sep); |
| 322 | typename tokenizer::iterator tok_it = tokens.begin(); |
| 323 | std::basic_string<charT> date_string = *tok_it; |
| 324 | // get 2 string iterators and generate a date from them |
| 325 | typename std::basic_string<charT>::iterator date_string_start = date_string.begin(), |
| 326 | date_string_end = date_string.end(); |
| 327 | typedef typename std::iterator_traits<typename std::basic_string<charT>::iterator>::value_type value_type; |
| 328 | date_type d1 = from_stream_type<date_type>(date_string_start, date_string_end, value_type()); |
| 329 | date_string = *(++tok_it); // next token |
| 330 | date_string_start = date_string.begin(), date_string_end = date_string.end(); |
| 331 | date_type d2 = from_stream_type<date_type>(date_string_start, date_string_end, value_type()); |
| 332 | return period<date_type, typename date_type::duration_type>(d1, d2); |
| 333 | } |
| 334 | #endif |
| 335 | |
| 336 | } } //namespace date_time |
| 337 | |
| 338 | |
| 339 | |
| 340 | |
| 341 | #endif |
| 342 | |
| 343 | |