1
2#ifndef DATE_TIME_FORMAT_DATE_PARSER_HPP__
3#define DATE_TIME_FORMAT_DATE_PARSER_HPP__
4
5/* Copyright (c) 2004-2005 CrystalClear Software, Inc.
6 * Use, modification and distribution is subject to the
7 * Boost Software License, Version 1.0. (See accompanying
8 * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
9 * Author: Jeff Garland, Bart Garst
10 * $Date$
11 */
12
13
14#include "boost/lexical_cast.hpp"
15#include "boost/date_time/string_parse_tree.hpp"
16#include "boost/date_time/strings_from_facet.hpp"
17#include "boost/date_time/special_values_parser.hpp"
18#include <string>
19#include <vector>
20#include <sstream>
21#include <iterator>
22#ifndef BOOST_NO_STDC_NAMESPACE
23# include <cctype>
24#else
25# include <ctype.h>
26#endif
27
28#ifdef BOOST_NO_STDC_NAMESPACE
29namespace std {
30 using ::isspace;
31 using ::isdigit;
32}
33#endif
34namespace boost { namespace date_time {
35
36//! Helper function for parsing fixed length strings into integers
37/*! Will consume 'length' number of characters from stream. Consumed
38 * character are transfered to parse_match_result struct.
39 * Returns '-1' if no number can be parsed or incorrect number of
40 * digits in stream. */
41template<typename int_type, typename charT>
42inline
43int_type
44fixed_string_to_int(std::istreambuf_iterator<charT>& itr,
45 std::istreambuf_iterator<charT>& stream_end,
46 parse_match_result<charT>& mr,
47 unsigned int length,
48 const charT& fill_char)
49{
50 //typedef std::basic_string<charT> string_type;
51 unsigned int j = 0;
52 //string_type s;
53 while (j < length && itr != stream_end &&
54 (std::isdigit(*itr) || *itr == fill_char)) {
55 if(*itr == fill_char) {
56 /* Since a fill_char can be anything, we convert it to a zero.
57 * lexical_cast will behave predictably when zero is used as fill. */
58 mr.cache += ('0');
59 }
60 else {
61 mr.cache += (*itr);
62 }
63 itr++;
64 j++;
65 }
66 int_type i = static_cast<int_type>(-1);
67 // mr.cache will hold leading zeros. size() tells us when input is too short.
68 if(mr.cache.size() < length) {
69 return i;
70 }
71 try {
72 i = boost::lexical_cast<int_type>(mr.cache);
73 }catch(bad_lexical_cast&){
74 // we want to return -1 if the cast fails so nothing to do here
75 }
76 return i;
77}
78
79//! Helper function for parsing fixed length strings into integers
80/*! Will consume 'length' number of characters from stream. Consumed
81 * character are transfered to parse_match_result struct.
82 * Returns '-1' if no number can be parsed or incorrect number of
83 * digits in stream. */
84template<typename int_type, typename charT>
85inline
86int_type
87fixed_string_to_int(std::istreambuf_iterator<charT>& itr,
88 std::istreambuf_iterator<charT>& stream_end,
89 parse_match_result<charT>& mr,
90 unsigned int length)
91{
92 return fixed_string_to_int<int_type, charT>(itr, stream_end, mr, length, '0');
93}
94
95//! Helper function for parsing varied length strings into integers
96/*! Will consume 'max_length' characters from stream only if those
97 * characters are digits. Returns '-1' if no number can be parsed.
98 * Will not parse a number preceeded by a '+' or '-'. */
99template<typename int_type, typename charT>
100inline
101int_type
102var_string_to_int(std::istreambuf_iterator<charT>& itr,
103 const std::istreambuf_iterator<charT>& stream_end,
104 unsigned int max_length)
105{
106 typedef std::basic_string<charT> string_type;
107 unsigned int j = 0;
108 string_type s;
109 while (itr != stream_end && (j < max_length) && std::isdigit(*itr)) {
110 s += (*itr);
111 ++itr;
112 ++j;
113 }
114 int_type i = static_cast<int_type>(-1);
115 if(!s.empty()) {
116 i = boost::lexical_cast<int_type>(s);
117 }
118 return i;
119}
120
121
122//! Class with generic date parsing using a format string
123/*! The following is the set of recognized format specifiers
124 - %a - Short weekday name
125 - %A - Long weekday name
126 - %b - Abbreviated month name
127 - %B - Full month name
128 - %d - Day of the month as decimal 01 to 31
129 - %j - Day of year as decimal from 001 to 366
130 - %m - Month name as a decimal 01 to 12
131 - %U - Week number 00 to 53 with first Sunday as the first day of week 1?
132 - %w - Weekday as decimal number 0 to 6 where Sunday == 0
133 - %W - Week number 00 to 53 where Monday is first day of week 1
134 - %x - facet default date representation
135 - %y - Year without the century - eg: 04 for 2004
136 - %Y - Year with century
137
138 The weekday specifiers (%a and %A) do not add to the date construction,
139 but they provide a way to skip over the weekday names for formats that
140 provide them.
141
142 todo -- Another interesting feature that this approach could provide is
143 an option to fill in any missing fields with the current values
144 from the clock. So if you have %m-%d the parser would detect
145 the missing year value and fill it in using the clock.
146
147 todo -- What to do with the %x. %x in the classic facet is just bad...
148
149 */
150template<class date_type, typename charT>
151class format_date_parser
152{
153 public:
154 typedef std::basic_string<charT> string_type;
155 typedef std::basic_istringstream<charT> stringstream_type;
156 typedef std::istreambuf_iterator<charT> stream_itr_type;
157 typedef typename string_type::const_iterator const_itr;
158 typedef typename date_type::year_type year_type;
159 typedef typename date_type::month_type month_type;
160 typedef typename date_type::day_type day_type;
161 typedef typename date_type::duration_type duration_type;
162 typedef typename date_type::day_of_week_type day_of_week_type;
163 typedef typename date_type::day_of_year_type day_of_year_type;
164 typedef string_parse_tree<charT> parse_tree_type;
165 typedef typename parse_tree_type::parse_match_result_type match_results;
166 typedef std::vector<std::basic_string<charT> > input_collection_type;
167
168 // TODO sv_parser uses its default constructor - write the others
169
170 format_date_parser(const string_type& format_str,
171 const input_collection_type& month_short_names,
172 const input_collection_type& month_long_names,
173 const input_collection_type& weekday_short_names,
174 const input_collection_type& weekday_long_names) :
175 m_format(format_str),
176 m_month_short_names(month_short_names, 1),
177 m_month_long_names(month_long_names, 1),
178 m_weekday_short_names(weekday_short_names),
179 m_weekday_long_names(weekday_long_names)
180 {}
181
182 format_date_parser(const string_type& format_str,
183 const std::locale& locale) :
184 m_format(format_str),
185 m_month_short_names(gather_month_strings<charT>(locale), 1),
186 m_month_long_names(gather_month_strings<charT>(locale, false), 1),
187 m_weekday_short_names(gather_weekday_strings<charT>(locale)),
188 m_weekday_long_names(gather_weekday_strings<charT>(locale, false))
189 {}
190
191 format_date_parser(const format_date_parser<date_type,charT>& fdp)
192 {
193 this->m_format = fdp.m_format;
194 this->m_month_short_names = fdp.m_month_short_names;
195 this->m_month_long_names = fdp.m_month_long_names;
196 this->m_weekday_short_names = fdp.m_weekday_short_names;
197 this->m_weekday_long_names = fdp.m_weekday_long_names;
198 }
199
200 string_type format() const
201 {
202 return m_format;
203 }
204
205 void format(string_type format_str)
206 {
207 m_format = format_str;
208 }
209
210 void short_month_names(const input_collection_type& month_names)
211 {
212 m_month_short_names = parse_tree_type(month_names, 1);
213 }
214 void long_month_names(const input_collection_type& month_names)
215 {
216 m_month_long_names = parse_tree_type(month_names, 1);
217 }
218 void short_weekday_names(const input_collection_type& weekday_names)
219 {
220 m_weekday_short_names = parse_tree_type(weekday_names);
221 }
222 void long_weekday_names(const input_collection_type& weekday_names)
223 {
224 m_weekday_long_names = parse_tree_type(weekday_names);
225 }
226
227 date_type
228 parse_date(const string_type& value,
229 const string_type& format_str,
230 const special_values_parser<date_type,charT>& sv_parser) const
231 {
232 stringstream_type ss(value);
233 stream_itr_type sitr(ss);
234 stream_itr_type stream_end;
235 return parse_date(sitr, stream_end, format_str, sv_parser);
236 }
237
238 date_type
239 parse_date(std::istreambuf_iterator<charT>& sitr,
240 std::istreambuf_iterator<charT>& stream_end,
241 const special_values_parser<date_type,charT>& sv_parser) const
242 {
243 return parse_date(sitr, stream_end, m_format, sv_parser);
244 }
245
246 /*! Of all the objects that the format_date_parser can parse, only a
247 * date can be a special value. Therefore, only parse_date checks
248 * for special_values. */
249 date_type
250 parse_date(std::istreambuf_iterator<charT>& sitr,
251 std::istreambuf_iterator<charT>& stream_end,
252 string_type format_str,
253 const special_values_parser<date_type,charT>& sv_parser) const
254 {
255 bool use_current_char = false;
256
257 // skip leading whitespace
258 while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
259
260 short year(0), month(0), day(0), day_of_year(0);// wkday(0);
261 /* Initialized the following to their minimum values. These intermediate
262 * objects are used so we get specific exceptions when part of the input
263 * is unparsable.
264 * Ex: "205-Jan-15" will throw a bad_year, "2005-Jsn-15"- bad_month, etc.*/
265 year_type t_year(1400);
266 month_type t_month(1);
267 day_type t_day(1);
268 day_of_week_type wkday(0);
269
270
271 const_itr itr(format_str.begin());
272 while (itr != format_str.end() && (sitr != stream_end)) {
273 if (*itr == '%') {
274 if ( ++itr == format_str.end())
275 break;
276 if (*itr != '%') {
277 switch(*itr) {
278 case 'a':
279 {
280 //this value is just throw away. It could be used for
281 //error checking potentially, but it isn't helpful in
282 //actually constructing the date - we just need to get it
283 //out of the stream
284 match_results mr = m_weekday_short_names.match(sitr, stream_end);
285 if(mr.current_match == match_results::PARSE_ERROR) {
286 // check special_values
287 if(sv_parser.match(sitr, stream_end, mr)) {
288 return date_type(static_cast<special_values>(mr.current_match));
289 }
290 }
291 wkday = mr.current_match;
292 if (mr.has_remaining()) {
293 use_current_char = true;
294 }
295 break;
296 }
297 case 'A':
298 {
299 //this value is just throw away. It could be used for
300 //error checking potentially, but it isn't helpful in
301 //actually constructing the date - we just need to get it
302 //out of the stream
303 match_results mr = m_weekday_long_names.match(sitr, stream_end);
304 if(mr.current_match == match_results::PARSE_ERROR) {
305 // check special_values
306 if(sv_parser.match(sitr, stream_end, mr)) {
307 return date_type(static_cast<special_values>(mr.current_match));
308 }
309 }
310 wkday = mr.current_match;
311 if (mr.has_remaining()) {
312 use_current_char = true;
313 }
314 break;
315 }
316 case 'b':
317 {
318 match_results mr = m_month_short_names.match(sitr, stream_end);
319 if(mr.current_match == match_results::PARSE_ERROR) {
320 // check special_values
321 if(sv_parser.match(sitr, stream_end, mr)) {
322 return date_type(static_cast<special_values>(mr.current_match));
323 }
324 }
325 t_month = month_type(mr.current_match);
326 if (mr.has_remaining()) {
327 use_current_char = true;
328 }
329 break;
330 }
331 case 'B':
332 {
333 match_results mr = m_month_long_names.match(sitr, stream_end);
334 if(mr.current_match == match_results::PARSE_ERROR) {
335 // check special_values
336 if(sv_parser.match(sitr, stream_end, mr)) {
337 return date_type(static_cast<special_values>(mr.current_match));
338 }
339 }
340 t_month = month_type(mr.current_match);
341 if (mr.has_remaining()) {
342 use_current_char = true;
343 }
344 break;
345 }
346 case 'd':
347 {
348 match_results mr;
349 day = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
350 if(day == -1) {
351 if(sv_parser.match(sitr, stream_end, mr)) {
352 return date_type(static_cast<special_values>(mr.current_match));
353 }
354 }
355 t_day = day_type(day);
356 break;
357 }
358 case 'e':
359 {
360 match_results mr;
361 day = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2, ' ');
362 if(day == -1) {
363 if(sv_parser.match(sitr, stream_end, mr)) {
364 return date_type(static_cast<special_values>(mr.current_match));
365 }
366 }
367 t_day = day_type(day);
368 break;
369 }
370 case 'j':
371 {
372 match_results mr;
373 day_of_year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 3);
374 if(day_of_year == -1) {
375 if(sv_parser.match(sitr, stream_end, mr)) {
376 return date_type(static_cast<special_values>(mr.current_match));
377 }
378 }
379 // these next two lines are so we get an exception with bad input
380 day_of_year_type t_day_of_year(1);
381 t_day_of_year = day_of_year_type(day_of_year);
382 break;
383 }
384 case 'm':
385 {
386 match_results mr;
387 month = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
388 if(month == -1) {
389 if(sv_parser.match(sitr, stream_end, mr)) {
390 return date_type(static_cast<special_values>(mr.current_match));
391 }
392 }
393 t_month = month_type(month);
394 break;
395 }
396 case 'Y':
397 {
398 match_results mr;
399 year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 4);
400 if(year == -1) {
401 if(sv_parser.match(sitr, stream_end, mr)) {
402 return date_type(static_cast<special_values>(mr.current_match));
403 }
404 }
405 t_year = year_type(year);
406 break;
407 }
408 case 'y':
409 {
410 match_results mr;
411 year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
412 if(year == -1) {
413 if(sv_parser.match(sitr, stream_end, mr)) {
414 return date_type(static_cast<special_values>(mr.current_match));
415 }
416 }
417 year += 2000; //make 2 digit years in this century
418 t_year = year_type(year);
419 break;
420 }
421 default:
422 {} //ignore those we don't understand
423
424 }//switch
425
426 }
427 else { // itr == '%', second consecutive
428 sitr++;
429 }
430
431 itr++; //advance past format specifier
432 }
433 else { //skip past chars in format and in buffer
434 itr++;
435 if (use_current_char) {
436 use_current_char = false;
437 }
438 else {
439 sitr++;
440 }
441 }
442 }
443
444 if (day_of_year > 0) {
445 date_type d(static_cast<unsigned short>(year-1),12,31); //end of prior year
446 return d + duration_type(day_of_year);
447 }
448
449 return date_type(t_year, t_month, t_day); // exceptions were thrown earlier
450 // if input was no good
451 }
452
453 //! Throws bad_month if unable to parse
454 month_type
455 parse_month(std::istreambuf_iterator<charT>& sitr,
456 std::istreambuf_iterator<charT>& stream_end,
457 string_type format_str) const
458 {
459 match_results mr;
460 return parse_month(sitr, stream_end, format_str, mr);
461 }
462
463 //! Throws bad_month if unable to parse
464 month_type
465 parse_month(std::istreambuf_iterator<charT>& sitr,
466 std::istreambuf_iterator<charT>& stream_end,
467 string_type format_str,
468 match_results& mr) const
469 {
470 bool use_current_char = false;
471
472 // skip leading whitespace
473 while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
474
475 short month(0);
476
477 const_itr itr(format_str.begin());
478 while (itr != format_str.end() && (sitr != stream_end)) {
479 if (*itr == '%') {
480 if ( ++itr == format_str.end())
481 break;
482 if (*itr != '%') {
483 switch(*itr) {
484 case 'b':
485 {
486 mr = m_month_short_names.match(sitr, stream_end);
487 month = mr.current_match;
488 if (mr.has_remaining()) {
489 use_current_char = true;
490 }
491 break;
492 }
493 case 'B':
494 {
495 mr = m_month_long_names.match(sitr, stream_end);
496 month = mr.current_match;
497 if (mr.has_remaining()) {
498 use_current_char = true;
499 }
500 break;
501 }
502 case 'm':
503 {
504 month = var_string_to_int<short, charT>(sitr, stream_end, 2);
505 // var_string_to_int returns -1 if parse failed. That will
506 // cause a bad_month exception to be thrown so we do nothing here
507 break;
508 }
509 default:
510 {} //ignore those we don't understand
511
512 }//switch
513
514 }
515 else { // itr == '%', second consecutive
516 sitr++;
517 }
518
519 itr++; //advance past format specifier
520 }
521 else { //skip past chars in format and in buffer
522 itr++;
523 if (use_current_char) {
524 use_current_char = false;
525 }
526 else {
527 sitr++;
528 }
529 }
530 }
531
532 return month_type(month); // throws bad_month exception when values are zero
533 }
534
535 //! Expects 1 or 2 digits 1-31. Throws bad_day_of_month if unable to parse
536 day_type
537 parse_var_day_of_month(std::istreambuf_iterator<charT>& sitr,
538 std::istreambuf_iterator<charT>& stream_end) const
539 {
540 // skip leading whitespace
541 while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
542
543 return day_type(var_string_to_int<short, charT>(sitr, stream_end, 2));
544 }
545 //! Expects 2 digits 01-31. Throws bad_day_of_month if unable to parse
546 day_type
547 parse_day_of_month(std::istreambuf_iterator<charT>& sitr,
548 std::istreambuf_iterator<charT>& stream_end) const
549 {
550 // skip leading whitespace
551 while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
552
553 //return day_type(var_string_to_int<short, charT>(sitr, stream_end, 2));
554 match_results mr;
555 return day_type(fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2));
556 }
557
558 day_of_week_type
559 parse_weekday(std::istreambuf_iterator<charT>& sitr,
560 std::istreambuf_iterator<charT>& stream_end,
561 string_type format_str) const
562 {
563 match_results mr;
564 return parse_weekday(sitr, stream_end, format_str, mr);
565 }
566 day_of_week_type
567 parse_weekday(std::istreambuf_iterator<charT>& sitr,
568 std::istreambuf_iterator<charT>& stream_end,
569 string_type format_str,
570 match_results& mr) const
571 {
572 bool use_current_char = false;
573
574 // skip leading whitespace
575 while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
576
577 short wkday(0);
578
579 const_itr itr(format_str.begin());
580 while (itr != format_str.end() && (sitr != stream_end)) {
581 if (*itr == '%') {
582 if ( ++itr == format_str.end())
583 break;
584 if (*itr != '%') {
585 switch(*itr) {
586 case 'a':
587 {
588 //this value is just throw away. It could be used for
589 //error checking potentially, but it isn't helpful in
590 //actually constructing the date - we just need to get it
591 //out of the stream
592 mr = m_weekday_short_names.match(sitr, stream_end);
593 wkday = mr.current_match;
594 if (mr.has_remaining()) {
595 use_current_char = true;
596 }
597 break;
598 }
599 case 'A':
600 {
601 //this value is just throw away. It could be used for
602 //error checking potentially, but it isn't helpful in
603 //actually constructing the date - we just need to get it
604 //out of the stream
605 mr = m_weekday_long_names.match(sitr, stream_end);
606 wkday = mr.current_match;
607 if (mr.has_remaining()) {
608 use_current_char = true;
609 }
610 break;
611 }
612 case 'w':
613 {
614 // weekday as number 0-6, Sunday == 0
615 wkday = var_string_to_int<short, charT>(sitr, stream_end, 2);
616 break;
617 }
618 default:
619 {} //ignore those we don't understand
620
621 }//switch
622
623 }
624 else { // itr == '%', second consecutive
625 sitr++;
626 }
627
628 itr++; //advance past format specifier
629 }
630 else { //skip past chars in format and in buffer
631 itr++;
632 if (use_current_char) {
633 use_current_char = false;
634 }
635 else {
636 sitr++;
637 }
638 }
639 }
640
641 return day_of_week_type(wkday); // throws bad_day_of_month exception
642 // when values are zero
643 }
644
645 //! throws bad_year if unable to parse
646 year_type
647 parse_year(std::istreambuf_iterator<charT>& sitr,
648 std::istreambuf_iterator<charT>& stream_end,
649 string_type format_str) const
650 {
651 match_results mr;
652 return parse_year(sitr, stream_end, format_str, mr);
653 }
654
655 //! throws bad_year if unable to parse
656 year_type
657 parse_year(std::istreambuf_iterator<charT>& sitr,
658 std::istreambuf_iterator<charT>& stream_end,
659 string_type format_str,
660 match_results& mr) const
661 {
662 // skip leading whitespace
663 while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
664
665 unsigned short year(0);
666
667 const_itr itr(format_str.begin());
668 while (itr != format_str.end() && (sitr != stream_end)) {
669 if (*itr == '%') {
670 if ( ++itr == format_str.end())
671 break;
672 if (*itr != '%') {
673 //match_results mr;
674 switch(*itr) {
675 case 'Y':
676 {
677 // year from 4 digit string
678 year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 4);
679 break;
680 }
681 case 'y':
682 {
683 // year from 2 digit string (no century)
684 year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
685 year += 2000; //make 2 digit years in this century
686 break;
687 }
688 default:
689 {} //ignore those we don't understand
690
691 }//switch
692
693 }
694 else { // itr == '%', second consecutive
695 sitr++;
696 }
697
698 itr++; //advance past format specifier
699 }
700 else { //skip past chars in format and in buffer
701 itr++;
702 sitr++;
703 }
704 }
705
706 return year_type(year); // throws bad_year exception when values are zero
707 }
708
709
710 private:
711 string_type m_format;
712 parse_tree_type m_month_short_names;
713 parse_tree_type m_month_long_names;
714 parse_tree_type m_weekday_short_names;
715 parse_tree_type m_weekday_long_names;
716
717};
718
719} } //namespace
720
721#endif
722
723
724
725

source code of include/boost/date_time/format_date_parser.hpp