date_parsing.hpp source code [include/boost/date_time/date_parsing.hpp]

1	#ifndef _DATE_TIME_DATE_PARSING_HPP___
2	#define _DATE_TIME_DATE_PARSING_HPP___
3
4	/ Copyright (c) 2002,2003,2005 CrystalClear Software, Inc.*
5	* Use, modification and distribution is subject to the
6	* Boost Software License, Version 1.0. (See accompanying
7	* file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
8	* Author: Jeff Garland, Bart Garst
9	* $Date$
10	*/
11
12	#include <map>
13	#include <string>
14	#include <sstream>
15	#include <iterator>
16	#include <algorithm>
17	#include <boost/tokenizer.hpp>
18	#include <boost/lexical_cast.hpp>
19	#include <boost/date_time/compiler_config.hpp>
20	#include <boost/date_time/parse_format_base.hpp>
21	#include <boost/date_time/period.hpp>
22
23	#if defined(BOOST_DATE_TIME_NO_LOCALE)
24	#include <cctype> // ::tolower(int)
25	#else
26	#include <locale> // std::tolower(char, locale)
27	#endif
28
29	namespace boost {
30	namespace date_time {
31
32	//! A function to replace the std::transform( , , ,tolower) construct
33	/! This function simply takes a string, and changes all the characters*
34	* in that string to lowercase (according to the default system locale).
35	* In the event that a compiler does not support locales, the old
36	* C style tolower() is used.
37	*/
38	inline
39	std::string
40	convert_to_lower(std::string inp)
41	{
42	#if !defined(BOOST_DATE_TIME_NO_LOCALE)
43	const std::locale loc(std::locale::classic());
44	#endif
45	std::string::size_type i = `0`, n = inp.length();
46	for (; i < n; ++i) {
47	inp [i] =
48	#if defined(BOOST_DATE_TIME_NO_LOCALE)
49	static_cast<char>(std::tolower(inp[i]));
50	#else
51	// tolower and others were brought in to std for borland >= v564
52	// in compiler_config.hpp
53	std::tolower(c: inp [i], loc: loc);
54	#endif
55	}
56	return inp;
57	}
58
59	//! Helper function for parse_date.
60	template<class month_type>
61	inline unsigned short
62	month_str_to_ushort(std::string const& s) {
63	if((s.at(n: `0`) >= `'0'`) && (s.at(n: `0`) <= `'9'`)) {
64	return boost::lexical_cast<unsigned short>(arg: s);
65	}
66	else {
67	std::string str = convert_to_lower(inp: s);
68	//c++98 support
69	#if defined(BOOST_NO_CXX11_UNIFIED_INITIALIZATION_SYNTAX)
70	static std::map<std::string, unsigned short> month_map;
71	typedef std::map<std::string, unsigned short>::value_type vtype;
72	if( month_map.empty() ) {
73	month_map.insert( vtype("jan", static_cast<unsigned short>(`1`)) );
74	month_map.insert( vtype("january", static_cast<unsigned short>(`1`)) );
75	month_map.insert( vtype("feb", static_cast<unsigned short>(`2`)) );
76	month_map.insert( vtype("february", static_cast<unsigned short>(`2`)) );
77	month_map.insert( vtype("mar", static_cast<unsigned short>(`3`)) );
78	month_map.insert( vtype("march", static_cast<unsigned short>(`3`)) );
79	month_map.insert( vtype("apr", static_cast<unsigned short>(`4`)) );
80	month_map.insert( vtype("april", static_cast<unsigned short>(`4`)) );
81	month_map.insert( vtype("may", static_cast<unsigned short>(`5`)) );
82	month_map.insert( vtype("jun", static_cast<unsigned short>(`6`)) );
83	month_map.insert( vtype("june", static_cast<unsigned short>(`6`)) );
84	month_map.insert( vtype("jul", static_cast<unsigned short>(`7`)) );
85	month_map.insert( vtype("july", static_cast<unsigned short>(`7`)) );
86	month_map.insert( vtype("aug", static_cast<unsigned short>(`8`)) );
87	month_map.insert( vtype("august", static_cast<unsigned short>(`8`)) );
88	month_map.insert( vtype("sep", static_cast<unsigned short>(`9`)) );
89	month_map.insert( vtype("september", static_cast<unsigned short>(`9`)) );
90	month_map.insert( vtype("oct", static_cast<unsigned short>(`10`)) );
91	month_map.insert( vtype("october", static_cast<unsigned short>(`10`)) );
92	month_map.insert( vtype("nov", static_cast<unsigned short>(`11`)) );
93	month_map.insert( vtype("november", static_cast<unsigned short>(`11`)) );
94	month_map.insert( vtype("dec", static_cast<unsigned short>(`12`)) );
95	month_map.insert( vtype("december", static_cast<unsigned short>(`12`)) );
96	}
97	#else //c+11 and beyond
98	static std::map<std::string, unsigned short> month_map =
99	{ { "jan", static_cast<unsigned short>(`1`) }, { "january", static_cast<unsigned short>(`1`) },
100	{ "feb", static_cast<unsigned short>(`2`) }, { "february", static_cast<unsigned short>(`2`) },
101	{ "mar", static_cast<unsigned short>(`3`) }, { "march", static_cast<unsigned short>(`3`) },
102	{ "apr", static_cast<unsigned short>(`4`) }, { "april", static_cast<unsigned short>(`4`) },
103	{ "may", static_cast<unsigned short>(`5`) },
104	{ "jun", static_cast<unsigned short>(`6`) }, { "june", static_cast<unsigned short>(`6`) },
105	{ "jul", static_cast<unsigned short>(`7`) }, { "july", static_cast<unsigned short>(`7`) },
106	{ "aug", static_cast<unsigned short>(`8`) }, { "august", static_cast<unsigned short>(`8`) },
107	{ "sep", static_cast<unsigned short>(`9`) }, { "september", static_cast<unsigned short>(`9`) },
108	{ "oct", static_cast<unsigned short>(`10`) }, { "october", static_cast<unsigned short>(`10`)},
109	{ "nov", static_cast<unsigned short>(`11`) }, { "november", static_cast<unsigned short>(`11`)},
110	{ "dec", static_cast<unsigned short>(`12`) }, { "december", static_cast<unsigned short>(`12`)}
111	};
112	#endif
113	std::map<std::string, unsigned short>::const_iterator mitr = month_map.find( x: str );
114	if ( mitr != month_map.end() ) {
115	return mitr ->second;
116	}
117	}
118	return `13`; // intentionally out of range - name not found
119	}
120
121
122	//! Generic function to parse a delimited date (eg: 2002-02-10)
123	/! Accepted formats are: "2003-02-10" or " 2003-Feb-10" or*
124	* "2003-Feburary-10"
125	* The order in which the Month, Day, & Year appear in the argument
126	* string can be accomodated by passing in the appropriate ymd_order_spec
127	*/
128	template<class date_type>
129	date_type
130	parse_date(const std::string& s, int order_spec = ymd_order_iso) {
131	std::string spec_str;
132	if(order_spec == ymd_order_iso) {
133	spec_str = "ymd";
134	}
135	else if(order_spec == ymd_order_dmy) {
136	spec_str = "dmy";
137	}
138	else { // (order_spec == ymd_order_us)
139	spec_str = "mdy";
140	}
141
142	typedef typename date_type::month_type month_type;
143	unsigned pos = `0`;
144	unsigned short year(`0`), month(`0`), day(`0`);
145	typedef typename std::basic_string<char>::traits_type traits_type;
146	typedef boost::char_separator<char, traits_type> char_separator_type;
147	typedef boost::tokenizer<char_separator_type,
148	std::basic_string<char>::const_iterator,
149	std::basic_string<char> > tokenizer;
150	typedef boost::tokenizer<char_separator_type,
151	std::basic_string<char>::const_iterator,
152	std::basic_string<char> >::iterator tokenizer_iterator;
153	// may need more delimiters, these work for the regression tests
154	const char sep_char[] = {`','`,`'-'`,`'.'`,`' '`,`'/'`,`'\0'`};
155	char_separator_type sep(sep_char);
156	tokenizer tok(s,sep);
157	for(tokenizer_iterator beg=tok.begin();
158	beg !=tok.end() && pos < spec_str.size();
159	++beg, ++pos) {
160	switch(spec_str.at(n: pos)) {
161	case `'y'`:
162	{
163	year = boost::lexical_cast<unsigned short>(arg: *beg);
164	break;
165	}
166	case `'m'`:
167	{
168	month = month_str_to_ushort<month_type>(*beg);
169	break;
170	}
171	case `'d'`:
172	{
173	day = boost::lexical_cast<unsigned short>(arg: *beg);
174	break;
175	}
176	default: break;
177	} //switch
178	}
179	return date_type(year, month, day);
180	}
181
182	//! Generic function to parse undelimited date (eg: 20020201)
183	template<class date_type>
184	date_type
185	parse_undelimited_date(const std::string& s) {
186	int offsets[] = {`4`,`2`,`2`};
187	int pos = `0`;
188	//typename date_type::ymd_type ymd((year_type::min)(),1,1);
189	unsigned short y = `0`, m = `0`, d = `0`;
190
191	/ The two bool arguments state that parsing will not wrap*
192	* (only the first 8 characters will be parsed) and partial
193	* strings will not be parsed.
194	* Ex:
195	* "2005121" will parse 2005 & 12, but not the "1" */
196	boost::offset_separator osf(offsets, offsets+`3`, false, false);
197
198	typedef typename boost::tokenizer<boost::offset_separator,
199	std::basic_string<char>::const_iterator,
200	std::basic_string<char> > tokenizer_type;
201	tokenizer_type tok(s, osf);
202	for(typename tokenizer_type::iterator ti=tok.begin(); ti !=tok.end();++ti) {
203	unsigned short i = boost::lexical_cast<unsigned short>(arg: *ti);
204	switch(pos) {
205	case `0`: y = i; break;
206	case `1`: m = i; break;
207	case `2`: d = i; break;
208	default: break;
209	}
210	pos++;
211	}
212	return date_type(y,m,d);
213	}
214
215	//! Helper function for 'date gregorian::from_stream()'
216	/! Creates a string from the iterators that reference the*
217	* begining & end of a char[] or string. All elements are
218	* used in output string */
219	template<class date_type, class iterator_type>
220	inline
221	date_type
222	from_stream_type(iterator_type& beg,
223	iterator_type const& end,
224	char)
225	{
226	std::ostringstream ss;
227	while(beg != end) {
228	ss << *beg++;
229	}
230	return parse_date<date_type>(ss.str());
231	}
232
233	//! Helper function for 'date gregorian::from_stream()'
234	/! Returns the first string found in the stream referenced by the*
235	* begining & end iterators */
236	template<class date_type, class iterator_type>
237	inline
238	date_type
239	from_stream_type(iterator_type& beg,
240	iterator_type const& / end /,
241	std::string const&)
242	{
243	return parse_date<date_type>(*beg);
244	}
245
246	/ I believe the wchar stuff would be best elsewhere, perhaps in*
247	* parse_date<>()? In the mean time this gets us started... */
248	//! Helper function for 'date gregorian::from_stream()'
249	/! Creates a string from the iterators that reference the*
250	* begining & end of a wstring. All elements are
251	* used in output string */
252	template<class date_type, class iterator_type>
253	inline
254	date_type from_stream_type(iterator_type& beg,
255	iterator_type const& end,
256	wchar_t)
257	{
258	std::ostringstream ss;
259	#if !defined(BOOST_DATE_TIME_NO_LOCALE)
260	std::locale loc;
261	std::ctype<wchar_t> const& fac = std::use_facet<std::ctype<wchar_t> >(loc: loc);
262	while(beg != end) {
263	ss << fac.narrow(beg++, `'X'`); // 'X' will cause exception to be thrown*
264	}
265	#else
266	while(beg != end) {
267	char c = `'X'`; // 'X' will cause exception to be thrown
268	const wchar_t wc = *beg++;
269	if (wc >= `0` && wc <= `127`)
270	c = static_cast< char >(wc);
271	ss << c;
272	}
273	#endif
274	return parse_date<date_type>(ss.str());
275	}
276	#ifndef BOOST_NO_STD_WSTRING
277	//! Helper function for 'date gregorian::from_stream()'
278	/! Creates a string from the first wstring found in the stream*
279	* referenced by the begining & end iterators */
280	template<class date_type, class iterator_type>
281	inline
282	date_type
283	from_stream_type(iterator_type& beg,
284	iterator_type const& / end /,
285	std::wstring const&) {
286	std::wstring ws = *beg;
287	std::ostringstream ss;
288	std::wstring::iterator wsb = ws.begin(), wse = ws.end();
289	#if !defined(BOOST_DATE_TIME_NO_LOCALE)
290	std::locale loc;
291	std::ctype<wchar_t> const& fac = std::use_facet<std::ctype<wchar_t> >(loc: loc);
292	while(wsb != wse) {
293	ss << fac.narrow(c: wsb ++, dfault: `'X'`); // 'X' will cause exception to be thrown*
294	}
295	#else
296	while(wsb != wse) {
297	char c = `'X'`; // 'X' will cause exception to be thrown
298	const wchar_t wc = *wsb++;
299	if (wc >= `0` && wc <= `127`)
300	c = static_cast< char >(wc);
301	ss << c;
302	}
303	#endif
304	return parse_date<date_type>(ss.str());
305	}
306	#endif // BOOST_NO_STD_WSTRING
307	#if (defined(BOOST_MSVC) && (_MSC_VER < 1300))
308	// This function cannot be compiled with MSVC 6.0 due to internal compiler shorcomings
309	#else
310	//! function called by wrapper functions: date_period_from_(w)string()
311	template<class date_type, class charT>
312	period<date_type, typename date_type::duration_type>
313	from_simple_string_type(const std::basic_string<charT>& s){
314	typedef typename std::basic_string<charT>::traits_type traits_type;
315	typedef typename boost::char_separator<charT, traits_type> char_separator;
316	typedef typename boost::tokenizer<char_separator,
317	typename std::basic_string<charT>::const_iterator,
318	std::basic_string<charT> > tokenizer;
319	const charT sep_list[`4`] = {`'['`,`'/'`,`']'`,`'\0'`};
320	char_separator sep(sep_list);
321	tokenizer tokens(s, sep);
322	typename tokenizer::iterator tok_it = tokens.begin();
323	std::basic_string<charT> date_string = *tok_it;
324	// get 2 string iterators and generate a date from them
325	typename std::basic_string<charT>::iterator date_string_start = date_string.begin(),
326	date_string_end = date_string.end();
327	typedef typename std::iterator_traits<typename std::basic_string<charT>::iterator>::value_type value_type;
328	date_type d1 = from_stream_type<date_type>(date_string_start, date_string_end, value_type());
329	date_string = (++tok_it); // next token*
330	date_string_start = date_string.begin(), date_string_end = date_string.end();
331	date_type d2 = from_stream_type<date_type>(date_string_start, date_string_end, value_type());
332	return period<date_type, typename date_type::duration_type>(d1, d2);
333	}
334	#endif
335
336	} } //namespace date_time
337
338
339
340
341	#endif
342
343

source code of include/boost/date_time/date_parsing.hpp