1 | // ---------------------------------------------------------------------------- |
2 | // parsing.hpp : implementation of the parsing member functions |
3 | // ( parse, parse_printf_directive) |
4 | // ---------------------------------------------------------------------------- |
5 | |
6 | // Copyright Samuel Krempp 2003. Use, modification, and distribution are |
7 | // subject to the Boost Software License, Version 1.0. (See accompanying |
8 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
9 | |
10 | // see http://www.boost.org/libs/format for library home page |
11 | |
12 | // ---------------------------------------------------------------------------- |
13 | |
14 | #ifndef BOOST_FORMAT_PARSING_HPP |
15 | #define BOOST_FORMAT_PARSING_HPP |
16 | |
17 | |
18 | #include <boost/format/format_class.hpp> |
19 | #include <boost/format/exceptions.hpp> |
20 | #include <boost/throw_exception.hpp> |
21 | #include <boost/assert.hpp> |
22 | |
23 | |
24 | namespace boost { |
25 | namespace io { |
26 | namespace detail { |
27 | |
28 | #if defined(BOOST_NO_STD_LOCALE) |
29 | // streams will be used for narrow / widen. but these methods are not const |
30 | template<class T> |
31 | T& const_or_not(const T& x) { |
32 | return const_cast<T&> (x); |
33 | } |
34 | #else |
35 | template<class T> |
36 | const T& const_or_not(const T& x) { |
37 | return x; |
38 | } |
39 | #endif |
40 | |
41 | template<class Ch, class Facet> inline |
42 | char wrap_narrow(const Facet& fac, Ch c, char deflt) { |
43 | return const_or_not(fac).narrow(c, deflt); |
44 | } |
45 | |
46 | template<class Ch, class Facet> inline |
47 | bool wrap_isdigit(const Facet& fac, Ch c) { |
48 | #if ! defined( BOOST_NO_LOCALE_ISDIGIT ) |
49 | return fac.is(std::ctype<Ch>::digit, c); |
50 | # else |
51 | (void) fac; // remove "unused parameter" warning |
52 | using namespace std; |
53 | return isdigit(c) != 0; |
54 | #endif |
55 | } |
56 | |
57 | template<class Iter, class Facet> |
58 | Iter wrap_scan_notdigit(const Facet & fac, Iter beg, Iter end) { |
59 | using namespace std; |
60 | for( ; beg!=end && wrap_isdigit(fac, *beg); ++beg) ; |
61 | return beg; |
62 | } |
63 | |
64 | |
65 | // Input : [start, last) iterators range and a |
66 | // a Facet to use its widen/narrow member function |
67 | // Effects : read sequence and convert digits into integral n, of type Res |
68 | // Returns : n |
69 | template<class Res, class Iter, class Facet> |
70 | Iter str2int (const Iter & start, const Iter & last, Res & res, |
71 | const Facet& fac) |
72 | { |
73 | using namespace std; |
74 | Iter it; |
75 | res=0; |
76 | for(it=start; it != last && wrap_isdigit(fac, *it); ++it ) { |
77 | char cur_ch = wrap_narrow(fac, *it, 0); // cant fail. |
78 | res *= 10; |
79 | res += cur_ch - '0'; // 22.2.1.1.2.13 of the C++ standard |
80 | } |
81 | return it; |
82 | } |
83 | |
84 | // skip printf's "asterisk-fields" directives in the format-string buf |
85 | // Input : char string, with starting index *pos_p |
86 | // a Facet merely to use its widen/narrow member function |
87 | // Effects : advance *pos_p by skipping printf's asterisk fields. |
88 | // Returns : nothing |
89 | template<class Iter, class Facet> |
90 | Iter skip_asterisk(Iter start, Iter last, const Facet& fac) |
91 | { |
92 | using namespace std; |
93 | ++ start; |
94 | start = wrap_scan_notdigit(fac, start, last); |
95 | if(start!=last && *start== const_or_not(fac).widen( '$') ) |
96 | ++start; |
97 | return start; |
98 | } |
99 | |
100 | |
101 | // auxiliary func called by parse_printf_directive |
102 | // for centralising error handling |
103 | // it either throws if user sets the corresponding flag, or does nothing. |
104 | inline void maybe_throw_exception(unsigned char exceptions, |
105 | std::size_t pos, std::size_t size) |
106 | { |
107 | if(exceptions & io::bad_format_string_bit) |
108 | boost::throw_exception(e: io::bad_format_string(pos, size) ); |
109 | } |
110 | |
111 | |
112 | // Input: the position of a printf-directive in the format-string |
113 | // a basic_ios& merely to use its widen/narrow member function |
114 | // a bitset'exceptions' telling whether to throw exceptions on errors. |
115 | // Returns: |
116 | // true if parse succeeded (ignore some errors if exceptions disabled) |
117 | // false if it failed so bad that the directive should be printed verbatim |
118 | // Effects: |
119 | // start is incremented so that *start is the first char after |
120 | // this directive |
121 | // *fpar is set with the parameters read in the directive |
122 | template<class Ch, class Tr, class Alloc, class Iter, class Facet> |
123 | bool parse_printf_directive(Iter & start, const Iter& last, |
124 | detail::format_item<Ch, Tr, Alloc> * fpar, |
125 | const Facet& fac, |
126 | std::size_t offset, unsigned char exceptions) |
127 | { |
128 | typedef typename basic_format<Ch, Tr, Alloc>::format_item_t format_item_t; |
129 | |
130 | fpar->argN_ = format_item_t::argN_no_posit; // if no positional-directive |
131 | bool precision_set = false; |
132 | bool in_brackets=false; |
133 | Iter start0 = start; |
134 | std::size_t fstring_size = last-start0+offset; |
135 | |
136 | if(start>= last) { // empty directive : this is a trailing % |
137 | maybe_throw_exception(exceptions, start-start0 + offset, fstring_size); |
138 | return false; |
139 | } |
140 | |
141 | if(*start== const_or_not(fac).widen( '|')) { |
142 | in_brackets=true; |
143 | if( ++start >= last ) { |
144 | maybe_throw_exception(exceptions, start-start0 + offset, fstring_size); |
145 | return false; |
146 | } |
147 | } |
148 | |
149 | // the flag '0' would be picked as a digit for argument order, but here it's a flag : |
150 | if(*start== const_or_not(fac).widen( '0')) |
151 | goto parse_flags; |
152 | |
153 | // handle argument order (%2$d) or possibly width specification: %2d |
154 | if(wrap_isdigit(fac, *start)) { |
155 | int n; |
156 | start = str2int(start, last, n, fac); |
157 | if( start >= last ) { |
158 | maybe_throw_exception(exceptions, start-start0+offset, fstring_size); |
159 | return false; |
160 | } |
161 | |
162 | // %N% case : this is already the end of the directive |
163 | if( *start == const_or_not(fac).widen( '%') ) { |
164 | fpar->argN_ = n-1; |
165 | ++start; |
166 | if( in_brackets) |
167 | maybe_throw_exception(exceptions, start-start0+offset, fstring_size); |
168 | // but don't return. maybe "%" was used in lieu of '$', so we go on. |
169 | else |
170 | return true; |
171 | } |
172 | |
173 | if ( *start== const_or_not(fac).widen( '$') ) { |
174 | fpar->argN_ = n-1; |
175 | ++start; |
176 | } |
177 | else { |
178 | // non-positionnal directive |
179 | fpar->fmtstate_.width_ = n; |
180 | fpar->argN_ = format_item_t::argN_no_posit; |
181 | goto parse_precision; |
182 | } |
183 | } |
184 | |
185 | parse_flags: |
186 | // handle flags |
187 | while ( start != last) { // as long as char is one of + - = _ # 0 l h or ' ' |
188 | // misc switches |
189 | switch ( wrap_narrow(fac, *start, 0)) { |
190 | case '\'' : break; // no effect yet. (painful to implement) |
191 | case 'l': |
192 | case 'h': // short/long modifier : for printf-comaptibility (no action needed) |
193 | break; |
194 | case '-': |
195 | fpar->fmtstate_.flags_ |= std::ios_base::left; |
196 | break; |
197 | case '=': |
198 | fpar->pad_scheme_ |= format_item_t::centered; |
199 | break; |
200 | case '_': |
201 | fpar->fmtstate_.flags_ |= std::ios_base::internal; |
202 | break; |
203 | case ' ': |
204 | fpar->pad_scheme_ |= format_item_t::spacepad; |
205 | break; |
206 | case '+': |
207 | fpar->fmtstate_.flags_ |= std::ios_base::showpos; |
208 | break; |
209 | case '0': |
210 | fpar->pad_scheme_ |= format_item_t::zeropad; |
211 | // need to know alignment before really setting flags, |
212 | // so just add 'zeropad' flag for now, it will be processed later. |
213 | break; |
214 | case '#': |
215 | fpar->fmtstate_.flags_ |= std::ios_base::showpoint | std::ios_base::showbase; |
216 | break; |
217 | default: |
218 | goto parse_width; |
219 | } |
220 | ++start; |
221 | } // loop on flag. |
222 | |
223 | if( start>=last) { |
224 | maybe_throw_exception(exceptions, start-start0+offset, fstring_size); |
225 | return true; |
226 | } |
227 | parse_width: |
228 | // handle width spec |
229 | // first skip 'asterisk fields' : *, or *N$ |
230 | if(*start == const_or_not(fac).widen( '*') ) |
231 | start = skip_asterisk(start, last, fac); |
232 | if(start!=last && wrap_isdigit(fac, *start)) |
233 | start = str2int(start, last, fpar->fmtstate_.width_, fac); |
234 | |
235 | parse_precision: |
236 | if( start>= last) { |
237 | maybe_throw_exception(exceptions, start-start0+offset, fstring_size); |
238 | return true; |
239 | } |
240 | // handle precision spec |
241 | if (*start== const_or_not(fac).widen( '.')) { |
242 | ++start; |
243 | if(start != last && *start == const_or_not(fac).widen( '*') ) |
244 | start = skip_asterisk(start, last, fac); |
245 | if(start != last && wrap_isdigit(fac, *start)) { |
246 | start = str2int(start, last, fpar->fmtstate_.precision_, fac); |
247 | precision_set = true; |
248 | } |
249 | else |
250 | fpar->fmtstate_.precision_ =0; |
251 | } |
252 | |
253 | // handle formatting-type flags : |
254 | while( start != last && ( *start== const_or_not(fac).widen( 'l') |
255 | || *start== const_or_not(fac).widen( 'L') |
256 | || *start== const_or_not(fac).widen( 'h')) ) |
257 | ++start; |
258 | if( start>=last) { |
259 | maybe_throw_exception(exceptions, start-start0+offset, fstring_size); |
260 | return true; |
261 | } |
262 | |
263 | if( in_brackets && *start== const_or_not(fac).widen( '|') ) { |
264 | ++start; |
265 | return true; |
266 | } |
267 | switch ( wrap_narrow(fac, *start, 0) ) { |
268 | case 'X': |
269 | fpar->fmtstate_.flags_ |= std::ios_base::uppercase; |
270 | case 'p': // pointer => set hex. |
271 | case 'x': |
272 | fpar->fmtstate_.flags_ &= ~std::ios_base::basefield; |
273 | fpar->fmtstate_.flags_ |= std::ios_base::hex; |
274 | break; |
275 | |
276 | case 'o': |
277 | fpar->fmtstate_.flags_ &= ~std::ios_base::basefield; |
278 | fpar->fmtstate_.flags_ |= std::ios_base::oct; |
279 | break; |
280 | |
281 | case 'E': |
282 | fpar->fmtstate_.flags_ |= std::ios_base::uppercase; |
283 | case 'e': |
284 | fpar->fmtstate_.flags_ &= ~std::ios_base::floatfield; |
285 | fpar->fmtstate_.flags_ |= std::ios_base::scientific; |
286 | |
287 | fpar->fmtstate_.flags_ &= ~std::ios_base::basefield; |
288 | fpar->fmtstate_.flags_ |= std::ios_base::dec; |
289 | break; |
290 | |
291 | case 'f': |
292 | fpar->fmtstate_.flags_ &= ~std::ios_base::floatfield; |
293 | fpar->fmtstate_.flags_ |= std::ios_base::fixed; |
294 | case 'u': |
295 | case 'd': |
296 | case 'i': |
297 | fpar->fmtstate_.flags_ &= ~std::ios_base::basefield; |
298 | fpar->fmtstate_.flags_ |= std::ios_base::dec; |
299 | break; |
300 | |
301 | case 'T': |
302 | ++start; |
303 | if( start >= last) |
304 | maybe_throw_exception(exceptions, start-start0+offset, fstring_size); |
305 | else |
306 | fpar->fmtstate_.fill_ = *start; |
307 | fpar->pad_scheme_ |= format_item_t::tabulation; |
308 | fpar->argN_ = format_item_t::argN_tabulation; |
309 | break; |
310 | case 't': |
311 | fpar->fmtstate_.fill_ = const_or_not(fac).widen( ' '); |
312 | fpar->pad_scheme_ |= format_item_t::tabulation; |
313 | fpar->argN_ = format_item_t::argN_tabulation; |
314 | break; |
315 | |
316 | case 'G': |
317 | fpar->fmtstate_.flags_ |= std::ios_base::uppercase; |
318 | break; |
319 | case 'g': // 'g' conversion is default for floats. |
320 | fpar->fmtstate_.flags_ &= ~std::ios_base::basefield; |
321 | fpar->fmtstate_.flags_ |= std::ios_base::dec; |
322 | |
323 | // CLEAR all floatield flags, so stream will CHOOSE |
324 | fpar->fmtstate_.flags_ &= ~std::ios_base::floatfield; |
325 | break; |
326 | |
327 | case 'C': |
328 | case 'c': |
329 | fpar->truncate_ = 1; |
330 | break; |
331 | case 'S': |
332 | case 's': |
333 | if(precision_set) // handle truncation manually, with own parameter. |
334 | fpar->truncate_ = fpar->fmtstate_.precision_; |
335 | fpar->fmtstate_.precision_ = 6; // default stream precision. |
336 | break; |
337 | case 'n' : |
338 | fpar->argN_ = format_item_t::argN_ignored; |
339 | break; |
340 | default: |
341 | maybe_throw_exception(exceptions, start-start0+offset, fstring_size); |
342 | } |
343 | ++start; |
344 | |
345 | if( in_brackets ) { |
346 | if( start != last && *start== const_or_not(fac).widen( '|') ) { |
347 | ++start; |
348 | return true; |
349 | } |
350 | else maybe_throw_exception(exceptions, start-start0+offset, fstring_size); |
351 | } |
352 | return true; |
353 | } |
354 | // -end parse_printf_directive() |
355 | |
356 | template<class String, class Facet> |
357 | int upper_bound_from_fstring(const String& buf, |
358 | const typename String::value_type arg_mark, |
359 | const Facet& fac, |
360 | unsigned char exceptions) |
361 | { |
362 | // quick-parsing of the format-string to count arguments mark (arg_mark, '%') |
363 | // returns : upper bound on the number of format items in the format strings |
364 | using namespace boost::io; |
365 | typename String::size_type i1=0; |
366 | int num_items=0; |
367 | while( (i1=buf.find(arg_mark,i1)) != String::npos ) { |
368 | if( i1+1 >= buf.size() ) { |
369 | if(exceptions & bad_format_string_bit) |
370 | boost::throw_exception(e: bad_format_string(i1, buf.size() )); // must not end in ".. %" |
371 | else { |
372 | ++num_items; |
373 | break; |
374 | } |
375 | } |
376 | if(buf[i1+1] == buf[i1] ) {// escaped "%%" |
377 | i1+=2; continue; |
378 | } |
379 | |
380 | ++i1; |
381 | // in case of %N% directives, dont count it double (wastes allocations..) : |
382 | i1 = detail::wrap_scan_notdigit(fac, buf.begin()+i1, buf.end()) - buf.begin(); |
383 | if( i1 < buf.size() && buf[i1] == arg_mark ) |
384 | ++i1; |
385 | ++num_items; |
386 | } |
387 | return num_items; |
388 | } |
389 | template<class String> inline |
390 | void append_string(String& dst, const String& src, |
391 | const typename String::size_type beg, |
392 | const typename String::size_type end) { |
393 | dst.append(src.begin()+beg, src.begin()+end); |
394 | } |
395 | |
396 | } // detail namespace |
397 | } // io namespace |
398 | |
399 | |
400 | |
401 | // ----------------------------------------------- |
402 | // format :: parse(..) |
403 | |
404 | template<class Ch, class Tr, class Alloc> |
405 | basic_format<Ch, Tr, Alloc>& basic_format<Ch, Tr, Alloc>:: |
406 | parse (const string_type& buf) { |
407 | // parse the format-string |
408 | using namespace std; |
409 | #if !defined(BOOST_NO_STD_LOCALE) |
410 | const std::ctype<Ch> & fac = BOOST_USE_FACET( std::ctype<Ch>, getloc()); |
411 | #else |
412 | io::basic_oaltstringstream<Ch, Tr, Alloc> fac; |
413 | //has widen and narrow even on compilers without locale |
414 | #endif |
415 | |
416 | const Ch arg_mark = io::detail::const_or_not(fac).widen( '%'); |
417 | bool ordered_args=true; |
418 | int max_argN=-1; |
419 | |
420 | // A: find upper_bound on num_items and allocates arrays |
421 | int num_items = io::detail::upper_bound_from_fstring(buf, arg_mark, fac, exceptions()); |
422 | make_or_reuse_data(nbitems: num_items); |
423 | |
424 | // B: Now the real parsing of the format string : |
425 | num_items=0; |
426 | typename string_type::size_type i0=0, i1=0; |
427 | typename string_type::const_iterator it; |
428 | bool special_things=false; |
429 | int cur_item=0; |
430 | while( (i1=buf.find(arg_mark,i1)) != string_type::npos ) { |
431 | string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_; |
432 | if( buf[i1+1] == buf[i1] ) { // escaped mark, '%%' |
433 | io::detail::append_string(piece, buf, i0, i1+1); |
434 | i1+=2; i0=i1; |
435 | continue; |
436 | } |
437 | BOOST_ASSERT( static_cast<unsigned int>(cur_item) < items_.size() || cur_item==0); |
438 | |
439 | if(i1!=i0) { |
440 | io::detail::append_string(piece, buf, i0, i1); |
441 | i0=i1; |
442 | } |
443 | ++i1; |
444 | it = buf.begin()+i1; |
445 | bool parse_ok = io::detail::parse_printf_directive( |
446 | it, buf.end(), &items_[cur_item], fac, i1, exceptions()); |
447 | i1 = it - buf.begin(); |
448 | if( ! parse_ok ) // the directive will be printed verbatim |
449 | continue; |
450 | i0=i1; |
451 | items_[cur_item].compute_states(); // process complex options, like zeropad, into params |
452 | |
453 | int argN=items_[cur_item].argN_; |
454 | if(argN == format_item_t::argN_ignored) |
455 | continue; |
456 | if(argN ==format_item_t::argN_no_posit) |
457 | ordered_args=false; |
458 | else if(argN == format_item_t::argN_tabulation) special_things=true; |
459 | else if(argN > max_argN) max_argN = argN; |
460 | ++num_items; |
461 | ++cur_item; |
462 | } // loop on %'s |
463 | BOOST_ASSERT(cur_item == num_items); |
464 | |
465 | // store the final piece of string |
466 | { |
467 | string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_; |
468 | io::detail::append_string(piece, buf, i0, buf.size()); |
469 | } |
470 | |
471 | if( !ordered_args) { |
472 | if(max_argN >= 0 ) { // dont mix positional with non-positionnal directives |
473 | if(exceptions() & io::bad_format_string_bit) |
474 | boost::throw_exception( |
475 | e: io::bad_format_string(static_cast<std::size_t>(max_argN), 0)); |
476 | // else do nothing. => positionnal arguments are processed as non-positionnal |
477 | } |
478 | // set things like it would have been with positional directives : |
479 | int non_ordered_items = 0; |
480 | for(int i=0; i< num_items; ++i) |
481 | if(items_[i].argN_ == format_item_t::argN_no_posit) { |
482 | items_[i].argN_ = non_ordered_items; |
483 | ++non_ordered_items; |
484 | } |
485 | max_argN = non_ordered_items-1; |
486 | } |
487 | |
488 | // C: set some member data : |
489 | items_.resize(num_items, format_item_t(io::detail::const_or_not(fac).widen( ' ')) ); |
490 | |
491 | if(special_things) style_ |= special_needs; |
492 | num_args_ = max_argN + 1; |
493 | if(ordered_args) style_ |= ordered; |
494 | else style_ &= ~ordered; |
495 | return *this; |
496 | } |
497 | |
498 | } // namespace boost |
499 | |
500 | |
501 | #endif // BOOST_FORMAT_PARSING_HPP |
502 | |