1 | // ---------------------------------------------------------------------------- |
2 | // parsing.hpp : implementation of the parsing member functions |
3 | // ( parse, parse_printf_directive) |
4 | // ---------------------------------------------------------------------------- |
5 | |
6 | // Copyright Samuel Krempp 2003. Use, modification, and distribution are |
7 | // subject to the Boost Software License, Version 1.0. (See accompanying |
8 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
9 | |
10 | // see http://www.boost.org/libs/format for library home page |
11 | |
12 | // ---------------------------------------------------------------------------- |
13 | |
14 | #ifndef BOOST_FORMAT_PARSING_HPP |
15 | #define BOOST_FORMAT_PARSING_HPP |
16 | |
17 | |
18 | #include <boost/format/format_class.hpp> |
19 | #include <boost/format/exceptions.hpp> |
20 | #include <boost/throw_exception.hpp> |
21 | #include <boost/assert.hpp> |
22 | #include <boost/config.hpp> |
23 | #include <boost/core/ignore_unused.hpp> |
24 | |
25 | namespace boost { |
26 | namespace io { |
27 | namespace detail { |
28 | |
29 | #if defined(BOOST_NO_STD_LOCALE) |
30 | // streams will be used for narrow / widen. but these methods are not const |
31 | template<class T> |
32 | T& const_or_not(const T& x) { |
33 | return const_cast<T&> (x); |
34 | } |
35 | #else |
36 | template<class T> |
37 | const T& const_or_not(const T& x) { |
38 | return x; |
39 | } |
40 | #endif |
41 | |
42 | template<class Ch, class Facet> inline |
43 | char wrap_narrow(const Facet& fac, Ch c, char deflt) { |
44 | return const_or_not(fac).narrow(c, deflt); |
45 | } |
46 | |
47 | template<class Ch, class Facet> inline |
48 | bool wrap_isdigit(const Facet& fac, Ch c) { |
49 | #if ! defined( BOOST_NO_LOCALE_ISDIGIT ) |
50 | return fac.is(std::ctype<Ch>::digit, c); |
51 | # else |
52 | ignore_unused(fac); |
53 | using namespace std; |
54 | return isdigit(c) != 0; |
55 | #endif |
56 | } |
57 | |
58 | template<class Iter, class Facet> |
59 | Iter wrap_scan_notdigit(const Facet & fac, Iter beg, Iter end) { |
60 | using namespace std; |
61 | for( ; beg!=end && wrap_isdigit(fac, *beg); ++beg) ; |
62 | return beg; |
63 | } |
64 | |
65 | |
66 | // Input : [start, last) iterators range and a |
67 | // a Facet to use its widen/narrow member function |
68 | // Effects : read sequence and convert digits into integral n, of type Res |
69 | // Returns : n |
70 | template<class Res, class Iter, class Facet> |
71 | Iter str2int (const Iter & start, const Iter & last, Res & res, |
72 | const Facet& fac) |
73 | { |
74 | using namespace std; |
75 | Iter it; |
76 | res=0; |
77 | for(it=start; it != last && wrap_isdigit(fac, *it); ++it ) { |
78 | char cur_ch = wrap_narrow(fac, *it, 0); // cant fail. |
79 | res *= 10; |
80 | res += cur_ch - '0'; // 22.2.1.1.2.13 of the C++ standard |
81 | } |
82 | return it; |
83 | } |
84 | |
85 | // auxiliary func called by parse_printf_directive |
86 | // for centralising error handling |
87 | // it either throws if user sets the corresponding flag, or does nothing. |
88 | inline void maybe_throw_exception(unsigned char exceptions, |
89 | std::size_t pos, std::size_t size) |
90 | { |
91 | if(exceptions & io::bad_format_string_bit) |
92 | boost::throw_exception(e: io::bad_format_string(pos, size) ); |
93 | } |
94 | |
95 | |
96 | // Input: the position of a printf-directive in the format-string |
97 | // a basic_ios& merely to use its widen/narrow member function |
98 | // a bitset'exceptions' telling whether to throw exceptions on errors. |
99 | // Returns: |
100 | // true if parse succeeded (ignore some errors if exceptions disabled) |
101 | // false if it failed so bad that the directive should be printed verbatim |
102 | // Effects: |
103 | // start is incremented so that *start is the first char after |
104 | // this directive |
105 | // *fpar is set with the parameters read in the directive |
106 | template<class Ch, class Tr, class Alloc, class Iter, class Facet> |
107 | bool parse_printf_directive(Iter & start, const Iter& last, |
108 | detail::format_item<Ch, Tr, Alloc> * fpar, |
109 | const Facet& fac, |
110 | std::size_t offset, unsigned char exceptions) |
111 | { |
112 | typedef typename basic_format<Ch, Tr, Alloc>::format_item_t format_item_t; |
113 | |
114 | fpar->argN_ = format_item_t::argN_no_posit; // if no positional-directive |
115 | bool precision_set = false; |
116 | bool in_brackets=false; |
117 | Iter start0 = start; |
118 | std::size_t fstring_size = last-start0+offset; |
119 | char mssiz = 0; |
120 | |
121 | if(start>= last) { // empty directive : this is a trailing % |
122 | maybe_throw_exception(exceptions, start-start0 + offset, fstring_size); |
123 | return false; |
124 | } |
125 | |
126 | if(*start== const_or_not(fac).widen( '|')) { |
127 | in_brackets=true; |
128 | if( ++start >= last ) { |
129 | maybe_throw_exception(exceptions, start-start0 + offset, fstring_size); |
130 | return false; |
131 | } |
132 | } |
133 | |
134 | // the flag '0' would be picked as a digit for argument order, but here it's a flag : |
135 | if(*start== const_or_not(fac).widen( '0')) |
136 | goto parse_flags; |
137 | |
138 | // handle argument order (%2$d) or possibly width specification: %2d |
139 | if(wrap_isdigit(fac, *start)) { |
140 | int n; |
141 | start = str2int(start, last, n, fac); |
142 | if( start >= last ) { |
143 | maybe_throw_exception(exceptions, start-start0+offset, fstring_size); |
144 | return false; |
145 | } |
146 | |
147 | // %N% case : this is already the end of the directive |
148 | if( *start == const_or_not(fac).widen( '%') ) { |
149 | fpar->argN_ = n-1; |
150 | ++start; |
151 | if( in_brackets) |
152 | maybe_throw_exception(exceptions, start-start0+offset, fstring_size); |
153 | return true; |
154 | } |
155 | |
156 | if ( *start== const_or_not(fac).widen( '$') ) { |
157 | fpar->argN_ = n-1; |
158 | ++start; |
159 | } |
160 | else { |
161 | // non-positional directive |
162 | fpar->fmtstate_.width_ = n; |
163 | fpar->argN_ = format_item_t::argN_no_posit; |
164 | goto parse_precision; |
165 | } |
166 | } |
167 | |
168 | parse_flags: |
169 | // handle flags |
170 | while (start != last) { // as long as char is one of + - = _ # 0 or ' ' |
171 | switch ( wrap_narrow(fac, *start, 0)) { |
172 | case '\'': |
173 | break; // no effect yet. (painful to implement) |
174 | case '-': |
175 | fpar->fmtstate_.flags_ |= std::ios_base::left; |
176 | break; |
177 | case '=': |
178 | fpar->pad_scheme_ |= format_item_t::centered; |
179 | break; |
180 | case '_': |
181 | fpar->fmtstate_.flags_ |= std::ios_base::internal; |
182 | break; |
183 | case ' ': |
184 | fpar->pad_scheme_ |= format_item_t::spacepad; |
185 | break; |
186 | case '+': |
187 | fpar->fmtstate_.flags_ |= std::ios_base::showpos; |
188 | break; |
189 | case '0': |
190 | fpar->pad_scheme_ |= format_item_t::zeropad; |
191 | // need to know alignment before really setting flags, |
192 | // so just add 'zeropad' flag for now, it will be processed later. |
193 | break; |
194 | case '#': |
195 | fpar->fmtstate_.flags_ |= std::ios_base::showpoint | std::ios_base::showbase; |
196 | break; |
197 | default: |
198 | goto parse_width; |
199 | } |
200 | ++start; |
201 | } // loop on flag. |
202 | |
203 | if( start>=last) { |
204 | maybe_throw_exception(exceptions, start-start0+offset, fstring_size); |
205 | return true; |
206 | } |
207 | |
208 | // first skip 'asterisk fields' : * or num (length) |
209 | parse_width: |
210 | if(*start == const_or_not(fac).widen( '*') ) |
211 | ++start; |
212 | else if(start!=last && wrap_isdigit(fac, *start)) |
213 | start = str2int(start, last, fpar->fmtstate_.width_, fac); |
214 | |
215 | parse_precision: |
216 | if( start>= last) { |
217 | maybe_throw_exception(exceptions, start-start0+offset, fstring_size); |
218 | return true; |
219 | } |
220 | // handle precision spec |
221 | if (*start== const_or_not(fac).widen( '.')) { |
222 | ++start; |
223 | if(start != last && *start == const_or_not(fac).widen( '*') ) |
224 | ++start; |
225 | else if(start != last && wrap_isdigit(fac, *start)) { |
226 | start = str2int(start, last, fpar->fmtstate_.precision_, fac); |
227 | precision_set = true; |
228 | } |
229 | else |
230 | fpar->fmtstate_.precision_ =0; |
231 | } |
232 | |
233 | // argument type modifiers |
234 | while (start != last) { |
235 | switch (wrap_narrow(fac, *start, 0)) { |
236 | case 'h': |
237 | case 'l': |
238 | case 'j': |
239 | case 'z': |
240 | case 'L': |
241 | // boost::format ignores argument type modifiers as it relies on |
242 | // the type of the argument fed into it by operator % |
243 | break; |
244 | |
245 | // Note that the ptrdiff_t argument type 't' from C++11 is not honored |
246 | // because it was already in use as the tabulation specifier in boost::format |
247 | // case 't': |
248 | |
249 | // Microsoft extensions: |
250 | // https://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx |
251 | |
252 | case 'w': |
253 | break; |
254 | case 'I': |
255 | mssiz = 'I'; |
256 | break; |
257 | case '3': |
258 | if (mssiz != 'I') { |
259 | maybe_throw_exception(exceptions, start - start0 + offset, fstring_size); |
260 | return true; |
261 | } |
262 | mssiz = '3'; |
263 | break; |
264 | case '2': |
265 | if (mssiz != '3') { |
266 | maybe_throw_exception(exceptions, start - start0 + offset, fstring_size); |
267 | return true; |
268 | } |
269 | mssiz = 0x00; |
270 | break; |
271 | case '6': |
272 | if (mssiz != 'I') { |
273 | maybe_throw_exception(exceptions, start - start0 + offset, fstring_size); |
274 | return true; |
275 | } |
276 | mssiz = '6'; |
277 | break; |
278 | case '4': |
279 | if (mssiz != '6') { |
280 | maybe_throw_exception(exceptions, start - start0 + offset, fstring_size); |
281 | return true; |
282 | } |
283 | mssiz = 0x00; |
284 | break; |
285 | default: |
286 | if (mssiz && mssiz == 'I') { |
287 | mssiz = 0; |
288 | } |
289 | goto parse_conversion_specification; |
290 | } |
291 | ++start; |
292 | } // loop on argument type modifiers to pick up 'hh', 'll', and the more complex microsoft ones |
293 | |
294 | parse_conversion_specification: |
295 | if (start >= last || mssiz) { |
296 | maybe_throw_exception(exceptions, start - start0 + offset, fstring_size); |
297 | return true; |
298 | } |
299 | |
300 | if( in_brackets && *start== const_or_not(fac).widen( '|') ) { |
301 | ++start; |
302 | return true; |
303 | } |
304 | |
305 | // The default flags are "dec" and "skipws" |
306 | // so if changing the base, need to unset basefield first |
307 | |
308 | switch (wrap_narrow(fac, *start, 0)) |
309 | { |
310 | // Boolean |
311 | case 'b': |
312 | fpar->fmtstate_.flags_ |= std::ios_base::boolalpha; |
313 | break; |
314 | |
315 | // Decimal |
316 | case 'u': |
317 | case 'd': |
318 | case 'i': |
319 | // Defaults are sufficient |
320 | break; |
321 | |
322 | // Hex |
323 | case 'X': |
324 | fpar->fmtstate_.flags_ |= std::ios_base::uppercase; |
325 | BOOST_FALLTHROUGH; |
326 | case 'x': |
327 | case 'p': // pointer => set hex. |
328 | fpar->fmtstate_.flags_ &= ~std::ios_base::basefield; |
329 | fpar->fmtstate_.flags_ |= std::ios_base::hex; |
330 | break; |
331 | |
332 | // Octal |
333 | case 'o': |
334 | fpar->fmtstate_.flags_ &= ~std::ios_base::basefield; |
335 | fpar->fmtstate_.flags_ |= std::ios_base::oct; |
336 | break; |
337 | |
338 | // Floating |
339 | case 'A': |
340 | fpar->fmtstate_.flags_ |= std::ios_base::uppercase; |
341 | BOOST_FALLTHROUGH; |
342 | case 'a': |
343 | fpar->fmtstate_.flags_ &= ~std::ios_base::basefield; |
344 | fpar->fmtstate_.flags_ |= std::ios_base::fixed; |
345 | fpar->fmtstate_.flags_ |= std::ios_base::scientific; |
346 | break; |
347 | case 'E': |
348 | fpar->fmtstate_.flags_ |= std::ios_base::uppercase; |
349 | BOOST_FALLTHROUGH; |
350 | case 'e': |
351 | fpar->fmtstate_.flags_ |= std::ios_base::scientific; |
352 | break; |
353 | case 'F': |
354 | fpar->fmtstate_.flags_ |= std::ios_base::uppercase; |
355 | BOOST_FALLTHROUGH; |
356 | case 'f': |
357 | fpar->fmtstate_.flags_ |= std::ios_base::fixed; |
358 | break; |
359 | case 'G': |
360 | fpar->fmtstate_.flags_ |= std::ios_base::uppercase; |
361 | BOOST_FALLTHROUGH; |
362 | case 'g': |
363 | // default flags are correct here |
364 | break; |
365 | |
366 | // Tabulation (a boost::format extension) |
367 | case 'T': |
368 | ++start; |
369 | if( start >= last) { |
370 | maybe_throw_exception(exceptions, start-start0+offset, fstring_size); |
371 | return false; |
372 | } else { |
373 | fpar->fmtstate_.fill_ = *start; |
374 | } |
375 | fpar->pad_scheme_ |= format_item_t::tabulation; |
376 | fpar->argN_ = format_item_t::argN_tabulation; |
377 | break; |
378 | case 't': |
379 | fpar->fmtstate_.fill_ = const_or_not(fac).widen( ' '); |
380 | fpar->pad_scheme_ |= format_item_t::tabulation; |
381 | fpar->argN_ = format_item_t::argN_tabulation; |
382 | break; |
383 | |
384 | // Character |
385 | case 'C': |
386 | case 'c': |
387 | fpar->truncate_ = 1; |
388 | break; |
389 | |
390 | // String |
391 | case 'S': |
392 | case 's': |
393 | if(precision_set) // handle truncation manually, with own parameter. |
394 | fpar->truncate_ = fpar->fmtstate_.precision_; |
395 | fpar->fmtstate_.precision_ = 6; // default stream precision. |
396 | break; |
397 | |
398 | // %n is insecure and ignored by boost::format |
399 | case 'n' : |
400 | fpar->argN_ = format_item_t::argN_ignored; |
401 | break; |
402 | |
403 | default: |
404 | maybe_throw_exception(exceptions, start-start0+offset, fstring_size); |
405 | } |
406 | ++start; |
407 | |
408 | if( in_brackets ) { |
409 | if( start != last && *start== const_or_not(fac).widen( '|') ) { |
410 | ++start; |
411 | return true; |
412 | } |
413 | else maybe_throw_exception(exceptions, start-start0+offset, fstring_size); |
414 | } |
415 | return true; |
416 | } |
417 | // -end parse_printf_directive() |
418 | |
419 | template<class String, class Facet> |
420 | int upper_bound_from_fstring(const String& buf, |
421 | const typename String::value_type arg_mark, |
422 | const Facet& fac, |
423 | unsigned char exceptions) |
424 | { |
425 | // quick-parsing of the format-string to count arguments mark (arg_mark, '%') |
426 | // returns : upper bound on the number of format items in the format strings |
427 | using namespace boost::io; |
428 | typename String::size_type i1=0; |
429 | int num_items=0; |
430 | while( (i1=buf.find(arg_mark,i1)) != String::npos ) { |
431 | if( i1+1 >= buf.size() ) { |
432 | if(exceptions & bad_format_string_bit) |
433 | boost::throw_exception(e: bad_format_string(i1, buf.size() )); // must not end in ".. %" |
434 | else { |
435 | ++num_items; |
436 | break; |
437 | } |
438 | } |
439 | if(buf[i1+1] == buf[i1] ) {// escaped "%%" |
440 | i1+=2; continue; |
441 | } |
442 | |
443 | ++i1; |
444 | // in case of %N% directives, dont count it double (wastes allocations..) : |
445 | i1 = detail::wrap_scan_notdigit(fac, buf.begin()+i1, buf.end()) - buf.begin(); |
446 | if( i1 < buf.size() && buf[i1] == arg_mark ) |
447 | ++i1; |
448 | ++num_items; |
449 | } |
450 | return num_items; |
451 | } |
452 | template<class String> inline |
453 | void append_string(String& dst, const String& src, |
454 | const typename String::size_type beg, |
455 | const typename String::size_type end) { |
456 | dst.append(src.begin()+beg, src.begin()+end); |
457 | } |
458 | |
459 | } // detail namespace |
460 | } // io namespace |
461 | |
462 | |
463 | |
464 | // ----------------------------------------------- |
465 | // format :: parse(..) |
466 | |
467 | template<class Ch, class Tr, class Alloc> |
468 | basic_format<Ch, Tr, Alloc>& basic_format<Ch, Tr, Alloc>:: |
469 | parse (const string_type& buf) { |
470 | // parse the format-string |
471 | using namespace std; |
472 | #if !defined(BOOST_NO_STD_LOCALE) |
473 | const std::ctype<Ch> & fac = BOOST_USE_FACET( std::ctype<Ch>, getloc()); |
474 | #else |
475 | io::basic_oaltstringstream<Ch, Tr, Alloc> fac; |
476 | //has widen and narrow even on compilers without locale |
477 | #endif |
478 | |
479 | const Ch arg_mark = io::detail::const_or_not(fac).widen( '%'); |
480 | bool ordered_args=true; |
481 | int max_argN=-1; |
482 | |
483 | // A: find upper_bound on num_items and allocates arrays |
484 | int num_items = io::detail::upper_bound_from_fstring(buf, arg_mark, fac, exceptions()); |
485 | make_or_reuse_data(nbitems: num_items); |
486 | |
487 | // B: Now the real parsing of the format string : |
488 | num_items=0; |
489 | typename string_type::size_type i0=0, i1=0; |
490 | typename string_type::const_iterator it; |
491 | bool special_things=false; |
492 | int cur_item=0; |
493 | while( (i1=buf.find(arg_mark,i1)) != string_type::npos ) { |
494 | string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_; |
495 | if( buf[i1+1] == buf[i1] ) { // escaped mark, '%%' |
496 | io::detail::append_string(piece, buf, i0, i1+1); |
497 | i1+=2; i0=i1; |
498 | continue; |
499 | } |
500 | BOOST_ASSERT( static_cast<unsigned int>(cur_item) < items_.size() || cur_item==0); |
501 | |
502 | if(i1!=i0) { |
503 | io::detail::append_string(piece, buf, i0, i1); |
504 | i0=i1; |
505 | } |
506 | ++i1; |
507 | it = buf.begin()+i1; |
508 | bool parse_ok = io::detail::parse_printf_directive( |
509 | it, buf.end(), &items_[cur_item], fac, i1, exceptions()); |
510 | i1 = it - buf.begin(); |
511 | if( ! parse_ok ) // the directive will be printed verbatim |
512 | continue; |
513 | i0=i1; |
514 | items_[cur_item].compute_states(); // process complex options, like zeropad, into params |
515 | |
516 | int argN=items_[cur_item].argN_; |
517 | if(argN == format_item_t::argN_ignored) |
518 | continue; |
519 | if(argN ==format_item_t::argN_no_posit) |
520 | ordered_args=false; |
521 | else if(argN == format_item_t::argN_tabulation) special_things=true; |
522 | else if(argN > max_argN) max_argN = argN; |
523 | ++num_items; |
524 | ++cur_item; |
525 | } // loop on %'s |
526 | BOOST_ASSERT(cur_item == num_items); |
527 | |
528 | // store the final piece of string |
529 | { |
530 | string_type & piece = (cur_item==0) ? prefix_ : items_[cur_item-1].appendix_; |
531 | io::detail::append_string(piece, buf, i0, buf.size()); |
532 | } |
533 | |
534 | if( !ordered_args) { |
535 | if(max_argN >= 0 ) { // dont mix positional with non-positionnal directives |
536 | if(exceptions() & io::bad_format_string_bit) |
537 | boost::throw_exception( |
538 | e: io::bad_format_string(static_cast<std::size_t>(max_argN), 0)); |
539 | // else do nothing. => positionnal arguments are processed as non-positionnal |
540 | } |
541 | // set things like it would have been with positional directives : |
542 | int non_ordered_items = 0; |
543 | for(int i=0; i< num_items; ++i) |
544 | if(items_[i].argN_ == format_item_t::argN_no_posit) { |
545 | items_[i].argN_ = non_ordered_items; |
546 | ++non_ordered_items; |
547 | } |
548 | max_argN = non_ordered_items-1; |
549 | } |
550 | |
551 | // C: set some member data : |
552 | items_.resize(num_items, format_item_t(io::detail::const_or_not(fac).widen( ' ')) ); |
553 | |
554 | if(special_things) style_ |= special_needs; |
555 | num_args_ = max_argN + 1; |
556 | if(ordered_args) style_ |= ordered; |
557 | else style_ &= ~ordered; |
558 | return *this; |
559 | } |
560 | |
561 | } // namespace boost |
562 | |
563 | |
564 | #endif // BOOST_FORMAT_PARSING_HPP |
565 | |