| 1 | // ---------------------------------------------------------------------------- |
| 2 | // Copyright (C) 2002-2006 Marcin Kalicinski |
| 3 | // |
| 4 | // Distributed under the Boost Software License, Version 1.0. |
| 5 | // (See accompanying file LICENSE_1_0.txt or copy at |
| 6 | // http://www.boost.org/LICENSE_1_0.txt) |
| 7 | // |
| 8 | // For more information, see www.boost.org |
| 9 | // ---------------------------------------------------------------------------- |
| 10 | #ifndef BOOST_PROPERTY_TREE_DETAIL_INFO_PARSER_READ_HPP_INCLUDED |
| 11 | #define BOOST_PROPERTY_TREE_DETAIL_INFO_PARSER_READ_HPP_INCLUDED |
| 12 | |
| 13 | #include "boost/property_tree/ptree.hpp" |
| 14 | #include "boost/property_tree/detail/info_parser_error.hpp" |
| 15 | #include "boost/property_tree/detail/info_parser_utils.hpp" |
| 16 | #include <iterator> |
| 17 | #include <string> |
| 18 | #include <stack> |
| 19 | #include <fstream> |
| 20 | #include <cctype> |
| 21 | |
| 22 | namespace boost { namespace property_tree { namespace info_parser |
| 23 | { |
| 24 | |
| 25 | // Expand known escape sequences |
| 26 | template<class It> |
| 27 | std::basic_string<typename std::iterator_traits<It>::value_type> |
| 28 | expand_escapes(It b, It e) |
| 29 | { |
| 30 | typedef typename std::iterator_traits<It>::value_type Ch; |
| 31 | std::basic_string<Ch> result; |
| 32 | while (b != e) |
| 33 | { |
| 34 | if (*b == Ch('\\')) |
| 35 | { |
| 36 | ++b; |
| 37 | if (b == e) |
| 38 | { |
| 39 | BOOST_PROPERTY_TREE_THROW(info_parser_error( |
| 40 | "character expected after backslash" , "" , 0)); |
| 41 | } |
| 42 | else if (*b == Ch('0')) result += Ch('\0'); |
| 43 | else if (*b == Ch('a')) result += Ch('\a'); |
| 44 | else if (*b == Ch('b')) result += Ch('\b'); |
| 45 | else if (*b == Ch('f')) result += Ch('\f'); |
| 46 | else if (*b == Ch('n')) result += Ch('\n'); |
| 47 | else if (*b == Ch('r')) result += Ch('\r'); |
| 48 | else if (*b == Ch('t')) result += Ch('\t'); |
| 49 | else if (*b == Ch('v')) result += Ch('\v'); |
| 50 | else if (*b == Ch('"')) result += Ch('"'); |
| 51 | else if (*b == Ch('\'')) result += Ch('\''); |
| 52 | else if (*b == Ch('\\')) result += Ch('\\'); |
| 53 | else |
| 54 | BOOST_PROPERTY_TREE_THROW(info_parser_error( |
| 55 | "unknown escape sequence" , "" , 0)); |
| 56 | } |
| 57 | else |
| 58 | result += *b; |
| 59 | ++b; |
| 60 | } |
| 61 | return result; |
| 62 | } |
| 63 | |
| 64 | // Detect whitespace in a not very smart way. |
| 65 | template <class Ch> |
| 66 | bool is_ascii_space(Ch c) |
| 67 | { |
| 68 | // Everything outside ASCII is not space. |
| 69 | unsigned n = c; |
| 70 | if (n > 127) |
| 71 | return false; |
| 72 | return std::isspace(c) != 0; |
| 73 | } |
| 74 | |
| 75 | // Advance pointer past whitespace |
| 76 | template<class Ch> |
| 77 | void skip_whitespace(const Ch *&text) |
| 78 | { |
| 79 | using namespace std; |
| 80 | while (is_ascii_space(*text)) |
| 81 | ++text; |
| 82 | } |
| 83 | |
| 84 | // Extract word (whitespace delimited) and advance pointer accordingly |
| 85 | template<class Ch> |
| 86 | std::basic_string<Ch> read_word(const Ch *&text) |
| 87 | { |
| 88 | using namespace std; |
| 89 | skip_whitespace(text); |
| 90 | const Ch *start = text; |
| 91 | while (!is_ascii_space(*text) && *text != Ch(';') && *text != Ch('\0')) |
| 92 | ++text; |
| 93 | return expand_escapes(start, text); |
| 94 | } |
| 95 | |
| 96 | // Extract line (eol delimited) and advance pointer accordingly |
| 97 | template<class Ch> |
| 98 | std::basic_string<Ch> read_line(const Ch *&text) |
| 99 | { |
| 100 | using namespace std; |
| 101 | skip_whitespace(text); |
| 102 | const Ch *start = text; |
| 103 | while (*text != Ch('\0') && *text != Ch(';')) |
| 104 | ++text; |
| 105 | while (text > start && is_ascii_space(*(text - 1))) |
| 106 | --text; |
| 107 | return expand_escapes(start, text); |
| 108 | } |
| 109 | |
| 110 | // Extract string (inside ""), and advance pointer accordingly |
| 111 | // Set need_more_lines to true if \ continuator found |
| 112 | template<class Ch> |
| 113 | std::basic_string<Ch> read_string(const Ch *&text, bool *need_more_lines) |
| 114 | { |
| 115 | skip_whitespace(text); |
| 116 | if (*text == Ch('\"')) |
| 117 | { |
| 118 | |
| 119 | // Skip " |
| 120 | ++text; |
| 121 | |
| 122 | // Find end of string, but skip escaped " |
| 123 | bool escaped = false; |
| 124 | const Ch *start = text; |
| 125 | while ((escaped || *text != Ch('\"')) && *text != Ch('\0')) |
| 126 | { |
| 127 | escaped = (!escaped && *text == Ch('\\')); |
| 128 | ++text; |
| 129 | } |
| 130 | |
| 131 | // If end of string found |
| 132 | if (*text == Ch('\"')) |
| 133 | { |
| 134 | std::basic_string<Ch> result = expand_escapes(start, text++); |
| 135 | skip_whitespace(text); |
| 136 | if (*text == Ch('\\')) |
| 137 | { |
| 138 | if (!need_more_lines) |
| 139 | BOOST_PROPERTY_TREE_THROW(info_parser_error( |
| 140 | "unexpected \\" , "" , 0)); |
| 141 | ++text; |
| 142 | skip_whitespace(text); |
| 143 | if (*text == Ch('\0') || *text == Ch(';')) |
| 144 | *need_more_lines = true; |
| 145 | else |
| 146 | BOOST_PROPERTY_TREE_THROW(info_parser_error( |
| 147 | "expected end of line after \\" , "" , 0)); |
| 148 | } |
| 149 | else |
| 150 | if (need_more_lines) |
| 151 | *need_more_lines = false; |
| 152 | return result; |
| 153 | } |
| 154 | else |
| 155 | BOOST_PROPERTY_TREE_THROW(info_parser_error( |
| 156 | "unexpected end of line" , "" , 0)); |
| 157 | |
| 158 | } |
| 159 | else |
| 160 | BOOST_PROPERTY_TREE_THROW(info_parser_error("expected \"" , "" , 0)); |
| 161 | } |
| 162 | |
| 163 | // Extract key |
| 164 | template<class Ch> |
| 165 | std::basic_string<Ch> read_key(const Ch *&text) |
| 166 | { |
| 167 | skip_whitespace(text); |
| 168 | if (*text == Ch('\"')) |
| 169 | return read_string(text, NULL); |
| 170 | else |
| 171 | return read_word(text); |
| 172 | } |
| 173 | |
| 174 | // Extract data |
| 175 | template<class Ch> |
| 176 | std::basic_string<Ch> read_data(const Ch *&text, bool *need_more_lines) |
| 177 | { |
| 178 | skip_whitespace(text); |
| 179 | if (*text == Ch('\"')) |
| 180 | return read_string(text, need_more_lines); |
| 181 | else |
| 182 | { |
| 183 | *need_more_lines = false; |
| 184 | return read_word(text); |
| 185 | } |
| 186 | } |
| 187 | |
| 188 | // Build ptree from info stream |
| 189 | template<class Ptree, class Ch> |
| 190 | void read_info_internal(std::basic_istream<Ch> &stream, |
| 191 | Ptree &pt, |
| 192 | const std::string &filename, |
| 193 | int include_depth) |
| 194 | { |
| 195 | typedef std::basic_string<Ch> str_t; |
| 196 | // Possible parser states |
| 197 | enum state_t { |
| 198 | s_key, // Parser expects key |
| 199 | s_data, // Parser expects data |
| 200 | s_data_cont // Parser expects data continuation |
| 201 | }; |
| 202 | |
| 203 | unsigned long line_no = 0; |
| 204 | state_t state = s_key; // Parser state |
| 205 | Ptree *last = NULL; // Pointer to last created ptree |
| 206 | // Define line here to minimize reallocations |
| 207 | str_t line; |
| 208 | |
| 209 | // Initialize ptree stack (used to handle nesting) |
| 210 | std::stack<Ptree *> stack; |
| 211 | stack.push(&pt); // Push root ptree on stack initially |
| 212 | |
| 213 | try { |
| 214 | // While there are characters in the stream |
| 215 | while (stream.good()) { |
| 216 | // Read one line from stream |
| 217 | ++line_no; |
| 218 | std::getline(stream, line); |
| 219 | if (!stream.good() && !stream.eof()) |
| 220 | BOOST_PROPERTY_TREE_THROW(info_parser_error( |
| 221 | "read error" , filename, line_no)); |
| 222 | const Ch *text = line.c_str(); |
| 223 | |
| 224 | // If directive found |
| 225 | skip_whitespace(text); |
| 226 | if (*text == Ch('#')) { |
| 227 | // Determine directive type |
| 228 | ++text; // skip # |
| 229 | std::basic_string<Ch> directive = read_word(text); |
| 230 | if (directive == convert_chtype<Ch, char>("include" )) { |
| 231 | // #include |
| 232 | if (include_depth > 100) { |
| 233 | BOOST_PROPERTY_TREE_THROW(info_parser_error( |
| 234 | "include depth too large, " |
| 235 | "probably recursive include" , |
| 236 | filename, line_no)); |
| 237 | } |
| 238 | str_t s = read_string(text, NULL); |
| 239 | std::string inc_name = |
| 240 | convert_chtype<char, Ch>(s.c_str()); |
| 241 | std::basic_ifstream<Ch> inc_stream(inc_name.c_str()); |
| 242 | if (!inc_stream.good()) |
| 243 | BOOST_PROPERTY_TREE_THROW(info_parser_error( |
| 244 | "cannot open include file " + inc_name, |
| 245 | filename, line_no)); |
| 246 | read_info_internal(inc_stream, *stack.top(), |
| 247 | inc_name, include_depth + 1); |
| 248 | } else { // Unknown directive |
| 249 | BOOST_PROPERTY_TREE_THROW(info_parser_error( |
| 250 | "unknown directive" , filename, line_no)); |
| 251 | } |
| 252 | |
| 253 | // Directive must be followed by end of line |
| 254 | skip_whitespace(text); |
| 255 | if (*text != Ch('\0')) { |
| 256 | BOOST_PROPERTY_TREE_THROW(info_parser_error( |
| 257 | "expected end of line" , filename, line_no)); |
| 258 | } |
| 259 | |
| 260 | // Go to next line |
| 261 | continue; |
| 262 | } |
| 263 | |
| 264 | // While there are characters left in line |
| 265 | while (1) { |
| 266 | |
| 267 | // Stop parsing on end of line or comment |
| 268 | skip_whitespace(text); |
| 269 | if (*text == Ch('\0') || *text == Ch(';')) { |
| 270 | if (state == s_data) // If there was no data set state to s_key |
| 271 | state = s_key; |
| 272 | break; |
| 273 | } |
| 274 | |
| 275 | // Process according to current parser state |
| 276 | switch (state) |
| 277 | { |
| 278 | |
| 279 | // Parser expects key |
| 280 | case s_key: |
| 281 | { |
| 282 | |
| 283 | if (*text == Ch('{')) // Brace opening found |
| 284 | { |
| 285 | if (!last) |
| 286 | BOOST_PROPERTY_TREE_THROW(info_parser_error("unexpected {" , "" , 0)); |
| 287 | stack.push(last); |
| 288 | last = NULL; |
| 289 | ++text; |
| 290 | } |
| 291 | else if (*text == Ch('}')) // Brace closing found |
| 292 | { |
| 293 | if (stack.size() <= 1) |
| 294 | BOOST_PROPERTY_TREE_THROW(info_parser_error("unmatched }" , "" , 0)); |
| 295 | stack.pop(); |
| 296 | last = NULL; |
| 297 | ++text; |
| 298 | } |
| 299 | else // Key text found |
| 300 | { |
| 301 | std::basic_string<Ch> key = read_key(text); |
| 302 | last = &stack.top()->push_back( |
| 303 | std::make_pair(key, Ptree()))->second; |
| 304 | state = s_data; |
| 305 | } |
| 306 | |
| 307 | }; break; |
| 308 | |
| 309 | // Parser expects data |
| 310 | case s_data: |
| 311 | { |
| 312 | |
| 313 | // Last ptree must be defined because we are going to add data to it |
| 314 | BOOST_ASSERT(last); |
| 315 | |
| 316 | if (*text == Ch('{')) // Brace opening found |
| 317 | { |
| 318 | stack.push(last); |
| 319 | last = NULL; |
| 320 | ++text; |
| 321 | state = s_key; |
| 322 | } |
| 323 | else if (*text == Ch('}')) // Brace closing found |
| 324 | { |
| 325 | if (stack.size() <= 1) |
| 326 | BOOST_PROPERTY_TREE_THROW(info_parser_error("unmatched }" , "" , 0)); |
| 327 | stack.pop(); |
| 328 | last = NULL; |
| 329 | ++text; |
| 330 | state = s_key; |
| 331 | } |
| 332 | else // Data text found |
| 333 | { |
| 334 | bool need_more_lines; |
| 335 | std::basic_string<Ch> data = read_data(text, &need_more_lines); |
| 336 | last->data() = data; |
| 337 | state = need_more_lines ? s_data_cont : s_key; |
| 338 | } |
| 339 | |
| 340 | |
| 341 | }; break; |
| 342 | |
| 343 | // Parser expects continuation of data after \ on previous line |
| 344 | case s_data_cont: |
| 345 | { |
| 346 | |
| 347 | // Last ptree must be defined because we are going to update its data |
| 348 | BOOST_ASSERT(last); |
| 349 | |
| 350 | if (*text == Ch('\"')) // Continuation must start with " |
| 351 | { |
| 352 | bool need_more_lines; |
| 353 | std::basic_string<Ch> data = read_string(text, &need_more_lines); |
| 354 | last->put_value(last->template get_value<std::basic_string<Ch> >() + data); |
| 355 | state = need_more_lines ? s_data_cont : s_key; |
| 356 | } |
| 357 | else |
| 358 | BOOST_PROPERTY_TREE_THROW(info_parser_error("expected \" after \\ in previous line" , "" , 0)); |
| 359 | |
| 360 | }; break; |
| 361 | |
| 362 | // Should never happen |
| 363 | default: |
| 364 | BOOST_ASSERT(0); |
| 365 | |
| 366 | } |
| 367 | } |
| 368 | } |
| 369 | |
| 370 | // Check if stack has initial size, otherwise some {'s have not been closed |
| 371 | if (stack.size() != 1) |
| 372 | BOOST_PROPERTY_TREE_THROW(info_parser_error("unmatched {" , "" , 0)); |
| 373 | |
| 374 | } |
| 375 | catch (info_parser_error &e) |
| 376 | { |
| 377 | // If line undefined rethrow error with correct filename and line |
| 378 | if (e.line() == 0) |
| 379 | { |
| 380 | BOOST_PROPERTY_TREE_THROW(info_parser_error(e.message(), filename, line_no)); |
| 381 | } |
| 382 | else |
| 383 | BOOST_PROPERTY_TREE_THROW(e); |
| 384 | |
| 385 | } |
| 386 | |
| 387 | } |
| 388 | |
| 389 | } } } |
| 390 | |
| 391 | #endif |
| 392 | |