| 1 | /*============================================================================= |
| 2 | Copyright (c) 2001-2010 Joel de Guzman |
| 3 | |
| 4 | Distributed under the Boost Software License, Version 1.0. (See accompanying |
| 5 | file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| 6 | =============================================================================*/ |
| 7 | /////////////////////////////////////////////////////////////////////////////// |
| 8 | // |
| 9 | // A mini XML-like parser |
| 10 | // |
| 11 | // [ JDG March 25, 2007 ] spirit2 |
| 12 | // |
| 13 | /////////////////////////////////////////////////////////////////////////////// |
| 14 | |
| 15 | #include <boost/spirit/include/qi.hpp> |
| 16 | #include <boost/phoenix/core.hpp> |
| 17 | #include <boost/phoenix/operator.hpp> |
| 18 | #include <boost/phoenix/fusion.hpp> |
| 19 | #include <boost/phoenix/stl.hpp> |
| 20 | #include <boost/fusion/include/adapt_struct.hpp> |
| 21 | #include <boost/variant/recursive_variant.hpp> |
| 22 | #include <boost/foreach.hpp> |
| 23 | |
| 24 | #include <iostream> |
| 25 | #include <fstream> |
| 26 | #include <string> |
| 27 | #include <vector> |
| 28 | |
| 29 | namespace client |
| 30 | { |
| 31 | namespace fusion = boost::fusion; |
| 32 | namespace phoenix = boost::phoenix; |
| 33 | namespace qi = boost::spirit::qi; |
| 34 | namespace ascii = boost::spirit::ascii; |
| 35 | |
| 36 | /////////////////////////////////////////////////////////////////////////// |
| 37 | // Our mini XML tree representation |
| 38 | /////////////////////////////////////////////////////////////////////////// |
| 39 | struct mini_xml; |
| 40 | |
| 41 | typedef |
| 42 | boost::variant< |
| 43 | boost::recursive_wrapper<mini_xml> |
| 44 | , std::string |
| 45 | > |
| 46 | mini_xml_node; |
| 47 | |
| 48 | struct mini_xml |
| 49 | { |
| 50 | std::string name; // tag name |
| 51 | std::vector<mini_xml_node> children; // children |
| 52 | }; |
| 53 | } |
| 54 | |
| 55 | // We need to tell fusion about our mini_xml struct |
| 56 | // to make it a first-class fusion citizen |
| 57 | BOOST_FUSION_ADAPT_STRUCT( |
| 58 | client::mini_xml, |
| 59 | (std::string, name) |
| 60 | (std::vector<client::mini_xml_node>, children) |
| 61 | ) |
| 62 | |
| 63 | namespace client |
| 64 | { |
| 65 | /////////////////////////////////////////////////////////////////////////// |
| 66 | // Print out the mini xml tree |
| 67 | /////////////////////////////////////////////////////////////////////////// |
| 68 | int const tabsize = 4; |
| 69 | |
| 70 | void tab(int indent) |
| 71 | { |
| 72 | for (int i = 0; i < indent; ++i) |
| 73 | std::cout << ' '; |
| 74 | } |
| 75 | |
| 76 | struct mini_xml_printer |
| 77 | { |
| 78 | mini_xml_printer(int indent = 0) |
| 79 | : indent(indent) |
| 80 | { |
| 81 | } |
| 82 | |
| 83 | void operator()(mini_xml const& xml) const; |
| 84 | |
| 85 | int indent; |
| 86 | }; |
| 87 | |
| 88 | struct mini_xml_node_printer : boost::static_visitor<> |
| 89 | { |
| 90 | mini_xml_node_printer(int indent = 0) |
| 91 | : indent(indent) |
| 92 | { |
| 93 | } |
| 94 | |
| 95 | void operator()(mini_xml const& xml) const |
| 96 | { |
| 97 | mini_xml_printer(indent+tabsize)(xml); |
| 98 | } |
| 99 | |
| 100 | void operator()(std::string const& text) const |
| 101 | { |
| 102 | tab(indent: indent+tabsize); |
| 103 | std::cout << "text: \"" << text << '"' << std::endl; |
| 104 | } |
| 105 | |
| 106 | int indent; |
| 107 | }; |
| 108 | |
| 109 | void mini_xml_printer::operator()(mini_xml const& xml) const |
| 110 | { |
| 111 | tab(indent); |
| 112 | std::cout << "tag: " << xml.name << std::endl; |
| 113 | tab(indent); |
| 114 | std::cout << '{' << std::endl; |
| 115 | |
| 116 | BOOST_FOREACH(mini_xml_node const& node, xml.children) |
| 117 | { |
| 118 | boost::apply_visitor(visitor: mini_xml_node_printer(indent), visitable: node); |
| 119 | } |
| 120 | |
| 121 | tab(indent); |
| 122 | std::cout << '}' << std::endl; |
| 123 | } |
| 124 | |
| 125 | /////////////////////////////////////////////////////////////////////////// |
| 126 | // Our mini XML grammar definition |
| 127 | /////////////////////////////////////////////////////////////////////////// |
| 128 | //[tutorial_xml2_grammar |
| 129 | template <typename Iterator> |
| 130 | struct mini_xml_grammar |
| 131 | : qi::grammar<Iterator, mini_xml(), qi::locals<std::string>, ascii::space_type> |
| 132 | { |
| 133 | mini_xml_grammar() |
| 134 | : mini_xml_grammar::base_type(xml) |
| 135 | { |
| 136 | using qi::lit; |
| 137 | using qi::lexeme; |
| 138 | using ascii::char_; |
| 139 | using ascii::string; |
| 140 | using namespace qi::labels; |
| 141 | |
| 142 | text %= lexeme[+(char_ - '<')]; |
| 143 | node %= xml | text; |
| 144 | |
| 145 | start_tag %= |
| 146 | '<' |
| 147 | >> !lit('/') |
| 148 | >> lexeme[+(char_ - '>')] |
| 149 | >> '>' |
| 150 | ; |
| 151 | |
| 152 | end_tag = |
| 153 | "</" |
| 154 | >> lit(_r1) |
| 155 | >> '>' |
| 156 | ; |
| 157 | |
| 158 | xml %= |
| 159 | start_tag[_a = _1] |
| 160 | >> *node |
| 161 | >> end_tag(_a) |
| 162 | ; |
| 163 | } |
| 164 | |
| 165 | qi::rule<Iterator, mini_xml(), qi::locals<std::string>, ascii::space_type> xml; |
| 166 | qi::rule<Iterator, mini_xml_node(), ascii::space_type> node; |
| 167 | qi::rule<Iterator, std::string(), ascii::space_type> text; |
| 168 | qi::rule<Iterator, std::string(), ascii::space_type> start_tag; |
| 169 | qi::rule<Iterator, void(std::string), ascii::space_type> end_tag; |
| 170 | }; |
| 171 | //] |
| 172 | } |
| 173 | |
| 174 | /////////////////////////////////////////////////////////////////////////////// |
| 175 | // Main program |
| 176 | /////////////////////////////////////////////////////////////////////////////// |
| 177 | int main(int argc, char **argv) |
| 178 | { |
| 179 | char const* filename; |
| 180 | if (argc > 1) |
| 181 | { |
| 182 | filename = argv[1]; |
| 183 | } |
| 184 | else |
| 185 | { |
| 186 | std::cerr << "Error: No input file provided." << std::endl; |
| 187 | return 1; |
| 188 | } |
| 189 | |
| 190 | std::ifstream in(filename, std::ios_base::in); |
| 191 | |
| 192 | if (!in) |
| 193 | { |
| 194 | std::cerr << "Error: Could not open input file: " |
| 195 | << filename << std::endl; |
| 196 | return 1; |
| 197 | } |
| 198 | |
| 199 | std::string storage; // We will read the contents here. |
| 200 | in.unsetf(mask: std::ios::skipws); // No white space skipping! |
| 201 | std::copy( |
| 202 | first: std::istream_iterator<char>(in), |
| 203 | last: std::istream_iterator<char>(), |
| 204 | result: std::back_inserter(x&: storage)); |
| 205 | |
| 206 | typedef client::mini_xml_grammar<std::string::const_iterator> mini_xml_grammar; |
| 207 | mini_xml_grammar xml; // Our grammar |
| 208 | client::mini_xml ast; // Our tree |
| 209 | |
| 210 | using boost::spirit::ascii::space; |
| 211 | std::string::const_iterator iter = storage.begin(); |
| 212 | std::string::const_iterator end = storage.end(); |
| 213 | bool r = phrase_parse(first&: iter, last: end, expr: xml, skipper: space, attr&: ast); |
| 214 | |
| 215 | if (r && iter == end) |
| 216 | { |
| 217 | std::cout << "-------------------------\n" ; |
| 218 | std::cout << "Parsing succeeded\n" ; |
| 219 | std::cout << "-------------------------\n" ; |
| 220 | client::mini_xml_printer printer; |
| 221 | printer(ast); |
| 222 | return 0; |
| 223 | } |
| 224 | else |
| 225 | { |
| 226 | std::string::const_iterator some = iter + std::min(a: 30, b: int(end - iter)); |
| 227 | std::string context(iter, (some>end)?end:some); |
| 228 | std::cout << "-------------------------\n" ; |
| 229 | std::cout << "Parsing failed\n" ; |
| 230 | std::cout << "stopped at: \"" << context << "...\"\n" ; |
| 231 | std::cout << "-------------------------\n" ; |
| 232 | return 1; |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | |
| 237 | |