1// Copyright 2004-9 Trustees of Indiana University
2
3// Distributed under the Boost Software License, Version 1.0.
4// (See accompanying file LICENSE_1_0.txt or copy at
5// http://www.boost.org/LICENSE_1_0.txt)
6
7//
8// read_graphviz_new.cpp -
9// Initialize a model of the BGL's MutableGraph concept and an associated
10// collection of property maps using a graph expressed in the GraphViz
11// DOT Language.
12//
13// Based on the grammar found at:
14// https://web.archive.org/web/20041213234742/http://www.graphviz.org/cvs/doc/info/lang.html
15//
16// Jeremiah rewrite used grammar found at:
17// http://www.graphviz.org/doc/info/lang.html
18// and page 34 or http://www.graphviz.org/pdf/dotguide.pdf
19//
20// See documentation for this code at:
21// http://www.boost.org/libs/graph/doc/read_graphviz.html
22//
23
24// Author: Jeremiah Willcock
25// Ronald Garcia
26//
27
28#define BOOST_GRAPH_SOURCE
29#include <boost/assert.hpp>
30#include <boost/ref.hpp>
31#include <boost/function/function2.hpp>
32#include <boost/property_map/dynamic_property_map.hpp>
33#include <boost/graph/graph_traits.hpp>
34#include <boost/detail/workaround.hpp>
35#include <boost/algorithm/string/case_conv.hpp>
36#include <cstdlib>
37#include <algorithm>
38#include <exception> // for std::exception
39#include <iostream>
40#include <map>
41#include <set>
42#include <string>
43#include <vector>
44#include <utility>
45#include <boost/throw_exception.hpp>
46#include <boost/regex.hpp>
47#include <boost/function.hpp>
48#include <boost/graph/dll_import_export.hpp>
49#include <boost/graph/graphviz.hpp>
50
51namespace boost
52{
53
54namespace read_graphviz_detail
55{
56 struct token
57 {
58 enum token_type
59 {
60 kw_strict,
61 kw_graph,
62 kw_digraph,
63 kw_node,
64 kw_edge,
65 kw_subgraph,
66 left_brace,
67 right_brace,
68 semicolon,
69 equal,
70 left_bracket,
71 right_bracket,
72 comma,
73 colon,
74 dash_greater,
75 dash_dash,
76 plus,
77 left_paren,
78 right_paren,
79 at,
80 identifier,
81 quoted_string, // Only used internally in tokenizer
82 eof,
83 invalid
84 };
85 token_type type;
86 std::string normalized_value; // May have double-quotes removed and/or
87 // some escapes replaced
88 token(token_type type, const std::string& normalized_value)
89 : type(type), normalized_value(normalized_value)
90 {
91 }
92 token() : type(invalid), normalized_value("") {}
93 friend std::ostream& operator<<(std::ostream& o, const token& t)
94 {
95 switch (t.type)
96 {
97 case token::kw_strict:
98 o << "<strict>";
99 break;
100 case token::kw_graph:
101 o << "<graph>";
102 break;
103 case token::kw_digraph:
104 o << "<digraph>";
105 break;
106 case token::kw_node:
107 o << "<node>";
108 break;
109 case token::kw_edge:
110 o << "<edge>";
111 break;
112 case token::kw_subgraph:
113 o << "<subgraph>";
114 break;
115 case token::left_brace:
116 o << "<left_brace>";
117 break;
118 case token::right_brace:
119 o << "<right_brace>";
120 break;
121 case token::semicolon:
122 o << "<semicolon>";
123 break;
124 case token::equal:
125 o << "<equal>";
126 break;
127 case token::left_bracket:
128 o << "<left_bracket>";
129 break;
130 case token::right_bracket:
131 o << "<right_bracket>";
132 break;
133 case token::comma:
134 o << "<comma>";
135 break;
136 case token::colon:
137 o << "<colon>";
138 break;
139 case token::dash_greater:
140 o << "<dash-greater>";
141 break;
142 case token::dash_dash:
143 o << "<dash-dash>";
144 break;
145 case token::plus:
146 o << "<plus>";
147 break;
148 case token::left_paren:
149 o << "<left_paren>";
150 break;
151 case token::right_paren:
152 o << "<right_paren>";
153 break;
154 case token::at:
155 o << "<at>";
156 break;
157 case token::identifier:
158 o << "<identifier>";
159 break;
160 case token::quoted_string:
161 o << "<quoted_string>";
162 break;
163 case token::eof:
164 o << "<eof>";
165 break;
166 default:
167 o << "<invalid type>";
168 break;
169 }
170 o << " '" << t.normalized_value << "'";
171 return o;
172 }
173 };
174
175 bad_graphviz_syntax lex_error(const std::string& errmsg, char bad_char)
176 {
177 if (bad_char == '\0')
178 {
179 return bad_graphviz_syntax(errmsg + " (at end of input)");
180 }
181 else
182 {
183 return bad_graphviz_syntax(
184 errmsg + " (char is '" + bad_char + "')");
185 }
186 }
187
188 bad_graphviz_syntax parse_error(
189 const std::string& errmsg, const token& bad_token)
190 {
191 return bad_graphviz_syntax(errmsg + " (token is \""
192 + boost::lexical_cast< std::string >(arg: bad_token) + "\")");
193 }
194
195 struct tokenizer
196 {
197 std::string::const_iterator begin, end;
198 std::vector< token > lookahead;
199 // Precomputed regexes
200 boost::regex stuff_to_skip;
201 boost::regex basic_id_token;
202 boost::regex punctuation_token;
203 boost::regex number_token;
204 boost::regex quoted_string_token;
205 boost::regex xml_tag_token;
206 boost::regex cdata;
207
208 tokenizer(const std::string& str) : begin(str.begin()), end(str.end())
209 {
210 std::string end_of_token = "(?=(?:\\W))";
211 std::string whitespace = "(?:\\s+)";
212 std::string slash_slash_comment = "(?://.*?$)";
213 std::string slash_star_comment = "(?:/\\*.*?\\*/)";
214 std::string hash_comment = "(?:^#.*?$)";
215 std::string backslash_newline = "(?:[\\\\][\\n])";
216 stuff_to_skip = "\\A(?:" + whitespace + "|" + slash_slash_comment
217 + "|" + slash_star_comment + "|" + hash_comment + "|"
218 + backslash_newline + ")*";
219 basic_id_token = "\\A([[:alpha:]_](?:\\w*))";
220 punctuation_token = "\\A([][{};=,:+()@]|[-][>-])";
221 number_token = "\\A([-]?(?:(?:\\.\\d+)|(?:\\d+(?:\\.\\d*)?)))";
222 quoted_string_token = "\\A(\"(?:[^\"\\\\]|(?:[\\\\].))*\")";
223 xml_tag_token
224 = "\\A<(/?)(?:[^!?'\"]|(?:'[^']*?')|(?:\"[^\"]*?\"))*?(/?)>";
225 cdata = "\\A\\Q<![CDATA[\\E.*?\\Q]]>\\E";
226 }
227
228 void skip()
229 {
230 boost::match_results< std::string::const_iterator > results;
231#ifndef NDEBUG
232 bool found =
233#endif
234 boost::regex_search(first: begin, last: end, m&: results, e: stuff_to_skip);
235#ifndef NDEBUG
236 BOOST_ASSERT(found);
237#endif
238 boost::sub_match< std::string::const_iterator > sm1
239 = results.suffix();
240 BOOST_ASSERT(sm1.second == end);
241 begin = sm1.first;
242 }
243
244 token get_token_raw()
245 {
246 if (!lookahead.empty())
247 {
248 token t = lookahead.front();
249 lookahead.erase(position: lookahead.begin());
250 return t;
251 }
252 skip();
253 if (begin == end)
254 return token(token::eof, "");
255 // Look for keywords first
256 bool found;
257 boost::match_results< std::string::const_iterator > results;
258 found = boost::regex_search(first: begin, last: end, m&: results, e: basic_id_token);
259 if (found)
260 {
261 std::string str = results[1].str();
262 std::string str_lower = boost::algorithm::to_lower_copy(Input: str);
263 begin = results.suffix().first;
264 if (str_lower == "strict")
265 {
266 return token(token::kw_strict, str);
267 }
268 else if (str_lower == "graph")
269 {
270 return token(token::kw_graph, str);
271 }
272 else if (str_lower == "digraph")
273 {
274 return token(token::kw_digraph, str);
275 }
276 else if (str_lower == "node")
277 {
278 return token(token::kw_node, str);
279 }
280 else if (str_lower == "edge")
281 {
282 return token(token::kw_edge, str);
283 }
284 else if (str_lower == "subgraph")
285 {
286 return token(token::kw_subgraph, str);
287 }
288 else
289 {
290 return token(token::identifier, str);
291 }
292 }
293 found = boost::regex_search(first: begin, last: end, m&: results, e: punctuation_token);
294 if (found)
295 {
296 std::string str = results[1].str();
297 begin = results.suffix().first;
298 switch (str[0])
299 {
300 case '[':
301 return token(token::left_bracket, str);
302 case ']':
303 return token(token::right_bracket, str);
304 case '{':
305 return token(token::left_brace, str);
306 case '}':
307 return token(token::right_brace, str);
308 case ';':
309 return token(token::semicolon, str);
310 case '=':
311 return token(token::equal, str);
312 case ',':
313 return token(token::comma, str);
314 case ':':
315 return token(token::colon, str);
316 case '+':
317 return token(token::plus, str);
318 case '(':
319 return token(token::left_paren, str);
320 case ')':
321 return token(token::right_paren, str);
322 case '@':
323 return token(token::at, str);
324 case '-':
325 {
326 switch (str[1])
327 {
328 case '-':
329 return token(token::dash_dash, str);
330 case '>':
331 return token(token::dash_greater, str);
332 default:
333 BOOST_ASSERT(!"Definition of punctuation_token does "
334 "not match switch statement");
335 }
336 // Prevent static analyzers complaining about fallthrough:
337 break;
338 }
339 default:
340 BOOST_ASSERT(!"Definition of punctuation_token does not "
341 "match switch statement");
342 }
343 }
344 found = boost::regex_search(first: begin, last: end, m&: results, e: number_token);
345 if (found)
346 {
347 std::string str = results[1].str();
348 begin = results.suffix().first;
349 return token(token::identifier, str);
350 }
351 found
352 = boost::regex_search(first: begin, last: end, m&: results, e: quoted_string_token);
353 if (found)
354 {
355 std::string str = results[1].str();
356 begin = results.suffix().first;
357 // Remove the beginning and ending quotes
358 BOOST_ASSERT(str.size() >= 2);
359 str.erase(position: str.begin());
360 str.erase(position: str.end() - 1);
361 // Unescape quotes in the middle, but nothing else (see format
362 // spec)
363 for (size_t i = 0; i + 1 < str.size() /* May change */; ++i)
364 {
365 if (str[i] == '\\' && str[i + 1] == '"')
366 {
367 str.erase(position: str.begin() + i);
368 // Don't need to adjust i
369 }
370 else if (str[i] == '\\' && str[i + 1] == '\n')
371 {
372 str.erase(position: str.begin() + i);
373 str.erase(position: str.begin() + i);
374 --i; // Invert ++ that will be applied
375 }
376 }
377 return token(token::quoted_string, str);
378 }
379 if (*begin == '<')
380 {
381 std::string::const_iterator saved_begin = begin;
382 int counter = 0;
383 do
384 {
385 if (begin == end)
386 throw_lex_error(errmsg: "Unclosed HTML string");
387 if (*begin != '<')
388 {
389 ++begin;
390 continue;
391 }
392 found = boost::regex_search(
393 first: begin, last: end, m&: results, e: xml_tag_token);
394 if (found)
395 {
396 begin = results.suffix().first;
397 if (results[1].str() == "/")
398 { // Close tag
399 --counter;
400 }
401 else if (results[2].str() == "/")
402 { // Empty tag
403 }
404 else
405 { // Open tag
406 ++counter;
407 }
408 continue;
409 }
410 found = boost::regex_search(first: begin, last: end, m&: results, e: cdata);
411 if (found)
412 {
413 begin = results.suffix().first;
414 continue;
415 }
416 throw_lex_error(errmsg: "Invalid contents in HTML string");
417 } while (counter > 0);
418 return token(
419 token::identifier, std::string(saved_begin, begin));
420 }
421 else
422 {
423 throw_lex_error(errmsg: "Invalid character");
424 return token();
425 }
426 }
427
428 token peek_token_raw()
429 {
430 if (lookahead.empty())
431 {
432 token t = get_token_raw();
433 lookahead.push_back(x: t);
434 }
435 return lookahead.front();
436 }
437
438 token get_token()
439 { // Handle string concatenation
440 token t = get_token_raw();
441 if (t.type != token::quoted_string)
442 return t;
443 std::string str = t.normalized_value;
444 while (peek_token_raw().type == token::plus)
445 {
446 get_token_raw();
447 token t2 = get_token_raw();
448 if (t2.type != token::quoted_string)
449 {
450 throw_lex_error(
451 errmsg: "Must have quoted string after string concatenation");
452 }
453 str += t2.normalized_value;
454 }
455 return token(
456 token::identifier, str); // Note that quoted_string does not get
457 // passed to the parser
458 }
459
460 void throw_lex_error(const std::string& errmsg)
461 {
462 boost::throw_exception(
463 e: lex_error(errmsg, bad_char: (begin == end ? '\0' : *begin)));
464 }
465 };
466
467 struct edge_endpoint
468 {
469 bool is_subgraph;
470 node_and_port node_ep;
471 subgraph_name subgraph_ep;
472
473 static edge_endpoint node(const node_and_port& ep)
474 {
475 edge_endpoint r;
476 r.is_subgraph = false;
477 r.node_ep = ep;
478 return r;
479 }
480
481 static edge_endpoint subgraph(const subgraph_name& ep)
482 {
483 edge_endpoint r;
484 r.is_subgraph = true;
485 r.subgraph_ep = ep;
486 return r;
487 }
488 };
489
490 struct node_or_subgraph_ref
491 {
492 bool is_subgraph;
493 std::string
494 name; // Name for subgraphs or nodes, "___root___" for root graph
495 };
496
497 static node_or_subgraph_ref noderef(const node_name& n)
498 {
499 node_or_subgraph_ref r;
500 r.is_subgraph = false;
501 r.name = n;
502 return r;
503 }
504
505 static node_or_subgraph_ref subgraphref(const subgraph_name& n)
506 {
507 node_or_subgraph_ref r;
508 r.is_subgraph = true;
509 r.name = n;
510 return r;
511 }
512
513 typedef std::vector< node_or_subgraph_ref > subgraph_member_list;
514
515 struct subgraph_info
516 {
517 properties def_node_props;
518 properties def_edge_props;
519 subgraph_member_list members;
520 };
521
522 struct parser
523 {
524 tokenizer the_tokenizer;
525 std::vector< token > lookahead;
526 parser_result& r;
527 std::map< subgraph_name, subgraph_info > subgraphs;
528 std::string current_subgraph_name;
529 int sgcounter; // Counter for anonymous subgraphs
530 std::set< std::pair< node_name, node_name > >
531 existing_edges; // Used for checking in strict graphs
532
533 subgraph_info& current() { return subgraphs[current_subgraph_name]; }
534 properties& current_graph_props()
535 {
536 return r.graph_props[current_subgraph_name];
537 }
538 subgraph_member_list& current_members() { return current().members; }
539
540 parser(const std::string& gr, parser_result& result)
541 : the_tokenizer(gr), lookahead(), r(result), sgcounter(0)
542 {
543 current_subgraph_name = "___root___";
544 current() = subgraph_info(); // Initialize root graph
545 current_graph_props().clear();
546 current_members().clear();
547 }
548
549 token get()
550 {
551 if (lookahead.empty())
552 {
553 token t = the_tokenizer.get_token();
554 return t;
555 }
556 else
557 {
558 token t = lookahead.front();
559 lookahead.erase(position: lookahead.begin());
560 return t;
561 }
562 }
563
564 token peek()
565 {
566 if (lookahead.empty())
567 {
568 lookahead.push_back(x: the_tokenizer.get_token());
569 }
570 return lookahead.front();
571 }
572
573 void error(const std::string& str)
574 {
575 boost::throw_exception(e: parse_error(errmsg: str, bad_token: peek()));
576 }
577
578 void parse_graph(bool want_directed)
579 {
580 bool is_strict = false;
581 bool is_directed = false;
582 std::string name;
583 if (peek().type == token::kw_strict)
584 {
585 get();
586 is_strict = true;
587 }
588 switch (peek().type)
589 {
590 case token::kw_graph:
591 is_directed = false;
592 break;
593 case token::kw_digraph:
594 is_directed = true;
595 break;
596 default:
597 error(str: "Wanted \"graph\" or \"digraph\"");
598 }
599 r.graph_is_directed = is_directed; // Used to check edges
600 r.graph_is_strict = is_strict;
601 if (want_directed != r.graph_is_directed)
602 {
603 if (want_directed)
604 {
605 boost::throw_exception(e: boost::undirected_graph_error());
606 }
607 else
608 {
609 boost::throw_exception(e: boost::directed_graph_error());
610 }
611 }
612 get();
613 switch (peek().type)
614 {
615 case token::identifier:
616 name = peek().normalized_value;
617 get();
618 break;
619 case token::left_brace:
620 break;
621 default:
622 error(str: "Wanted a graph name or left brace");
623 }
624 if (peek().type == token::left_brace)
625 get();
626 else
627 error(str: "Wanted a left brace to start the graph");
628 parse_stmt_list();
629 if (peek().type == token::right_brace)
630 get();
631 else
632 error(str: "Wanted a right brace to end the graph");
633 if (peek().type == token::eof)
634 {
635 }
636 else
637 error(str: "Wanted end of file");
638 }
639
640 void parse_stmt_list()
641 {
642 while (true)
643 {
644 if (peek().type == token::right_brace)
645 return;
646 parse_stmt();
647 if (peek().type == token::semicolon)
648 get();
649 }
650 }
651
652 void parse_stmt()
653 {
654 switch (peek().type)
655 {
656 case token::kw_node:
657 case token::kw_edge:
658 case token::kw_graph:
659 parse_attr_stmt();
660 break;
661 case token::kw_subgraph:
662 case token::left_brace:
663 case token::identifier:
664 {
665 token id = get();
666 if (id.type == token::identifier && peek().type == token::equal)
667 { // Graph property
668 get();
669 if (peek().type != token::identifier)
670 error(str: "Wanted identifier as right side of =");
671 token id2 = get();
672 current_graph_props()[id.normalized_value]
673 = id2.normalized_value;
674 }
675 else
676 {
677 edge_endpoint ep = parse_endpoint_rest(first_token: id);
678 if (peek().type == token::dash_dash
679 || peek().type == token::dash_greater)
680 { // Edge
681 parse_edge_stmt(lhs: ep);
682 }
683 else
684 {
685 if (!ep.is_subgraph)
686 { // Only nodes can have attribute lists
687 // This node already exists because of its first
688 // mention (properties set to defaults by
689 // parse_node_and_port, called by
690 // parse_endpoint_rest)
691 properties this_node_props;
692 if (peek().type == token::left_bracket)
693 {
694 parse_attr_list(props&: this_node_props);
695 }
696 for (properties::const_iterator i
697 = this_node_props.begin();
698 i != this_node_props.end(); ++i)
699 {
700 // Override old properties with same names
701 r.nodes[ep.node_ep.name][i->first] = i->second;
702 }
703 current_members().push_back(
704 x: noderef(n: ep.node_ep.name));
705 }
706 else
707 {
708 current_members().push_back(
709 x: subgraphref(n: ep.subgraph_ep));
710 }
711 }
712 }
713 break;
714 }
715 default:
716 error(str: "Invalid start token for statement");
717 }
718 }
719
720 void parse_attr_stmt()
721 {
722 switch (get().type)
723 {
724 case token::kw_graph:
725 parse_attr_list(props&: current_graph_props());
726 break;
727 case token::kw_node:
728 parse_attr_list(props&: current().def_node_props);
729 break;
730 case token::kw_edge:
731 parse_attr_list(props&: current().def_edge_props);
732 break;
733 default:
734 BOOST_ASSERT(!"Bad attr_stmt case");
735 }
736 }
737
738 edge_endpoint parse_endpoint()
739 {
740 switch (peek().type)
741 {
742 case token::kw_subgraph:
743 case token::left_brace:
744 case token::identifier:
745 {
746 token first = get();
747 return parse_endpoint_rest(first_token: first);
748 }
749 default:
750 {
751 error(str: "Wanted \"subgraph\", \"{\", or identifier to start node "
752 "or subgraph");
753 return edge_endpoint();
754 }
755 }
756 }
757
758 edge_endpoint parse_endpoint_rest(const token& first_token)
759 {
760 switch (first_token.type)
761 {
762 case token::kw_subgraph:
763 case token::left_brace:
764 return edge_endpoint::subgraph(ep: parse_subgraph(first_token));
765 default:
766 return edge_endpoint::node(ep: parse_node_and_port(name: first_token));
767 }
768 }
769
770 subgraph_name parse_subgraph(const token& first_token)
771 {
772 std::string name;
773 bool is_anonymous = true;
774 if (first_token.type == token::kw_subgraph)
775 {
776 if (peek().type == token::identifier)
777 {
778 name = get().normalized_value;
779 is_anonymous = false;
780 }
781 }
782 if (is_anonymous)
783 {
784 name = "___subgraph_"
785 + boost::lexical_cast< std::string >(arg: ++sgcounter);
786 }
787 if (subgraphs.find(x: name) == subgraphs.end())
788 {
789 subgraphs[name]
790 = current(); // Initialize properties and defaults
791 subgraphs[name].members.clear(); // Except member list
792 }
793 if (first_token.type == token::kw_subgraph
794 && peek().type != token::left_brace)
795 {
796 if (is_anonymous)
797 error(str: "Subgraph reference needs a name");
798 return name;
799 }
800 subgraph_name old_sg = current_subgraph_name;
801 current_subgraph_name = name;
802 if (peek().type == token::left_brace)
803 get();
804 else
805 error(str: "Wanted left brace to start subgraph");
806 parse_stmt_list();
807 if (peek().type == token::right_brace)
808 get();
809 else
810 error(str: "Wanted right brace to end subgraph");
811 current_subgraph_name = old_sg;
812 return name;
813 }
814
815 node_and_port parse_node_and_port(const token& name)
816 {
817 // A node ID is a node name, followed optionally by a port angle and
818 // a port location (in either order); a port location is either :id,
819 // :id:id, or :(id,id); the last two forms are treated as equivalent
820 // although I am not sure about that.
821 node_and_port id;
822 id.name = name.normalized_value;
823 parse_more:
824 switch (peek().type)
825 {
826 case token::at:
827 {
828 get();
829 if (peek().type != token::identifier)
830 error(str: "Wanted identifier as port angle");
831 if (!id.angle.empty())
832 error(str: "Duplicate port angle");
833 id.angle = get().normalized_value;
834 goto parse_more;
835 }
836 case token::colon:
837 {
838 get();
839 if (!id.location.empty())
840 error(str: "Duplicate port location");
841 switch (peek().type)
842 {
843 case token::identifier:
844 {
845 id.location.push_back(x: get().normalized_value);
846 switch (peek().type)
847 {
848 case token::colon:
849 {
850 get();
851 if (peek().type != token::identifier)
852 error(str: "Wanted identifier as port location");
853 id.location.push_back(x: get().normalized_value);
854 goto parse_more;
855 }
856 default:
857 goto parse_more;
858 }
859 }
860 case token::left_paren:
861 {
862 get();
863 if (peek().type != token::identifier)
864 error(str: "Wanted identifier as first element of port "
865 "location");
866 id.location.push_back(x: get().normalized_value);
867 if (peek().type != token::comma)
868 error(str: "Wanted comma between parts of port location");
869 get();
870 if (peek().type != token::identifier)
871 error(str: "Wanted identifier as second element of port "
872 "location");
873 id.location.push_back(x: get().normalized_value);
874 if (peek().type != token::right_paren)
875 error(
876 str: "Wanted right parenthesis to close port location");
877 get();
878 goto parse_more;
879 }
880 default:
881 error(str: "Wanted identifier or left parenthesis as start of "
882 "port location");
883 }
884 }
885 default:
886 break;
887 }
888 if (r.nodes.find(x: id.name) == r.nodes.end())
889 { // First mention
890 r.nodes[id.name] = current().def_node_props;
891 }
892 return id;
893 }
894
895 void parse_edge_stmt(const edge_endpoint& lhs)
896 {
897 std::vector< edge_endpoint > nodes_in_chain(1, lhs);
898 while (true)
899 {
900 bool leave_loop = true;
901 switch (peek().type)
902 {
903 case token::dash_dash:
904 {
905 if (r.graph_is_directed)
906 error(str: "Using -- in directed graph");
907 get();
908 nodes_in_chain.push_back(x: parse_endpoint());
909 leave_loop = false;
910 break;
911 }
912 case token::dash_greater:
913 {
914 if (!r.graph_is_directed)
915 error(str: "Using -> in undirected graph");
916 get();
917 nodes_in_chain.push_back(x: parse_endpoint());
918 leave_loop = false;
919 break;
920 }
921 default:
922 leave_loop = true;
923 break;
924 }
925 if (leave_loop)
926 break;
927 }
928 properties this_edge_props = current().def_edge_props;
929 if (peek().type == token::left_bracket)
930 parse_attr_list(props&: this_edge_props);
931 BOOST_ASSERT(nodes_in_chain.size()
932 >= 2); // Should be in node parser otherwise
933 for (size_t i = 0; i + 1 < nodes_in_chain.size(); ++i)
934 {
935 do_orig_edge(
936 src: nodes_in_chain[i], tgt: nodes_in_chain[i + 1], props: this_edge_props);
937 }
938 }
939
940 // Do an edge from the file, the edge may need to be expanded if it
941 // connects to a subgraph
942 void do_orig_edge(const edge_endpoint& src, const edge_endpoint& tgt,
943 const properties& props)
944 {
945 std::set< node_and_port > sources = get_recursive_members(orig_ep: src);
946 std::set< node_and_port > targets = get_recursive_members(orig_ep: tgt);
947 for (std::set< node_and_port >::const_iterator i = sources.begin();
948 i != sources.end(); ++i)
949 {
950 for (std::set< node_and_port >::const_iterator j
951 = targets.begin();
952 j != targets.end(); ++j)
953 {
954 do_edge(src: *i, tgt: *j, props);
955 }
956 }
957 }
958
959 // Get nodes in an edge_endpoint, recursively
960 std::set< node_and_port > get_recursive_members(
961 const edge_endpoint& orig_ep)
962 {
963 std::set< node_and_port > result;
964 std::vector< edge_endpoint > worklist(1, orig_ep);
965 std::set< subgraph_name > done;
966 while (!worklist.empty())
967 {
968 edge_endpoint ep = worklist.back();
969 worklist.pop_back();
970 if (ep.is_subgraph)
971 {
972 if (done.find(x: ep.subgraph_ep) == done.end())
973 {
974 done.insert(x: ep.subgraph_ep);
975 std::map< subgraph_name, subgraph_info >::const_iterator
976 info_i
977 = subgraphs.find(x: ep.subgraph_ep);
978 if (info_i != subgraphs.end())
979 {
980 const subgraph_member_list& members
981 = info_i->second.members;
982 for (subgraph_member_list::const_iterator i
983 = members.begin();
984 i != members.end(); ++i)
985 {
986 node_or_subgraph_ref ref = *i;
987 if (ref.is_subgraph)
988 {
989 worklist.push_back(
990 x: edge_endpoint::subgraph(ep: ref.name));
991 }
992 else
993 {
994 node_and_port np;
995 np.name = ref.name;
996 worklist.push_back(x: edge_endpoint::node(ep: np));
997 }
998 }
999 }
1000 }
1001 }
1002 else
1003 {
1004 result.insert(x: ep.node_ep);
1005 }
1006 }
1007 return result;
1008 }
1009
1010 // Do a fixed-up edge, with only nodes as endpoints
1011 void do_edge(const node_and_port& src, const node_and_port& tgt,
1012 const properties& props)
1013 {
1014 if (r.graph_is_strict)
1015 {
1016 if (src.name == tgt.name)
1017 return;
1018 std::pair< node_name, node_name > tag(src.name, tgt.name);
1019 if (existing_edges.find(x: tag) != existing_edges.end())
1020 {
1021 return; // Parallel edge
1022 }
1023 existing_edges.insert(x: tag);
1024 }
1025 edge_info e;
1026 e.source = src;
1027 e.target = tgt;
1028 e.props = props;
1029 r.edges.push_back(x: e);
1030 }
1031
1032 void parse_attr_list(properties& props)
1033 {
1034 while (true)
1035 {
1036 if (peek().type == token::left_bracket)
1037 get();
1038 else
1039 error(str: "Wanted left bracket to start attribute list");
1040 while (true)
1041 {
1042 switch (peek().type)
1043 {
1044 case token::right_bracket:
1045 break;
1046 case token::identifier:
1047 {
1048 std::string lhs = get().normalized_value;
1049 std::string rhs = "true";
1050 if (peek().type == token::equal)
1051 {
1052 get();
1053 if (peek().type != token::identifier)
1054 error(
1055 str: "Wanted identifier as value of attribute");
1056 rhs = get().normalized_value;
1057 }
1058 props[lhs] = rhs;
1059 break;
1060 }
1061 default:
1062 error(str: "Wanted identifier as name of attribute");
1063 }
1064 if (peek().type == token::comma
1065 || peek().type == token::semicolon)
1066 get();
1067 else if (peek().type == token::right_bracket)
1068 break;
1069 }
1070 if (peek().type == token::right_bracket)
1071 get();
1072 else
1073 error(str: "Wanted right bracket to end attribute list");
1074 if (peek().type != token::left_bracket)
1075 break;
1076 }
1077 }
1078 };
1079
1080 void parse_graphviz_from_string(
1081 const std::string& str, parser_result& result, bool want_directed)
1082 {
1083 parser p(str, result);
1084 p.parse_graph(want_directed);
1085 }
1086
1087 // Some debugging stuff
1088 std::ostream& operator<<(std::ostream& o, const node_and_port& n)
1089 {
1090 o << n.name;
1091 for (size_t i = 0; i < n.location.size(); ++i)
1092 {
1093 o << ":" << n.location[i];
1094 }
1095 if (!n.angle.empty())
1096 o << "@" << n.angle;
1097 return o;
1098 }
1099
1100 // Can't be operator<< because properties is just an std::map
1101 std::string props_to_string(const properties& props)
1102 {
1103 std::string result = "[";
1104 for (properties::const_iterator i = props.begin(); i != props.end();
1105 ++i)
1106 {
1107 if (i != props.begin())
1108 result += ", ";
1109 result += i->first + "=" + i->second;
1110 }
1111 result += "]";
1112 return result;
1113 }
1114
1115 void translate_results_to_graph(
1116 const parser_result& r, ::boost::detail::graph::mutate_graph* mg)
1117 {
1118 typedef boost::detail::graph::edge_t edge;
1119 for (std::map< node_name, properties >::const_iterator i
1120 = r.nodes.begin();
1121 i != r.nodes.end(); ++i)
1122 {
1123 // std::cerr << i->first << " " << props_to_string(i->second) <<
1124 // std::endl;
1125 mg->do_add_vertex(node: i->first);
1126 for (properties::const_iterator j = i->second.begin();
1127 j != i->second.end(); ++j)
1128 {
1129 mg->set_node_property(key: j->first, node: i->first, value: j->second);
1130 }
1131 }
1132 for (std::vector< edge_info >::const_iterator i = r.edges.begin();
1133 i != r.edges.end(); ++i)
1134 {
1135 const edge_info& ei = *i;
1136 // std::cerr << ei.source << " -> " << ei.target << " " <<
1137 // props_to_string(ei.props) << std::endl;
1138 edge e = edge::new_edge();
1139 mg->do_add_edge(edge: e, source: ei.source.name, target: ei.target.name);
1140 for (properties::const_iterator j = ei.props.begin();
1141 j != ei.props.end(); ++j)
1142 {
1143 mg->set_edge_property(key: j->first, edge: e, value: j->second);
1144 }
1145 }
1146 std::map< subgraph_name, properties >::const_iterator root_graph_props_i
1147 = r.graph_props.find(x: "___root___");
1148 BOOST_ASSERT(
1149 root_graph_props_i != r.graph_props.end()); // Should not happen
1150 const properties& root_graph_props = root_graph_props_i->second;
1151 // std::cerr << "ending graph " << props_to_string(root_graph_props) <<
1152 // std::endl;
1153 for (properties::const_iterator i = root_graph_props.begin();
1154 i != root_graph_props.end(); ++i)
1155 {
1156 mg->set_graph_property(key: i->first, value: i->second);
1157 }
1158 mg->finish_building_graph();
1159 }
1160
1161} // end namespace read_graphviz_detail
1162
1163namespace detail
1164{
1165 namespace graph
1166 {
1167
1168 BOOST_GRAPH_DECL bool read_graphviz_new(
1169 const std::string& str, boost::detail::graph::mutate_graph* mg)
1170 {
1171 read_graphviz_detail::parser_result parsed_file;
1172 read_graphviz_detail::parse_graphviz_from_string(
1173 str, result&: parsed_file, want_directed: mg->is_directed());
1174 read_graphviz_detail::translate_results_to_graph(r: parsed_file, mg);
1175 return true;
1176 }
1177
1178 } // end namespace graph
1179} // end namespace detail
1180
1181} // end namespace boost
1182
1183// GraphViz format notes (tested using "dot version 1.13 (v16) (Mon August 23,
1184// 2004)", grammar from references in read_graphviz_new.hpp):
1185
1186// Subgraphs don't nest (all a0 subgraphs are the same thing), but a node or
1187// subgraph can have multiple parents (sources online say that the layout
1188// algorithms can't handle non-tree structures of clusters, but it seems to
1189// read them the same from the file). The containment relation is required to
1190// be a DAG, though; it appears that a node or subgraph can't be moved into an
1191// ancestor of a subgraph where it already was (we don't enforce that but do a
1192// DFS when finding members to prevent cycles). Nodes get their properties by
1193// when they are first mentioned, and can only have them overridden after that
1194// by explicit properties on that particular node. Closing and reopening the
1195// same subgraph name adds to its members, and graph properties and node/edge
1196// defaults are preserved in that subgraph. The members of a subgraph used in
1197// an edge are gathered when the edge is read, even if new members are added to
1198// the subgraph later. Ports are allowed in a lot more places in the grammar
1199// than Dot uses. For example, declaring a node seems to ignore ports, and I
1200// don't think it's possible to set properties on a particular port. Adding an
1201// edge between two ports on the same node seems to make Dot unhappy (crashed
1202// for me).
1203
1204// Test graph for GraphViz behavior on strange combinations of subgraphs and
1205// such. I don't have anywhere else to put this file.
1206
1207#if 0
1208dIGRaph foo {
1209 node [color=blue]
1210 subgraph a -> b
1211 subgraph a {c}
1212 subgraph a -> d
1213 subgraph a {node [color=red]; e}
1214 subgraph a -> f
1215 subgraph a {g} -> h
1216 subgraph a {node [color=green]; i} -> j
1217 subgraph a {node [color=yellow]}
1218
1219 subgraph a0 {node [color=black]; subgraph a1 {node [color=white]}}
1220 node [color=pink] zz
1221 subgraph a0 {x1}
1222 subgraph a0 {subgraph a1 {x2}}
1223
1224 subgraph a0 -> x3
1225 subgraph a0 {subgraph a1 -> x3}
1226 x3
1227 subgraph a0 {subgraph a0 {node [color=xxx]; x2} x7}
1228 x2 [color=yyy]
1229 subgraph cluster_ax {foo; subgraph a0}
1230 subgraph a0 {foo2}
1231 subgraph cluster_ax {foo3}
1232 // subgraph a0 -> subgraph a0
1233
1234 bgcolor=yellow
1235 subgraph cluster_a2 {y1}
1236 // y1:n -> y1:(s,3)@se
1237 y1@se [color=green]
1238 y1@n [color=red]
1239}
1240#endif
1241

source code of boost/libs/graph/src/read_graphviz_new.cpp