1 | //===--- Disambiguate.h - Find the best tree in the forest -------*- C++-*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // A GLR parse forest represents every possible parse tree for the source code. |
10 | // |
11 | // Before we can do useful analysis/editing of the code, we need to pick a |
12 | // single tree which we think is accurate. We use three main types of clues: |
13 | // |
14 | // A) Semantic language rules may restrict which parses are allowed. |
15 | // For example, `string string string X` is *grammatical* C++, but only a |
16 | // single type-name is allowed in a decl-specifier-sequence. |
17 | // Where possible, these interpretations are forbidden by guards. |
18 | // Sometimes this isn't possible, or we want our parser to be lenient. |
19 | // |
20 | // B) Some constructs are rarer, while others are common. |
21 | // For example `a<b>::c` is often a template specialization, and rarely a |
22 | // double comparison between a, b, and c. |
23 | // |
24 | // C) Identifier text hints whether they name types/values/templates etc. |
25 | // "std" is usually a namespace, a project index may also guide us. |
26 | // Hints may be within the document: if one occurrence of 'foo' is a variable |
27 | // then the others probably are too. |
28 | // (Text need not match: similar CaseStyle can be a weak hint, too). |
29 | // |
30 | //----------------------------------------------------------------------------// |
31 | // |
32 | // Mechanically, we replace each ambiguous node with its best alternative. |
33 | // |
34 | // "Best" is determined by assigning bonuses/penalties to nodes, to express |
35 | // the clues of type A and B above. A forest node representing an unlikely |
36 | // parse would apply a penalty to every subtree is is present in. |
37 | // Disambiguation proceeds bottom-up, so that the score of each alternative |
38 | // is known when a decision is made. |
39 | // |
40 | // Identifier-based hints within the document mean some nodes should be |
41 | // *correlated*. Rather than resolve these simultaneously, we make the most |
42 | // certain decisions first and use these results to update bonuses elsewhere. |
43 | // |
44 | //===----------------------------------------------------------------------===// |
45 | |
46 | #include "clang-pseudo/Forest.h" |
47 | |
48 | namespace clang::pseudo { |
49 | |
50 | struct DisambiguateParams {}; |
51 | |
52 | // Maps ambiguous nodes onto the index of their preferred alternative. |
53 | using Disambiguation = llvm::DenseMap<const ForestNode *, unsigned>; |
54 | |
55 | // Resolve each ambiguous node in the forest. |
56 | // Maps each ambiguous node to the index of the chosen alternative. |
57 | // FIXME: current implementation is a placeholder and chooses arbitrarily. |
58 | Disambiguation disambiguate(const ForestNode *Root, |
59 | const DisambiguateParams &Params); |
60 | |
61 | // Remove all ambiguities from the forest, resolving them according to Disambig. |
62 | void removeAmbiguities(ForestNode *&Root, const Disambiguation &Disambig); |
63 | |
64 | } // namespace clang::pseudo |
65 | |