1//===--- Disambiguate.h - Find the best tree in the forest -------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// A GLR parse forest represents every possible parse tree for the source code.
10//
11// Before we can do useful analysis/editing of the code, we need to pick a
12// single tree which we think is accurate. We use three main types of clues:
13//
14// A) Semantic language rules may restrict which parses are allowed.
15// For example, `string string string X` is *grammatical* C++, but only a
16// single type-name is allowed in a decl-specifier-sequence.
17// Where possible, these interpretations are forbidden by guards.
18// Sometimes this isn't possible, or we want our parser to be lenient.
19//
20// B) Some constructs are rarer, while others are common.
21// For example `a<b>::c` is often a template specialization, and rarely a
22// double comparison between a, b, and c.
23//
24// C) Identifier text hints whether they name types/values/templates etc.
25// "std" is usually a namespace, a project index may also guide us.
26// Hints may be within the document: if one occurrence of 'foo' is a variable
27// then the others probably are too.
28// (Text need not match: similar CaseStyle can be a weak hint, too).
29//
30//----------------------------------------------------------------------------//
31//
32// Mechanically, we replace each ambiguous node with its best alternative.
33//
34// "Best" is determined by assigning bonuses/penalties to nodes, to express
35// the clues of type A and B above. A forest node representing an unlikely
36// parse would apply a penalty to every subtree is is present in.
37// Disambiguation proceeds bottom-up, so that the score of each alternative
38// is known when a decision is made.
39//
40// Identifier-based hints within the document mean some nodes should be
41// *correlated*. Rather than resolve these simultaneously, we make the most
42// certain decisions first and use these results to update bonuses elsewhere.
43//
44//===----------------------------------------------------------------------===//
45
46#include "clang-pseudo/Forest.h"
47
48namespace clang::pseudo {
49
50struct DisambiguateParams {};
51
52// Maps ambiguous nodes onto the index of their preferred alternative.
53using Disambiguation = llvm::DenseMap<const ForestNode *, unsigned>;
54
55// Resolve each ambiguous node in the forest.
56// Maps each ambiguous node to the index of the chosen alternative.
57// FIXME: current implementation is a placeholder and chooses arbitrarily.
58Disambiguation disambiguate(const ForestNode *Root,
59 const DisambiguateParams &Params);
60
61// Remove all ambiguities from the forest, resolving them according to Disambig.
62void removeAmbiguities(ForestNode *&Root, const Disambiguation &Disambig);
63
64} // namespace clang::pseudo
65

source code of clang-tools-extra/pseudo/include/clang-pseudo/Disambiguate.h