| 1 | //===--- Quality.h - Ranking alternatives for ambiguous queries --*- C++-*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// Some operations such as code completion produce a set of candidates. |
| 10 | /// Usually the user can choose between them, but we should put the best options |
| 11 | /// at the top (they're easier to select, and more likely to be seen). |
| 12 | /// |
| 13 | /// This file defines building blocks for ranking candidates. |
| 14 | /// It's used by the features directly and also in the implementation of |
| 15 | /// indexes, as indexes also need to heuristically limit their results. |
| 16 | /// |
| 17 | /// The facilities here are: |
| 18 | /// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString |
| 19 | /// These are structured in a way that they can be debugged, and are fairly |
| 20 | /// consistent regardless of the source. |
| 21 | /// - compute scores from scoring signals. These are suitable for sorting. |
| 22 | /// - sorting utilities like the TopN container. |
| 23 | /// These could be split up further to isolate dependencies if we care. |
| 24 | /// |
| 25 | //===----------------------------------------------------------------------===// |
| 26 | |
| 27 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H |
| 28 | #define |
| 29 | |
| 30 | #include "FileDistance.h" |
| 31 | #include "clang/Sema/CodeCompleteConsumer.h" |
| 32 | #include "llvm/ADT/StringRef.h" |
| 33 | #include "llvm/ADT/StringSet.h" |
| 34 | #include <algorithm> |
| 35 | #include <functional> |
| 36 | #include <optional> |
| 37 | #include <vector> |
| 38 | |
| 39 | namespace llvm { |
| 40 | class raw_ostream; |
| 41 | } // namespace llvm |
| 42 | |
| 43 | namespace clang { |
| 44 | class CodeCompletionResult; |
| 45 | |
| 46 | namespace clangd { |
| 47 | struct ASTSignals; |
| 48 | struct Symbol; |
| 49 | class URIDistance; |
| 50 | |
| 51 | // Signals structs are designed to be aggregated from 0 or more sources. |
| 52 | // A default instance has neutral signals, and sources are merged into it. |
| 53 | // They can be dumped for debugging, and evaluate()d into a score. |
| 54 | |
| 55 | /// Attributes of a symbol that affect how much we like it. |
| 56 | struct SymbolQualitySignals { |
| 57 | bool Deprecated = false; |
| 58 | bool ReservedName = false; // __foo, _Foo are usually implementation details. |
| 59 | // FIXME: make these findable once user types _. |
| 60 | bool ImplementationDetail = false; |
| 61 | unsigned References = 0; |
| 62 | |
| 63 | enum SymbolCategory { |
| 64 | Unknown = 0, |
| 65 | Variable, |
| 66 | Macro, |
| 67 | Type, |
| 68 | Function, |
| 69 | Constructor, |
| 70 | Destructor, |
| 71 | Namespace, |
| 72 | Keyword, |
| 73 | Operator, |
| 74 | } Category = Unknown; |
| 75 | |
| 76 | void merge(const CodeCompletionResult &SemaCCResult); |
| 77 | void merge(const Symbol &IndexResult); |
| 78 | |
| 79 | // Condense these signals down to a single number, higher is better. |
| 80 | float evaluateHeuristics() const; |
| 81 | }; |
| 82 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, |
| 83 | const SymbolQualitySignals &); |
| 84 | |
| 85 | /// Attributes of a symbol-query pair that affect how much we like it. |
| 86 | struct SymbolRelevanceSignals { |
| 87 | /// The name of the symbol (for ContextWords). Must be explicitly assigned. |
| 88 | llvm::StringRef Name; |
| 89 | /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. |
| 90 | float NameMatch = 1; |
| 91 | /// Lowercase words relevant to the context (e.g. near the completion point). |
| 92 | llvm::StringSet<>* ContextWords = nullptr; |
| 93 | bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). |
| 94 | /// Whether fixits needs to be applied for that completion or not. |
| 95 | bool NeedsFixIts = false; |
| 96 | bool InBaseClass = false; // A member from base class of the accessed class. |
| 97 | |
| 98 | URIDistance *FileProximityMatch = nullptr; |
| 99 | /// These are used to calculate proximity between the index symbol and the |
| 100 | /// query. |
| 101 | llvm::StringRef SymbolURI; |
| 102 | /// FIXME: unify with index proximity score - signals should be |
| 103 | /// source-independent. |
| 104 | /// Proximity between best declaration and the query. [0-1], 1 is closest. |
| 105 | float SemaFileProximityScore = 0; |
| 106 | |
| 107 | // Scope proximity is only considered (both index and sema) when this is set. |
| 108 | ScopeDistance *ScopeProximityMatch = nullptr; |
| 109 | std::optional<llvm::StringRef> SymbolScope; |
| 110 | // A symbol from sema should be accessible from the current scope. |
| 111 | bool SemaSaysInScope = false; |
| 112 | |
| 113 | // An approximate measure of where we expect the symbol to be used. |
| 114 | enum AccessibleScope { |
| 115 | FunctionScope, |
| 116 | ClassScope, |
| 117 | FileScope, |
| 118 | GlobalScope, |
| 119 | } Scope = GlobalScope; |
| 120 | |
| 121 | enum QueryType { |
| 122 | CodeComplete, |
| 123 | Generic, |
| 124 | } Query = Generic; |
| 125 | |
| 126 | CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other; |
| 127 | |
| 128 | // Whether symbol is an instance member of a class. |
| 129 | bool IsInstanceMember = false; |
| 130 | |
| 131 | // Whether clang provided a preferred type in the completion context. |
| 132 | bool HadContextType = false; |
| 133 | // Whether a source completion item or a symbol had a type information. |
| 134 | bool HadSymbolType = false; |
| 135 | // Whether the item matches the type expected in the completion context. |
| 136 | bool TypeMatchesPreferred = false; |
| 137 | |
| 138 | /// Length of the unqualified partial name of Symbol typed in |
| 139 | /// CompletionPrefix. |
| 140 | unsigned FilterLength = 0; |
| 141 | |
| 142 | const ASTSignals *MainFileSignals = nullptr; |
| 143 | /// Number of references to the candidate in the main file. |
| 144 | unsigned MainFileRefs = 0; |
| 145 | /// Number of unique symbols in the main file which belongs to candidate's |
| 146 | /// namespace. This indicates how relevant the namespace is in the current |
| 147 | /// file. |
| 148 | unsigned ScopeRefsInFile = 0; |
| 149 | |
| 150 | /// Set of derived signals computed by calculateDerivedSignals(). Must not be |
| 151 | /// set explicitly. |
| 152 | struct DerivedSignals { |
| 153 | /// Whether Name contains some word from context. |
| 154 | bool NameMatchesContext = false; |
| 155 | /// Min distance between SymbolURI and all the headers included by the TU. |
| 156 | unsigned FileProximityDistance = FileDistance::Unreachable; |
| 157 | /// Min distance between SymbolScope and all the available scopes. |
| 158 | unsigned ScopeProximityDistance = FileDistance::Unreachable; |
| 159 | }; |
| 160 | |
| 161 | DerivedSignals calculateDerivedSignals() const; |
| 162 | |
| 163 | void merge(const CodeCompletionResult &SemaResult); |
| 164 | void merge(const Symbol &IndexResult); |
| 165 | void computeASTSignals(const CodeCompletionResult &SemaResult); |
| 166 | |
| 167 | // Condense these signals down to a single number, higher is better. |
| 168 | float evaluateHeuristics() const; |
| 169 | }; |
| 170 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, |
| 171 | const SymbolRelevanceSignals &); |
| 172 | |
| 173 | /// Combine symbol quality and relevance into a single score. |
| 174 | float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance); |
| 175 | |
| 176 | /// Same semantics as CodeComplete::Score. Quality score and Relevance score |
| 177 | /// have been removed since DecisionForest cannot assign individual scores to |
| 178 | /// Quality and Relevance signals. |
| 179 | struct DecisionForestScores { |
| 180 | float Total = 0.f; |
| 181 | float ExcludingName = 0.f; |
| 182 | }; |
| 183 | |
| 184 | DecisionForestScores |
| 185 | evaluateDecisionForest(const SymbolQualitySignals &Quality, |
| 186 | const SymbolRelevanceSignals &Relevance, float Base); |
| 187 | |
| 188 | /// TopN<T> is a lossy container that preserves only the "best" N elements. |
| 189 | template <typename T, typename Compare = std::greater<T>> class TopN { |
| 190 | public: |
| 191 | using value_type = T; |
| 192 | TopN(size_t N, Compare Greater = Compare()) |
| 193 | : N(N), Greater(std::move(Greater)) {} |
| 194 | |
| 195 | // Adds a candidate to the set. |
| 196 | // Returns true if a candidate was dropped to get back under N. |
| 197 | bool push(value_type &&V) { |
| 198 | bool Dropped = false; |
| 199 | if (Heap.size() >= N) { |
| 200 | Dropped = true; |
| 201 | if (N > 0 && Greater(V, Heap.front())) { |
| 202 | std::pop_heap(Heap.begin(), Heap.end(), Greater); |
| 203 | Heap.back() = std::move(V); |
| 204 | std::push_heap(Heap.begin(), Heap.end(), Greater); |
| 205 | } |
| 206 | } else { |
| 207 | Heap.push_back(std::move(V)); |
| 208 | std::push_heap(Heap.begin(), Heap.end(), Greater); |
| 209 | } |
| 210 | assert(Heap.size() <= N); |
| 211 | assert(std::is_heap(Heap.begin(), Heap.end(), Greater)); |
| 212 | return Dropped; |
| 213 | } |
| 214 | |
| 215 | // Returns candidates from best to worst. |
| 216 | std::vector<value_type> items() && { |
| 217 | std::sort_heap(Heap.begin(), Heap.end(), Greater); |
| 218 | assert(Heap.size() <= N); |
| 219 | return std::move(Heap); |
| 220 | } |
| 221 | |
| 222 | private: |
| 223 | const size_t N; |
| 224 | std::vector<value_type> Heap; // Min-heap, comparator is Greater. |
| 225 | Compare Greater; |
| 226 | }; |
| 227 | |
| 228 | /// Returns a string that sorts in the same order as (-Score, Tiebreak), for |
| 229 | /// LSP. (The highest score compares smallest so it sorts at the top). |
| 230 | std::string sortText(float Score, llvm::StringRef Tiebreak = "" ); |
| 231 | |
| 232 | struct SignatureQualitySignals { |
| 233 | uint32_t NumberOfParameters = 0; |
| 234 | uint32_t NumberOfOptionalParameters = 0; |
| 235 | CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind = |
| 236 | CodeCompleteConsumer::OverloadCandidate::CandidateKind::CK_Function; |
| 237 | }; |
| 238 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, |
| 239 | const SignatureQualitySignals &); |
| 240 | |
| 241 | } // namespace clangd |
| 242 | } // namespace clang |
| 243 | |
| 244 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H |
| 245 | |