1 | //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file contains the declaration of the SARIFDocumentWriter class, and |
11 | /// associated builders such as: |
12 | /// - \ref SarifArtifact |
13 | /// - \ref SarifArtifactLocation |
14 | /// - \ref SarifRule |
15 | /// - \ref SarifResult |
16 | //===----------------------------------------------------------------------===// |
17 | #include "clang/Basic/Sarif.h" |
18 | #include "clang/Basic/SourceLocation.h" |
19 | #include "clang/Basic/SourceManager.h" |
20 | #include "llvm/ADT/ArrayRef.h" |
21 | #include "llvm/ADT/STLExtras.h" |
22 | #include "llvm/ADT/StringExtras.h" |
23 | #include "llvm/ADT/StringRef.h" |
24 | #include "llvm/Support/ConvertUTF.h" |
25 | #include "llvm/Support/JSON.h" |
26 | #include "llvm/Support/Path.h" |
27 | |
28 | #include <optional> |
29 | #include <string> |
30 | #include <utility> |
31 | |
32 | using namespace clang; |
33 | using namespace llvm; |
34 | |
35 | using clang::detail::SarifArtifact; |
36 | using clang::detail::SarifArtifactLocation; |
37 | |
38 | static StringRef getFileName(FileEntryRef FE) { |
39 | StringRef Filename = FE.getFileEntry().tryGetRealPathName(); |
40 | if (Filename.empty()) |
41 | Filename = FE.getName(); |
42 | return Filename; |
43 | } |
44 | /// \name URI |
45 | /// @{ |
46 | |
47 | /// \internal |
48 | /// \brief |
49 | /// Return the RFC3986 encoding of the input character. |
50 | /// |
51 | /// \param C Character to encode to RFC3986. |
52 | /// |
53 | /// \return The RFC3986 representation of \c C. |
54 | static std::string percentEncodeURICharacter(char C) { |
55 | // RFC 3986 claims alpha, numeric, and this handful of |
56 | // characters are not reserved for the path component and |
57 | // should be written out directly. Otherwise, percent |
58 | // encode the character and write that out instead of the |
59 | // reserved character. |
60 | if (llvm::isAlnum(C) || StringRef("-._~:@!$&'()*+,;=" ).contains(C)) |
61 | return std::string(&C, 1); |
62 | return "%" + llvm::toHex(Input: StringRef(&C, 1)); |
63 | } |
64 | |
65 | /// \internal |
66 | /// \brief Return a URI representing the given file name. |
67 | /// |
68 | /// \param Filename The filename to be represented as URI. |
69 | /// |
70 | /// \return RFC3986 URI representing the input file name. |
71 | static std::string fileNameToURI(StringRef Filename) { |
72 | SmallString<32> Ret = StringRef("file://" ); |
73 | |
74 | // Get the root name to see if it has a URI authority. |
75 | StringRef Root = sys::path::root_name(path: Filename); |
76 | if (Root.starts_with(Prefix: "//" )) { |
77 | // There is an authority, so add it to the URI. |
78 | Ret += Root.drop_front(N: 2).str(); |
79 | } else if (!Root.empty()) { |
80 | // There is no authority, so end the component and add the root to the URI. |
81 | Ret += Twine("/" + Root).str(); |
82 | } |
83 | |
84 | auto Iter = sys::path::begin(path: Filename), End = sys::path::end(path: Filename); |
85 | assert(Iter != End && "Expected there to be a non-root path component." ); |
86 | // Add the rest of the path components, encoding any reserved characters; |
87 | // we skip past the first path component, as it was handled it above. |
88 | for (StringRef Component : llvm::make_range(x: ++Iter, y: End)) { |
89 | // For reasons unknown to me, we may get a backslash with Windows native |
90 | // paths for the initial backslash following the drive component, which |
91 | // we need to ignore as a URI path part. |
92 | if (Component == "\\" ) |
93 | continue; |
94 | |
95 | // Add the separator between the previous path part and the one being |
96 | // currently processed. |
97 | Ret += "/" ; |
98 | |
99 | // URI encode the part. |
100 | for (char C : Component) { |
101 | Ret += percentEncodeURICharacter(C); |
102 | } |
103 | } |
104 | |
105 | return std::string(Ret); |
106 | } |
107 | /// @} |
108 | |
109 | /// \brief Calculate the column position expressed in the number of UTF-8 code |
110 | /// points from column start to the source location |
111 | /// |
112 | /// \param Loc The source location whose column needs to be calculated. |
113 | /// \param TokenLen Optional hint for when the token is multiple bytes long. |
114 | /// |
115 | /// \return The column number as a UTF-8 aware byte offset from column start to |
116 | /// the effective source location. |
117 | static unsigned int adjustColumnPos(FullSourceLoc Loc, |
118 | unsigned int TokenLen = 0) { |
119 | assert(!Loc.isInvalid() && "invalid Loc when adjusting column position" ); |
120 | |
121 | std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc(); |
122 | std::optional<MemoryBufferRef> Buf = |
123 | Loc.getManager().getBufferOrNone(FID: LocInfo.first); |
124 | assert(Buf && "got an invalid buffer for the location's file" ); |
125 | assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) && |
126 | "token extends past end of buffer?" ); |
127 | |
128 | // Adjust the offset to be the start of the line, since we'll be counting |
129 | // Unicode characters from there until our column offset. |
130 | unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1); |
131 | unsigned int Ret = 1; |
132 | while (Off < (LocInfo.second + TokenLen)) { |
133 | Off += getNumBytesForUTF8(firstByte: Buf->getBuffer()[Off]); |
134 | Ret++; |
135 | } |
136 | |
137 | return Ret; |
138 | } |
139 | |
140 | /// \name SARIF Utilities |
141 | /// @{ |
142 | |
143 | /// \internal |
144 | json::Object createMessage(StringRef Text) { |
145 | return json::Object{{.K: "text" , .V: Text.str()}}; |
146 | } |
147 | |
148 | /// \internal |
149 | /// \pre CharSourceRange must be a token range |
150 | static json::Object createTextRegion(const SourceManager &SM, |
151 | const CharSourceRange &R) { |
152 | FullSourceLoc BeginCharLoc{R.getBegin(), SM}; |
153 | FullSourceLoc EndCharLoc{R.getEnd(), SM}; |
154 | json::Object Region{{.K: "startLine" , .V: BeginCharLoc.getExpansionLineNumber()}, |
155 | {.K: "startColumn" , .V: adjustColumnPos(Loc: BeginCharLoc)}}; |
156 | |
157 | if (BeginCharLoc == EndCharLoc) { |
158 | Region["endColumn" ] = adjustColumnPos(Loc: BeginCharLoc); |
159 | } else { |
160 | Region["endLine" ] = EndCharLoc.getExpansionLineNumber(); |
161 | Region["endColumn" ] = adjustColumnPos(Loc: EndCharLoc); |
162 | } |
163 | return Region; |
164 | } |
165 | |
166 | static json::Object createLocation(json::Object &&PhysicalLocation, |
167 | StringRef Message = "" ) { |
168 | json::Object Ret{{.K: "physicalLocation" , .V: std::move(PhysicalLocation)}}; |
169 | if (!Message.empty()) |
170 | Ret.insert(E: {.K: "message" , .V: createMessage(Text: Message)}); |
171 | return Ret; |
172 | } |
173 | |
174 | static StringRef importanceToStr(ThreadFlowImportance I) { |
175 | switch (I) { |
176 | case ThreadFlowImportance::Important: |
177 | return "important" ; |
178 | case ThreadFlowImportance::Essential: |
179 | return "essential" ; |
180 | case ThreadFlowImportance::Unimportant: |
181 | return "unimportant" ; |
182 | } |
183 | llvm_unreachable("Fully covered switch is not so fully covered" ); |
184 | } |
185 | |
186 | static StringRef resultLevelToStr(SarifResultLevel R) { |
187 | switch (R) { |
188 | case SarifResultLevel::None: |
189 | return "none" ; |
190 | case SarifResultLevel::Note: |
191 | return "note" ; |
192 | case SarifResultLevel::Warning: |
193 | return "warning" ; |
194 | case SarifResultLevel::Error: |
195 | return "error" ; |
196 | } |
197 | llvm_unreachable("Potentially un-handled SarifResultLevel. " |
198 | "Is the switch not fully covered?" ); |
199 | } |
200 | |
201 | static json::Object |
202 | createThreadFlowLocation(json::Object &&Location, |
203 | const ThreadFlowImportance &Importance) { |
204 | return json::Object{{.K: "location" , .V: std::move(Location)}, |
205 | {.K: "importance" , .V: importanceToStr(I: Importance)}}; |
206 | } |
207 | /// @} |
208 | |
209 | json::Object |
210 | SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) { |
211 | assert(R.isValid() && |
212 | "Cannot create a physicalLocation from invalid SourceRange!" ); |
213 | assert(R.isCharRange() && |
214 | "Cannot create a physicalLocation from a token range!" ); |
215 | FullSourceLoc Start{R.getBegin(), SourceMgr}; |
216 | OptionalFileEntryRef FE = Start.getExpansionLoc().getFileEntryRef(); |
217 | assert(FE && "Diagnostic does not exist within a valid file!" ); |
218 | |
219 | const std::string &FileURI = fileNameToURI(Filename: getFileName(FE: *FE)); |
220 | auto I = CurrentArtifacts.find(Key: FileURI); |
221 | |
222 | if (I == CurrentArtifacts.end()) { |
223 | uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size()); |
224 | const SarifArtifactLocation &Location = |
225 | SarifArtifactLocation::create(URI: FileURI).setIndex(Idx); |
226 | const SarifArtifact &Artifact = SarifArtifact::create(Loc: Location) |
227 | .setRoles({"resultFile" }) |
228 | .setLength(FE->getSize()) |
229 | .setMimeType("text/plain" ); |
230 | auto StatusIter = CurrentArtifacts.insert(KV: {FileURI, Artifact}); |
231 | // If inserted, ensure the original iterator points to the newly inserted |
232 | // element, so it can be used downstream. |
233 | if (StatusIter.second) |
234 | I = StatusIter.first; |
235 | } |
236 | assert(I != CurrentArtifacts.end() && "Failed to insert new artifact" ); |
237 | const SarifArtifactLocation &Location = I->second.Location; |
238 | json::Object ArtifactLocationObject{{.K: "uri" , .V: Location.URI}}; |
239 | if (Location.Index.has_value()) |
240 | ArtifactLocationObject["index" ] = *Location.Index; |
241 | return json::Object{{{.K: "artifactLocation" , .V: std::move(ArtifactLocationObject)}, |
242 | {.K: "region" , .V: createTextRegion(SM: SourceMgr, R)}}}; |
243 | } |
244 | |
245 | json::Object &SarifDocumentWriter::getCurrentTool() { |
246 | assert(!Closed && "SARIF Document is closed. " |
247 | "Need to call createRun() before using getcurrentTool!" ); |
248 | |
249 | // Since Closed = false here, expect there to be at least 1 Run, anything |
250 | // else is an invalid state. |
251 | assert(!Runs.empty() && "There are no runs associated with the document!" ); |
252 | |
253 | return *Runs.back().getAsObject()->get(K: "tool" )->getAsObject(); |
254 | } |
255 | |
256 | void SarifDocumentWriter::reset() { |
257 | CurrentRules.clear(); |
258 | CurrentArtifacts.clear(); |
259 | } |
260 | |
261 | void SarifDocumentWriter::endRun() { |
262 | // Exit early if trying to close a closed Document. |
263 | if (Closed) { |
264 | reset(); |
265 | return; |
266 | } |
267 | |
268 | // Since Closed = false here, expect there to be at least 1 Run, anything |
269 | // else is an invalid state. |
270 | assert(!Runs.empty() && "There are no runs associated with the document!" ); |
271 | |
272 | // Flush all the rules. |
273 | json::Object &Tool = getCurrentTool(); |
274 | json::Array Rules; |
275 | for (const SarifRule &R : CurrentRules) { |
276 | json::Object Config{ |
277 | {.K: "enabled" , .V: R.DefaultConfiguration.Enabled}, |
278 | {.K: "level" , .V: resultLevelToStr(R: R.DefaultConfiguration.Level)}, |
279 | {.K: "rank" , .V: R.DefaultConfiguration.Rank}}; |
280 | json::Object Rule{ |
281 | {.K: "name" , .V: R.Name}, |
282 | {.K: "id" , .V: R.Id}, |
283 | {.K: "fullDescription" , .V: json::Object{{.K: "text" , .V: R.Description}}}, |
284 | {.K: "defaultConfiguration" , .V: std::move(Config)}}; |
285 | if (!R.HelpURI.empty()) |
286 | Rule["helpUri" ] = R.HelpURI; |
287 | Rules.emplace_back(A: std::move(Rule)); |
288 | } |
289 | json::Object &Driver = *Tool.getObject(K: "driver" ); |
290 | Driver["rules" ] = std::move(Rules); |
291 | |
292 | // Flush all the artifacts. |
293 | json::Object &Run = getCurrentRun(); |
294 | json::Array *Artifacts = Run.getArray(K: "artifacts" ); |
295 | SmallVector<std::pair<StringRef, SarifArtifact>, 0> Vec; |
296 | for (const auto &[K, V] : CurrentArtifacts) |
297 | Vec.emplace_back(Args: K, Args: V); |
298 | llvm::sort(C&: Vec, Comp: llvm::less_first()); |
299 | for (const auto &[_, A] : Vec) { |
300 | json::Object Loc{{.K: "uri" , .V: A.Location.URI}}; |
301 | if (A.Location.Index.has_value()) { |
302 | Loc["index" ] = static_cast<int64_t>(*A.Location.Index); |
303 | } |
304 | json::Object Artifact; |
305 | Artifact["location" ] = std::move(Loc); |
306 | if (A.Length.has_value()) |
307 | Artifact["length" ] = static_cast<int64_t>(*A.Length); |
308 | if (!A.Roles.empty()) |
309 | Artifact["roles" ] = json::Array(A.Roles); |
310 | if (!A.MimeType.empty()) |
311 | Artifact["mimeType" ] = A.MimeType; |
312 | if (A.Offset.has_value()) |
313 | Artifact["offset" ] = *A.Offset; |
314 | Artifacts->push_back(E: json::Value(std::move(Artifact))); |
315 | } |
316 | |
317 | // Clear, reset temporaries before next run. |
318 | reset(); |
319 | |
320 | // Mark the document as closed. |
321 | Closed = true; |
322 | } |
323 | |
324 | json::Array |
325 | SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) { |
326 | json::Object Ret{{.K: "locations" , .V: json::Array{}}}; |
327 | json::Array Locs; |
328 | for (const auto &ThreadFlow : ThreadFlows) { |
329 | json::Object PLoc = createPhysicalLocation(R: ThreadFlow.Range); |
330 | json::Object Loc = createLocation(PhysicalLocation: std::move(PLoc), Message: ThreadFlow.Message); |
331 | Locs.emplace_back( |
332 | A: createThreadFlowLocation(Location: std::move(Loc), Importance: ThreadFlow.Importance)); |
333 | } |
334 | Ret["locations" ] = std::move(Locs); |
335 | return json::Array{std::move(Ret)}; |
336 | } |
337 | |
338 | json::Object |
339 | SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) { |
340 | return json::Object{{.K: "threadFlows" , .V: createThreadFlows(ThreadFlows)}}; |
341 | } |
342 | |
343 | void SarifDocumentWriter::createRun(StringRef ShortToolName, |
344 | StringRef LongToolName, |
345 | StringRef ToolVersion) { |
346 | // Clear resources associated with a previous run. |
347 | endRun(); |
348 | |
349 | // Signify a new run has begun. |
350 | Closed = false; |
351 | |
352 | json::Object Tool{ |
353 | {.K: "driver" , |
354 | .V: json::Object{{.K: "name" , .V: ShortToolName}, |
355 | {.K: "fullName" , .V: LongToolName}, |
356 | {.K: "language" , .V: "en-US" }, |
357 | {.K: "version" , .V: ToolVersion}, |
358 | {.K: "informationUri" , |
359 | .V: "https://clang.llvm.org/docs/UsersManual.html" }}}}; |
360 | json::Object TheRun{{.K: "tool" , .V: std::move(Tool)}, |
361 | {.K: "results" , .V: {}}, |
362 | {.K: "artifacts" , .V: {}}, |
363 | {.K: "columnKind" , .V: "unicodeCodePoints" }}; |
364 | Runs.emplace_back(A: std::move(TheRun)); |
365 | } |
366 | |
367 | json::Object &SarifDocumentWriter::getCurrentRun() { |
368 | assert(!Closed && |
369 | "SARIF Document is closed. " |
370 | "Can only getCurrentRun() if document is opened via createRun(), " |
371 | "create a run first" ); |
372 | |
373 | // Since Closed = false here, expect there to be at least 1 Run, anything |
374 | // else is an invalid state. |
375 | assert(!Runs.empty() && "There are no runs associated with the document!" ); |
376 | return *Runs.back().getAsObject(); |
377 | } |
378 | |
379 | size_t SarifDocumentWriter::createRule(const SarifRule &Rule) { |
380 | size_t Ret = CurrentRules.size(); |
381 | CurrentRules.emplace_back(Args: Rule); |
382 | return Ret; |
383 | } |
384 | |
385 | void SarifDocumentWriter::appendResult(const SarifResult &Result) { |
386 | size_t RuleIdx = Result.RuleIdx; |
387 | assert(RuleIdx < CurrentRules.size() && |
388 | "Trying to reference a rule that doesn't exist" ); |
389 | const SarifRule &Rule = CurrentRules[RuleIdx]; |
390 | assert(Rule.DefaultConfiguration.Enabled && |
391 | "Cannot add a result referencing a disabled Rule" ); |
392 | json::Object Ret{{.K: "message" , .V: createMessage(Text: Result.DiagnosticMessage)}, |
393 | {.K: "ruleIndex" , .V: static_cast<int64_t>(RuleIdx)}, |
394 | {.K: "ruleId" , .V: Rule.Id}}; |
395 | if (!Result.Locations.empty()) { |
396 | json::Array Locs; |
397 | for (auto &Range : Result.Locations) { |
398 | Locs.emplace_back(A: createLocation(PhysicalLocation: createPhysicalLocation(R: Range))); |
399 | } |
400 | Ret["locations" ] = std::move(Locs); |
401 | } |
402 | if (!Result.ThreadFlows.empty()) |
403 | Ret["codeFlows" ] = json::Array{createCodeFlow(ThreadFlows: Result.ThreadFlows)}; |
404 | |
405 | Ret["level" ] = resultLevelToStr( |
406 | R: Result.LevelOverride.value_or(u: Rule.DefaultConfiguration.Level)); |
407 | |
408 | json::Object &Run = getCurrentRun(); |
409 | json::Array *Results = Run.getArray(K: "results" ); |
410 | Results->emplace_back(A: std::move(Ret)); |
411 | } |
412 | |
413 | json::Object SarifDocumentWriter::createDocument() { |
414 | // Flush all temporaries to their destinations if needed. |
415 | endRun(); |
416 | |
417 | json::Object Doc{ |
418 | {.K: "$schema" , .V: SchemaURI}, |
419 | {.K: "version" , .V: SchemaVersion}, |
420 | }; |
421 | if (!Runs.empty()) |
422 | Doc["runs" ] = json::Array(Runs); |
423 | return Doc; |
424 | } |
425 | |