1//===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file contains the declaration of the SARIFDocumentWriter class, and
11/// associated builders such as:
12/// - \ref SarifArtifact
13/// - \ref SarifArtifactLocation
14/// - \ref SarifRule
15/// - \ref SarifResult
16//===----------------------------------------------------------------------===//
17#include "clang/Basic/Sarif.h"
18#include "clang/Basic/SourceLocation.h"
19#include "clang/Basic/SourceManager.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/StringExtras.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Support/ConvertUTF.h"
24#include "llvm/Support/JSON.h"
25#include "llvm/Support/Path.h"
26
27#include <optional>
28#include <string>
29#include <utility>
30
31using namespace clang;
32using namespace llvm;
33
34using clang::detail::SarifArtifact;
35using clang::detail::SarifArtifactLocation;
36
37static StringRef getFileName(FileEntryRef FE) {
38 StringRef Filename = FE.getFileEntry().tryGetRealPathName();
39 if (Filename.empty())
40 Filename = FE.getName();
41 return Filename;
42}
43/// \name URI
44/// @{
45
46/// \internal
47/// \brief
48/// Return the RFC3986 encoding of the input character.
49///
50/// \param C Character to encode to RFC3986.
51///
52/// \return The RFC3986 representation of \c C.
53static std::string percentEncodeURICharacter(char C) {
54 // RFC 3986 claims alpha, numeric, and this handful of
55 // characters are not reserved for the path component and
56 // should be written out directly. Otherwise, percent
57 // encode the character and write that out instead of the
58 // reserved character.
59 if (llvm::isAlnum(C) || StringRef("-._~:@!$&'()*+,;=").contains(C))
60 return std::string(&C, 1);
61 return "%" + llvm::toHex(Input: StringRef(&C, 1));
62}
63
64/// \internal
65/// \brief Return a URI representing the given file name.
66///
67/// \param Filename The filename to be represented as URI.
68///
69/// \return RFC3986 URI representing the input file name.
70static std::string fileNameToURI(StringRef Filename) {
71 SmallString<32> Ret = StringRef("file://");
72
73 // Get the root name to see if it has a URI authority.
74 StringRef Root = sys::path::root_name(path: Filename);
75 if (Root.starts_with(Prefix: "//")) {
76 // There is an authority, so add it to the URI.
77 Ret += Root.drop_front(N: 2).str();
78 } else if (!Root.empty()) {
79 // There is no authority, so end the component and add the root to the URI.
80 Ret += Twine("/" + Root).str();
81 }
82
83 auto Iter = sys::path::begin(path: Filename), End = sys::path::end(path: Filename);
84 assert(Iter != End && "Expected there to be a non-root path component.");
85 // Add the rest of the path components, encoding any reserved characters;
86 // we skip past the first path component, as it was handled it above.
87 for (StringRef Component : llvm::make_range(x: ++Iter, y: End)) {
88 // For reasons unknown to me, we may get a backslash with Windows native
89 // paths for the initial backslash following the drive component, which
90 // we need to ignore as a URI path part.
91 if (Component == "\\")
92 continue;
93
94 // Add the separator between the previous path part and the one being
95 // currently processed.
96 Ret += "/";
97
98 // URI encode the part.
99 for (char C : Component) {
100 Ret += percentEncodeURICharacter(C);
101 }
102 }
103
104 return std::string(Ret);
105}
106/// @}
107
108/// \brief Calculate the column position expressed in the number of UTF-8 code
109/// points from column start to the source location
110///
111/// \param Loc The source location whose column needs to be calculated.
112/// \param TokenLen Optional hint for when the token is multiple bytes long.
113///
114/// \return The column number as a UTF-8 aware byte offset from column start to
115/// the effective source location.
116static unsigned int adjustColumnPos(FullSourceLoc Loc,
117 unsigned int TokenLen = 0) {
118 assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
119
120 std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedExpansionLoc();
121 std::optional<MemoryBufferRef> Buf =
122 Loc.getManager().getBufferOrNone(FID: LocInfo.first);
123 assert(Buf && "got an invalid buffer for the location's file");
124 assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
125 "token extends past end of buffer?");
126
127 // Adjust the offset to be the start of the line, since we'll be counting
128 // Unicode characters from there until our column offset.
129 unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1);
130 unsigned int Ret = 1;
131 while (Off < (LocInfo.second + TokenLen)) {
132 Off += getNumBytesForUTF8(firstByte: Buf->getBuffer()[Off]);
133 Ret++;
134 }
135
136 return Ret;
137}
138
139/// \name SARIF Utilities
140/// @{
141
142/// \internal
143static json::Object createMessage(StringRef Text) {
144 return json::Object{{.K: "text", .V: Text.str()}};
145}
146
147/// \internal
148/// \pre CharSourceRange must be a token range
149static json::Object createTextRegion(const SourceManager &SM,
150 const CharSourceRange &R) {
151 FullSourceLoc BeginCharLoc{R.getBegin(), SM};
152 FullSourceLoc EndCharLoc{R.getEnd(), SM};
153 json::Object Region{{.K: "startLine", .V: BeginCharLoc.getExpansionLineNumber()},
154 {.K: "startColumn", .V: adjustColumnPos(Loc: BeginCharLoc)}};
155
156 if (BeginCharLoc == EndCharLoc) {
157 Region["endColumn"] = adjustColumnPos(Loc: BeginCharLoc);
158 } else {
159 Region["endLine"] = EndCharLoc.getExpansionLineNumber();
160 Region["endColumn"] = adjustColumnPos(Loc: EndCharLoc);
161 }
162 return Region;
163}
164
165static json::Object createLocation(json::Object &&PhysicalLocation,
166 StringRef Message = "") {
167 json::Object Ret{{.K: "physicalLocation", .V: std::move(PhysicalLocation)}};
168 if (!Message.empty())
169 Ret.insert(E: {.K: "message", .V: createMessage(Text: Message)});
170 return Ret;
171}
172
173static StringRef importanceToStr(ThreadFlowImportance I) {
174 switch (I) {
175 case ThreadFlowImportance::Important:
176 return "important";
177 case ThreadFlowImportance::Essential:
178 return "essential";
179 case ThreadFlowImportance::Unimportant:
180 return "unimportant";
181 }
182 llvm_unreachable("Fully covered switch is not so fully covered");
183}
184
185static StringRef resultLevelToStr(SarifResultLevel R) {
186 switch (R) {
187 case SarifResultLevel::None:
188 return "none";
189 case SarifResultLevel::Note:
190 return "note";
191 case SarifResultLevel::Warning:
192 return "warning";
193 case SarifResultLevel::Error:
194 return "error";
195 }
196 llvm_unreachable("Potentially un-handled SarifResultLevel. "
197 "Is the switch not fully covered?");
198}
199
200static json::Object
201createThreadFlowLocation(json::Object &&Location,
202 const ThreadFlowImportance &Importance) {
203 return json::Object{{.K: "location", .V: std::move(Location)},
204 {.K: "importance", .V: importanceToStr(I: Importance)}};
205}
206/// @}
207
208json::Object
209SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) {
210 assert(R.isValid() &&
211 "Cannot create a physicalLocation from invalid SourceRange!");
212 assert(R.isCharRange() &&
213 "Cannot create a physicalLocation from a token range!");
214 FullSourceLoc Start{R.getBegin(), SourceMgr};
215 OptionalFileEntryRef FE = Start.getExpansionLoc().getFileEntryRef();
216 assert(FE && "Diagnostic does not exist within a valid file!");
217
218 const std::string &FileURI = fileNameToURI(Filename: getFileName(FE: *FE));
219 auto I = CurrentArtifacts.find(Key: FileURI);
220
221 if (I == CurrentArtifacts.end()) {
222 uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size());
223 const SarifArtifactLocation &Location =
224 SarifArtifactLocation::create(URI: FileURI).setIndex(Idx);
225 const SarifArtifact &Artifact = SarifArtifact::create(Loc: Location)
226 .setRoles({"resultFile"})
227 .setLength(FE->getSize())
228 .setMimeType("text/plain");
229 auto StatusIter = CurrentArtifacts.insert(KV: {FileURI, Artifact});
230 // If inserted, ensure the original iterator points to the newly inserted
231 // element, so it can be used downstream.
232 if (StatusIter.second)
233 I = StatusIter.first;
234 }
235 assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");
236 const SarifArtifactLocation &Location = I->second.Location;
237 json::Object ArtifactLocationObject{{.K: "uri", .V: Location.URI}};
238 if (Location.Index.has_value())
239 ArtifactLocationObject["index"] = *Location.Index;
240 return json::Object{{{.K: "artifactLocation", .V: std::move(ArtifactLocationObject)},
241 {.K: "region", .V: createTextRegion(SM: SourceMgr, R)}}};
242}
243
244json::Object &SarifDocumentWriter::getCurrentTool() {
245 assert(!Closed && "SARIF Document is closed. "
246 "Need to call createRun() before using getcurrentTool!");
247
248 // Since Closed = false here, expect there to be at least 1 Run, anything
249 // else is an invalid state.
250 assert(!Runs.empty() && "There are no runs associated with the document!");
251
252 return *Runs.back().getAsObject()->get(K: "tool")->getAsObject();
253}
254
255void SarifDocumentWriter::reset() {
256 CurrentRules.clear();
257 CurrentArtifacts.clear();
258}
259
260void SarifDocumentWriter::endRun() {
261 // Exit early if trying to close a closed Document.
262 if (Closed) {
263 reset();
264 return;
265 }
266
267 // Since Closed = false here, expect there to be at least 1 Run, anything
268 // else is an invalid state.
269 assert(!Runs.empty() && "There are no runs associated with the document!");
270
271 // Flush all the rules.
272 json::Object &Tool = getCurrentTool();
273 json::Array Rules;
274 for (const SarifRule &R : CurrentRules) {
275 json::Object Config{
276 {.K: "enabled", .V: R.DefaultConfiguration.Enabled},
277 {.K: "level", .V: resultLevelToStr(R: R.DefaultConfiguration.Level)},
278 {.K: "rank", .V: R.DefaultConfiguration.Rank}};
279 json::Object Rule{
280 {.K: "name", .V: R.Name},
281 {.K: "id", .V: R.Id},
282 {.K: "fullDescription", .V: json::Object{{.K: "text", .V: R.Description}}},
283 {.K: "defaultConfiguration", .V: std::move(Config)}};
284 if (!R.HelpURI.empty())
285 Rule["helpUri"] = R.HelpURI;
286 Rules.emplace_back(A: std::move(Rule));
287 }
288 json::Object &Driver = *Tool.getObject(K: "driver");
289 Driver["rules"] = std::move(Rules);
290
291 // Flush all the artifacts.
292 json::Object &Run = getCurrentRun();
293 json::Array *Artifacts = Run.getArray(K: "artifacts");
294 SmallVector<std::pair<StringRef, SarifArtifact>, 0> Vec;
295 for (const auto &[K, V] : CurrentArtifacts)
296 Vec.emplace_back(Args: K, Args: V);
297 llvm::sort(C&: Vec, Comp: llvm::less_first());
298 for (const auto &[_, A] : Vec) {
299 json::Object Loc{{.K: "uri", .V: A.Location.URI}};
300 if (A.Location.Index.has_value()) {
301 Loc["index"] = static_cast<int64_t>(*A.Location.Index);
302 }
303 json::Object Artifact;
304 Artifact["location"] = std::move(Loc);
305 if (A.Length.has_value())
306 Artifact["length"] = static_cast<int64_t>(*A.Length);
307 if (!A.Roles.empty())
308 Artifact["roles"] = json::Array(A.Roles);
309 if (!A.MimeType.empty())
310 Artifact["mimeType"] = A.MimeType;
311 if (A.Offset.has_value())
312 Artifact["offset"] = *A.Offset;
313 Artifacts->push_back(E: json::Value(std::move(Artifact)));
314 }
315
316 // Clear, reset temporaries before next run.
317 reset();
318
319 // Mark the document as closed.
320 Closed = true;
321}
322
323json::Array
324SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) {
325 json::Object Ret{{.K: "locations", .V: json::Array{}}};
326 json::Array Locs;
327 for (const auto &ThreadFlow : ThreadFlows) {
328 json::Object PLoc = createPhysicalLocation(R: ThreadFlow.Range);
329 json::Object Loc = createLocation(PhysicalLocation: std::move(PLoc), Message: ThreadFlow.Message);
330 Locs.emplace_back(
331 A: createThreadFlowLocation(Location: std::move(Loc), Importance: ThreadFlow.Importance));
332 }
333 Ret["locations"] = std::move(Locs);
334 return json::Array{std::move(Ret)};
335}
336
337json::Object
338SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) {
339 return json::Object{{.K: "threadFlows", .V: createThreadFlows(ThreadFlows)}};
340}
341
342void SarifDocumentWriter::createRun(StringRef ShortToolName,
343 StringRef LongToolName,
344 StringRef ToolVersion) {
345 // Clear resources associated with a previous run.
346 endRun();
347
348 // Signify a new run has begun.
349 Closed = false;
350
351 json::Object Tool{
352 {.K: "driver",
353 .V: json::Object{{.K: "name", .V: ShortToolName},
354 {.K: "fullName", .V: LongToolName},
355 {.K: "language", .V: "en-US"},
356 {.K: "version", .V: ToolVersion},
357 {.K: "informationUri",
358 .V: "https://clang.llvm.org/docs/UsersManual.html"}}}};
359 json::Object TheRun{{.K: "tool", .V: std::move(Tool)},
360 {.K: "results", .V: {}},
361 {.K: "artifacts", .V: {}},
362 {.K: "columnKind", .V: "unicodeCodePoints"}};
363 Runs.emplace_back(A: std::move(TheRun));
364}
365
366json::Object &SarifDocumentWriter::getCurrentRun() {
367 assert(!Closed &&
368 "SARIF Document is closed. "
369 "Can only getCurrentRun() if document is opened via createRun(), "
370 "create a run first");
371
372 // Since Closed = false here, expect there to be at least 1 Run, anything
373 // else is an invalid state.
374 assert(!Runs.empty() && "There are no runs associated with the document!");
375 return *Runs.back().getAsObject();
376}
377
378size_t SarifDocumentWriter::createRule(const SarifRule &Rule) {
379 size_t Ret = CurrentRules.size();
380 CurrentRules.emplace_back(Args: Rule);
381 return Ret;
382}
383
384void SarifDocumentWriter::appendResult(const SarifResult &Result) {
385 size_t RuleIdx = Result.RuleIdx;
386 assert(RuleIdx < CurrentRules.size() &&
387 "Trying to reference a rule that doesn't exist");
388 const SarifRule &Rule = CurrentRules[RuleIdx];
389 assert(Rule.DefaultConfiguration.Enabled &&
390 "Cannot add a result referencing a disabled Rule");
391 json::Object Ret{{.K: "message", .V: createMessage(Text: Result.DiagnosticMessage)},
392 {.K: "ruleIndex", .V: static_cast<int64_t>(RuleIdx)},
393 {.K: "ruleId", .V: Rule.Id}};
394 if (!Result.Locations.empty()) {
395 json::Array Locs;
396 for (auto &Range : Result.Locations) {
397 Locs.emplace_back(A: createLocation(PhysicalLocation: createPhysicalLocation(R: Range)));
398 }
399 Ret["locations"] = std::move(Locs);
400 }
401 if (!Result.ThreadFlows.empty())
402 Ret["codeFlows"] = json::Array{createCodeFlow(ThreadFlows: Result.ThreadFlows)};
403
404 Ret["level"] = resultLevelToStr(
405 R: Result.LevelOverride.value_or(u: Rule.DefaultConfiguration.Level));
406
407 json::Object &Run = getCurrentRun();
408 json::Array *Results = Run.getArray(K: "results");
409 Results->emplace_back(A: std::move(Ret));
410}
411
412json::Object SarifDocumentWriter::createDocument() {
413 // Flush all temporaries to their destinations if needed.
414 endRun();
415
416 json::Object Doc{
417 {.K: "$schema", .V: SchemaURI},
418 {.K: "version", .V: SchemaVersion},
419 };
420 if (!Runs.empty())
421 Doc["runs"] = json::Array(Runs);
422 return Doc;
423}
424

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of clang/lib/Basic/Sarif.cpp