1//== clang/Basic/Sarif.h - SARIF Diagnostics Object Model -------*- C++ -*--==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// Defines clang::SarifDocumentWriter, clang::SarifRule, clang::SarifResult.
10///
11/// The document built can be accessed as a JSON Object.
12/// Several value semantic types are also introduced which represent properties
13/// of the SARIF standard, such as 'artifact', 'result', 'rule'.
14///
15/// A SARIF (Static Analysis Results Interchange Format) document is JSON
16/// document that describes in detail the results of running static analysis
17/// tools on a project. Each (non-trivial) document consists of at least one
18/// "run", which are themselves composed of details such as:
19/// * Tool: The tool that was run
20/// * Rules: The rules applied during the tool run, represented by
21/// \c reportingDescriptor objects in SARIF
22/// * Results: The matches for the rules applied against the project(s) being
23/// evaluated, represented by \c result objects in SARIF
24///
25/// Reference:
26/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html">The SARIF standard</a>
27/// 2. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317836">SARIF<pre>reportingDescriptor</pre></a>
28/// 3. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317638">SARIF<pre>result</pre></a>
29//===----------------------------------------------------------------------===//
30
31#ifndef LLVM_CLANG_BASIC_SARIF_H
32#define LLVM_CLANG_BASIC_SARIF_H
33
34#include "clang/Basic/SourceLocation.h"
35#include "clang/Basic/Version.h"
36#include "llvm/ADT/ArrayRef.h"
37#include "llvm/ADT/SmallVector.h"
38#include "llvm/ADT/StringMap.h"
39#include "llvm/ADT/StringRef.h"
40#include "llvm/Support/JSON.h"
41#include <cassert>
42#include <cstddef>
43#include <cstdint>
44#include <initializer_list>
45#include <optional>
46#include <string>
47
48namespace clang {
49
50class SarifDocumentWriter;
51class SourceManager;
52
53namespace detail {
54
55/// \internal
56/// An artifact location is SARIF's way of describing the complete location
57/// of an artifact encountered during analysis. The \c artifactLocation object
58/// typically consists of a URI, and/or an index to reference the artifact it
59/// locates.
60///
61/// This builder makes an additional assumption: that every artifact encountered
62/// by \c clang will be a physical, top-level artifact. Which is why the static
63/// creation method \ref SarifArtifactLocation::create takes a mandatory URI
64/// parameter. The official standard states that either a \c URI or \c Index
65/// must be available in the object, \c clang picks the \c URI as a reasonable
66/// default, because it intends to deal in physical artifacts for now.
67///
68/// Reference:
69/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317427">artifactLocation object</a>
70/// 2. \ref SarifArtifact
71class SarifArtifactLocation {
72private:
73 friend class clang::SarifDocumentWriter;
74
75 std::optional<uint32_t> Index;
76 std::string URI;
77
78 SarifArtifactLocation() = delete;
79 explicit SarifArtifactLocation(const std::string &URI) : URI(URI) {}
80
81public:
82 static SarifArtifactLocation create(llvm::StringRef URI) {
83 return SarifArtifactLocation{URI.str()};
84 }
85
86 SarifArtifactLocation setIndex(uint32_t Idx) {
87 Index = Idx;
88 return *this;
89 }
90};
91
92/// \internal
93/// An artifact in SARIF is any object (a sequence of bytes) addressable by
94/// a URI (RFC 3986). The most common type of artifact for clang's use-case
95/// would be source files. SARIF's artifact object is described in detail in
96/// section 3.24.
97//
98/// Since every clang artifact MUST have a location (there being no nested
99/// artifacts), the creation method \ref SarifArtifact::create requires a
100/// \ref SarifArtifactLocation object.
101///
102/// Reference:
103/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317611">artifact object</a>
104class SarifArtifact {
105private:
106 friend class clang::SarifDocumentWriter;
107
108 std::optional<uint32_t> Offset;
109 std::optional<size_t> Length;
110 std::string MimeType;
111 SarifArtifactLocation Location;
112 llvm::SmallVector<std::string, 4> Roles;
113
114 SarifArtifact() = delete;
115
116 explicit SarifArtifact(const SarifArtifactLocation &Loc) : Location(Loc) {}
117
118public:
119 static SarifArtifact create(const SarifArtifactLocation &Loc) {
120 return SarifArtifact{Loc};
121 }
122
123 SarifArtifact setOffset(uint32_t ArtifactOffset) {
124 Offset = ArtifactOffset;
125 return *this;
126 }
127
128 SarifArtifact setLength(size_t NumBytes) {
129 Length = NumBytes;
130 return *this;
131 }
132
133 SarifArtifact setRoles(std::initializer_list<llvm::StringRef> ArtifactRoles) {
134 Roles.assign(in_start: ArtifactRoles.begin(), in_end: ArtifactRoles.end());
135 return *this;
136 }
137
138 SarifArtifact setMimeType(llvm::StringRef ArtifactMimeType) {
139 MimeType = ArtifactMimeType.str();
140 return *this;
141 }
142};
143
144} // namespace detail
145
146enum class ThreadFlowImportance { Important, Essential, Unimportant };
147
148/// The level of severity associated with a \ref SarifResult.
149///
150/// Of all the levels, \c None is the only one that is not associated with
151/// a failure.
152///
153/// A typical mapping for clang's DiagnosticKind to SarifResultLevel would look
154/// like:
155/// * \c None: \ref clang::DiagnosticsEngine::Level::Remark, \ref clang::DiagnosticsEngine::Level::Ignored
156/// * \c Note: \ref clang::DiagnosticsEngine::Level::Note
157/// * \c Warning: \ref clang::DiagnosticsEngine::Level::Warning
158/// * \c Error could be generated from one of:
159/// - \ref clang::DiagnosticsEngine::Level::Warning with \c -Werror
160/// - \ref clang::DiagnosticsEngine::Level::Error
161/// - \ref clang::DiagnosticsEngine::Level::Fatal when \ref clang::DiagnosticsEngine::ErrorsAsFatal is set.
162///
163/// Reference:
164/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317648">level property</a>
165enum class SarifResultLevel { None, Note, Warning, Error };
166
167/// A thread flow is a sequence of code locations that specify a possible path
168/// through a single thread of execution.
169/// A thread flow in SARIF is related to a code flow which describes
170/// the progress of one or more programs through one or more thread flows.
171///
172/// Reference:
173/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317744">threadFlow object</a>
174/// 2. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317740">codeFlow object</a>
175class ThreadFlow {
176 friend class SarifDocumentWriter;
177
178 CharSourceRange Range;
179 ThreadFlowImportance Importance;
180 std::string Message;
181
182 ThreadFlow() = default;
183
184public:
185 static ThreadFlow create() { return {}; }
186
187 ThreadFlow setRange(const CharSourceRange &ItemRange) {
188 assert(ItemRange.isCharRange() &&
189 "ThreadFlows require a character granular source range!");
190 Range = ItemRange;
191 return *this;
192 }
193
194 ThreadFlow setImportance(const ThreadFlowImportance &ItemImportance) {
195 Importance = ItemImportance;
196 return *this;
197 }
198
199 ThreadFlow setMessage(llvm::StringRef ItemMessage) {
200 Message = ItemMessage.str();
201 return *this;
202 }
203};
204
205/// A SARIF Reporting Configuration (\c reportingConfiguration) object contains
206/// properties for a \ref SarifRule that can be configured at runtime before
207/// analysis begins.
208///
209/// Reference:
210/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317852">reportingConfiguration object</a>
211class SarifReportingConfiguration {
212 friend class clang::SarifDocumentWriter;
213
214 bool Enabled = true;
215 SarifResultLevel Level = SarifResultLevel::Warning;
216 float Rank = -1.0f;
217
218 SarifReportingConfiguration() = default;
219
220public:
221 static SarifReportingConfiguration create() { return {}; };
222
223 SarifReportingConfiguration disable() {
224 Enabled = false;
225 return *this;
226 }
227
228 SarifReportingConfiguration enable() {
229 Enabled = true;
230 return *this;
231 }
232
233 SarifReportingConfiguration setLevel(SarifResultLevel TheLevel) {
234 Level = TheLevel;
235 return *this;
236 }
237
238 SarifReportingConfiguration setRank(float TheRank) {
239 assert(TheRank >= 0.0f && "Rule rank cannot be smaller than 0.0");
240 assert(TheRank <= 100.0f && "Rule rank cannot be larger than 100.0");
241 Rank = TheRank;
242 return *this;
243 }
244};
245
246/// A SARIF rule (\c reportingDescriptor object) contains information that
247/// describes a reporting item generated by a tool. A reporting item is
248/// either a result of analysis or notification of a condition encountered by
249/// the tool. Rules are arbitrary but are identifiable by a hierarchical
250/// rule-id.
251///
252/// This builder provides an interface to create SARIF \c reportingDescriptor
253/// objects via the \ref SarifRule::create static method.
254///
255/// Reference:
256/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317836">reportingDescriptor object</a>
257class SarifRule {
258 friend class clang::SarifDocumentWriter;
259
260 std::string Name;
261 std::string Id;
262 std::string Description;
263 std::string HelpURI;
264 SarifReportingConfiguration DefaultConfiguration;
265
266 SarifRule() : DefaultConfiguration(SarifReportingConfiguration::create()) {}
267
268public:
269 static SarifRule create() { return {}; }
270
271 SarifRule setName(llvm::StringRef RuleName) {
272 Name = RuleName.str();
273 return *this;
274 }
275
276 SarifRule setRuleId(llvm::StringRef RuleId) {
277 Id = RuleId.str();
278 return *this;
279 }
280
281 SarifRule setDescription(llvm::StringRef RuleDesc) {
282 Description = RuleDesc.str();
283 return *this;
284 }
285
286 SarifRule setHelpURI(llvm::StringRef RuleHelpURI) {
287 HelpURI = RuleHelpURI.str();
288 return *this;
289 }
290
291 SarifRule
292 setDefaultConfiguration(const SarifReportingConfiguration &Configuration) {
293 DefaultConfiguration = Configuration;
294 return *this;
295 }
296};
297
298/// A SARIF result (also called a "reporting item") is a unit of output
299/// produced when one of the tool's \c reportingDescriptor encounters a match
300/// on the file being analysed by the tool.
301///
302/// This builder provides a \ref SarifResult::create static method that can be
303/// used to create an empty shell onto which attributes can be added using the
304/// \c setX(...) methods.
305///
306/// For example:
307/// \code{.cpp}
308/// SarifResult result = SarifResult::create(...)
309/// .setRuleId(...)
310/// .setDiagnosticMessage(...);
311/// \endcode
312///
313/// Reference:
314/// 1. <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317638">SARIF<pre>result</pre></a>
315class SarifResult {
316 friend class clang::SarifDocumentWriter;
317
318 // NOTE:
319 // This type cannot fit all possible indexes representable by JSON, but is
320 // chosen because it is the largest unsigned type that can be safely
321 // converted to an \c int64_t.
322 uint32_t RuleIdx;
323 std::string RuleId;
324 std::string DiagnosticMessage;
325 llvm::SmallVector<CharSourceRange, 8> Locations;
326 llvm::SmallVector<ThreadFlow, 8> ThreadFlows;
327 std::optional<SarifResultLevel> LevelOverride;
328
329 SarifResult() = delete;
330 explicit SarifResult(uint32_t RuleIdx) : RuleIdx(RuleIdx) {}
331
332public:
333 static SarifResult create(uint32_t RuleIdx) { return SarifResult{RuleIdx}; }
334
335 SarifResult setIndex(uint32_t Idx) {
336 RuleIdx = Idx;
337 return *this;
338 }
339
340 SarifResult setRuleId(llvm::StringRef Id) {
341 RuleId = Id.str();
342 return *this;
343 }
344
345 SarifResult setDiagnosticMessage(llvm::StringRef Message) {
346 DiagnosticMessage = Message.str();
347 return *this;
348 }
349
350 SarifResult setLocations(llvm::ArrayRef<CharSourceRange> DiagLocs) {
351#ifndef NDEBUG
352 for (const auto &Loc : DiagLocs) {
353 assert(Loc.isCharRange() &&
354 "SARIF Results require character granular source ranges!");
355 }
356#endif
357 Locations.assign(in_start: DiagLocs.begin(), in_end: DiagLocs.end());
358 return *this;
359 }
360 SarifResult setThreadFlows(llvm::ArrayRef<ThreadFlow> ThreadFlowResults) {
361 ThreadFlows.assign(in_start: ThreadFlowResults.begin(), in_end: ThreadFlowResults.end());
362 return *this;
363 }
364
365 SarifResult setDiagnosticLevel(const SarifResultLevel &TheLevel) {
366 LevelOverride = TheLevel;
367 return *this;
368 }
369};
370
371/// This class handles creating a valid SARIF document given various input
372/// attributes. However, it requires an ordering among certain method calls:
373///
374/// 1. Because every SARIF document must contain at least 1 \c run, callers
375/// must ensure that \ref SarifDocumentWriter::createRun is called before
376/// any other methods.
377/// 2. If SarifDocumentWriter::endRun is called, callers MUST call
378/// SarifDocumentWriter::createRun, before invoking any of the result
379/// aggregation methods such as SarifDocumentWriter::appendResult etc.
380class SarifDocumentWriter {
381private:
382 const llvm::StringRef SchemaURI{
383 "https://docs.oasis-open.org/sarif/sarif/v2.1.0/cos02/schemas/"
384 "sarif-schema-2.1.0.json"};
385 const llvm::StringRef SchemaVersion{"2.1.0"};
386
387 /// \internal
388 /// Return a pointer to the current tool. Asserts that a run exists.
389 llvm::json::Object &getCurrentTool();
390
391 /// \internal
392 /// Checks if there is a run associated with this document.
393 ///
394 /// \return true on success
395 bool hasRun() const;
396
397 /// \internal
398 /// Reset portions of the internal state so that the document is ready to
399 /// receive data for a new run.
400 void reset();
401
402 /// \internal
403 /// Return a mutable reference to the current run, after asserting it exists.
404 ///
405 /// \note It is undefined behavior to call this if a run does not exist in
406 /// the SARIF document.
407 llvm::json::Object &getCurrentRun();
408
409 /// Create a code flow object for the given threadflows.
410 /// See \ref ThreadFlow.
411 ///
412 /// \note It is undefined behavior to call this if a run does not exist in
413 /// the SARIF document.
414 llvm::json::Object
415 createCodeFlow(const llvm::ArrayRef<ThreadFlow> ThreadFlows);
416
417 /// Add the given threadflows to the ones this SARIF document knows about.
418 llvm::json::Array
419 createThreadFlows(const llvm::ArrayRef<ThreadFlow> ThreadFlows);
420
421 /// Add the given \ref CharSourceRange to the SARIF document as a physical
422 /// location, with its corresponding artifact.
423 llvm::json::Object createPhysicalLocation(const CharSourceRange &R);
424
425public:
426 SarifDocumentWriter() = delete;
427
428 /// Create a new empty SARIF document with the given source manager.
429 SarifDocumentWriter(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
430
431 /// Release resources held by this SARIF document.
432 ~SarifDocumentWriter() = default;
433
434 /// Create a new run with which any upcoming analysis will be associated.
435 /// Each run requires specifying the tool that is generating reporting items.
436 void createRun(const llvm::StringRef ShortToolName,
437 const llvm::StringRef LongToolName,
438 const llvm::StringRef ToolVersion = CLANG_VERSION_STRING);
439
440 /// If there is a current run, end it.
441 ///
442 /// This method collects various book-keeping required to clear and close
443 /// resources associated with the current run, but may also allocate some
444 /// for the next run.
445 ///
446 /// Calling \ref endRun before associating a run through \ref createRun leads
447 /// to undefined behaviour.
448 void endRun();
449
450 /// Associate the given rule with the current run.
451 ///
452 /// Returns an integer rule index for the created rule that is unique within
453 /// the current run, which can then be used to create a \ref SarifResult
454 /// to add to the current run. Note that a rule must exist before being
455 /// referenced by a result.
456 ///
457 /// \pre
458 /// There must be a run associated with the document, failing to do so will
459 /// cause undefined behaviour.
460 size_t createRule(const SarifRule &Rule);
461
462 /// Append a new result to the currently in-flight run.
463 ///
464 /// \pre
465 /// There must be a run associated with the document, failing to do so will
466 /// cause undefined behaviour.
467 /// \pre
468 /// \c RuleIdx used to create the result must correspond to a rule known by
469 /// the SARIF document. It must be the value returned by a previous call
470 /// to \ref createRule.
471 void appendResult(const SarifResult &SarifResult);
472
473 /// Return the SARIF document in its current state.
474 /// Calling this will trigger a copy of the internal state including all
475 /// reported diagnostics, resulting in an expensive call.
476 llvm::json::Object createDocument();
477
478private:
479 /// Source Manager to use for the current SARIF document.
480 const SourceManager &SourceMgr;
481
482 /// Flag to track the state of this document:
483 /// A closed document is one on which a new runs must be created.
484 /// This could be a document that is freshly created, or has recently
485 /// finished writing to a previous run.
486 bool Closed = true;
487
488 /// A sequence of SARIF runs.
489 /// Each run object describes a single run of an analysis tool and contains
490 /// the output of that run.
491 ///
492 /// Reference: <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317484">run object</a>
493 llvm::json::Array Runs;
494
495 /// The list of rules associated with the most recent active run. These are
496 /// defined using the diagnostics passed to the SarifDocument. Each rule
497 /// need not be unique through the result set. E.g. there may be several
498 /// 'syntax' errors throughout code under analysis, each of which has its
499 /// own specific diagnostic message (and consequently, RuleId). Rules are
500 /// also known as "reportingDescriptor" objects in SARIF.
501 ///
502 /// Reference: <a href="https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317556">rules property</a>
503 llvm::SmallVector<SarifRule, 32> CurrentRules;
504
505 /// The list of artifacts that have been encountered on the most recent active
506 /// run. An artifact is defined in SARIF as a sequence of bytes addressable
507 /// by a URI. A common example for clang's case would be files named by
508 /// filesystem paths.
509 llvm::StringMap<detail::SarifArtifact> CurrentArtifacts;
510};
511} // namespace clang
512
513#endif // LLVM_CLANG_BASIC_SARIF_H
514

source code of clang/include/clang/Basic/Sarif.h