aaron.ballman created this revision. aaron.ballman added reviewers: dcoughlin, zaks.anna. Herald added subscribers: dkrupp, donat.nagy, Szelethus, a.sidorin, mgorny. Herald added a reviewer: george.karpenkov.
SARIF (https://github.com/oasis-tcs/sarif-spec) is a new draft standard interchange format for static analysis results that allows result viewers to be decoupled from the tool producing the analysis results. This patch allows users to specify SARIF as the output from the clang static analyzer so that the results can be read in by other tools. There are several such tools for consuming SARIF, such as extensions to Visual Studio and VSCode, as well as static analyzers like CodeSonar. SARIF is JSON-based and the latest provisional specification can be found at: https://github.com/oasis-tcs/sarif-spec/blob/master/Documents/ProvisionalDrafts/sarif-v2.0-csd02-provisional.docx. GrammaTech sponsored the work to produce this patch and we will make any necessary changes if the draft standard changes before publication. https://reviews.llvm.org/D53814 Files: Analysis/diagnostics/sarif-check.py Analysis/diagnostics/sarif-diagnostics-taint-test.c StaticAnalyzer/Core/CMakeLists.txt StaticAnalyzer/Core/SarifDiagnostics.cpp clang/StaticAnalyzer/Core/Analyses.def clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h
Index: Analysis/diagnostics/sarif-diagnostics-taint-test.c =================================================================== --- /dev/null +++ Analysis/diagnostics/sarif-diagnostics-taint-test.c @@ -0,0 +1,32 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.security.taint,debug.TaintTest %s -verify -analyzer-output=sarif -o %t.sarif | %python %S/sarif-check.py %s %t.sarif +#include "../Inputs/system-header-simulator.h" + +int atoi(const char *nptr); + +void f(void) { + char s[80]; + scanf("%s", s); + int d = atoi(s); // expected-warning {{tainted}} +} + +int main(void) { + f(); + return 0; +} + +// Test the basics for sanity. +// CHECK: sarifLog['version'].startswith("2.0.0") +// CHECK: sarifLog['runs'][0]['tool']['fullName'] == "clang static analyzer" +// CHECK: sarifLog['runs'][0]['tool']['name'] == "clang" +// CHECK: sarifLog['runs'][0]['tool']['language'] == "en-US" + +// Test the specifics of this taint test. +// CHECK: len(result(0)['codeFlows'][0]['threadFlows'][0]['locations']) == 2 +// CHECK: flow(0)['locations'][0]['step'] == 1 +// CHECK: flow(0)['locations'][0]['importance'] == "essential" +// CHECK: flow(0)['locations'][0]['location']['message']['text'] == "Calling 'f'" +// CHECK: flow(0)['locations'][0]['location']['physicalLocation']['region']['startLine'] == 13 +// CHECK: flow(0)['locations'][1]['step'] == 2 +// CHECK: flow(0)['locations'][1]['importance'] == "essential" +// CHECK: flow(0)['locations'][1]['location']['message']['text'] == "tainted" +// CHECK: flow(0)['locations'][1]['location']['physicalLocation']['region']['startLine'] == 9 Index: Analysis/diagnostics/sarif-check.py =================================================================== --- /dev/null +++ Analysis/diagnostics/sarif-check.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python + +import sys +import subprocess +import traceback +import json + +testfile = sys.argv[1] +with open(sys.argv[2]) as datafh: + data = json.load(datafh) + +prefix = "CHECK: " + +def sarifLogFirstResult(idx): + return data['runs'][0]['results'][idx] + +def threadFlow(idx): + return data['runs'][0]['results'][0]['codeFlows'][0]['threadFlows'][idx] + +fails = 0 +passes = 0 +with open(testfile) as testfh: + lineno = 0 + for line in iter(testfh.readline, ""): + lineno += 1 + line = line.rstrip("\r\n") + try: + prefix_pos = line.index(prefix) + except ValueError: + continue + check_expr = line[prefix_pos + len(prefix):] + + try: + exception = None + result = eval(check_expr, {"sarifLog":data, + "result":sarifLogFirstResult, + "flow":threadFlow}) + except Exception: + result = False + exception = traceback.format_exc().splitlines()[-1] + + if exception is not None: + sys.stderr.write( + "{file}:{line:d}: check threw exception: {expr}\n" + "{file}:{line:d}: exception was: {exception}\n".format( + file=testfile, line=lineno, + expr=check_expr, exception=exception)) + fails += 1 + elif not result: + sys.stderr.write( + "{file}:{line:d}: check returned False: {expr}\n".format( + file=testfile, line=lineno, expr=check_expr)) + fails += 1 + else: + passes += 1 + +if fails != 0: + sys.exit("{} checks failed".format(fails)) +else: + sys.stdout.write("{} checks passed\n".format(passes)) + Index: StaticAnalyzer/Core/SarifDiagnostics.cpp =================================================================== --- /dev/null +++ StaticAnalyzer/Core/SarifDiagnostics.cpp @@ -0,0 +1,266 @@ +//===--- SarifDiagnostics.cpp - Sarif Diagnostics for Paths -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SarifDiagnostics object. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Version.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" +#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h" +#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/Path.h" + +using namespace llvm; +using namespace clang; +using namespace ento; + +namespace { +class SarifDiagnostics : public PathDiagnosticConsumer { + std::string OutputFile; + +public: + SarifDiagnostics(AnalyzerOptions &, const std::string &Output) + : OutputFile(Output) {} + ~SarifDiagnostics() override = default; + + void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags, + FilesMade *FM) override; + + StringRef getName() const override { return "SarifDiagnostics"; } + PathGenerationScheme getGenerationScheme() const override { return Sarif; } + bool supportsLogicalOpControlFlow() const override { return true; } + bool supportsCrossFileDiagnostics() const override { return true; } +}; +} // end anonymous namespace + +void ento::createSarifDiagnosticConsumer(AnalyzerOptions &AnalyzerOpts, + PathDiagnosticConsumers &C, + const std::string &Output, + const Preprocessor &) { + C.push_back(new SarifDiagnostics(AnalyzerOpts, Output)); +} + +static StringRef getFileName(const FileEntry &FE) { + StringRef Filename = FE.tryGetRealPathName(); + if (Filename.empty()) + Filename = FE.getName(); + return Filename; +} + +static std::string fileNameToURI(StringRef Filename) { + llvm::SmallString<32> Ret = "file://"; + + // Get the root name to see if it has a URI authority. + StringRef Root = sys::path::root_name(Filename); + if (Root.startswith("//")) { + // There is an authority, so add it to the URI. + Ret += Root.drop_front(2).str(); + } else { + // There is no authority, so end the component and add the root to the URI. + Ret += Twine("/" + Root).str(); + } + + // Add the rest of the path components, encoding any reserved characters. + std::for_each(std::next(sys::path::begin(Filename)), sys::path::end(Filename), + [&Ret](StringRef Component) { + // For reasons unknown to me, we may get a backslash with + // Windows native paths for the initial backslash following + // the drive component, which we need to ignore as a URI path + // part. + if (Component == "\\") + return; + + // Add the separator between the previous path part and the + // one being currently processed. + Ret += "/"; + + // URI encode the part. + for (char C : Component) { + // RFC 3986 claims alpha, numeric, and this handful of + // characters are not reserved for the path component and + // should be written out directly. Otherwise, percent + // encode the character and write that out instead of the + // reserved character. + if (llvm::isAlnum(C) || + StringRef::npos != + StringRef("-._~:@!$&'()*+,;=").find(C)) { + Ret += C; + } else { + Ret += "%" + llvm::toHex(StringRef(&C, 1)); + } + } + }); + + return Ret.str().str(); +} + +static json::Object createFileLocation(const FileEntry &FE) { + return json::Object{{"uri", fileNameToURI(getFileName(FE))}}; +} + +static json::Object createFile(const FileEntry &FE) { + return json::Object{{"fileLocation", createFileLocation(FE)}, + {"roles", json::Array{"resultFile"}}, + {"length", FE.getSize()}, + {"mimeType", "text/plain"}}; +} + +static json::Object createFileLocation(const FileEntry &FE, + json::Object &Files) { + std::string FileURI = fileNameToURI(getFileName(FE)); + if (!Files.get(FileURI)) + Files[FileURI] = createFile(FE); + + return json::Object{{"uri", FileURI}}; +} + +static json::Object createTextRegion(SourceRange R, const SourceManager &SM) { + return json::Object{ + {"startLine", SM.getExpansionLineNumber(R.getBegin())}, + {"endLine", SM.getExpansionLineNumber(R.getEnd())}, + {"startColumn", SM.getExpansionColumnNumber(R.getBegin())}, + {"endColumn", SM.getExpansionColumnNumber(R.getEnd())}}; +} + +static json::Object createPhysicalLocation(SourceRange R, const FileEntry &FE, + const SourceManager &SMgr, + json::Object &Files) { + return json::Object{{{"fileLocation", createFileLocation(FE, Files)}, + {"region", createTextRegion(R, SMgr)}}}; +} + +enum class Importance { Important, Essential, Unimportant }; + +static StringRef importanceToStr(Importance I) { + switch (I) { + case Importance::Important: + return "important"; + case Importance::Essential: + return "essential"; + case Importance::Unimportant: + return "unimportant"; + } + llvm_unreachable("Fully covered switch is not so fully covered"); +} + +static json::Object createThreadFlowLocation(int Step, json::Object &&Location, + Importance I) { + return json::Object{{"step", Step}, + {"location", std::move(Location)}, + {"importance", importanceToStr(I)}}; +} + +static json::Object createMessage(StringRef Text) { + return json::Object{{"text", Text.str()}}; +} + +static json::Object createLocation(json::Object &&PhysicalLocation, + StringRef Message = "") { + json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}}; + if (!Message.empty()) + Ret.insert({"message", createMessage(Message)}); + return Ret; +} + +static Importance calculateImportance(const PathDiagnosticPiece &Piece) { + StringRef PieceStr = Piece.getString(); + + switch (Piece.getKind()) { + case PathDiagnosticPiece::Kind::Call: + case PathDiagnosticPiece::Kind::Macro: + case PathDiagnosticPiece::Kind::Note: + // FIXME: What should be reported here? + break; + case PathDiagnosticPiece::Kind::Event: + return Piece.getTagStr() == "ConditionBRVisitor" ? Importance::Important + : Importance::Essential; + case PathDiagnosticPiece::Kind::ControlFlow: + return Importance::Unimportant; + } + return Importance::Unimportant; +} + +static json::Object createThreadFlow(const PathPieces &Pieces, + json::Object &Files) { + const SourceManager &SMgr = Pieces.front()->getLocation().getManager(); + int Step = 1; + json::Array Locations; + for (const auto &Piece : Pieces) { + const PathDiagnosticLocation &P = Piece->getLocation(); + Locations.push_back(createThreadFlowLocation( + Step++, + createLocation(createPhysicalLocation(P.asRange(), + *P.asLocation().getFileEntry(), + SMgr, Files), + Piece->getString()), + calculateImportance(*Piece))); + } + return json::Object{{"locations", std::move(Locations)}}; +} + +static json::Object createCodeFlow(const PathPieces &Pieces, + json::Object &Files) { + return json::Object{ + {"threadFlows", json::Array{createThreadFlow(Pieces, Files)}}}; +} + +static json::Object createTool() { + return json::Object{{"name", "clang"}, + {"fullName", "clang static analyzer"}, + {"language", "en-US"}, + {"version", getClangFullVersion()}}; +} + +static json::Object createResult(const PathDiagnostic &Diag, + json::Object &Files) { + const PathPieces &Path = Diag.path.flatten(false); + const SourceManager &SMgr = Path.front()->getLocation().getManager(); + + return json::Object{ + {"message", createMessage(Diag.getVerboseDescription())}, + {"codeFlows", json::Array{createCodeFlow(Path, Files)}}, + {"locations", + json::Array{createLocation(createPhysicalLocation( + Diag.getLocation().asRange(), + *Diag.getLocation().asLocation().getFileEntry(), SMgr, Files))}}, + {"ruleId", Diag.getCheckName()}}; +} + +static json::Object createRun(std::vector<const PathDiagnostic *> &Diags) { + json::Array Results; + json::Object Files; + + llvm::for_each(Diags, [&](const PathDiagnostic *D) { + Results.push_back(createResult(*D, Files)); + }); + + return json::Object{{"tool", createTool()}, + {"results", std::move(Results)}, + {"files", std::move(Files)}}; +} + +void SarifDiagnostics::FlushDiagnosticsImpl( + std::vector<const PathDiagnostic *> &Diags, FilesMade *) { + // FIXME: if the file already exists, do we overwrite it with a single run, + // or do we append a run into the file if it's a valid SARIF log? + std::error_code EC; + llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::F_Text); + if (EC) { + llvm::errs() << "warning: could not create file: " << EC.message() << '\n'; + return; + } + json::Object Sarif{{"$schema", "http://json.schemastore.org/sarif-2.0.0"}, + {"version", "2.0.0-beta.2018-09-26"}, + {"runs", json::Array{createRun(Diags)}}}; + OS << llvm::formatv("{0:2}", json::Value(std::move(Sarif))); +} Index: StaticAnalyzer/Core/CMakeLists.txt =================================================================== --- StaticAnalyzer/Core/CMakeLists.txt +++ StaticAnalyzer/Core/CMakeLists.txt @@ -40,6 +40,7 @@ MemRegion.cpp PathDiagnostic.cpp PlistDiagnostics.cpp + SarifDiagnostics.cpp ProgramState.cpp RangeConstraintManager.cpp RangedConstraintManager.cpp Index: clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h =================================================================== --- clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h +++ clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h @@ -123,6 +123,9 @@ /// Used for plist output, used for "arrows" generation. Extensive, + + /// Used for SARIF output. + Sarif, }; virtual PathGenerationScheme getGenerationScheme() const { return Minimal; } Index: clang/StaticAnalyzer/Core/Analyses.def =================================================================== --- clang/StaticAnalyzer/Core/Analyses.def +++ clang/StaticAnalyzer/Core/Analyses.def @@ -33,6 +33,7 @@ ANALYSIS_DIAGNOSTICS(PLIST, "plist", "Output analysis results using Plists", createPlistDiagnosticConsumer) ANALYSIS_DIAGNOSTICS(PLIST_MULTI_FILE, "plist-multi-file", "Output analysis results using Plists (allowing for multi-file bugs)", createPlistMultiFileDiagnosticConsumer) ANALYSIS_DIAGNOSTICS(PLIST_HTML, "plist-html", "Output analysis results using HTML wrapped with Plists", createPlistHTMLDiagnosticConsumer) +ANALYSIS_DIAGNOSTICS(SARIF, "sarif", "Output analysis results in a SARIF file", createSarifDiagnosticConsumer) ANALYSIS_DIAGNOSTICS(TEXT, "text", "Text output of analysis results", createTextPathDiagnosticConsumer) #ifndef ANALYSIS_PURGE
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits