This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG73093599287c: [analyzer] Fix scan-build report 
deduplication. (authored by dergachev.a).
Herald added a project: clang.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105167/new/

https://reviews.llvm.org/D105167

Files:
  clang/include/clang/Analysis/PathDiagnostic.h
  clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
  clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
  clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
  clang/test/Analysis/analyzer-config.c
  clang/test/Analysis/scan-build/Inputs/deduplication/1.c
  clang/test/Analysis/scan-build/Inputs/deduplication/2.c
  clang/test/Analysis/scan-build/Inputs/deduplication/header.h
  clang/test/Analysis/scan-build/deduplication.test
  clang/test/Analysis/scan-build/rebuild_index/rebuild_index.test
  clang/test/Analysis/scan-build/rebuild_index/report-3.html
  clang/test/Analysis/scan-build/rebuild_index/subdirectory/report-3.html
  clang/test/Analysis/scan-build/rebuild_index/subdirectory/report-4.html
  clang/tools/scan-build/bin/scan-build

Index: clang/tools/scan-build/bin/scan-build
===================================================================
--- clang/tools/scan-build/bin/scan-build
+++ clang/tools/scan-build/bin/scan-build
@@ -14,7 +14,6 @@
 use strict;
 use warnings;
 use FindBin qw($RealBin);
-use Digest::MD5;
 use File::Basename;
 use File::Find;
 use File::Copy qw(copy);
@@ -268,27 +267,6 @@
   $ENV{'CCC_ANALYZER_HTML'} = $Dir;
 }
 
-##----------------------------------------------------------------------------##
-# ComputeDigest - Compute a digest of the specified file.
-##----------------------------------------------------------------------------##
-
-sub ComputeDigest {
-  my $FName = shift;
-  DieDiag("Cannot read $FName to compute Digest.\n") if (! -r $FName);
-
-  # Use Digest::MD5.  We don't have to be cryptographically secure.  We're
-  # just looking for duplicate files that come from a non-malicious source.
-  # We use Digest::MD5 because it is a standard Perl module that should
-  # come bundled on most systems.
-  open(FILE, $FName) or DieDiag("Cannot open $FName when computing Digest.\n");
-  binmode FILE;
-  my $Result = Digest::MD5->new->addfile(*FILE)->hexdigest;
-  close(FILE);
-
-  # Return the digest.
-  return $Result;
-}
-
 ##----------------------------------------------------------------------------##
 #  UpdatePrefix - Compute the common prefix of files.
 ##----------------------------------------------------------------------------##
@@ -374,8 +352,6 @@
 # Sometimes a source file is scanned more than once, and thus produces
 # multiple error reports.  We use a cache to solve this problem.
 
-my %AlreadyScanned;
-
 sub ScanFile {
 
   my $Index = shift;
@@ -383,19 +359,6 @@
   my $FName = shift;
   my $Stats = shift;
 
-  # Compute a digest for the report file.  Determine if we have already
-  # scanned a file that looks just like it.
-
-  my $digest = ComputeDigest("$Dir/$FName");
-
-  if (defined $AlreadyScanned{$digest}) {
-    # Redundant file.  Remove it.
-    unlink("$Dir/$FName");
-    return;
-  }
-
-  $AlreadyScanned{$digest} = 1;
-
   # At this point the report file is not world readable.  Make it happen.
   chmod(0644, "$Dir/$FName");
 
Index: clang/test/Analysis/scan-build/rebuild_index/subdirectory/report-4.html
===================================================================
--- clang/test/Analysis/scan-build/rebuild_index/subdirectory/report-4.html
+++ /dev/null
@@ -1,8 +0,0 @@
-<!-- BUGTYPE bug4 -->
-<!-- BUGFILE file4 -->
-<!-- BUGPATHLENGTH 4 -->
-<!-- BUGLINE 4 -->
-<!-- BUGCATEGORY cat4 -->
-<!-- BUGDESC desc4 -->
-<!-- FUNCTIONNAME func4 -->
-<!-- BUGMETAEND -->
Index: clang/test/Analysis/scan-build/rebuild_index/subdirectory/report-3.html
===================================================================
--- /dev/null
+++ clang/test/Analysis/scan-build/rebuild_index/subdirectory/report-3.html
@@ -0,0 +1,8 @@
+<!-- BUGTYPE bug3 -->
+<!-- BUGFILE file3 -->
+<!-- BUGPATHLENGTH 3 -->
+<!-- BUGLINE 3 -->
+<!-- BUGCATEGORY cat3 -->
+<!-- BUGDESC desc3 -->
+<!-- FUNCTIONNAME func3 -->
+<!-- BUGMETAEND -->
Index: clang/test/Analysis/scan-build/rebuild_index/report-3.html
===================================================================
--- clang/test/Analysis/scan-build/rebuild_index/report-3.html
+++ /dev/null
@@ -1,8 +0,0 @@
-<!-- BUGTYPE bug1 -->
-<!-- BUGFILE file1 -->
-<!-- BUGPATHLENGTH 1 -->
-<!-- BUGLINE 1 -->
-<!-- BUGCATEGORY cat1 -->
-<!-- BUGDESC desc1 -->
-<!-- FUNCTIONNAME func1 -->
-<!-- BUGMETAEND -->
Index: clang/test/Analysis/scan-build/rebuild_index/rebuild_index.test
===================================================================
--- clang/test/Analysis/scan-build/rebuild_index/rebuild_index.test
+++ clang/test/Analysis/scan-build/rebuild_index/rebuild_index.test
@@ -4,9 +4,8 @@
 RUN: rm -rf %t.output_dir && mkdir %t.output_dir
 RUN: cp %S/report-1.html %t.output_dir
 RUN: cp %S/report-2.html %t.output_dir
-RUN: cp %S/report-3.html %t.output_dir
 RUN: mkdir %t.output_dir/subdirectory
-RUN: cp %S/subdirectory/report-4.html %t.output_dir/subdirectory
+RUN: cp %S/subdirectory/report-3.html %t.output_dir/subdirectory
 
 RUN: %scan-build --generate-index-only %t.output_dir
 
@@ -15,16 +14,13 @@
 CHECK-FILES:      index.html
 CHECK-FILES-NEXT: report-1.html
 CHECK-FILES-NEXT: report-2.html
-
-// report-3.html is a duplicate of report-1.html so it's not present.
-CHECK-FILES-NOT:  report-3.html
 CHECK-FILES-NEXT: scanview.css
 CHECK-FILES-NEXT: sorttable.js
 CHECK-FILES-NEXT: subdirectory
 
 RUN: ls %t.output_dir/subdirectory | FileCheck -check-prefix CHECK-SUB %s
 
-CHECK-SUB: report-4.html
+CHECK-SUB: report-3.html
 
 RUN: cat %t.output_dir/index.html | FileCheck -check-prefix CHECK-INDEX %s
 
@@ -32,10 +28,9 @@
 CHECK-INDEX-NEXT: bug1
 CHECK-INDEX-NEXT: cat2
 CHECK-INDEX-NEXT: bug2
-CHECK-INDEX-NEXT: cat4
-CHECK-INDEX-NEXT: bug4
+CHECK-INDEX-NEXT: cat3
+CHECK-INDEX-NEXT: bug3
 
 CHECK-INDEX:     report-1.html#EndPath
 CHECK-INDEX:     report-2.html#EndPath
-CHECK-INDEX-NOT: report-3.html#EndPath
-CHECK-INDEX:     subdirectory/report-4.html#EndPath
+CHECK-INDEX:     subdirectory/report-3.html#EndPath
Index: clang/test/Analysis/scan-build/deduplication.test
===================================================================
--- /dev/null
+++ clang/test/Analysis/scan-build/deduplication.test
@@ -0,0 +1,40 @@
+// FIXME: Actually, "perl".
+REQUIRES: shell
+
+RUN: rm -rf %t.output_dir && mkdir %t.output_dir
+RUN: %scan-build -o %t.output_dir \
+RUN:             %clang -S %S/Inputs/deduplication/1.c \
+RUN:                       %S/Inputs/deduplication/2.c \
+RUN:     | FileCheck %s -check-prefix CHECK-STDOUT
+
+RUN: ls %t.output_dir/*/ | FileCheck %s -check-prefix CHECK-FILENAMES
+
+RUN: rm -rf %t.output_dir && mkdir %t.output_dir
+RUN: %scan-build -o %t.output_dir \
+RUN:             -analyzer-config stable-report-filename=true \
+RUN:             %clang -S %S/Inputs/deduplication/1.c \
+RUN:                       %S/Inputs/deduplication/2.c \
+RUN:     | FileCheck %s -check-prefix CHECK-STDOUT
+
+RUN: ls %t.output_dir/*/ | FileCheck %s -check-prefix CHECK-FILENAMES
+
+RUN: rm -rf %t.output_dir && mkdir %t.output_dir
+RUN: %scan-build -o %t.output_dir \
+RUN:             -analyzer-config verbose-report-filename=true \
+RUN:             %clang -S %S/Inputs/deduplication/1.c \
+RUN:                       %S/Inputs/deduplication/2.c \
+RUN:     | FileCheck %s -check-prefix CHECK-STDOUT
+
+RUN: ls %t.output_dir/*/ | FileCheck %s -check-prefix CHECK-FILENAMES
+
+// Only one report file should be generated.
+
+CHECK-STDOUT: scan-build: Using '{{.*}}' for static analysis
+CHECK-STDOUT: scan-build: 1 bug found.
+CHECK-STDOUT: scan-build: Run 'scan-view {{.*}}' to examine bug reports.
+
+
+CHECK-FILENAMES: index.html
+CHECK-FILENAMES-NEXT: report-{{.*}}.html
+CHECK-FILENAMES-NEXT: scanview.css
+CHECK-FILENAMES-NEXT: sorttable.js
Index: clang/test/Analysis/scan-build/Inputs/deduplication/header.h
===================================================================
--- /dev/null
+++ clang/test/Analysis/scan-build/Inputs/deduplication/header.h
@@ -0,0 +1,4 @@
+int foo() {
+  int x = 0;
+  return 1 / x;
+}
Index: clang/test/Analysis/scan-build/Inputs/deduplication/2.c
===================================================================
--- /dev/null
+++ clang/test/Analysis/scan-build/Inputs/deduplication/2.c
@@ -0,0 +1,5 @@
+#include "header.h"
+
+void bar() {
+  foo();
+}
Index: clang/test/Analysis/scan-build/Inputs/deduplication/1.c
===================================================================
--- /dev/null
+++ clang/test/Analysis/scan-build/Inputs/deduplication/1.c
@@ -0,0 +1,5 @@
+#include "header.h"
+
+void bar() {
+  foo();
+}
Index: clang/test/Analysis/analyzer-config.c
===================================================================
--- clang/test/Analysis/analyzer-config.c
+++ clang/test/Analysis/analyzer-config.c
@@ -119,4 +119,5 @@
 // CHECK-NEXT: unix.DynamicMemoryModeling:AddNoOwnershipChangeNotes = false
 // CHECK-NEXT: unix.DynamicMemoryModeling:Optimistic = false
 // CHECK-NEXT: unroll-loops = false
+// CHECK-NEXT: verbose-report-filename = false
 // CHECK-NEXT: widen-loops = false
Index: clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
+++ clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
@@ -245,6 +245,18 @@
     ReportDiag(*Diag, filesMade);
 }
 
+static llvm::SmallString<32> getIssueHash(const PathDiagnostic &D,
+                                          const Preprocessor &PP) {
+  SourceManager &SMgr = PP.getSourceManager();
+  PathDiagnosticLocation UPDLoc = D.getUniqueingLoc();
+  FullSourceLoc L(SMgr.getExpansionLoc(UPDLoc.isValid()
+                                           ? UPDLoc.asLocation()
+                                           : D.getLocation().asLocation()),
+                  SMgr);
+  return getIssueHash(L, D.getCheckerName(), D.getBugType(),
+                      D.getDeclWithIssue(), PP.getLangOpts());
+}
+
 void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
                                  FilesMade *filesMade) {
   // Create the HTML directory if it is missing.
@@ -271,11 +283,6 @@
   // Create a new rewriter to generate HTML.
   Rewriter R(const_cast<SourceManager&>(SMgr), PP.getLangOpts());
 
-  // The file for the first path element is considered the main report file, it
-  // will usually be equivalent to SMgr.getMainFileID(); however, it might be a
-  // header when -analyzer-opt-analyze-headers is used.
-  FileID ReportFile = path.front()->getLocation().asLocation().getExpansionLoc().getFileID();
-
   // Get the function/method name
   SmallString<128> declName("unknown");
   int offsetDecl = 0;
@@ -302,46 +309,52 @@
 
   // Create a path for the target HTML file.
   int FD;
-  SmallString<128> Model, ResultPath;
-
-  if (!DiagOpts.ShouldWriteStableReportFilename) {
-      llvm::sys::path::append(Model, Directory, "report-%%%%%%.html");
-      if (std::error_code EC =
-          llvm::sys::fs::make_absolute(Model)) {
-          llvm::errs() << "warning: could not make '" << Model
-                       << "' absolute: " << EC.message() << '\n';
-        return;
-      }
-      if (std::error_code EC = llvm::sys::fs::createUniqueFile(
-              Model, FD, ResultPath, llvm::sys::fs::OF_Text)) {
-        llvm::errs() << "warning: could not create file in '" << Directory
-                     << "': " << EC.message() << '\n';
-        return;
-      }
-  } else {
-      int i = 1;
-      std::error_code EC;
-      do {
-          // Find a filename which is not already used
-          const FileEntry* Entry = SMgr.getFileEntryForID(ReportFile);
-          std::stringstream filename;
-          Model = "";
-          filename << "report-"
-                   << llvm::sys::path::filename(Entry->getName()).str()
-                   << "-" << declName.c_str()
-                   << "-" << offsetDecl
-                   << "-" << i << ".html";
-          llvm::sys::path::append(Model, Directory,
-                                  filename.str());
-          EC = llvm::sys::fs::openFileForReadWrite(
-              Model, FD, llvm::sys::fs::CD_CreateNew, llvm::sys::fs::OF_None);
-          if (EC && EC != llvm::errc::file_exists) {
-              llvm::errs() << "warning: could not create file '" << Model
-                           << "': " << EC.message() << '\n';
-              return;
-          }
-          i++;
-      } while (EC);
+
+  SmallString<128> FileNameStr;
+  llvm::raw_svector_ostream FileName(FileNameStr);
+  FileName << "report-";
+
+  // Historically, neither the stable report filename nor the unstable report
+  // filename were actually stable. That said, the stable report filename
+  // was more stable because it was mostly composed of information
+  // about the bug report instead of being completely random.
+  // Now both stable and unstable report filenames are in fact stable
+  // but the stable report filename is still more verbose.
+  if (DiagOpts.ShouldWriteVerboseReportFilename) {
+    // FIXME: This code relies on knowing what constitutes the issue hash.
+    // Otherwise deduplication won't work correctly.
+    FileID ReportFile =
+        path.back()->getLocation().asLocation().getExpansionLoc().getFileID();
+
+    const FileEntry *Entry = SMgr.getFileEntryForID(ReportFile);
+
+    FileName << llvm::sys::path::filename(Entry->getName()).str() << "-"
+             << declName.c_str() << "-" << offsetDecl << "-";
+  }
+
+  FileName << StringRef(getIssueHash(D, PP)).substr(0, 6).str() << ".html";
+
+  SmallString<128> ResultPath;
+  llvm::sys::path::append(ResultPath, Directory, FileName.str());
+  if (std::error_code EC = llvm::sys::fs::make_absolute(ResultPath)) {
+    llvm::errs() << "warning: could not make '" << ResultPath
+                 << "' absolute: " << EC.message() << '\n';
+    return;
+  }
+
+  if (std::error_code EC = llvm::sys::fs::openFileForReadWrite(
+          ResultPath, FD, llvm::sys::fs::CD_CreateNew,
+          llvm::sys::fs::OF_None)) {
+    // Existence of the file corresponds to the situation where a different
+    // Clang instance has emitted a bug report with the same issue hash.
+    // This is an entirely normal situation that does not deserve a warning,
+    // as apart from hash collisions this can happen because the reports
+    // are in fact similar enough to be considered duplicates of each other.
+    if (EC != llvm::errc::file_exists) {
+      llvm::errs() << "warning: could not create file in '" << Directory
+                   << "': " << EC.message() << '\n';
+    }
+    return;
   }
 
   llvm::raw_fd_ostream os(FD, true);
@@ -638,7 +651,6 @@
                                              ? UPDLoc.asLocation()
                                              : D.getLocation().asLocation()),
                     SMgr);
-    const Decl *DeclWithIssue = D.getDeclWithIssue();
 
     StringRef BugCategory = D.getCategory();
     if (!BugCategory.empty())
@@ -650,9 +662,7 @@
 
     os  << "\n<!-- FUNCTIONNAME " <<  declName << " -->\n";
 
-    os << "\n<!-- ISSUEHASHCONTENTOFLINEINCONTEXT "
-       << getIssueHash(L, D.getCheckerName(), D.getBugType(), DeclWithIssue,
-                       PP.getLangOpts())
+    os << "\n<!-- ISSUEHASHCONTENTOFLINEINCONTEXT " << getIssueHash(D, PP)
        << " -->\n";
 
     os << "\n<!-- BUGLINE "
Index: clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
===================================================================
--- clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
+++ clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
@@ -395,7 +395,11 @@
     return {FullCompilerInvocation,
             ShouldDisplayMacroExpansions,
             ShouldSerializeStats,
-            ShouldWriteStableReportFilename,
+            // The stable report filename option is deprecated because
+            // file names are now always stable. Now the old option acts as
+            // an alias to the new verbose filename option because this
+            // closely mimics the behavior under the old option.
+            ShouldWriteStableReportFilename || ShouldWriteVerboseReportFilename,
             AnalyzerWerror,
             ShouldApplyFixIts,
             ShouldDisplayCheckerNameForText};
Index: clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
===================================================================
--- clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
+++ clang/include/clang/StaticAnalyzer/Core/AnalyzerOptions.def
@@ -190,7 +190,13 @@
                 false)
 
 ANALYZER_OPTION(bool, ShouldWriteStableReportFilename, "stable-report-filename",
-                "Whether or not the report filename should be random or not.",
+                "Deprecated: report filenames are now always stable. "
+                "See also 'verbose-report-filename'.",
+                false)
+
+ANALYZER_OPTION(bool, ShouldWriteVerboseReportFilename, "verbose-report-filename",
+                "Whether or not the report filename should contain extra "
+                "information about the issue.",
                 false)
 
 ANALYZER_OPTION(
Index: clang/include/clang/Analysis/PathDiagnostic.h
===================================================================
--- clang/include/clang/Analysis/PathDiagnostic.h
+++ clang/include/clang/Analysis/PathDiagnostic.h
@@ -75,14 +75,8 @@
   bool ShouldSerializeStats = false;
 
   /// If the consumer intends to produce multiple output files, should it
-  /// use randomly generated file names for these files (with the tiny risk of
-  /// having random collisions) or deterministic human-readable file names
-  /// (with a larger risk of deterministic collisions or invalid characters
-  /// in the file name). We should not really give this choice to the users
-  /// because deterministic mode is always superior when done right, but
-  /// for some consumers this mode is experimental and needs to be
-  /// off by default.
-  bool ShouldWriteStableReportFilename = false;
+  /// use a pseudo-random file name name or a human-readable file name.
+  bool ShouldWriteVerboseReportFilename = false;
 
   /// Whether the consumer should treat consumed diagnostics as hard errors.
   /// Useful for breaking your build when issues are found.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to