martong created this revision.
martong added reviewers: Szelethus, NoQ.
Herald added subscribers: cfe-commits, steakhal, Charusso, gamesh411, dkrupp, 
donat.nagy, mikhail.ramalho, a.sidorin, rnkovacs, szepet, baloghadamsoftware, 
kristof.beyls, xazax.hun, whisperity.
Herald added a project: clang.
martong added a child revision: D73898: [analyzer] StdLibraryFunctionsChecker: 
Add argument constraints.

Both EOF and the max value of unsigned char is platform dependent. In this
patch we try our best to deduce the value of EOF from the Preprocessor,
if we can't we fall back to -1.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D74473

Files:
  clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
  clang/test/Analysis/std-c-library-functions-eof.c

Index: clang/test/Analysis/std-c-library-functions-eof.c
===================================================================
--- /dev/null
+++ clang/test/Analysis/std-c-library-functions-eof.c
@@ -0,0 +1,26 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=apiModeling.StdCLibraryFunctions,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s
+// RUN: %clang_analyze_cc1 -triple i686-unknown-linux -analyzer-checker=apiModeling.StdCLibraryFunctions,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s
+// RUN: %clang_analyze_cc1 -triple x86_64-unknown-linux -analyzer-checker=apiModeling.StdCLibraryFunctions,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s
+// RUN: %clang_analyze_cc1 -triple armv7-a15-linux -analyzer-checker=apiModeling.StdCLibraryFunctions,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s
+// RUN: %clang_analyze_cc1 -triple thumbv7-a15-linux -analyzer-checker=apiModeling.StdCLibraryFunctions,debug.ExprInspection -verify -analyzer-config eagerly-assume=false %s
+
+void clang_analyzer_eval(int);
+
+typedef struct FILE FILE;
+// Unorthodox EOF value.
+#define EOF -2
+
+int getc(FILE *);
+void test_getc(FILE *fp) {
+
+  int x;
+  while ((x = getc(fp)) != EOF) {
+    clang_analyzer_eval(x > 255); // expected-warning{{FALSE}}
+    clang_analyzer_eval(x >= 0); // expected-warning{{TRUE}}
+  }
+
+  int y = getc(fp);
+  if (y < 0) {
+    clang_analyzer_eval(y == EOF); // expected-warning{{TRUE}}
+  }
+}
Index: clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
+++ clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp
@@ -241,7 +241,7 @@
                                         const CallExpr *CE,
                                         CheckerContext &C) const;
 
-  void initFunctionSummaries(BasicValueFactory &BVF) const;
+  void initFunctionSummaries(CheckerContext &C) const;
 };
 } // end of anonymous namespace
 
@@ -312,10 +312,11 @@
     for (size_t I = 1; I != E; ++I) {
       const llvm::APSInt &Min = BVF.getValue(R[I - 1].second + 1ULL, T);
       const llvm::APSInt &Max = BVF.getValue(R[I].first - 1ULL, T);
-      assert(Min <= Max);
-      State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
-      if (!State)
-        return nullptr;
+      if (Min <= Max) {
+        State = CM.assumeInclusiveRange(State, *N, Min, Max, false);
+        if (!State)
+          return nullptr;
+      }
     }
   }
 
@@ -449,9 +450,7 @@
   if (!FD)
     return None;
 
-  SValBuilder &SVB = C.getSValBuilder();
-  BasicValueFactory &BVF = SVB.getBasicValueFactory();
-  initFunctionSummaries(BVF);
+  initFunctionSummaries(C);
 
   IdentifierInfo *II = FD->getIdentifier();
   if (!II)
@@ -478,11 +477,13 @@
 }
 
 void StdLibraryFunctionsChecker::initFunctionSummaries(
-    BasicValueFactory &BVF) const {
+    CheckerContext &C) const {
   if (!FunctionSummaryMap.empty())
     return;
 
-  ASTContext &ACtx = BVF.getContext();
+  SValBuilder &SVB = C.getSValBuilder();
+  BasicValueFactory &BVF = SVB.getBasicValueFactory();
+  const ASTContext &ACtx = BVF.getContext();
 
   // These types are useful for writing specifications quickly,
   // New specifications should probably introduce more types.
@@ -491,15 +492,47 @@
   // of function summary for common cases (eg. ssize_t could be int or long
   // or long long, so three summary variants would be enough).
   // Of course, function variants are also useful for C++ overloads.
-  QualType Irrelevant; // A placeholder, whenever we do not care about the type.
-  QualType IntTy = ACtx.IntTy;
-  QualType LongTy = ACtx.LongTy;
-  QualType LongLongTy = ACtx.LongLongTy;
-  QualType SizeTy = ACtx.getSizeType();
-
-  RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue();
-  RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue();
-  RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue();
+  const QualType
+      Irrelevant; // A placeholder, whenever we do not care about the type.
+  const QualType IntTy = ACtx.IntTy;
+  const QualType LongTy = ACtx.LongTy;
+  const QualType LongLongTy = ACtx.LongLongTy;
+  const QualType SizeTy = ACtx.getSizeType();
+
+  const RangeInt IntMax = BVF.getMaxValue(IntTy).getLimitedValue();
+  const RangeInt LongMax = BVF.getMaxValue(LongTy).getLimitedValue();
+  const RangeInt LongLongMax = BVF.getMaxValue(LongLongTy).getLimitedValue();
+
+  const RangeInt UCharMax =
+      BVF.getMaxValue(ACtx.UnsignedCharTy).getLimitedValue();
+
+  const Preprocessor &PP = C.getPreprocessor();
+
+  // The platform dependent value of EOF.
+  // Try our best to parse this from the Preprocessor, otherwise fallback to -1.
+  const auto EOFv = [&PP]() -> RangeInt {
+    const auto EOFMacroIt = llvm::find_if(
+        PP.macros(), [](const auto &K) { return K.first->getName() == "EOF"; });
+    if (EOFMacroIt == PP.macro_end())
+      return -1;
+    const MacroInfo *MI = PP.getMacroInfo(EOFMacroIt->first);
+
+    // Parse an integer at the end of the macro definition.
+    const Token &T = MI->tokens().back();
+    StringRef ValueStr = StringRef(T.getLiteralData(), T.getLength());
+    llvm::APInt IntValue;
+    constexpr unsigned AutoSenseRadix = 0;
+    if (ValueStr.getAsInteger(AutoSenseRadix, IntValue))
+      return -1;
+
+    // Parse an optional minus sign.
+    if (MI->tokens().size() == 2)
+      if (MI->tokens().front().is(tok::minus))
+        IntValue = -IntValue;
+
+    IntValue.dump();
+    return IntValue.getSExtValue();
+  }();
 
   // We are finally ready to define specifications for all supported functions.
   //
@@ -552,7 +585,8 @@
   // Templates for summaries that are reused by many functions.
   auto Getc = [&]() {
     return Summary(ArgTypes{Irrelevant}, RetType{IntTy}, NoEvalCall)
-        .Case({ReturnValueCondition(WithinRange, Range(-1, 255))});
+        .Case(
+            {ReturnValueCondition(WithinRange, {{EOFv, EOFv}, {0, UCharMax}})});
   };
   auto Read = [&](RetType R, RangeInt Max) {
     return Summary(ArgTypes{Irrelevant, Irrelevant, SizeTy}, RetType{R},
@@ -587,10 +621,12 @@
                   // The locale-specific range.
                   // No post-condition. We are completely unaware of
                   // locale-specific return values.
-                  .Case({ArgumentCondition(0U, WithinRange, {{128, 255}})})
-                  .Case({ArgumentCondition(
-                             0U, OutOfRange,
-                             {{'0', '9'}, {'A', 'Z'}, {'a', 'z'}, {128, 255}}),
+                  .Case({ArgumentCondition(0U, WithinRange, {{128, UCharMax}})})
+                  .Case({ArgumentCondition(0U, OutOfRange,
+                                           {{'0', '9'},
+                                            {'A', 'Z'},
+                                            {'a', 'z'},
+                                            {128, UCharMax}}),
                          ReturnValueCondition(WithinRange, SingleValue(0))})},
       },
       {
@@ -601,11 +637,11 @@
                                            {{'A', 'Z'}, {'a', 'z'}}),
                          ReturnValueCondition(OutOfRange, SingleValue(0))})
                   // The locale-specific range.
-                  .Case({ArgumentCondition(0U, WithinRange, {{128, 255}})})
-                  .Case(
-                      {ArgumentCondition(0U, OutOfRange,
-                                         {{'A', 'Z'}, {'a', 'z'}, {128, 255}}),
-                       ReturnValueCondition(WithinRange, SingleValue(0))})},
+                  .Case({ArgumentCondition(0U, WithinRange, {{128, UCharMax}})})
+                  .Case({ArgumentCondition(
+                             0U, OutOfRange,
+                             {{'A', 'Z'}, {'a', 'z'}, {128, UCharMax}}),
+                         ReturnValueCondition(WithinRange, SingleValue(0))})},
       },
       {
           "isascii",
@@ -668,9 +704,9 @@
                          ArgumentCondition(0U, OutOfRange, Range('a', 'z')),
                          ReturnValueCondition(WithinRange, SingleValue(0))})
                   // The locale-specific range.
-                  .Case({ArgumentCondition(0U, WithinRange, {{128, 255}})})
+                  .Case({ArgumentCondition(0U, WithinRange, {{128, UCharMax}})})
                   // Is not an unsigned char.
-                  .Case({ArgumentCondition(0U, OutOfRange, Range(0, 255)),
+                  .Case({ArgumentCondition(0U, OutOfRange, Range(0, UCharMax)),
                          ReturnValueCondition(WithinRange, SingleValue(0))})},
       },
       {
@@ -704,9 +740,10 @@
                                            {{9, 13}, {' ', ' '}}),
                          ReturnValueCondition(OutOfRange, SingleValue(0))})
                   // The locale-specific range.
-                  .Case({ArgumentCondition(0U, WithinRange, {{128, 255}})})
-                  .Case({ArgumentCondition(0U, OutOfRange,
-                                           {{9, 13}, {' ', ' '}, {128, 255}}),
+                  .Case({ArgumentCondition(0U, WithinRange, {{128, UCharMax}})})
+                  .Case({ArgumentCondition(
+                             0U, OutOfRange,
+                             {{9, 13}, {' ', ' '}, {128, UCharMax}}),
                          ReturnValueCondition(WithinRange, SingleValue(0))})},
       },
       {
@@ -717,10 +754,10 @@
                   .Case({ArgumentCondition(0U, WithinRange, Range('A', 'Z')),
                          ReturnValueCondition(OutOfRange, SingleValue(0))})
                   // The locale-specific range.
-                  .Case({ArgumentCondition(0U, WithinRange, {{128, 255}})})
+                  .Case({ArgumentCondition(0U, WithinRange, {{128, UCharMax}})})
                   // Other.
                   .Case({ArgumentCondition(0U, OutOfRange,
-                                           {{'A', 'Z'}, {128, 255}}),
+                                           {{'A', 'Z'}, {128, UCharMax}}),
                          ReturnValueCondition(WithinRange, SingleValue(0))})},
       },
       {
@@ -740,9 +777,10 @@
       // The getc() family of functions that returns either a char or an EOF.
       {"getc", Summaries{Getc()}},
       {"fgetc", Summaries{Getc()}},
-      {"getchar", Summaries{Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall)
-                                .Case({ReturnValueCondition(WithinRange,
-                                                            Range(-1, 255))})}},
+      {"getchar",
+       Summaries{Summary(ArgTypes{}, RetType{IntTy}, NoEvalCall)
+                     .Case({ReturnValueCondition(
+                         WithinRange, {{EOFv, EOFv}, {0, UCharMax}})})}},
 
       // read()-like functions that never return more than buffer size.
       // We are not sure how ssize_t is defined on every platform, so we
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to