On Wed, Jan 23, 2013 at 04:24:01PM +0400, Evgeniy Stepanov wrote:
> > > What if glibc adds a scanf hook (like it has already printf hooks), apps
> > > could then register their own stuff and the above would then break.  It
> > > really should be very conservative, and should be checked e.g. with all
> > > glibc's *scanf tests (e.g. stdio-common/scanf[0-9]*.c,
> > > stdio-common/tst-sscanf.c).
> 
> I'll add support for the missing %specs. About the testing, these
> files seem like GPL, so I'd prefer not to look at them at all. We've
> got a smallish test for the scanf implementation in sanitizer_common,
> but it would be really great to run it on full glibc scanf tests.
> Would you be willing to setup such testing gnu-side?

Seems the code in llvm repo looks much better now to me than it used to,
still it breaks on:

#define _GNU_SOURCE 1
#include <stdio.h>
#include <string.h>

int
main ()
{
  char *p;
  long double q;
  if (sscanf ("abcdefghijklmno 1.0", "%ms %Lf", &p, &q) != 2
      || strcmp (p, "abcdefghijklmno") != 0
      || q != 1.0L)
    return 1;
  return 0;
}

because it mishandles %ms.

Attached patch fixes that and also handles %a when possible.
There are cases where it is unambiguous, e.g.
s%Las is s %La s, reading long double, or %ar is %a r, reading
float, other cases where we can check at least something and keep checking
the rest of the format string, e.g. for:
%as
we know it will either store float, or char * pointer, so just assume
conservatively the smaller size, and other cases where we have to punt,
%a[a%d]
(where % appears in the [...] list).

I've tested various glibc *scanf testcases with the GCC libsanitizer
+ enabling of # define SANITIZER_INTERCEPT_SCANF SI_NOT_WINDOWS
+ llvm svn diff between last merge point to GCC and current llvm trunk
+ this patch, and it looked good.

There is one further issue, I'd say you need to pass down to scanf_common
the result from the intercepted call, and use it to see if it is valid
to call strlen on the ptr, or if just 1 should be assumed for SSS_STRLEN.
Note 'n' doesn't count towards that.  Because, it is unsafe to call strlen
on arguments that haven't been assigned yet.  Testcase that still fails:

#define _GNU_SOURCE 1
#include <stdio.h>
#include <string.h>

int
main ()
{
  int a, b, c, d;
  char e[3], f[10];
  memset (e, ' ', sizeof e);
  memset (f, ' ', sizeof f);
  if (sscanf ("1 2 a", "%d%n%n%d %s %s", &a, &b, &c, &d, e, f) != 3
      || a != 1
      || b != 1
      || c != 1
      || d != 2
      || strcmp (e, "a") != 0)
    return 1;
  return 0;
}

Oh, one more thing, on Linux for recent enough glibc's it would be
desirable to also intercept:
__isoc99_sscanf, __isoc99_scanf, __isoc99_vsscanf, __isoc99_fscanf,
__isoc99_vfscanf, __isoc99_vscanf
functions (guard the interception with
#if defined __GLIBC_PREREQ && __GLIBC_PREREQ (2, 7)
).  All these work exactly like the corresponding non-__isoc99_
functions, just %a followed by s, S or [ always writes float, thus
there is no ambiguity.

        Jakub
--- sanitizer_common_interceptors_scanf.inc.jj  2013-02-08 13:21:51.000000000 
+0100
+++ sanitizer_common_interceptors_scanf.inc     2013-02-11 12:15:27.575871424 
+0100
@@ -18,11 +18,12 @@
 
 struct ScanfDirective {
   int argIdx;      // argument index, or -1 of not specified ("%n$")
-  bool suppressed; // suppress assignment ("*")
   int fieldWidth;
+  bool suppressed; // suppress assignment ("*")
   bool allocate; // allocate space ("m")
   char lengthModifier[2];
   char convSpecifier;
+  bool maybeGnuMalloc;
 };
 
 static const char *parse_number(const char *p, int *out) {
@@ -119,6 +120,31 @@ static const char *scanf_parse_next(cons
                   // Consume the closing ']'.
       ++p;
     }
+    // This is unfortunately ambiguous between old GNU extension
+    // of %as, %aS and %a[...] and newer POSIX %a followed by
+    // letters s, S or [.
+    if (dir->convSpecifier == 'a' && !dir->lengthModifier[0]) {
+      if (*p == 's' || *p == 'S') {
+       dir->maybeGnuMalloc = true;
+       ++p;
+      } else if (*p == '[') {
+       // Watch for %a[h-j%d], if % appears in the
+       // [...] range, then we need to give up, we don't know
+       // if scanf will parse it as POSIX %a [h-j %d ] or
+       // GNU allocation of string with range dh-j plus %.
+       const char *q = p + 1;
+       if (*q == '^')
+         ++q;
+       if (*q == ']')
+         ++q;
+       while (*q && *q != ']' && *q != '%')
+         ++q;
+       if (*q == 0 || *q == '%')
+         return 0;
+       p = q + 1; // Consume the closing ']'.
+       dir->maybeGnuMalloc = true;
+      }
+    }
     break;
   }
   return p;
@@ -131,9 +157,7 @@ static bool scanf_is_integer_conv(char c
 
 // Returns true if the character is an floating point conversion specifier.
 static bool scanf_is_float_conv(char c) {
-  return char_is_one_of(c, "AeEfFgG");
-  // NOTE: c == 'a' is ambiguous between POSIX and GNU and, therefore,
-  // unsupported.
+  return char_is_one_of(c, "aAeEfFgG");
 }
 
 // Returns string output character size for string-like conversions,
@@ -168,6 +192,21 @@ enum ScanfStoreSize {
 // Returns the store size of a scanf directive (if >0), or a value of
 // ScanfStoreSize.
 static int scanf_get_store_size(ScanfDirective *dir) {
+  if (dir->allocate) {
+    if (!char_is_one_of(dir->convSpecifier, "cCsS["))
+      return SSS_INVALID;
+    return sizeof(char *);
+  }
+
+  if (dir->maybeGnuMalloc) {
+    if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
+      return SSS_INVALID;
+    // This is ambiguous, so check the smaller size of char * (if it is
+    // a GNU extension of %as, %aS or %a[...]) and float (if it is
+    // POSIX %a followed by s, S or [ letters).
+    return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
+  }
+
   if (scanf_is_integer_conv(dir->convSpecifier)) {
     switch (dir->lengthModifier[0]) {
     case 'h':
@@ -256,10 +295,6 @@ static void scanf_common(void *ctx, cons
     }
     if (dir.suppressed)
       continue;
-    if (dir.allocate) {
-      // Unsupported;
-      continue;
-    }
 
     int size = scanf_get_store_size(&dir);
     if (size == SSS_INVALID)

Reply via email to