Perhaps not in the true Unix spirit of fixed-size buffers, I've
written a patch to add automatic storage allocation to the
scanf() family of functions when dealing with strings.  I later
found out that GNU had a very similar interface in its glibc
scanf() functions and made sure that behavior was similar in
an effort to maximize portability.  The result is attached.

With this patch, something like this is now possible:

  char *buf;
  sscanf(string, "%as bar", &buf);
or
  sscanf(string, "%a[^ ]", &buf);

  (providing string was "foo bar", buf would now contain "foo").

Resulting strings can be freed by free(3).

The only difference between my implementation and the one in
GNU libc is that glibc will return with an error if one of the
stdlib memory allocation functions fails, whereas this will
continue to run through the string and simply place a NULL at
the address where the resulting string would normally have gone.
This could be changed if one were so inclined.

If this sort of thing is frowned upon for whatever reason, feel
free to ignore this message -- I just thought it may be useful.

(By the way, this patch is meant for RELENG_4, not HEAD.)
--- /usr/src/lib/libc/stdio/vfscanf.c.orig      Sun Apr  8 21:19:16 2001
+++ /usr/src/lib/libc/stdio/vfscanf.c   Sun Apr  8 21:19:21 2001
@@ -69,6 +69,7 @@
 #define        SUPPRESS        0x08    /* suppress assignment */
 #define        POINTER         0x10    /* weird %p pointer (`fake hex') */
 #define        NOSKIP          0x20    /* do not skip blanks */
+#define        ALLOC           0x800   /* allocate space for string assignment */
 #define        QUAD            0x400
 
 /*
@@ -170,6 +171,9 @@
                case 'L':
                        flags |= LONGDBL;
                        goto again;
+               case 'a':
+                       flags |= ALLOC;
+                       goto again;
                case 'h':
                        flags |= SHORT;
                        goto again;
@@ -361,7 +365,7 @@
                        if (flags & SUPPRESS) {
                                n = 0;
                                while (ccltab[*fp->_p]) {
-                                       n++, fp->_r--, fp->_p++;
+                                       n++, nread++, fp->_r--, fp->_p++;
                                        if (--width == 0)
                                                break;
                                        if (fp->_r <= 0 && __srefill(fp)) {
@@ -373,10 +377,32 @@
                                if (n == 0)
                                        goto match_failure;
                        } else {
-                               p0 = p = va_arg(ap, char *);
+                               if (flags & ALLOC) {
+                                       /*
+                                        * 64 is optimistic.  If malloc fails,
+                                        * we'll skip this string as if SUPPRESS
+                                        * was set in ``flags."
+                                        */
+                                       n = 64;
+                                       p0 = p = (char *)malloc(n);
+                               } else
+                                       p0 = p = va_arg(ap, char *);
+
                                while (ccltab[*fp->_p]) {
                                        fp->_r--;
-                                       *p++ = *fp->_p++;
+                                       if (p0)
+                                               *p++ = *fp->_p;
+                                       fp->_p++, nread++;
+                                       if (flags & ALLOC && p0 && p == p0 + n){
+                                               size_t p0_off = p - p0;
+                                               /*
+                                                * Retain the offset of p, and
+                                                * reallocate p0.
+                                                */
+                                               n += 64;
+                                               p0 = (char *)reallocf(p0, n);
+                                               p  = p0 + p0_off;
+                                       }
                                        if (--width == 0)
                                                break;
                                        if (fp->_r <= 0 && __srefill(fp)) {
@@ -385,13 +411,14 @@
                                                break;
                                        }
                                }
-                               n = p - p0;
-                               if (n == 0)
+                               if (p == p0)
                                        goto match_failure;
-                               *p = 0;
+                               if (p0)
+                                       *p = 0;
+                               if (flags & ALLOC)
+                                       *va_arg(ap, char **) = p0;
                                nassigned++;
                        }
-                       nread += n;
                        nconversions++;
                        break;
 
@@ -410,17 +437,36 @@
                                }
                                nread += n;
                        } else {
-                               p0 = p = va_arg(ap, char *);
+                               if (flags & ALLOC) {
+                                       n = 64;
+                                       p0 = p = (char *)malloc(n);
+                               } else
+                                       p0 = p = va_arg(ap, char *);
+
                                while (!isspace(*fp->_p)) {
                                        fp->_r--;
-                                       *p++ = *fp->_p++;
+                                       if (p0)
+                                               *p++ = *fp->_p;
+                                       fp->_p++, nread++;
+                                       if (flags & ALLOC && p0 && p == p0 + n){
+                                               size_t p0_off = p - p0;
+                                               /*
+                                                * We ran out of buffer;
+                                                * reallocate.
+                                                */
+                                               n += 64;
+                                               p0 = (char *)reallocf(p0, n);
+                                               p  = p0 + p0_off;
+                                       }
                                        if (--width == 0)
                                                break;
                                        if (fp->_r <= 0 && __srefill(fp))
                                                break;
                                }
-                               *p = 0;
-                               nread += p - p0;
+                               if (p0)
+                                       *p = 0;
+                               if (flags & ALLOC)
+                                       *va_arg(ap, char **) = p0;
                                nassigned++;
                        }
                        nconversions++;
--- /usr/src/lib/libc/stdio/scanf.3.orig        Sun Apr  8 22:37:14 2001
+++ /usr/src/lib/libc/stdio/scanf.3     Sun Apr  8 22:26:18 2001
@@ -174,6 +174,12 @@
 (This type is not implemented; the
 .Cm L
 flag is currently ignored.)
+.It Cm a
+(Applicable only to string conversions; see below.)  Indicates that storage for
+any resulting string should be retrieved automatically.  The next pointer must
+be a pointer to a string
+.Em ( "char *" ) ;
+a pointer to the newly allocated storage will be placed at this address.
 .It Cm q
 Indicates either that the conversion will be one of
 .Cm dioux

Reply via email to