On 01/23/2013 12:34 PM, Pádraig Brady wrote:
> On 01/23/2013 02:32 AM, Lei Zhang wrote:
>> Hi All,
>>
>> We found a bug in the `head' program of coreutils 8.20:
>>
>> Invoking `head -c -P' or `head -c -E' will cause memory exhaustion.
>>
>> However, smaller units (e.g., b, K, M) work fine; bigger units (e.g., Z, Y)
>> fail properly
>> (by outputing "number of bytes is so large that it is not representable").
>> And `-n' also
>> works fine.
>>
>> A bit dig reveals that the bug is introduced at line 322 of head.c
>> (coreutils 8.20):
>>
>> 204: elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t
>> n_elide_0)
>> 205: {
>> 322:     b = xcalloc (n_bufs, sizeof *b); /** this statement fails **/
>> 398: }
>>
>> We also examined n_elide and n_bufs before that statement. Actually, they
>> are very
>> large:
>>
>> n_elide: 1125899906842624
>> n_bufs: 137438953474
>>
>> According to the following comment in the source file:
>>
>>> CAUTION: do not fail (out of memory) when asked to elide
>>> a ridiculous amount, but when given only a small input.  */
>>
>> We think this is a bug and bring this issue to your attention. Thanks!
> 
> There is the argument that we _should_ allocate
> everything up front to indicate immediately
> that the system can't (currently) support the requested operation,
> but given the 'currently' caveat above I guess it's
> more general to fail when we actually run out of mem?
> 
> So to do that we can either realloc our pointer array
> in chunks or use the simpler approach in the patch below,
> and take advantage of kernel overcommit strategies.
> (calloc is problematic for overcommit as zeroing the
> memory fully allocates it to the process).
> The caveat with that though is that it's dependent
> on the overcommit config for the kernel and possibly
> current memory conditions.
> 
> thanks,
> Pádraig.
> 
> diff --git a/src/head.c b/src/head.c
> index cf84a95..909be04 100644
> --- a/src/head.c
> +++ b/src/head.c
> @@ -197,7 +197,7 @@ copy_fd (int src_fd, FILE *o_stream, uintmax_t n_bytes)
>    return COPY_FD_OK;
>  }
> 
> -/* Print all but the last N_ELIDE lines from the input available via
> +/* Print all but the last N_ELIDE bytes from the input available via
>     the non-seekable file descriptor FD.  Return true upon success.
>     Give a diagnostic and return false upon error.  */
>  static bool
> @@ -314,18 +314,22 @@ elide_tail_bytes_pipe (const char *filename, int fd, 
> uintmax_t n_elide_0)
>        size_t n_read;
>        bool buffered_enough;
>        size_t i, i_next;
> +      size_t n_alloc = 0;
>        char **b;
>        /* Round n_elide up to a multiple of READ_BUFSIZE.  */
>        size_t rem = READ_BUFSIZE - (n_elide % READ_BUFSIZE);
>        size_t n_elide_round = n_elide + rem;
>        size_t n_bufs = n_elide_round / READ_BUFSIZE + 1;
> -      b = xcalloc (n_bufs, sizeof *b);
> +      b = xnmalloc (n_bufs, sizeof *b);
> 
>        buffered_enough = false;
>        for (i = 0, i_next = 1; !eof; i = i_next, i_next = (i_next + 1) % 
> n_bufs)
>          {
> -          if (b[i] == NULL)
> -            b[i] = xmalloc (READ_BUFSIZE);
> +          if (! buffered_enough)
> +            {
> +              b[i] = xmalloc (READ_BUFSIZE);
> +              n_alloc = i + 1;
> +            }
>            n_read = full_read (fd, b[i], READ_BUFSIZE);
>            if (n_read < READ_BUFSIZE)
>              {
> @@ -389,7 +393,7 @@ elide_tail_bytes_pipe (const char *filename, int fd, 
> uintmax_t n_elide_0)
>          }
> 
>      free_mem:
> -      for (i = 0; i < n_bufs; i++)
> +      for (i = 0; i < n_alloc; i++)
>          free (b[i]);
>        free (b);

I expect to push soon, the attached more complete fix to realloc the array.

thanks,
Pádraig.

>From 57d55485f0d2014c076f2cbfe0b340d1f2046952 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Wed, 23 Jan 2013 12:34:46 +0000
Subject: [PATCH] head: with --bytes=-N only allocate memory as needed

* src/head.c (elide_tail_bytes_pipe): Don't use calloc as that
bypasses memory overcommit due to the zeroing requirement.
Also realloc rather than malloc the pointer array to avoid
dependence on overcommit entirely.
* tests/misc/head-c.sh: Add a test case.
Fixes http://bugs.gnu.org/13530
---
 src/head.c           |   28 ++++++++++++++++++++++------
 tests/misc/head-c.sh |    9 +++++++--
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/src/head.c b/src/head.c
index d79d5f7..00e1be1 100644
--- a/src/head.c
+++ b/src/head.c
@@ -196,7 +196,7 @@ copy_fd (int src_fd, FILE *o_stream, uintmax_t n_bytes)
   return COPY_FD_OK;
 }
 
-/* Print all but the last N_ELIDE lines from the input available via
+/* Print all but the last N_ELIDE bytes from the input available via
    the non-seekable file descriptor FD.  Return true upon success.
    Give a diagnostic and return false upon error.  */
 static bool
@@ -313,18 +313,34 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
       size_t n_read;
       bool buffered_enough;
       size_t i, i_next;
-      char **b;
+      char **b = NULL;
       /* Round n_elide up to a multiple of READ_BUFSIZE.  */
       size_t rem = READ_BUFSIZE - (n_elide % READ_BUFSIZE);
       size_t n_elide_round = n_elide + rem;
       size_t n_bufs = n_elide_round / READ_BUFSIZE + 1;
-      b = xcalloc (n_bufs, sizeof *b);
+      size_t n_alloc = 0;
+      size_t n_array_alloc = 0;
 
       buffered_enough = false;
       for (i = 0, i_next = 1; !eof; i = i_next, i_next = (i_next + 1) % n_bufs)
         {
-          if (b[i] == NULL)
-            b[i] = xmalloc (READ_BUFSIZE);
+          if (n_array_alloc == i)
+            {
+              /* reallocate between 16 and n_bufs entries.  */
+              if (n_array_alloc == 0)
+                n_array_alloc = MIN (n_bufs, 16);
+              else if (n_array_alloc <= n_bufs / 2)
+                n_array_alloc *= 2;
+              else
+                n_array_alloc = n_bufs;
+              b = xnrealloc (b, n_array_alloc, sizeof *b);
+            }
+
+          if (! buffered_enough)
+            {
+              b[i] = xmalloc (READ_BUFSIZE);
+              n_alloc = i + 1;
+            }
           n_read = full_read (fd, b[i], READ_BUFSIZE);
           if (n_read < READ_BUFSIZE)
             {
@@ -388,7 +404,7 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
         }
 
     free_mem:
-      for (i = 0; i < n_bufs; i++)
+      for (i = 0; i < n_alloc; i++)
         free (b[i]);
       free (b);
 
diff --git a/tests/misc/head-c.sh b/tests/misc/head-c.sh
index 6807c4d..eada8d5 100755
--- a/tests/misc/head-c.sh
+++ b/tests/misc/head-c.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
-# exercise the fix of 2001-08-18, based on test case from Ian Bruce
+# exercise head -c
 
 # Copyright (C) 2001-2013 Free Software Foundation, Inc.
 
@@ -19,12 +19,17 @@
 . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
 print_ver_ head
 
+# exercise the fix of 2001-08-18, based on test case from Ian Bruce
 echo abc > in || framework_failure_
-
 (head -c1; head -c1) < in > out || fail=1
 case "$(cat out)" in
   ab) ;;
   *) fail=1 ;;
 esac
 
+# Only allocate memory as needed.
+# Coreutils <= 8.21 would allocate memory up front
+# based on the value passed to -c
+(ulimit -v 20000; head --bytes=-E < /dev/null) || fail=1
+
 Exit $fail
-- 
1.7.7.6

Reply via email to