On 07/03/15 12:10, Pádraig Brady wrote:
> On 07/03/15 11:49, Ole Tange wrote:
>> These two commands give the same output:
>>
>> $ yes `echo {1..1000}` | head -c 2300M | md5sum
>> a0241f2247e9a37db60e7def3e4f7038  -
>>
>> $ yes "`echo {1..1000}`" | head -c 2300M | md5sum
>> a0241f2247e9a37db60e7def3e4f7038  -
>>
>> But the time to run is quite different:
>>
>> $ time yes "`echo {1..1000}`" | head -c 2300M >/dev/null
>>
>> real    0m0.897s
>> user    0m0.384s
>> sys     0m1.343s
>>
>> $ time yes `echo {1..1000}` | head -c 2300M >/dev/null
>>
>> real    0m11.352s
>> user    0m10.571s
>> sys     0m2.590s
>>
>> WTF?!
>>
>> I imagine 'yes' spends a lot of time collecting the 1000 args. But why
>> does it do that more than once?
> 
> The stdio interactions dominate here.
> The slow case has 1000 times more fputs_unlocked() calls.
> Yes we could build the line up once and output that.
> If doing that we could also build up a BUFSIZ of complete lines
> to output at a time, in which case you'd probably avoid stdio altogether.

The attached should make things more efficient here.

thanks,
Pádraig.

>From 7959bbf19307705e98f08cfa32a9dcf67672590c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Mon, 9 Mar 2015 19:27:32 +0000
Subject: [PATCH] yes: output data more efficiently

yes(1) may be used to generate repeating patterns of text
for test inputs etc., so adjust to be more efficient.

Profiling the case where yes(1) is outputting small items
through stdio (which was the default case), shows the overhead
continuously processing small items in main() and in stdio:

    $ yes >/dev/null & perf top -p $!
    31.02%  yes           [.] main
    27.36%  libc-2.20.so  [.] _IO_file_xsputn@@GLIBC_2.2.5
    14.51%  libc-2.20.so  [.] fputs_unlocked
    13.50%  libc-2.20.so  [.] strlen
    10.66%  libc-2.20.so  [.] __GI___mempcpy
     1.98%  yes           [.] fputs_unlocked@plta

Sending more data per stdio call improves the situation,
but still, there is significant stdio overhead due to memory copies,
and the repeated string length checking:

    $ yes "`echo {1..1000}`" >/dev/null & perf top -p $!
    42.26%  libc-2.20.so  [.] __GI___mempcpy
    17.38%  libc-2.20.so  [.] strlen
     5.21%  [kernel]      [k] __srcu_read_lock
     4.58%  [kernel]      [k] __srcu_read_unlock
     4.27%  libc-2.20.so  [.] _IO_file_xsputn@@GLIBC_2.2.5
     2.50%  libc-2.20.so  [.] __GI___libc_write
     2.45%  [kernel]      [k] system_call
     2.40%  [kernel]      [k] system_call_after_swapgs
     2.27%  [kernel]      [k] vfs_write
     2.09%  libc-2.20.so  [.] _IO_do_write@@GLIBC_2.2.5
     2.01%  [kernel]      [k] fsnotify
     1.95%  libc-2.20.so  [.] _IO_file_write@@GLIBC_2.2.5
     1.44%  yes           [.] main

We can avoid all stdio overhead by building up the buffer
_once_ and outputting that, and the profile below shows
the bottleneck moved to the kernel:

    $ src/yes >/dev/null & perf top -p $!
    15.42%  [kernel]      [k] __srcu_read_lock
    12.98%  [kernel]      [k] __srcu_read_unlock
     9.41%  libc-2.20.so  [.] __GI___libc_write
     9.11%  [kernel]      [k] vfs_write
     8.35%  [kernel]      [k] fsnotify
     8.02%  [kernel]      [k] system_call
     5.84%  [kernel]      [k] system_call_after_swapgs
     4.54%  [kernel]      [k] __fget_light
     3.98%  [kernel]      [k] sys_write
     3.65%  [kernel]      [k] selinux_file_permission
     3.44%  [kernel]      [k] rw_verify_area
     2.94%  [kernel]      [k] __fsnotify_parent
     2.76%  [kernel]      [k] security_file_permission
     2.39%  yes           [.] main
     2.17%  [kernel]      [k] __fdget_pos
     2.13%  [kernel]      [k] sysret_check
     0.81%  [kernel]      [k] write_null
     0.36%  yes           [.] write@plt

Note this change also ensures that yes will only write complete lines
for lines softer than BUFSIZ.

* src/yes.c (main): Build up a BUFSIZ buffer of lines,
and output that, rather than having stdio process each item.
* tests/misc/yes.sh: Add a new test for various buffer sizes.
* tests/local.mk: Reference the new test.
Fixes http://bugs.gnu.org/20029
---
 src/yes.c         | 43 +++++++++++++++++++++++++++++++++++++++++--
 tests/local.mk    |  1 +
 tests/misc/yes.sh | 28 ++++++++++++++++++++++++++++
 3 files changed, 70 insertions(+), 2 deletions(-)
 create mode 100755 tests/misc/yes.sh

diff --git a/src/yes.c b/src/yes.c
index b35b13f..91dea11 100644
--- a/src/yes.c
+++ b/src/yes.c
@@ -58,6 +58,10 @@ Repeatedly output a line with all specified STRING(s), or 'y'.\n\
 int
 main (int argc, char **argv)
 {
+  char buf[BUFSIZ];
+  char *pbuf = buf;
+  int i;
+
   initialize_main (&argc, &argv);
   set_program_name (argv[0]);
   setlocale (LC_ALL, "");
@@ -77,9 +81,44 @@ main (int argc, char **argv)
       argv[argc++] = bad_cast ("y");
     }
 
-  while (true)
+  /* Buffer data locally once, rather than having the
+     large overhead of stdio buffering each item.   */
+  for (i = optind; i < argc; i++)
+    {
+      size_t len = strlen (argv[i]);
+      if (BUFSIZ < len || BUFSIZ - len <= pbuf - buf)
+        break;
+      memcpy (pbuf, argv[i], len);
+      pbuf += len;
+      *pbuf++ = i == argc - 1 ? '\n' : ' ';
+    }
+  if (i < argc)
+    pbuf = NULL;
+  else
+    {
+      size_t line_len = pbuf - buf;
+      size_t lines = BUFSIZ / line_len;
+      while (--lines)
+        {
+          memcpy (pbuf, pbuf - line_len, line_len);
+          pbuf += line_len;
+        }
+    }
+
+  /* The normal case is to continuously output the local buffer.  */
+  while (pbuf)
+    {
+      if (write (STDOUT_FILENO, buf, pbuf - buf) == -1)
+        {
+          error (0, errno, _("standard output"));
+          return EXIT_FAILURE;
+        }
+    }
+
+  /* If the data doesn't fit in BUFSIZ then it's large
+     and not too onerous to output using stdio in any case.  */
+  while (! pbuf)
     {
-      int i;
       for (i = optind; i < argc; i++)
         if (fputs (argv[i], stdout) == EOF
             || putchar (i == argc - 1 ? '\n' : ' ') == EOF)
diff --git a/tests/local.mk b/tests/local.mk
index 9a52080..56cba69 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -402,6 +402,7 @@ all_tests =					\
   tests/misc/uniq.pl				\
   tests/misc/uniq-perf.sh			\
   tests/misc/xattr.sh				\
+  tests/misc/yes.sh				\
   tests/tail-2/wait.sh				\
   tests/tail-2/retry.sh				\
   tests/tail-2/symlink.sh			\
diff --git a/tests/misc/yes.sh b/tests/misc/yes.sh
new file mode 100755
index 0000000..8828030
--- /dev/null
+++ b/tests/misc/yes.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+# Validate yes buffer handling
+
+# Copyright (C) 2015 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ yes
+
+for size in 1 4095 4096 8191 8192 16383 16384; do
+  printf "%${size}s\n" '' > out.1
+  yes "$(printf %${size}s '')" | head -n2 | uniq > out.2
+  compare out.1 out.2 || fail=1
+done
+
+Exit $fail
-- 
2.1.0

Reply via email to