This has removes a tiny amount of overhead:

    $ seq 10000000 > input
    $ perf stat -e cpu-clock --repeat 1000 taskset 1 ./src/tac \
        input 2>&1 > /dev/null | grep -F 'seconds time'
              0.095707 +- 0.000223 seconds time elapsed  ( +-  0.23% )
    $ perf stat -e cpu-clock --repeat 1000 taskset 1 ./src/tac-prev \
        input 2>&1 > /dev/null | grep -F 'seconds time'
             0.1009378 +- 0.0000995 seconds time elapsed  ( +-  0.10% )

* src/tac.c (output): Use full_write instead of fread since we already
buffer the output ourselves.
---
 src/tac.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/tac.c b/src/tac.c
index e63f70fc3..1845791ab 100644
--- a/src/tac.c
+++ b/src/tac.c
@@ -45,6 +45,7 @@ tac -r -s '.\|
 
 #include "filenamecat.h"
 #include "full-read.h"
+#include "full-write.h"
 #include "temp-stream.h"
 #include "xbinary-io.h"
 
@@ -158,7 +159,8 @@ output (char const *start, char const *past_end)
 
   if (!start)
     {
-      if (fwrite (buffer, 1, bytes_in_buffer, stdout) != bytes_in_buffer)
+      if (full_write (STDOUT_FILENO, buffer, bytes_in_buffer)
+          != bytes_in_buffer)
         write_error ();
       bytes_in_buffer = 0;
       return;
@@ -170,7 +172,7 @@ output (char const *start, char const *past_end)
       memcpy (buffer + bytes_in_buffer, start, bytes_available);
       bytes_to_add -= bytes_available;
       start += bytes_available;
-      if (fwrite (buffer, 1, WRITESIZE, stdout) != WRITESIZE)
+      if (full_write (STDOUT_FILENO, buffer, WRITESIZE) != WRITESIZE)
         write_error ();
       bytes_in_buffer = 0;
       bytes_available = WRITESIZE;
-- 
2.53.0


Reply via email to