On 03/20/2014 10:11 AM, Pádraig Brady wrote: > On 03/20/2014 02:39 AM, Eldon wrote: >> Hello all, >> I am attempting the following in a bash shell on a 3.8.13 linux kernel: >> sudo tcpdump -nn >> |grep --line-buffered NTPv2 >> |split -u --lines=10 --filter=date >> >> Clearly date would be replaced with some more useful script, but for the >> mean time I am trying to use it to debug what I see as unexpected >> buffering. Since the traffic I am looking at is fairly consistent (when >> I pipe to cat instead, I see a steady stream), I would expect to see >> regular ticks as date executions each time 10 lines are sent to >> split. Instead I see flashes of many executions that seem to me to be a >> buffer flush. I looked in the code in the git repo, and it seems that >> the buffer is in fact filled prior to execution. Is it worth making a >> patch or expanding the meaning of -u to pass this through in an >> unbuffered fashion? Would there be a reason to reject such a patch if >> well-formed? >> >> Thoughts? > > split(1) doesn't use stdio so we're not hitting that buffering. > What's happening is that we're explicitly buffering input > (into a 64K buffer on my x86_64 system) when using full_read() since > coreutils 4.5.8 with this commit: > http://git.sv.gnu.org/gitweb/?p=coreutils.git;a=commit;h=23f6d41 > > Now we really shouldn't be delaying processing like that, > so I propose we switch back to safe_read(). > > With the attached, the following command line gives immediate output: > > while true; do seq 5; sleep 1; done | src/split --lines=5 --filter=date
Full patch attached which completely removes full_read() from all cases. thanks, Pádraig.
>From e59c2d9516c07a7be5d82c815970e1d9013d23de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]> Date: Thu, 20 Mar 2014 10:00:13 +0000 Subject: [PATCH] split: avoid unecessary input buffering Input buffering is best avoided because it introduces delayed processing of output for intermittent input, especially when the output size is less than that of the input buffer. This is significant when output is being further processed which could happen if split is writing to precreated fifos, or through --filter. If input is arriving fast from a pipe then this will already be buffered before we read it, so fast arriving input shouldn't be a performance issue. * src/split.c (lines_split): s/full_read/safe_read/. (lines_bytes_split): Likewise. (bytes_split): Likewise. (lines_chunk_split): Likewise. (bytes_chunk_extract): Likewise. * NEWS: Mention the improvement. --- NEWS | 3 +++ src/split.c | 29 +++++++++++++---------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/NEWS b/NEWS index c2caa42..f68ab4f 100644 --- a/NEWS +++ b/NEWS @@ -45,6 +45,9 @@ GNU coreutils NEWS -*- outline -*- causing name look-up errors. Also look-ups are first done outside the chroot, in case the look-up within the chroot fails due to library conflicts etc. + split avoids unnecessary input buffering, immediately writing input to output + which is significant with --filter or when writing to fifos or stdout etc. + stat and tail work better with HFS+ and HFSX. stat -f --format=%T now reports the file system type, and tail -f now uses inotify for files, rather than the default of issuing a warning and reverting to polling. diff --git a/src/split.c b/src/split.c index 29d3dbf..dacacaa 100644 --- a/src/split.c +++ b/src/split.c @@ -33,7 +33,6 @@ #include "error.h" #include "fd-reopen.h" #include "fcntl--.h" -#include "full-read.h" #include "full-write.h" #include "ioblksize.h" #include "quote.h" @@ -526,8 +525,8 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files) do { - n_read = full_read (STDIN_FILENO, buf, bufsize); - if (n_read < bufsize && errno) + n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) error (EXIT_FAILURE, errno, "%s", infile); bp_out = buf; to_read = n_read; @@ -562,7 +561,7 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files) } } } - while (n_read == bufsize); + while (n_read); /* Ensure NUMBER files are created, which truncates any existing files or notifies any consumers on fifos. @@ -584,8 +583,8 @@ lines_split (uintmax_t n_lines, char *buf, size_t bufsize) do { - n_read = full_read (STDIN_FILENO, buf, bufsize); - if (n_read < bufsize && errno) + n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) error (EXIT_FAILURE, errno, "%s", infile); bp = bp_out = buf; eob = bp + n_read; @@ -614,7 +613,7 @@ lines_split (uintmax_t n_lines, char *buf, size_t bufsize) } } } - while (n_read == bufsize); + while (n_read); } /* Split into pieces that are as large as possible while still not more @@ -633,8 +632,8 @@ line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize) do { - n_read = full_read (STDIN_FILENO, buf, bufsize); - if (n_read < bufsize && errno) + n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) error (EXIT_FAILURE, errno, "%s", infile); size_t n_left = n_read; char *sob = buf; @@ -718,7 +717,7 @@ line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize) } } } - while (n_read == bufsize); + while (n_read); /* Handle no eol at end of file. */ if (n_hold) @@ -762,8 +761,8 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, while (n_written < file_size) { char *bp = buf, *eob; - size_t n_read = full_read (STDIN_FILENO, buf, bufsize); - if (n_read < bufsize && errno) + size_t n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) error (EXIT_FAILURE, errno, "%s", infile); else if (n_read == 0) break; /* eof. */ @@ -857,8 +856,8 @@ bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize, while (start < end) { - size_t n_read = full_read (STDIN_FILENO, buf, bufsize); - if (n_read < bufsize && errno) + size_t n_read = safe_read (STDIN_FILENO, buf, bufsize); + if (n_read == SAFE_READ_ERROR) error (EXIT_FAILURE, errno, "%s", infile); else if (n_read == 0) break; /* eof. */ @@ -998,8 +997,6 @@ lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize) while (true) { char *bp = buf, *eob; - /* Use safe_read() rather than full_read() here - so that we process available data immediately. */ size_t n_read = safe_read (STDIN_FILENO, buf, bufsize); if (n_read == SAFE_READ_ERROR) error (EXIT_FAILURE, errno, "%s", infile); -- 1.7.7.6
