split: avoid unnecessary input buffering

Input buffering is best avoided because it introduces
delayed processing of output for intermittent input,
especially when the output size is less than that of
the input buffer.  This is significant when output
is being further processed which could happen if split
is writing to precreated fifos, or through --filter.

If input is arriving quickly from a pipe then this will
already be buffered before we read it, so fast arriving
input shouldn't be a performance issue.

* src/split.c (lines_split, lines_bytes_split, bytes_split,
lines_chunk_split, bytes_chunk_extract): s/full_read/safe_read/.
* THANKS.in: Mention the reporter.
* NEWS: Mention the improvement.
This commit is contained in:
Pádraig Brady 2014-03-20 10:00:13 +00:00
parent b010481e1f
commit 5f9a5b3f03
3 changed files with 17 additions and 16 deletions

3
NEWS
View File

@ -45,6 +45,9 @@ GNU coreutils NEWS -*- outline -*-
causing name look-up errors. Also look-ups are first done outside the chroot,
in case the look-up within the chroot fails due to library conflicts etc.
split avoids unnecessary input buffering, immediately writing input to output
which is significant with --filter or when writing to fifos or stdout etc.
stat and tail work better with HFS+ and HFSX. stat -f --format=%T now reports
the file system type, and tail -f now uses inotify for files, rather than the
default of issuing a warning and reverting to polling.

View File

@ -184,6 +184,7 @@ Egmont Koblinger egmont@uhulinux.hu
Eirik Fuller eirik@hackrat.com
Eivind eivindt@multinet.no
Elbert Pol elbert.pol@gmail.com
Eldon Stegall eldon@eldondev.com
Eli Zaretskii eliz@is.elta.co.il
Emile LeBlanc leblanc@math.toronto.edu
Emmanuel Lacour elacour@home-dn.net

View File

@ -33,7 +33,6 @@
#include "error.h"
#include "fd-reopen.h"
#include "fcntl--.h"
#include "full-read.h"
#include "full-write.h"
#include "ioblksize.h"
#include "quote.h"
@ -526,8 +525,8 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
do
{
n_read = full_read (STDIN_FILENO, buf, bufsize);
if (n_read < bufsize && errno)
n_read = safe_read (STDIN_FILENO, buf, bufsize);
if (n_read == SAFE_READ_ERROR)
error (EXIT_FAILURE, errno, "%s", infile);
bp_out = buf;
to_read = n_read;
@ -562,7 +561,7 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
}
}
}
while (n_read == bufsize);
while (n_read);
/* Ensure NUMBER files are created, which truncates
any existing files or notifies any consumers on fifos.
@ -584,8 +583,8 @@ lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
do
{
n_read = full_read (STDIN_FILENO, buf, bufsize);
if (n_read < bufsize && errno)
n_read = safe_read (STDIN_FILENO, buf, bufsize);
if (n_read == SAFE_READ_ERROR)
error (EXIT_FAILURE, errno, "%s", infile);
bp = bp_out = buf;
eob = bp + n_read;
@ -614,7 +613,7 @@ lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
}
}
}
while (n_read == bufsize);
while (n_read);
}
/* Split into pieces that are as large as possible while still not more
@ -633,8 +632,8 @@ line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize)
do
{
n_read = full_read (STDIN_FILENO, buf, bufsize);
if (n_read < bufsize && errno)
n_read = safe_read (STDIN_FILENO, buf, bufsize);
if (n_read == SAFE_READ_ERROR)
error (EXIT_FAILURE, errno, "%s", infile);
size_t n_left = n_read;
char *sob = buf;
@ -718,7 +717,7 @@ line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize)
}
}
}
while (n_read == bufsize);
while (n_read);
/* Handle no eol at end of file. */
if (n_hold)
@ -762,8 +761,8 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
while (n_written < file_size)
{
char *bp = buf, *eob;
size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
if (n_read < bufsize && errno)
size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
if (n_read == SAFE_READ_ERROR)
error (EXIT_FAILURE, errno, "%s", infile);
else if (n_read == 0)
break; /* eof. */
@ -857,8 +856,8 @@ bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
while (start < end)
{
size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
if (n_read < bufsize && errno)
size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
if (n_read == SAFE_READ_ERROR)
error (EXIT_FAILURE, errno, "%s", infile);
else if (n_read == 0)
break; /* eof. */
@ -998,8 +997,6 @@ lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize)
while (true)
{
char *bp = buf, *eob;
/* Use safe_read() rather than full_read() here
so that we process available data immediately. */
size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
if (n_read == SAFE_READ_ERROR)
error (EXIT_FAILURE, errno, "%s", infile);