mirror of
https://https.git.savannah.gnu.org/git/coreutils.git
synced 2026-01-27 01:44:21 +00:00
split: add --number to generate a particular number of files
* src/split.c (usage, long_options, main): New options --number, --unbuffered, --elide-empty-files. (set_suffix_length): New function to auto increase suffix length to handle a specified number of files. (create): New function. Refactored from cwrite() and ofile_open(). (bytes_split): Add max_files argument to support byte chunking. (lines_chunk_split): New function. Split file into chunks of lines. (bytes_chunk_extract): New function. Extract a chunk of file. (of_info): New struct. Used by functions lines_rr and ofile_open to keep track of file descriptors associated with output files. (ofile_open): New function. Shuffle file descriptors when there are more output files than available file descriptors. (lines_rr): New function to distribute lines round-robin to files. (chunk_parse): New function. Parses K/N syntax. * tests/misc/split-bchunk: New test for byte chunking. * tests/misc/split-lchunk: New test for line delimited chunking. * tests/misc/split-rchunk: New test for round-robin chunking. * tests/Makefile.am: Reference new tests. * tests/misc/split-fail: Add failure scenarios for new options. * tests/misc/split-l: Fix a typo. s/ln/split/. * doc/coreutils.texi (split invocation): Document --number. * NEWS: Mention the new feature. * .mailmap: Map new email address for shortlog. Signed-off-by: Pádraig Brady <P@draigBrady.com>
This commit is contained in:
parent
dadca988af
commit
be107398e5
1
.mailmap
1
.mailmap
@ -8,3 +8,4 @@ Paul R. Eggert <eggert@cs.ucla.edu> <eggert@CS.UCLA.EDU>
|
||||
Evan Hunt <ethanol@armory.com> Evan Hunt <jim@meyering.net>
|
||||
<P@draigBrady.com> <P@draigBrady.com (trivial change)>
|
||||
Pádraig Brady <p@draigbrady.com> <p@draigBrady.com>
|
||||
<chen.guo.0625@gmail.com> <chenguo4@yahoo.com>
|
||||
|
||||
4
NEWS
4
NEWS
@ -7,6 +7,10 @@ GNU coreutils NEWS -*- outline -*-
|
||||
od now prints floating-point numbers without losing information, and
|
||||
it no longer omits spaces between floating-point columns in some cases.
|
||||
|
||||
** New features
|
||||
|
||||
split accepts the --number option to generate a specific number of files.
|
||||
|
||||
|
||||
* Noteworthy changes in release 8.7 (2010-11-13) [stable]
|
||||
|
||||
|
||||
@ -104,7 +104,7 @@
|
||||
* shuf: (coreutils)shuf invocation. Shuffling text files.
|
||||
* sleep: (coreutils)sleep invocation. Delay for a specified time.
|
||||
* sort: (coreutils)sort invocation. Sort text files.
|
||||
* split: (coreutils)split invocation. Split into fixed-size pieces.
|
||||
* split: (coreutils)split invocation. Split into pieces.
|
||||
* stat: (coreutils)stat invocation. Report file(system) status.
|
||||
* stdbuf: (coreutils)stdbuf invocation. Modify stdio buffering.
|
||||
* stty: (coreutils)stty invocation. Print/change terminal settings.
|
||||
@ -2624,7 +2624,7 @@ These commands output pieces of the input.
|
||||
@menu
|
||||
* head invocation:: Output the first part of files.
|
||||
* tail invocation:: Output the last part of files.
|
||||
* split invocation:: Split a file into fixed-size pieces.
|
||||
* split invocation:: Split a file into pieces.
|
||||
* csplit invocation:: Split a file into context-determined pieces.
|
||||
@end menu
|
||||
|
||||
@ -2920,15 +2920,15 @@ mean either @samp{tail ./+4} or @samp{tail -n +4}.
|
||||
|
||||
|
||||
@node split invocation
|
||||
@section @command{split}: Split a file into fixed-size pieces
|
||||
@section @command{split}: Split a file into pieces.
|
||||
|
||||
@pindex split
|
||||
@cindex splitting a file into pieces
|
||||
@cindex pieces, splitting a file into
|
||||
|
||||
@command{split} creates output files containing consecutive sections of
|
||||
@var{input} (standard input if none is given or @var{input} is
|
||||
@samp{-}). Synopsis:
|
||||
@command{split} creates output files containing consecutive or interleaved
|
||||
sections of @var{input} (standard input if none is given or @var{input}
|
||||
is @samp{-}). Synopsis:
|
||||
|
||||
@example
|
||||
split [@var{option}] [@var{input} [@var{prefix}]]
|
||||
@ -2941,10 +2941,9 @@ left over for the last section), into each output file.
|
||||
The output files' names consist of @var{prefix} (@samp{x} by default)
|
||||
followed by a group of characters (@samp{aa}, @samp{ab}, @dots{} by
|
||||
default), such that concatenating the output files in traditional
|
||||
sorted order by file name produces
|
||||
the original input file. If the output file names are exhausted,
|
||||
@command{split} reports an error without deleting the output files
|
||||
that it did create.
|
||||
sorted order by file name produces the original input file (except
|
||||
@option{-r}). If the output file names are exhausted, @command{split}
|
||||
reports an error without deleting the output files that it did create.
|
||||
|
||||
The program accepts the following options. Also see @ref{Common options}.
|
||||
|
||||
@ -2976,6 +2975,41 @@ possible without exceeding @var{size} bytes. Individual lines longer than
|
||||
@var{size} bytes are broken into multiple files.
|
||||
@var{size} has the same format as for the @option{--bytes} option.
|
||||
|
||||
@item -n @var{chunks}
|
||||
@itemx --number=@var{chunks}
|
||||
@opindex -n
|
||||
@opindex --number
|
||||
|
||||
Split @var{input} to @var{chunks} output files where @var{chunks} may be:
|
||||
|
||||
@example
|
||||
@var{n} generate @var{n} files based on current size of @var{input}
|
||||
@var{k}/@var{n} only output @var{k}th of @var{n} to stdout
|
||||
l/@var{n} generate @var{n} files without splitting lines
|
||||
l/@var{k}/@var{n} likewise but only output @var{k}th of @var{n} to stdout
|
||||
r/@var{n} like @samp{l} but use round robin distribution
|
||||
r/@var{k}/@var{n} likewise but only output @var{k}th of @var{n} to stdout
|
||||
@end example
|
||||
|
||||
Any excess bytes remaining after dividing the @var{input}
|
||||
into @var{n} chunks, are assigned to the last chunk.
|
||||
Any excess bytes appearing after the initial calculation are discarded
|
||||
(except when using @samp{r} mode).
|
||||
|
||||
All @var{n} files are created even if there are fewer than @var{n} lines,
|
||||
or the @var{input} is truncated.
|
||||
|
||||
For @samp{l} mode, chunks are approximately @var{input} size / @var{n}.
|
||||
The @var{input} is partitioned into @var{n} equal sized portions, with
|
||||
the last assigned any excess. If a line @emph{starts} within a partition
|
||||
it is written completely to the corresponding file. Since lines
|
||||
are not split even if they overlap a partition, the files written
|
||||
can be larger or smaller than the partition size, and even empty
|
||||
if a line is so long as to completely overlap the partition.
|
||||
|
||||
For @samp{r} mode, the size of @var{input} is irrelevant,
|
||||
and so can be a pipe for example.
|
||||
|
||||
@item -a @var{length}
|
||||
@itemx --suffix-length=@var{length}
|
||||
@opindex -a
|
||||
@ -2988,6 +3022,23 @@ Use suffixes of length @var{length}. The default @var{length} is 2.
|
||||
@opindex --numeric-suffixes
|
||||
Use digits in suffixes rather than lower-case letters.
|
||||
|
||||
@item -e
|
||||
@itemx --elide-empty-files
|
||||
@opindex -e
|
||||
@opindex --elide-empty-files
|
||||
Suppress the generation of zero-length output files. This can happen
|
||||
with the @option{--number} option if a file is (truncated to be) shorter
|
||||
than the number requested, or if a line is so long as to completely
|
||||
span a chunk. The output file sequence numbers, always run consecutively
|
||||
even when this option is specified.
|
||||
|
||||
@item -u
|
||||
@itemx --unbuffered
|
||||
@opindex -u
|
||||
@opindex --unbuffered
|
||||
Immediately copy input to output in @option{--number r/...} mode,
|
||||
which is a much slower mode of operation.
|
||||
|
||||
@itemx --verbose
|
||||
@opindex --verbose
|
||||
Write a diagnostic just before each output file is opened.
|
||||
|
||||
524
src/split.c
524
src/split.c
@ -22,6 +22,7 @@
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <getopt.h>
|
||||
#include <sys/types.h>
|
||||
@ -33,6 +34,7 @@
|
||||
#include "full-read.h"
|
||||
#include "full-write.h"
|
||||
#include "quote.h"
|
||||
#include "safe-read.h"
|
||||
#include "xfreopen.h"
|
||||
#include "xstrtol.h"
|
||||
|
||||
@ -43,8 +45,6 @@
|
||||
proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \
|
||||
proper_name ("Richard M. Stallman")
|
||||
|
||||
#define DEFAULT_SUFFIX_LENGTH 2
|
||||
|
||||
/* Base name of output files. */
|
||||
static char const *outbase;
|
||||
|
||||
@ -56,7 +56,7 @@ static char *outfile;
|
||||
static char *outfile_mid;
|
||||
|
||||
/* Length of OUTFILE's suffix. */
|
||||
static size_t suffix_length = DEFAULT_SUFFIX_LENGTH;
|
||||
static size_t suffix_length;
|
||||
|
||||
/* Alphabet of characters to use in suffix. */
|
||||
static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
|
||||
@ -65,12 +65,19 @@ static char const *suffix_alphabet = "abcdefghijklmnopqrstuvwxyz";
|
||||
static char *infile;
|
||||
|
||||
/* Descriptor on which output file is open. */
|
||||
static int output_desc;
|
||||
static int output_desc = -1;
|
||||
|
||||
/* If true, print a diagnostic on standard error just before each
|
||||
output file is opened. */
|
||||
static bool verbose;
|
||||
|
||||
/* If true, don't generate zero length output files. */
|
||||
static bool elide_empty_files;
|
||||
|
||||
/* If true, in round robin mode, immediately copy
|
||||
input to output, which is much slower, so disabled by default. */
|
||||
static bool unbuffered;
|
||||
|
||||
/* For long options that have no equivalent short option, use a
|
||||
non-character as a pseudo short option, starting with CHAR_MAX + 1. */
|
||||
enum
|
||||
@ -83,6 +90,9 @@ static struct option const longopts[] =
|
||||
{"bytes", required_argument, NULL, 'b'},
|
||||
{"lines", required_argument, NULL, 'l'},
|
||||
{"line-bytes", required_argument, NULL, 'C'},
|
||||
{"number", required_argument, NULL, 'n'},
|
||||
{"elide-empty-files", no_argument, NULL, 'e'},
|
||||
{"unbuffered", no_argument, NULL, 'u'},
|
||||
{"suffix-length", required_argument, NULL, 'a'},
|
||||
{"numeric-suffixes", no_argument, NULL, 'd'},
|
||||
{"verbose", no_argument, NULL, VERBOSE_OPTION},
|
||||
@ -91,6 +101,32 @@ static struct option const longopts[] =
|
||||
{NULL, 0, NULL, 0}
|
||||
};
|
||||
|
||||
static void
|
||||
set_suffix_length (uintmax_t n_units)
|
||||
{
|
||||
#define DEFAULT_SUFFIX_LENGTH 2
|
||||
|
||||
size_t suffix_needed = 0;
|
||||
size_t alphabet_len = strlen (suffix_alphabet);
|
||||
bool alphabet_slop = (n_units % alphabet_len) != 0;
|
||||
while (n_units /= alphabet_len)
|
||||
suffix_needed++;
|
||||
suffix_needed += alphabet_slop;
|
||||
|
||||
if (suffix_length) /* set by user */
|
||||
{
|
||||
if (suffix_length < suffix_needed)
|
||||
{
|
||||
error (EXIT_FAILURE, 0,
|
||||
_("the suffix length needs to be at least %zu"),
|
||||
suffix_needed);
|
||||
}
|
||||
return;
|
||||
}
|
||||
else
|
||||
suffix_length = MAX (DEFAULT_SUFFIX_LENGTH, suffix_needed);
|
||||
}
|
||||
|
||||
void
|
||||
usage (int status)
|
||||
{
|
||||
@ -117,7 +153,10 @@ Mandatory arguments to long options are mandatory for short options too.\n\
|
||||
-b, --bytes=SIZE put SIZE bytes per output file\n\
|
||||
-C, --line-bytes=SIZE put at most SIZE bytes of lines per output file\n\
|
||||
-d, --numeric-suffixes use numeric suffixes instead of alphabetic\n\
|
||||
-e, --elide-empty-files do not generate empty output files with `-n'\n\
|
||||
-l, --lines=NUMBER put NUMBER lines per output file\n\
|
||||
-n, --number=CHUNKS generate CHUNKS output files. See below\n\
|
||||
-u, --unbuffered immediately copy input to output with `-n r/...'\n\
|
||||
"), DEFAULT_SUFFIX_LENGTH);
|
||||
fputs (_("\
|
||||
--verbose print a diagnostic just before each\n\
|
||||
@ -126,6 +165,15 @@ Mandatory arguments to long options are mandatory for short options too.\n\
|
||||
fputs (HELP_OPTION_DESCRIPTION, stdout);
|
||||
fputs (VERSION_OPTION_DESCRIPTION, stdout);
|
||||
emit_size_note ();
|
||||
fputs (_("\n\
|
||||
CHUNKS may be:\n\
|
||||
N split into N files based on size of input\n\
|
||||
K/N output Kth of N to stdout\n\
|
||||
l/N split into N files without splitting lines\n\
|
||||
l/K/N output Kth of N to stdout without splitting lines\n\
|
||||
r/N like `l' but use round robin distribution\n\
|
||||
r/K/N likewise but only output Kth of N to stdout\n\
|
||||
"), stdout);
|
||||
emit_ancillary_info ();
|
||||
}
|
||||
exit (status);
|
||||
@ -187,6 +235,17 @@ next_file_name (void)
|
||||
}
|
||||
}
|
||||
|
||||
/* Create or truncate a file. */
|
||||
|
||||
static int
|
||||
create (const char* name)
|
||||
{
|
||||
if (verbose)
|
||||
fprintf (stdout, _("creating file %s\n"), quote (name));
|
||||
return open (name, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
|
||||
(S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH));
|
||||
}
|
||||
|
||||
/* Write BYTES bytes at BP to an output file.
|
||||
If NEW_FILE_FLAG is true, open the next output file.
|
||||
Otherwise add to the same output file already in use. */
|
||||
@ -198,15 +257,8 @@ cwrite (bool new_file_flag, const char *bp, size_t bytes)
|
||||
{
|
||||
if (output_desc >= 0 && close (output_desc) < 0)
|
||||
error (EXIT_FAILURE, errno, "%s", outfile);
|
||||
|
||||
next_file_name ();
|
||||
if (verbose)
|
||||
fprintf (stdout, _("creating file %s\n"), quote (outfile));
|
||||
output_desc = open (outfile,
|
||||
O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
|
||||
(S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP
|
||||
| S_IROTH | S_IWOTH));
|
||||
if (output_desc < 0)
|
||||
if ((output_desc = create (outfile)) < 0)
|
||||
error (EXIT_FAILURE, errno, "%s", outfile);
|
||||
}
|
||||
if (full_write (output_desc, bp, bytes) != bytes)
|
||||
@ -217,13 +269,14 @@ cwrite (bool new_file_flag, const char *bp, size_t bytes)
|
||||
Use buffer BUF, whose size is BUFSIZE. */
|
||||
|
||||
static void
|
||||
bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize)
|
||||
bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
|
||||
{
|
||||
size_t n_read;
|
||||
bool new_file_flag = true;
|
||||
size_t to_read;
|
||||
uintmax_t to_write = n_bytes;
|
||||
char *bp_out;
|
||||
uintmax_t opened = 0;
|
||||
|
||||
do
|
||||
{
|
||||
@ -239,6 +292,7 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize)
|
||||
if (to_read) /* do not write 0 bytes! */
|
||||
{
|
||||
cwrite (new_file_flag, bp_out, to_read);
|
||||
opened += new_file_flag;
|
||||
to_write -= to_read;
|
||||
new_file_flag = false;
|
||||
}
|
||||
@ -248,14 +302,21 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize)
|
||||
{
|
||||
size_t w = to_write;
|
||||
cwrite (new_file_flag, bp_out, w);
|
||||
opened += new_file_flag;
|
||||
new_file_flag = !max_files || (opened < max_files);
|
||||
bp_out += w;
|
||||
to_read -= w;
|
||||
new_file_flag = true;
|
||||
to_write = n_bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
while (n_read == bufsize);
|
||||
|
||||
/* Ensure NUMBER files are created, which truncates
|
||||
any existing files or notifies any consumers on fifos.
|
||||
FIXME: Should we do this before EXIT_FAILURE? */
|
||||
while (!elide_empty_files && opened++ < max_files)
|
||||
cwrite (true, NULL, 0);
|
||||
}
|
||||
|
||||
/* Split into pieces of exactly N_LINES lines.
|
||||
@ -361,6 +422,331 @@ line_bytes_split (size_t n_bytes)
|
||||
free (buf);
|
||||
}
|
||||
|
||||
/* -n l/[K/]N: Write lines to files of approximately file size / N.
|
||||
The file is partitioned into file size / N sized portions, with the
|
||||
last assigned any excess. If a line _starts_ within a partition
|
||||
it is written completely to the corresponding file. Since lines
|
||||
are not split even if they overlap a partition, the files written
|
||||
can be larger or smaller than the partition size, and even empty
|
||||
if a line is so long as to completely overlap the partition. */
|
||||
|
||||
static void
|
||||
lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
|
||||
off_t file_size)
|
||||
{
|
||||
assert (n && k <= n && n <= file_size);
|
||||
|
||||
const off_t chunk_size = file_size / n;
|
||||
uintmax_t chunk_no = 1;
|
||||
off_t chunk_end = chunk_size - 1;
|
||||
off_t n_written = 0;
|
||||
bool new_file_flag = true;
|
||||
|
||||
if (k > 1)
|
||||
{
|
||||
/* Start reading 1 byte before kth chunk of file. */
|
||||
off_t start = (k - 1) * chunk_size - 1;
|
||||
if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
|
||||
error (EXIT_FAILURE, errno, "%s", infile);
|
||||
n_written = start;
|
||||
chunk_no = k - 1;
|
||||
chunk_end = chunk_no * chunk_size - 1;
|
||||
}
|
||||
|
||||
while (n_written < file_size)
|
||||
{
|
||||
char *bp = buf, *eob;
|
||||
size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
|
||||
n_read = MIN (n_read, file_size - n_written);
|
||||
if (n_read < bufsize && errno)
|
||||
error (EXIT_FAILURE, errno, "%s", infile);
|
||||
else if (n_read == 0)
|
||||
break; /* eof. */
|
||||
eob = buf + n_read;
|
||||
|
||||
while (bp != eob)
|
||||
{
|
||||
size_t to_write;
|
||||
bool next = false;
|
||||
|
||||
/* Begin looking for '\n' at last byte of chunk. */
|
||||
off_t skip = MIN (n_read, MAX (0, chunk_end - n_written));
|
||||
char *bp_out = memchr (bp + skip, '\n', n_read - skip);
|
||||
if (bp_out++)
|
||||
next = true;
|
||||
else
|
||||
bp_out = eob;
|
||||
to_write = bp_out - bp;
|
||||
|
||||
if (k == chunk_no)
|
||||
{
|
||||
/* We don't use the stdout buffer here since we're writing
|
||||
large chunks from an existing file, so it's more efficient
|
||||
to write out directly. */
|
||||
if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
|
||||
error (EXIT_FAILURE, errno, "%s", _("write error"));
|
||||
}
|
||||
else
|
||||
cwrite (new_file_flag, bp, to_write);
|
||||
n_written += to_write;
|
||||
bp += to_write;
|
||||
n_read -= to_write;
|
||||
new_file_flag = next;
|
||||
|
||||
/* A line could have been so long that it skipped
|
||||
entire chunks. So create empty files in that case. */
|
||||
while (next || chunk_end <= n_written - 1)
|
||||
{
|
||||
if (!next && bp == eob)
|
||||
break; /* replenish buf, before going to next chunk. */
|
||||
chunk_no++;
|
||||
if (k && chunk_no > k)
|
||||
return;
|
||||
if (chunk_no == n)
|
||||
chunk_end = file_size - 1; /* >= chunk_size. */
|
||||
else
|
||||
chunk_end += chunk_size;
|
||||
if (!elide_empty_files && chunk_end <= n_written - 1)
|
||||
cwrite (true, NULL, 0);
|
||||
else
|
||||
next = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Ensure NUMBER files are created, which truncates
|
||||
any existing files or notifies any consumers on fifos.
|
||||
FIXME: Should we do this before EXIT_FAILURE? */
|
||||
while (!k && !elide_empty_files && chunk_no++ <= n)
|
||||
cwrite (true, NULL, 0);
|
||||
}
|
||||
|
||||
/* -n K/N: Extract Kth of N chunks. */
|
||||
|
||||
static void
|
||||
bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
|
||||
off_t file_size)
|
||||
{
|
||||
off_t start;
|
||||
off_t end;
|
||||
|
||||
assert (k && n && k <= n && n <= file_size);
|
||||
|
||||
start = (k - 1) * (file_size / n);
|
||||
end = (k == n) ? file_size : k * (file_size / n);
|
||||
|
||||
if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
|
||||
error (EXIT_FAILURE, errno, "%s", infile);
|
||||
|
||||
while (start < end)
|
||||
{
|
||||
size_t n_read = full_read (STDIN_FILENO, buf, bufsize);
|
||||
n_read = MIN (n_read, end - start);
|
||||
if (n_read < bufsize && errno)
|
||||
error (EXIT_FAILURE, errno, "%s", infile);
|
||||
else if (n_read == 0)
|
||||
break; /* eof. */
|
||||
if (full_write (STDOUT_FILENO, buf, n_read) != n_read)
|
||||
error (EXIT_FAILURE, errno, "%s", quote ("-"));
|
||||
start += n_read;
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct of_info
|
||||
{
|
||||
char *of_name;
|
||||
int ofd;
|
||||
FILE* ofile;
|
||||
} of_t;
|
||||
|
||||
enum
|
||||
{
|
||||
OFD_NEW = -1,
|
||||
OFD_APPEND = -2
|
||||
};
|
||||
|
||||
/* Rotate file descriptors when we're writing to more output files than we
|
||||
have available file descriptors.
|
||||
Return whether we came under file resource pressure.
|
||||
If so, it's probably best to close each file when finished with it. */
|
||||
|
||||
static bool
|
||||
ofile_open (of_t *files, size_t i_check, size_t nfiles)
|
||||
{
|
||||
bool file_limit = false;
|
||||
|
||||
if (files[i_check].ofd <= OFD_NEW)
|
||||
{
|
||||
int fd;
|
||||
size_t i_reopen = i_check ? i_check - 1 : nfiles - 1;
|
||||
|
||||
/* Another process could have opened a file in between the calls to
|
||||
close and open, so we should keep trying until open succeeds or
|
||||
we've closed all of our files. */
|
||||
while (true)
|
||||
{
|
||||
if (files[i_check].ofd == OFD_NEW)
|
||||
fd = create (files[i_check].of_name);
|
||||
else /* OFD_APPEND */
|
||||
{
|
||||
/* Attempt to append to previously opened file.
|
||||
We use O_NONBLOCK to support writing to fifos,
|
||||
where the other end has closed because of our
|
||||
previous close. In that case we'll immediately
|
||||
get an error, rather than waiting indefinitely.
|
||||
In specialised cases the consumer can keep reading
|
||||
from the fifo, terminating on conditions in the data
|
||||
itself, or perhaps never in the case of `tail -f`.
|
||||
I.E. for fifos it is valid to attempt this reopen. */
|
||||
fd = open (files[i_check].of_name,
|
||||
O_WRONLY | O_BINARY | O_APPEND | O_NONBLOCK);
|
||||
}
|
||||
|
||||
if (-1 < fd)
|
||||
break;
|
||||
|
||||
if (!(errno == EMFILE || errno == ENFILE))
|
||||
error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
|
||||
|
||||
file_limit = true;
|
||||
|
||||
/* Search backwards for an open file to close. */
|
||||
while (files[i_reopen].ofd < 0)
|
||||
{
|
||||
i_reopen = i_reopen ? i_reopen - 1 : nfiles - 1;
|
||||
/* No more open files to close, exit with E[NM]FILE. */
|
||||
if (i_reopen == i_check)
|
||||
error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
|
||||
}
|
||||
|
||||
if (fclose (files[i_reopen].ofile) != 0)
|
||||
error (EXIT_FAILURE, errno, "%s", files[i_reopen].of_name);
|
||||
files[i_reopen].ofd = OFD_APPEND;
|
||||
}
|
||||
|
||||
files[i_check].ofd = fd;
|
||||
if (!(files[i_check].ofile = fdopen (fd, "a")))
|
||||
error (EXIT_FAILURE, errno, "%s", files[i_check].of_name);
|
||||
}
|
||||
|
||||
return file_limit;
|
||||
}
|
||||
|
||||
/* -n r/[K/]N: Divide file into N chunks in round robin fashion.
|
||||
When K == 0, we try to keep the files open in parallel.
|
||||
If we run out of file resources, then we revert
|
||||
to opening and closing each file for each line. */
|
||||
|
||||
static void
|
||||
lines_rr (uintmax_t k, uintmax_t n, char *buf, size_t bufsize)
|
||||
{
|
||||
bool file_limit;
|
||||
size_t i_file;
|
||||
of_t *files;
|
||||
uintmax_t line_no;
|
||||
|
||||
if (k)
|
||||
line_no = 1;
|
||||
else
|
||||
{
|
||||
if (SIZE_MAX < n)
|
||||
error (exit_failure, 0, "%s", _("memory exhausted"));
|
||||
files = xnmalloc (n, sizeof *files);
|
||||
|
||||
/* Generate output file names. */
|
||||
for (i_file = 0; i_file < n; i_file++)
|
||||
{
|
||||
next_file_name ();
|
||||
files[i_file].of_name = xstrdup (outfile);
|
||||
files[i_file].ofd = OFD_NEW;
|
||||
files[i_file].ofile = NULL;
|
||||
}
|
||||
i_file = 0;
|
||||
file_limit = false;
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
char *bp = buf, *eob;
|
||||
/* Use safe_read() rather than full_read() here
|
||||
so that we process available data immediately. */
|
||||
size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
|
||||
if (n_read == SAFE_READ_ERROR)
|
||||
error (EXIT_FAILURE, errno, "%s", infile);
|
||||
else if (n_read == 0)
|
||||
break; /* eof. */
|
||||
eob = buf + n_read;
|
||||
|
||||
while (bp != eob)
|
||||
{
|
||||
size_t to_write;
|
||||
bool next = false;
|
||||
|
||||
/* Find end of line. */
|
||||
char *bp_out = memchr (bp, '\n', eob - bp);
|
||||
if (bp_out)
|
||||
{
|
||||
bp_out++;
|
||||
next = true;
|
||||
}
|
||||
else
|
||||
bp_out = eob;
|
||||
to_write = bp_out - bp;
|
||||
|
||||
if (k)
|
||||
{
|
||||
if (line_no == k && unbuffered)
|
||||
{
|
||||
if (full_write (STDOUT_FILENO, bp, to_write) != to_write)
|
||||
error (EXIT_FAILURE, errno, "%s", _("write error"));
|
||||
}
|
||||
else if (line_no == k && fwrite (bp, to_write, 1, stdout) != 1)
|
||||
{
|
||||
clearerr (stdout); /* To silence close_stdout(). */
|
||||
error (EXIT_FAILURE, errno, "%s", _("write error"));
|
||||
}
|
||||
if (next)
|
||||
line_no = (line_no == n) ? 1 : line_no + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Secure file descriptor. */
|
||||
file_limit |= ofile_open (files, i_file, n);
|
||||
if (unbuffered)
|
||||
{
|
||||
/* Note writing to fd, rather than flushing the FILE gives
|
||||
an 8% performance benefit, due to reduced data copying. */
|
||||
if (full_write (files[i_file].ofd, bp, to_write) != to_write)
|
||||
error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
|
||||
}
|
||||
else if (fwrite (bp, to_write, 1, files[i_file].ofile) != 1)
|
||||
error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
|
||||
if (file_limit)
|
||||
{
|
||||
if (fclose (files[i_file].ofile) != 0)
|
||||
error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
|
||||
files[i_file].ofd = OFD_APPEND;
|
||||
}
|
||||
if (next && ++i_file == n)
|
||||
i_file = 0;
|
||||
}
|
||||
|
||||
bp = bp_out;
|
||||
}
|
||||
}
|
||||
|
||||
/* Ensure all files created, so that any existing files are truncated,
|
||||
and to signal any waiting fifo consumers.
|
||||
Also, close any open file descriptors.
|
||||
FIXME: Should we do this before EXIT_FAILURE? */
|
||||
for (i_file = 0; !k && !elide_empty_files && i_file < n; i_file++)
|
||||
{
|
||||
file_limit |= ofile_open (files, i_file, n);
|
||||
if (fclose (files[i_file].ofile) != 0)
|
||||
error (EXIT_FAILURE, errno, "%s", files[i_file].of_name);
|
||||
}
|
||||
}
|
||||
|
||||
#define FAIL_ONLY_ONE_WAY() \
|
||||
do \
|
||||
{ \
|
||||
@ -369,21 +755,41 @@ line_bytes_split (size_t n_bytes)
|
||||
} \
|
||||
while (0)
|
||||
|
||||
/* Parse K/N syntax of chunk options. */
|
||||
|
||||
static void
|
||||
parse_chunk (uintmax_t *k_units, uintmax_t *n_units, char *slash)
|
||||
{
|
||||
*slash = '\0';
|
||||
if (xstrtoumax (slash+1, NULL, 10, n_units, "") != LONGINT_OK
|
||||
|| *n_units == 0)
|
||||
error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), slash+1);
|
||||
if (slash != optarg /* a leading number is specified. */
|
||||
&& (xstrtoumax (optarg, NULL, 10, k_units, "") != LONGINT_OK
|
||||
|| *k_units == 0 || *n_units < *k_units))
|
||||
error (EXIT_FAILURE, 0, _("%s: invalid chunk number"), optarg);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
struct stat stat_buf;
|
||||
enum
|
||||
{
|
||||
type_undef, type_bytes, type_byteslines, type_lines, type_digits
|
||||
type_undef, type_bytes, type_byteslines, type_lines, type_digits,
|
||||
type_chunk_bytes, type_chunk_lines, type_rr
|
||||
} split_type = type_undef;
|
||||
size_t in_blk_size; /* optimal block size of input file device */
|
||||
char *buf; /* file i/o buffer */
|
||||
size_t page_size = getpagesize ();
|
||||
uintmax_t k_units = 0;
|
||||
uintmax_t n_units;
|
||||
|
||||
static char const multipliers[] = "bEGKkMmPTYZ0";
|
||||
int c;
|
||||
int digits_optind = 0;
|
||||
off_t file_size;
|
||||
|
||||
initialize_main (&argc, &argv);
|
||||
set_program_name (argv[0]);
|
||||
@ -395,15 +801,16 @@ main (int argc, char **argv)
|
||||
|
||||
/* Parse command line options. */
|
||||
|
||||
infile = bad_cast ( "-");
|
||||
infile = bad_cast ("-");
|
||||
outbase = bad_cast ("x");
|
||||
|
||||
while (1)
|
||||
while (true)
|
||||
{
|
||||
/* This is the argv-index of the option we will read next. */
|
||||
int this_optind = optind ? optind : 1;
|
||||
char *slash;
|
||||
|
||||
c = getopt_long (argc, argv, "0123456789C:a:b:dl:", longopts, NULL);
|
||||
c = getopt_long (argc, argv, "0123456789C:a:b:del:n:u", longopts, NULL);
|
||||
if (c == -1)
|
||||
break;
|
||||
|
||||
@ -468,6 +875,35 @@ main (int argc, char **argv)
|
||||
_("%s: invalid number of bytes"), optarg);
|
||||
break;
|
||||
|
||||
case 'n':
|
||||
if (split_type != type_undef)
|
||||
FAIL_ONLY_ONE_WAY ();
|
||||
/* skip any whitespace */
|
||||
while (isspace (to_uchar (*optarg)))
|
||||
optarg++;
|
||||
if (strncmp (optarg, "r/", 2) == 0)
|
||||
{
|
||||
split_type = type_rr;
|
||||
optarg += 2;
|
||||
}
|
||||
else if (strncmp (optarg, "l/", 2) == 0)
|
||||
{
|
||||
split_type = type_chunk_lines;
|
||||
optarg += 2;
|
||||
}
|
||||
else
|
||||
split_type = type_chunk_bytes;
|
||||
if ((slash = strchr (optarg, '/')))
|
||||
parse_chunk (&k_units, &n_units, slash);
|
||||
else if (xstrtoumax (optarg, NULL, 10, &n_units, "") != LONGINT_OK
|
||||
|| n_units == 0)
|
||||
error (EXIT_FAILURE, 0, _("%s: invalid number of chunks"), optarg);
|
||||
break;
|
||||
|
||||
case 'u':
|
||||
unbuffered = true;
|
||||
break;
|
||||
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
@ -501,6 +937,10 @@ main (int argc, char **argv)
|
||||
suffix_alphabet = "0123456789";
|
||||
break;
|
||||
|
||||
case 'e':
|
||||
elide_empty_files = true;
|
||||
break;
|
||||
|
||||
case VERBOSE_OPTION:
|
||||
verbose = true;
|
||||
break;
|
||||
@ -523,10 +963,12 @@ main (int argc, char **argv)
|
||||
|
||||
if (n_units == 0)
|
||||
{
|
||||
error (0, 0, _("invalid number of lines: 0"));
|
||||
error (0, 0, _("%s: invalid number of lines"), "0");
|
||||
usage (EXIT_FAILURE);
|
||||
}
|
||||
|
||||
set_suffix_length (n_units);
|
||||
|
||||
/* Get out the filename arguments. */
|
||||
|
||||
if (optind < argc)
|
||||
@ -547,18 +989,35 @@ main (int argc, char **argv)
|
||||
error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
|
||||
quote (infile));
|
||||
|
||||
/* Binary I/O is safer when bytecounts are used. */
|
||||
/* Binary I/O is safer when byte counts are used. */
|
||||
if (O_BINARY && ! isatty (STDIN_FILENO))
|
||||
xfreopen (NULL, "rb", stdin);
|
||||
|
||||
/* No output file is open now. */
|
||||
output_desc = -1;
|
||||
|
||||
/* Get the optimal block size of input device and make a buffer. */
|
||||
|
||||
if (fstat (STDIN_FILENO, &stat_buf) != 0)
|
||||
error (EXIT_FAILURE, errno, "%s", infile);
|
||||
in_blk_size = io_blksize (stat_buf);
|
||||
file_size = stat_buf.st_size;
|
||||
|
||||
if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
|
||||
{
|
||||
off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
|
||||
if (input_offset < 0)
|
||||
error (EXIT_FAILURE, 0, _("%s: cannot determine file size"),
|
||||
quote (infile));
|
||||
file_size -= input_offset;
|
||||
/* Overflow, and sanity checking. */
|
||||
if (OFF_T_MAX < n_units)
|
||||
{
|
||||
char buffer[INT_BUFSIZE_BOUND (uintmax_t)];
|
||||
error (EXIT_FAILURE, EFBIG, _("%s: invalid number of chunks"),
|
||||
umaxtostr (n_units, buffer));
|
||||
}
|
||||
/* increase file_size to n_units here, so that we still process
|
||||
any input data, and create empty files for the rest. */
|
||||
file_size = MAX (file_size, n_units);
|
||||
}
|
||||
|
||||
buf = ptr_align (xmalloc (in_blk_size + 1 + page_size - 1), page_size);
|
||||
|
||||
@ -570,13 +1029,30 @@ main (int argc, char **argv)
|
||||
break;
|
||||
|
||||
case type_bytes:
|
||||
bytes_split (n_units, buf, in_blk_size);
|
||||
bytes_split (n_units, buf, in_blk_size, 0);
|
||||
break;
|
||||
|
||||
case type_byteslines:
|
||||
line_bytes_split (n_units);
|
||||
break;
|
||||
|
||||
case type_chunk_bytes:
|
||||
if (k_units == 0)
|
||||
bytes_split (file_size / n_units, buf, in_blk_size, n_units);
|
||||
else
|
||||
bytes_chunk_extract (k_units, n_units, buf, in_blk_size, file_size);
|
||||
break;
|
||||
|
||||
case type_chunk_lines:
|
||||
lines_chunk_split (k_units, n_units, buf, in_blk_size, file_size);
|
||||
break;
|
||||
|
||||
case type_rr:
|
||||
/* Note, this is like `sed -n ${k}~${n}p` when k > 0,
|
||||
but the functionality is provided for symmetry. */
|
||||
lines_rr (k_units, n_units, buf, in_blk_size);
|
||||
break;
|
||||
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
|
||||
@ -240,8 +240,11 @@ TESTS = \
|
||||
misc/sort-unique \
|
||||
misc/sort-version \
|
||||
misc/split-a \
|
||||
misc/split-bchunk \
|
||||
misc/split-fail \
|
||||
misc/split-l \
|
||||
misc/split-lchunk \
|
||||
misc/split-rchunk \
|
||||
misc/stat-birthtime \
|
||||
misc/stat-fmt \
|
||||
misc/stat-hyphen \
|
||||
|
||||
50
tests/misc/split-bchunk
Executable file
50
tests/misc/split-bchunk
Executable file
@ -0,0 +1,50 @@
|
||||
#!/bin/sh
|
||||
# test splitting into 3 chunks
|
||||
|
||||
# Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
print_ver_ split
|
||||
|
||||
# N can be greater than the file size
|
||||
# in which case no data is extracted, or empty files are written
|
||||
split -n 10 /dev/null || fail=1
|
||||
test "$(stat -c %s x* | uniq -c | sed 's/^ *//; s/ /x/')" = "10x0" || fail=1
|
||||
rm x??
|
||||
|
||||
# Ensure --elide-empty-files is honored
|
||||
split -e -n 10 /dev/null || fail=1
|
||||
stat x?? 2>/dev/null && fail=1
|
||||
|
||||
printf '1\n2\n3\n4\n5\n' > in || framework_failure
|
||||
|
||||
split -n 3 in > out || fail=1
|
||||
split -n 1/3 in > b1 || fail=1
|
||||
split -n 2/3 in > b2 || fail=1
|
||||
split -n 3/3 in > b3 || fail=1
|
||||
printf '1\n2' > exp-1
|
||||
printf '\n3\n' > exp-2
|
||||
printf '4\n5\n' > exp-3
|
||||
|
||||
compare xaa exp-1 || fail=1
|
||||
compare xab exp-2 || fail=1
|
||||
compare xac exp-3 || fail=1
|
||||
compare b1 exp-1 || fail=1
|
||||
compare b2 exp-2 || fail=1
|
||||
compare b3 exp-3 || fail=1
|
||||
test -f xad && fail=1
|
||||
|
||||
Exit $fail
|
||||
@ -27,6 +27,10 @@ split -a 0 in 2> /dev/null || fail=1
|
||||
split -b 0 in 2> /dev/null && fail=1
|
||||
split -C 0 in 2> /dev/null && fail=1
|
||||
split -l 0 in 2> /dev/null && fail=1
|
||||
split -n 0 in 2> /dev/null && fail=1
|
||||
split -n 1/0 in 2> /dev/null && fail=1
|
||||
split -n 0/1 in 2> /dev/null && fail=1
|
||||
split -n 2/1 in 2> /dev/null && fail=1
|
||||
|
||||
# Make sure -C doesn't create empty files.
|
||||
rm -f x?? || fail=1
|
||||
@ -44,6 +48,12 @@ split --lines=$UINTMAX_MAX in || fail=1
|
||||
split --bytes=$OFF_T_MAX in || fail=1
|
||||
split --line-bytes=$OFF_T_OFLOW 2> /dev/null in && fail=1
|
||||
split --line-bytes=$SIZE_OFLOW 2> /dev/null in && fail=1
|
||||
if truncate -s$SIZE_OFLOW large; then
|
||||
# Ensure we can split chunks of a large file on 32 bit hosts
|
||||
split --number=$SIZE_OFLOW/$SIZE_OFLOW large >/dev/null || fail=1
|
||||
fi
|
||||
split --number=r/$UINTMAX_MAX/$UINTMAX_MAX </dev/null >/dev/null || fail=1
|
||||
split --number=r/$UINTMAX_OFLOW </dev/null 2>/dev/null && fail=1
|
||||
|
||||
# Make sure that a huge obsolete option evokes the right failure.
|
||||
split -99999999999999999991 2> out && fail=1
|
||||
|
||||
@ -17,7 +17,7 @@
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
print_ver_ ln
|
||||
print_ver_ split
|
||||
|
||||
printf '1\n2\n3\n4\n5\n' > in || framework_failure
|
||||
|
||||
|
||||
117
tests/misc/split-lchunk
Executable file
117
tests/misc/split-lchunk
Executable file
@ -0,0 +1,117 @@
|
||||
#!/bin/sh
|
||||
# test splitting into newline delineated chunks (-n l/...)
|
||||
|
||||
# Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
print_ver_ split
|
||||
|
||||
# invalid number of chunks
|
||||
echo 'split: 1o: invalid number of chunks' > exp
|
||||
split -n l/1o 2>err && fail=1
|
||||
compare err exp || fail=1
|
||||
|
||||
echo "split: \`-': cannot determine file size" > exp
|
||||
echo | split -n l/1 2>err && fail=1
|
||||
compare err exp || fail=1
|
||||
|
||||
# N can be greater than the file size
|
||||
# in which case no data is extracted, or empty files are written
|
||||
split -n l/10 /dev/null || fail=1
|
||||
test "$(stat -c %s x* | uniq -c | sed 's/^ *//; s/ /x/')" = "10x0" || fail=1
|
||||
rm x??
|
||||
|
||||
# Ensure --elide-empty-files is honored
|
||||
split -e -n l/10 /dev/null || fail=1
|
||||
stat x?? 2>/dev/null && fail=1
|
||||
|
||||
# 80 bytes. ~ transformed to \n below
|
||||
lines=\
|
||||
12345~1~12345~1~12345~1~12345~1~12345~~~12345~1~12345~1~12345~1~12345~1~12345~1~
|
||||
|
||||
printf "%s" "$lines" | tr '~' '\n' > in || framework_failure
|
||||
|
||||
echo 'split: 16: invalid chunk number' > exp
|
||||
split -n l/16/15 in 2>err && fail=1
|
||||
compare err exp || fail=1
|
||||
|
||||
: > out
|
||||
printf '%s' "\
|
||||
14 16 09 15 16 10
|
||||
14 08 08 10 14 08 08 10
|
||||
06 08 08 02 06 08 08 02 06 08 08 10
|
||||
06 08 02 06 08 00 08 02 06 08 02 06 08 00 10
|
||||
06 00 08 00 02 06 00 02 06 00 08 00 01 07 00 02 06 00 08 00 02 16
|
||||
" > exp || framework_failure
|
||||
|
||||
# Note for full testing one should set bufsize to 1,2,5,10,80,100
|
||||
# at the start of lines_chunk_split(), for each run of this test.
|
||||
|
||||
DEBUGGING=
|
||||
test "$DEBUGGING" && test "$VERBOSE" && set +x
|
||||
for N in 6 8 12 15 22; do
|
||||
rm -f x*
|
||||
split -n l/$N in
|
||||
echo $(stat -c "%02s" x*) >> out
|
||||
|
||||
if test "$DEBUGGING"; then
|
||||
# Output partition pattern
|
||||
size=$(printf "%s" "$lines" | wc -c)
|
||||
chunk_size=$(($size/$N))
|
||||
end_size=$(($chunk_size + ($size % $N)))
|
||||
{
|
||||
yes "$(printf %${chunk_size}s ])" | head -n$(($N-1))
|
||||
printf %${end_size}s ]
|
||||
} | tr -d '\n' | sed "s/\\(^.\\{1,$size\\}\\).*/\\1/"
|
||||
echo
|
||||
|
||||
# Output pattern generated for comparison
|
||||
for s in $(stat -c "%s" x*); do
|
||||
#s=0 transitions are not shown
|
||||
test "$m" = "_" && m=- || m=_
|
||||
printf "%${s}s" '' | tr ' ' $m
|
||||
done
|
||||
echo
|
||||
|
||||
# Output lines for reference
|
||||
echo "$lines"
|
||||
fi
|
||||
done
|
||||
test "$DEBUGGING" && test "$VERBOSE" && set -x
|
||||
|
||||
compare out exp || fail=1
|
||||
|
||||
# Check extraction of particular chunks
|
||||
: > out
|
||||
printf '1\n12345\n' > exp
|
||||
split -n l/13/15 in > out
|
||||
compare out exp || fail=1
|
||||
: > out
|
||||
printf '' > exp
|
||||
split -n l/14/15 in > out
|
||||
compare out exp || fail=1
|
||||
: > out
|
||||
printf '1\n12345\n1\n' > exp
|
||||
split -n l/15/15 in > out
|
||||
compare out exp || fail=1
|
||||
|
||||
# test input with no \n at end
|
||||
printf '12\n34\n5' > in
|
||||
printf '5' > exp
|
||||
split -n l/7/7 in > out
|
||||
compare out exp || fail=1
|
||||
|
||||
Exit $fail
|
||||
66
tests/misc/split-rchunk
Executable file
66
tests/misc/split-rchunk
Executable file
@ -0,0 +1,66 @@
|
||||
#!/bin/sh
|
||||
# test splitting into round-robin chunks
|
||||
|
||||
# Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
print_ver_ split
|
||||
|
||||
require_ulimit_
|
||||
|
||||
# N can be greater than the file size
|
||||
# in which case no data is extracted, or empty files are written
|
||||
split -n r/10 /dev/null || fail=1
|
||||
test "$(stat -c %s x* | uniq -c | sed 's/^ *//; s/ /x/')" = "10x0" || fail=1
|
||||
rm x??
|
||||
|
||||
# Ensure --elide-empty-files is honored
|
||||
split -e -n r/10 /dev/null || fail=1
|
||||
stat x?? 2>/dev/null && fail=1
|
||||
|
||||
printf '1\n2\n3\n4\n5\n' > in || framework_failure
|
||||
|
||||
split -n r/3 in > out || fail=1
|
||||
test -s out && fail=1
|
||||
|
||||
split -n r/1/3 in > r1 || fail=1
|
||||
split -n r/2/3 in > r2 || fail=1
|
||||
split -n r/3/3 in > r3 || fail=1
|
||||
|
||||
printf '1\n4\n' > exp-1
|
||||
printf '2\n5\n' > exp-2
|
||||
printf '3\n' > exp-3
|
||||
|
||||
compare xaa exp-1 || fail=1
|
||||
compare xab exp-2 || fail=1
|
||||
compare xac exp-3 || fail=1
|
||||
compare r1 exp-1 || fail=1
|
||||
compare r2 exp-2 || fail=1
|
||||
compare r3 exp-3 || fail=1
|
||||
test -f xad && fail=1
|
||||
|
||||
# Test input without trailing \n
|
||||
printf '1\n2\n3\n4\n5' | split -n r/2/3 > out
|
||||
printf '2\n5' > exp
|
||||
compare out exp || fail=1
|
||||
|
||||
# Ensure we fall back to appending to a file at a time
|
||||
# if we hit the limit for the number of open files.
|
||||
rm x*
|
||||
(ulimit -n 20 && yes | head -n90 | split -n r/30 ) || fail=1
|
||||
test "$(stat -c %s x* | uniq -c | sed 's/^ *//; s/ /x/')" = "30x6" || fail=1
|
||||
|
||||
Exit $fail
|
||||
Loading…
x
Reference in New Issue
Block a user