mirror of
https://https.git.savannah.gnu.org/git/coreutils.git
synced 2026-01-27 01:44:21 +00:00
copy,dd: simplify and optimize NUL bytes detection
* src/factor.c: Move LIKELY() definition to... * src/system.h: ...here. (is_nul): Reimplement with a version that doesn't require a sentinel after the buffer, and which calls down to (the system optimized) memcmp. Performance analyzed at http://rusty.ozlabs.org/?p=560 * src/dd.c (alloc_obuf): Simplify the is_nul() call by not needing to write the sentinel. * src/copy.c (sparse_copy): Likewise. (copy_reg): Simplify the buffer allocation by avoiding consideration of the sentinel in the buffer size calculation.
This commit is contained in:
parent
6796698c99
commit
9459d9d811
22
src/copy.c
22
src/copy.c
@ -245,17 +245,7 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
|
||||
csize = MIN (csize, n_read);
|
||||
|
||||
if (hole_size && csize)
|
||||
{
|
||||
/* Setup sentinel required by is_nul(). */
|
||||
typedef uintptr_t word;
|
||||
word isnul_tmp;
|
||||
memcpy (&isnul_tmp, cbuf + csize, sizeof (word));
|
||||
memset (cbuf + csize, 1, sizeof (word));
|
||||
|
||||
make_hole = is_nul (cbuf, csize);
|
||||
|
||||
memcpy (cbuf + csize, &isnul_tmp, sizeof (word));
|
||||
}
|
||||
make_hole = is_nul (cbuf, csize);
|
||||
|
||||
bool transition = (make_hole != prev_hole) && psize;
|
||||
bool last_chunk = (n_read == csize && ! make_hole) || ! csize;
|
||||
@ -1201,11 +1191,8 @@ copy_reg (char const *src_name, char const *dst_name,
|
||||
|
||||
if (data_copy_required)
|
||||
{
|
||||
typedef uintptr_t word;
|
||||
|
||||
/* Choose a suitable buffer size; it may be adjusted later. */
|
||||
size_t buf_alignment = lcm (getpagesize (), sizeof (word));
|
||||
size_t buf_alignment_slop = sizeof (word) + buf_alignment - 1;
|
||||
size_t buf_alignment = getpagesize ();
|
||||
size_t buf_size = io_blksize (sb);
|
||||
size_t hole_size = ST_BLKSIZE (sb);
|
||||
|
||||
@ -1236,7 +1223,7 @@ copy_reg (char const *src_name, char const *dst_name,
|
||||
{
|
||||
/* Compute the least common multiple of the input and output
|
||||
buffer sizes, adjusting for outlandish values. */
|
||||
size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX) - buf_alignment_slop;
|
||||
size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX) - buf_alignment;
|
||||
size_t blcm = buffer_lcm (io_blksize (src_open_sb), buf_size,
|
||||
blcm_max);
|
||||
|
||||
@ -1254,8 +1241,7 @@ copy_reg (char const *src_name, char const *dst_name,
|
||||
buf_size = blcm;
|
||||
}
|
||||
|
||||
/* Make a buffer with space for a sentinel at the end. */
|
||||
buf_alloc = xmalloc (buf_size + buf_alignment_slop);
|
||||
buf_alloc = xmalloc (buf_size + buf_alignment);
|
||||
buf = ptr_align (buf_alloc, buf_alignment);
|
||||
|
||||
if (sparse_src)
|
||||
|
||||
6
src/dd.c
6
src/dd.c
@ -20,7 +20,6 @@
|
||||
|
||||
#define SWAB_ALIGN_OFFSET 2
|
||||
|
||||
#include <assert.h>
|
||||
#include <sys/types.h>
|
||||
#include <signal.h>
|
||||
#include <getopt.h>
|
||||
@ -728,11 +727,6 @@ alloc_obuf (void)
|
||||
alloc_ibuf ();
|
||||
obuf = ibuf;
|
||||
}
|
||||
|
||||
/* Write a sentinel to the slop after the buffer,
|
||||
to allow efficient checking for NUL blocks. */
|
||||
assert (sizeof (uintptr_t) <= OUTPUT_BLOCK_SLOP);
|
||||
memset (obuf + output_blocksize, 1, sizeof (uintptr_t));
|
||||
}
|
||||
|
||||
static void
|
||||
|
||||
@ -705,14 +705,6 @@ static bool flag_prove_primality = PROVE_PRIMALITY;
|
||||
/* Number of Miller-Rabin tests to run when not proving primality. */
|
||||
#define MR_REPS 25
|
||||
|
||||
#ifdef __GNUC__
|
||||
# define LIKELY(cond) __builtin_expect ((cond), 1)
|
||||
# define UNLIKELY(cond) __builtin_expect ((cond), 0)
|
||||
#else
|
||||
# define LIKELY(cond) (cond)
|
||||
# define UNLIKELY(cond) (cond)
|
||||
#endif
|
||||
|
||||
static void
|
||||
factor_insert_refind (struct factors *factors, uintmax_t p, unsigned int i,
|
||||
unsigned int off)
|
||||
|
||||
66
src/system.h
66
src/system.h
@ -427,6 +427,15 @@ enum
|
||||
# define ATTRIBUTE_WARN_UNUSED_RESULT __attribute__ ((__warn_unused_result__))
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
# define LIKELY(cond) __builtin_expect ((cond), 1)
|
||||
# define UNLIKELY(cond) __builtin_expect ((cond), 0)
|
||||
#else
|
||||
# define LIKELY(cond) (cond)
|
||||
# define UNLIKELY(cond) (cond)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined strdupa
|
||||
# define ASSIGN_STRDUPA(DEST, S) \
|
||||
do { DEST = strdupa (S); } while (0)
|
||||
@ -487,27 +496,54 @@ ptr_align (void const *ptr, size_t alignment)
|
||||
}
|
||||
|
||||
/* Return whether the buffer consists entirely of NULs.
|
||||
Note the word after the buffer must be non NUL. */
|
||||
Based on memeqzero in CCAN by Rusty Russell under CC0 (Public domain). */
|
||||
|
||||
static inline bool _GL_ATTRIBUTE_PURE
|
||||
is_nul (void const *buf, size_t bufsize)
|
||||
is_nul (void const *buf, size_t length)
|
||||
{
|
||||
typedef uintptr_t word;
|
||||
void const *vp;
|
||||
char const *cbuf = buf;
|
||||
word const *wp = buf;
|
||||
const unsigned char *p = buf;
|
||||
/* Using possibly unaligned access for the first 16 bytes
|
||||
saves about 30-40 cycles, though it is strictly undefined behavior
|
||||
and so would need __attribute__ ((__no_sanitize_undefined__))
|
||||
to avoid -fsanitize=undefined warnings.
|
||||
Considering coreutils is mainly concerned with relatively
|
||||
large buffers, we'll just use the defined behavior. */
|
||||
#if 0 && _STRING_ARCH_unaligned
|
||||
unsigned long word;
|
||||
#else
|
||||
unsigned char word;
|
||||
#endif
|
||||
|
||||
/* Find first nonzero *word*, or the word with the sentinel. */
|
||||
while (*wp++ == 0)
|
||||
continue;
|
||||
if (! length)
|
||||
return true;
|
||||
|
||||
/* Find the first nonzero *byte*, or the sentinel. */
|
||||
vp = wp - 1;
|
||||
char const *cp = vp;
|
||||
while (*cp++ == 0)
|
||||
continue;
|
||||
/* Check len bytes not aligned on a word. */
|
||||
while (UNLIKELY (length & (sizeof word - 1)))
|
||||
{
|
||||
if (*p)
|
||||
return false;
|
||||
p++;
|
||||
length--;
|
||||
if (! length)
|
||||
return true;
|
||||
}
|
||||
|
||||
return cbuf + bufsize < cp;
|
||||
/* Check up to 16 bytes a word at a time. */
|
||||
for (;;)
|
||||
{
|
||||
memcpy (&word, p, sizeof word);
|
||||
if (word)
|
||||
return false;
|
||||
p += sizeof word;
|
||||
length -= sizeof word;
|
||||
if (! length)
|
||||
return true;
|
||||
if (UNLIKELY (length & 15) == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Now we know first 16 bytes are NUL, memcmp with self. */
|
||||
return memcmp (buf, p, length) == 0;
|
||||
}
|
||||
|
||||
/* If 10*Accum + Digit_val is larger than the maximum value for Type,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user