Implement xargs --process-slot-var.

* xargs/xargs.c (set_slot_var): New function; sets an environment
variable to the index of the entry in pids[] that represents the
relevant child process.  This can be used in rudimentary load
distribution systems.
(slot_var_name): the name of the variable to use (selected by
--process-slot-var).
(enum LongOptionIdentifier): Unique identifiers for long options
with no short option equivalent (--process-slot-var is the first).
(longopts): Add --process-slot-var.
(add_proc): return the index within pids[] that we selected.
(main): Pass &option_index to getopt_long (option_index is a new
variable) in order to identify which long option was passed.
Handle --process-slot-var.
(prep_child_for_exec): Call set_slot_var.
(usage): Mention --process-slot-var.
* doc/find.texi (xargs options): Document --process-slot-var.
* xargs/xargs.1: Likewise.
* NEWS: Mention this change.

Signed-off-by: James Youngman <jay@gnu.org>
This commit is contained in:
James Youngman 2010-04-11 23:27:45 +01:00
parent 61f4abb5be
commit 6f7b74d052
5 changed files with 141 additions and 5 deletions

View File

@ -1,5 +1,25 @@
2011-05-15 James Youngman <jay@gnu.org>
Implement xargs --process-slot-var.
* xargs/xargs.c (set_slot_var): New function; sets an environment
variable to the index of the entry in pids[] that represents the
relevant child process. This can be used in rudimentary load
distribution systems.
(slot_var_name): the name of the variable to use (selected by
--process-slot-var).
(enum LongOptionIdentifier): Unique identifiers for long options
with no short option equivalent (--process-slot-var is the first).
(longopts): Add --process-slot-var.
(add_proc): return the index within pids[] that we selected.
(main): Pass &option_index to getopt_long (option_index is a new
variable) in order to identify which long option was passed.
Handle --process-slot-var.
(prep_child_for_exec): Call set_slot_var.
(usage): Mention --process-slot-var.
* doc/find.texi (xargs options): Document --process-slot-var.
* xargs/xargs.1: Likewise.
* NEWS: Mention this change and that it was Savannah bug #29512.
Describe xargs options in alphabetical order.
* xargs/xargs.1: Re-order the options to place them in
alphabetical order. Put --help and --version at the end.

8
NEWS
View File

@ -10,6 +10,14 @@ GNU findutils NEWS - User visible changes. -*- outline -*- (allout)
#32043: find -name [ doesn't obey posix
** Functional Enhancements to xargs
A new option is provided, --process-slot-var. If you set this, xargs
will set the indicated environment variable in each child. The values
are re-used, but no executing child process will have the same value
as another executing child process. This wishlist item was Savannah
bug #29512.
* Major changes in release 4.5.10, 2011-05-11
** Documentation Changes

View File

@ -3689,6 +3689,12 @@ Exit if the size (see the @samp{-s} option) is exceeded.
Run simultaneously up to @var{max-procs} processes at once; the default is 1. If
@var{max-procs} is 0, @code{xargs} will run as many processes as
possible simultaneously.
@item --process-slot-var=@var{environment-variable-name}
Set the environment variable @var{environment-variable-name} to a
unique value in each running child process. Each value is a decimal
integer. Values are reused once child processes exit. This can be
used in a rudimentary load distribution scheme, for example.
@end table
@node Invoking the shell from xargs

View File

@ -23,6 +23,7 @@ xargs \- build and execute command lines from standard input
[\fB\-\-max\-chars\fR=\fImax-chars\fR]
[\fB\-P \fImax-procs\fR]
[\fB\-\-max\-procs\fR=\fImax-procs\fR]
[\fB\-\-process\-slot\-var\fR=\fIname\fR]
[\fB\-\-interactive\fR]
[\fB\-\-verbose\fR]
[\fB\-\-exit\fR]
@ -243,6 +244,14 @@ with `y' or `Y'. Implies
.BR -t .
.TP
.PD
.BR \-\-process\-slot\-var "=\fIname\fR"
Set the environment variable
.I name
to a unique value in each running child process. Values are reused
once child processes exit. This can be used in a rudimentary load
distribution scheme, for example.
.TP
.PD
.B \-r, \-\-no\-run\-if\-empty
If the standard input does not contain any nonblanks, do not run the
command. Normally, the command is run once even if there is no input.

View File

@ -177,6 +177,18 @@ static bool query_before_executing = false;
static char input_delimiter = '\0';
/* Name of the environment variable which indicates which 'slot'
* the child process is in. This can be used to do some kind of basic
* load distribution. We guarantee not to allow two processes to run
* at the same time with the same value of this variable.
*/
static char* slot_var_name = NULL;
enum LongOptionIdentifier
{
PROCESS_SLOT_VAR = CHAR_MAX+1
};
static struct option const longopts[] =
{
{"null", no_argument, NULL, '0'},
@ -193,6 +205,7 @@ static struct option const longopts[] =
{"show-limits", no_argument, NULL, 'S'},
{"exit", no_argument, NULL, 'x'},
{"max-procs", required_argument, NULL, 'P'},
{"process-slot-var", required_argument, NULL, PROCESS_SLOT_VAR},
{"version", no_argument, NULL, 'v'},
{"help", no_argument, NULL, 'h'},
{NULL, no_argument, NULL, 0}
@ -219,7 +232,7 @@ static bool print_args (bool ask);
/* static void do_exec (void); */
static int xargs_do_exec (struct buildcmd_control *ctl, void *usercontext, int argc, char **argv);
static void exec_if_possible (void);
static void add_proc (pid_t pid);
static unsigned int add_proc (pid_t pid);
static void wait_for_proc (bool all, unsigned int minreap);
static void wait_for_proc_all (void);
static void increment_proc_max (int);
@ -228,7 +241,6 @@ static long parse_num (char *str, int option, long min, long max, int fatal);
static void usage (FILE * stream);
static char
get_char_oct_or_hex_escape (const char *s)
{
@ -367,7 +379,7 @@ smaller_of (size_t a, size_t b)
int
main (int argc, char **argv)
{
int optc;
int optc, option_index;
int show_limits = 0; /* --show-limits */
int always_run_command = 1;
char *input_file = "-"; /* "-" is stdin */
@ -480,7 +492,7 @@ main (int argc, char **argv)
}
while ((optc = getopt_long (argc, argv, "+0a:E:e::i::I:l::L:n:prs:txP:d:",
longopts, (int *) 0)) != -1)
longopts, &option_index)) != -1)
{
switch (optc)
{
@ -601,6 +613,27 @@ main (int argc, char **argv)
display_findutils_version ("xargs");
return 0;
case PROCESS_SLOT_VAR:
if (strchr (optarg, '='))
{
error (EXIT_FAILURE, 0,
_("option --%s may not be set to a value which includes `='"),
longopts[option_index]);
}
slot_var_name = optarg;
if (0 != unsetenv (slot_var_name))
{
/* This is a fatal error, otherwise some child process
may not be able to guarantee that no two children
have the same value for this variable; see
set_slot_var.
*/
error (EXIT_FAILURE, errno,
_("failed to unset environment variable %s"),
slot_var_name);
}
break;
default:
usage (stderr);
return 1;
@ -1044,6 +1077,55 @@ print_args (bool ask)
return false;
}
/* Set SOME_ENVIRONMENT_VARIABLE=n in the environment. */
static void
set_slot_var (unsigned int n)
{
static const char *fmt = "%u";
int size;
char *buf;
/* Determine the length of the buffer we need.
If the result would be zero-length or have length (not value) >
INT_MAX, the assumptions we made about how snprintf behaves (or
what UINT_MAX is) are wrong. Hence we have a design error (not
an environmental error).
*/
size = snprintf (NULL, 0u, fmt, n);
assert (size > 0);
/* Failures here are undesirable but not fatal, since we can still
guarantee that this child does not have a duplicate value of the
indicated environment variable set (since the parent unset it on
startup).
*/
if (NULL == (buf = malloc (size+1)))
{
error (0, errno, _("unable to allocate memory"));
}
else
{
snprintf (buf, size+1, fmt, n);
/* If the user doesn't want us to set the variable, there is
nothing to do. However, we defer the bail-out until this
point in order to get better test coverage.
*/
if (slot_var_name)
{
if (setenv (slot_var_name, buf, 1) < 0)
{
error (0, errno,
_("failed to set environment variable %s"), slot_var_name);
}
}
free (buf);
}
}
/* Close stdin and attach /dev/null to it.
* This resolves Savannah bug #3992.
@ -1051,6 +1133,14 @@ print_args (bool ask)
static void
prep_child_for_exec (void)
{
/* The parent will call add_proc to allocate a slot. We do the same in the
child to make sure we get the same value.
We use 0 here in order to avoid generating a data structure that appears
to indicate that we (the child) have a child. */
unsigned int slot = add_proc (0);
set_slot_var (slot);
if (!keep_stdin)
{
const char inputfile[] = "/dev/null";
@ -1257,7 +1347,7 @@ exec_if_possible (void)
/* Add the process with id PID to the list of processes that have
been executed. */
static void
static unsigned int
add_proc (pid_t pid)
{
unsigned int i, j;
@ -1282,6 +1372,7 @@ add_proc (pid_t pid)
pids[i] = pid;
procs_executing++;
procs_executed = true;
return i;
}
@ -1535,6 +1626,8 @@ Non-mandatory arguments are indicated by [square brackets]\n\
line\n\
-P, --max-procs=MAX-PROCS Run up to max-procs processes at a time\n\
-p, --interactive Prompt before running commands\n\
--process-slot-var=VAR Set environment variable VAR in child\n\
processes\n\
-r, --no-run-if-empty If there are no arguments, run no command.\n\
If this option is not given, COMMAND will be\n\
run at least once.\n\