From 6f7b74d052e3344d2e23a151ecb3bf65daca6338 Mon Sep 17 00:00:00 2001 From: James Youngman Date: Sun, 11 Apr 2010 23:27:45 +0100 Subject: [PATCH] Implement xargs --process-slot-var. * xargs/xargs.c (set_slot_var): New function; sets an environment variable to the index of the entry in pids[] that represents the relevant child process. This can be used in rudimentary load distribution systems. (slot_var_name): the name of the variable to use (selected by --process-slot-var). (enum LongOptionIdentifier): Unique identifiers for long options with no short option equivalent (--process-slot-var is the first). (longopts): Add --process-slot-var. (add_proc): return the index within pids[] that we selected. (main): Pass &option_index to getopt_long (option_index is a new variable) in order to identify which long option was passed. Handle --process-slot-var. (prep_child_for_exec): Call set_slot_var. (usage): Mention --process-slot-var. * doc/find.texi (xargs options): Document --process-slot-var. * xargs/xargs.1: Likewise. * NEWS: Mention this change. Signed-off-by: James Youngman --- ChangeLog | 20 ++++++++++ NEWS | 8 ++++ doc/find.texi | 6 +++ xargs/xargs.1 | 9 +++++ xargs/xargs.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 141 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index a27d688b..cae72653 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,25 @@ 2011-05-15 James Youngman + Implement xargs --process-slot-var. + * xargs/xargs.c (set_slot_var): New function; sets an environment + variable to the index of the entry in pids[] that represents the + relevant child process. This can be used in rudimentary load + distribution systems. + (slot_var_name): the name of the variable to use (selected by + --process-slot-var). + (enum LongOptionIdentifier): Unique identifiers for long options + with no short option equivalent (--process-slot-var is the first). + (longopts): Add --process-slot-var. + (add_proc): return the index within pids[] that we selected. + (main): Pass &option_index to getopt_long (option_index is a new + variable) in order to identify which long option was passed. + Handle --process-slot-var. + (prep_child_for_exec): Call set_slot_var. + (usage): Mention --process-slot-var. + * doc/find.texi (xargs options): Document --process-slot-var. + * xargs/xargs.1: Likewise. + * NEWS: Mention this change and that it was Savannah bug #29512. + Describe xargs options in alphabetical order. * xargs/xargs.1: Re-order the options to place them in alphabetical order. Put --help and --version at the end. diff --git a/NEWS b/NEWS index 0e186753..bde307b1 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,14 @@ GNU findutils NEWS - User visible changes. -*- outline -*- (allout) #32043: find -name [ doesn't obey posix +** Functional Enhancements to xargs + +A new option is provided, --process-slot-var. If you set this, xargs +will set the indicated environment variable in each child. The values +are re-used, but no executing child process will have the same value +as another executing child process. This wishlist item was Savannah +bug #29512. + * Major changes in release 4.5.10, 2011-05-11 ** Documentation Changes diff --git a/doc/find.texi b/doc/find.texi index 1f278de0..a088c017 100644 --- a/doc/find.texi +++ b/doc/find.texi @@ -3689,6 +3689,12 @@ Exit if the size (see the @samp{-s} option) is exceeded. Run simultaneously up to @var{max-procs} processes at once; the default is 1. If @var{max-procs} is 0, @code{xargs} will run as many processes as possible simultaneously. + +@item --process-slot-var=@var{environment-variable-name} +Set the environment variable @var{environment-variable-name} to a +unique value in each running child process. Each value is a decimal +integer. Values are reused once child processes exit. This can be +used in a rudimentary load distribution scheme, for example. @end table @node Invoking the shell from xargs diff --git a/xargs/xargs.1 b/xargs/xargs.1 index 0bf1d09e..01360dcd 100644 --- a/xargs/xargs.1 +++ b/xargs/xargs.1 @@ -23,6 +23,7 @@ xargs \- build and execute command lines from standard input [\fB\-\-max\-chars\fR=\fImax-chars\fR] [\fB\-P \fImax-procs\fR] [\fB\-\-max\-procs\fR=\fImax-procs\fR] +[\fB\-\-process\-slot\-var\fR=\fIname\fR] [\fB\-\-interactive\fR] [\fB\-\-verbose\fR] [\fB\-\-exit\fR] @@ -243,6 +244,14 @@ with `y' or `Y'. Implies .BR -t . .TP .PD +.BR \-\-process\-slot\-var "=\fIname\fR" +Set the environment variable +.I name +to a unique value in each running child process. Values are reused +once child processes exit. This can be used in a rudimentary load +distribution scheme, for example. +.TP +.PD .B \-r, \-\-no\-run\-if\-empty If the standard input does not contain any nonblanks, do not run the command. Normally, the command is run once even if there is no input. diff --git a/xargs/xargs.c b/xargs/xargs.c index 9932224b..219b09f6 100644 --- a/xargs/xargs.c +++ b/xargs/xargs.c @@ -177,6 +177,18 @@ static bool query_before_executing = false; static char input_delimiter = '\0'; +/* Name of the environment variable which indicates which 'slot' + * the child process is in. This can be used to do some kind of basic + * load distribution. We guarantee not to allow two processes to run + * at the same time with the same value of this variable. + */ +static char* slot_var_name = NULL; + +enum LongOptionIdentifier + { + PROCESS_SLOT_VAR = CHAR_MAX+1 + }; + static struct option const longopts[] = { {"null", no_argument, NULL, '0'}, @@ -193,6 +205,7 @@ static struct option const longopts[] = {"show-limits", no_argument, NULL, 'S'}, {"exit", no_argument, NULL, 'x'}, {"max-procs", required_argument, NULL, 'P'}, + {"process-slot-var", required_argument, NULL, PROCESS_SLOT_VAR}, {"version", no_argument, NULL, 'v'}, {"help", no_argument, NULL, 'h'}, {NULL, no_argument, NULL, 0} @@ -219,7 +232,7 @@ static bool print_args (bool ask); /* static void do_exec (void); */ static int xargs_do_exec (struct buildcmd_control *ctl, void *usercontext, int argc, char **argv); static void exec_if_possible (void); -static void add_proc (pid_t pid); +static unsigned int add_proc (pid_t pid); static void wait_for_proc (bool all, unsigned int minreap); static void wait_for_proc_all (void); static void increment_proc_max (int); @@ -228,7 +241,6 @@ static long parse_num (char *str, int option, long min, long max, int fatal); static void usage (FILE * stream); - static char get_char_oct_or_hex_escape (const char *s) { @@ -367,7 +379,7 @@ smaller_of (size_t a, size_t b) int main (int argc, char **argv) { - int optc; + int optc, option_index; int show_limits = 0; /* --show-limits */ int always_run_command = 1; char *input_file = "-"; /* "-" is stdin */ @@ -480,7 +492,7 @@ main (int argc, char **argv) } while ((optc = getopt_long (argc, argv, "+0a:E:e::i::I:l::L:n:prs:txP:d:", - longopts, (int *) 0)) != -1) + longopts, &option_index)) != -1) { switch (optc) { @@ -601,6 +613,27 @@ main (int argc, char **argv) display_findutils_version ("xargs"); return 0; + case PROCESS_SLOT_VAR: + if (strchr (optarg, '=')) + { + error (EXIT_FAILURE, 0, + _("option --%s may not be set to a value which includes `='"), + longopts[option_index]); + } + slot_var_name = optarg; + if (0 != unsetenv (slot_var_name)) + { + /* This is a fatal error, otherwise some child process + may not be able to guarantee that no two children + have the same value for this variable; see + set_slot_var. + */ + error (EXIT_FAILURE, errno, + _("failed to unset environment variable %s"), + slot_var_name); + } + break; + default: usage (stderr); return 1; @@ -1044,6 +1077,55 @@ print_args (bool ask) return false; } +/* Set SOME_ENVIRONMENT_VARIABLE=n in the environment. */ +static void +set_slot_var (unsigned int n) +{ + static const char *fmt = "%u"; + int size; + char *buf; + + + /* Determine the length of the buffer we need. + + If the result would be zero-length or have length (not value) > + INT_MAX, the assumptions we made about how snprintf behaves (or + what UINT_MAX is) are wrong. Hence we have a design error (not + an environmental error). + */ + size = snprintf (NULL, 0u, fmt, n); + assert (size > 0); + + + /* Failures here are undesirable but not fatal, since we can still + guarantee that this child does not have a duplicate value of the + indicated environment variable set (since the parent unset it on + startup). + */ + if (NULL == (buf = malloc (size+1))) + { + error (0, errno, _("unable to allocate memory")); + } + else + { + snprintf (buf, size+1, fmt, n); + + /* If the user doesn't want us to set the variable, there is + nothing to do. However, we defer the bail-out until this + point in order to get better test coverage. + */ + if (slot_var_name) + { + if (setenv (slot_var_name, buf, 1) < 0) + { + error (0, errno, + _("failed to set environment variable %s"), slot_var_name); + } + } + free (buf); + } +} + /* Close stdin and attach /dev/null to it. * This resolves Savannah bug #3992. @@ -1051,6 +1133,14 @@ print_args (bool ask) static void prep_child_for_exec (void) { + /* The parent will call add_proc to allocate a slot. We do the same in the + child to make sure we get the same value. + + We use 0 here in order to avoid generating a data structure that appears + to indicate that we (the child) have a child. */ + unsigned int slot = add_proc (0); + set_slot_var (slot); + if (!keep_stdin) { const char inputfile[] = "/dev/null"; @@ -1257,7 +1347,7 @@ exec_if_possible (void) /* Add the process with id PID to the list of processes that have been executed. */ -static void +static unsigned int add_proc (pid_t pid) { unsigned int i, j; @@ -1282,6 +1372,7 @@ add_proc (pid_t pid) pids[i] = pid; procs_executing++; procs_executed = true; + return i; } @@ -1535,6 +1626,8 @@ Non-mandatory arguments are indicated by [square brackets]\n\ line\n\ -P, --max-procs=MAX-PROCS Run up to max-procs processes at a time\n\ -p, --interactive Prompt before running commands\n\ + --process-slot-var=VAR Set environment variable VAR in child\n\ + processes\n\ -r, --no-run-if-empty If there are no arguments, run no command.\n\ If this option is not given, COMMAND will be\n\ run at least once.\n\