1LIBPIPELINE(3)           GNU Library Functions Manual           LIBPIPELINE(3)
2

NAME

4     libpipeline — pipeline manipulation library
5

SYNOPSIS

7     #include <pipeline.h>
8

DESCRIPTION

10     libpipeline is a C library for setting up and running pipelines of pro‐
11     cesses, without needing to involve shell command-line parsing which is
12     often error-prone and insecure.  This relieves programmers of the need to
13     laboriously construct pipelines using lower-level primitives such as fork
14     and execve.
15
16     The general way to use libpipeline involves constructing a pipeline
17     structure and adding one or more pipecmd structures to it.  A pipecmd
18     represents a subprocess (or “command”), while a pipeline represents a
19     sequence of subprocesses each of whose outputs is connected to the next
20     one's input, as in the example ls | grep pattern | less.  The calling
21     program may adjust certain properties of each command independently, such
22     as its environment and nice(3) priority, as well as properties of the
23     entire pipeline such as its input and output and the way signals are han‐
24     dled while executing it.  The calling program may then start the pipe‐
25     line, read output from it, wait for it to complete, and gather its exit
26     status.
27
28     Strings passed as const char * function arguments will be copied by the
29     library.
30
31   Functions to build individual commands
32     pipecmd *pipecmd_new(const char *name)
33
34           Construct a new command representing execution of a program called
35           name.
36
37     pipecmd *pipecmd_new_argv(const char *name, va_list argv)
38     pipecmd *pipecmd_new_args(const char *name, ...)
39
40           Convenience constructors wrapping pipecmd_new() and pipecmd_arg().
41           Construct a new command representing execution of a program called
42           name with arguments.  Terminate arguments with NULL.
43
44     pipecmd *pipecmd_new_argstr(const char *argstr)
45
46           Split argstr on whitespace to construct a command and arguments,
47           honouring shell-style single-quoting, double-quoting, and back‐
48           slashes, but not other shell evilness like wildcards, semicolons,
49           or backquotes.  This is included only to support situations where
50           command arguments are encoded into configuration files and the
51           like.  While it is safer than system(3), it still involves signifi‐
52           cant string parsing which is inherently riskier than avoiding it
53           altogether.  Please try to avoid using it in new code.
54
55     typedef void pipecmd_function_type (void *);
56     typedef void pipecmd_function_free_type (void *);
57     pipecmd *pipecmd_new_function(const char *name,
58           pipecmd_function_type *func, pipecmd_function_free_type *free_func,
59           void *data)
60
61           Construct a new command that calls a given function rather than
62           executing a process.
63
64           The data argument is passed as the function's only argument, and
65           will be freed before returning using free_func (if non-NULL).
66
67           pipecmd_* functions that deal with arguments cannot be used with
68           the command returned by this function.
69
70     pipecmd *pipecmd_new_sequencev(const char *name, va_list cmdv)
71     pipecmd *pipecmd_new_sequence(const char *name, ...)
72
73           Construct a new command that itself runs a sequence of commands,
74           supplied as command * arguments following name and terminated by
75           NULL.  The commands will be executed in forked children; if any
76           exits non-zero then it will terminate the sequence, as with "&&" in
77           shell.
78
79           pipecmd_* functions that deal with arguments cannot be used with
80           the command returned by this function.
81
82     pipecmd *pipecmd_new_passthrough(void)
83
84           Return a new command that just passes data from its input to its
85           output.
86
87     pipecmd *pipecmd_dup(pipecmd *cmd)
88
89           Return a duplicate of a command.
90
91     void pipecmd_arg(pipecmd *cmd, const char *arg)
92
93           Add an argument to a command.
94
95     void pipecmd_argf(pipecmd *cmd, const char *format, ...)
96
97           Convenience function to add an argument with printf substitutions.
98
99     void pipecmd_argv(pipecmd *cmd, va_list argv)
100     void pipecmd_args(pipecmd *cmd, ...)
101
102           Convenience functions wrapping pipecmd_arg() to add multiple argu‐
103           ments at once.  Terminate arguments with NULL.
104
105     void pipecmd_argstr(pipecmd *cmd, const char *argstr)
106
107           Split argstr on whitespace to add a list of arguments, honouring
108           shell-style single-quoting, double-quoting, and backslashes, but
109           not other shell evilness like wildcards, semicolons, or backquotes.
110           This is included only to support situations where command arguments
111           are encoded into configuration files and the like.  While it is
112           safer than system(3), it still involves significant string parsing
113           which is inherently riskier than avoiding it altogether.  Please
114           try to avoid using it in new code.
115
116     void pipecmd_get_nargs(pipecmd *cmd)
117
118           Return the number of arguments to this command.  Note that this
119           includes the command name as the first argument, so the command
120           ‘echo foo bar’ is counted as having three arguments.
121
122     void pipecmd_nice(pipecmd *cmd, int value)
123
124           Set the nice(3) value for this command.  Defaults to 0.  Errors
125           while attempting to set the nice value are ignored, aside from
126           emitting a debug message.
127
128     void pipecmd_discard_err(pipecmd *cmd, int discard_err)
129
130           If discard_err is non-zero, redirect this command's standard error
131           to /dev/null.  Otherwise, and by default, pass it through.  This is
132           usually a bad idea.
133
134     void pipecmd_chdir(pipecmd *cmd, const char *directory)
135
136           Change the working directory to directory while running this com‐
137           mand.
138
139     void pipecmd_fchdir(pipecmd *cmd, int directory_fd)
140
141           Change the working directory to the directory given by the open
142           file descriptor directory_fd while running this command.
143
144     void pipecmd_setenv(pipecmd *cmd, const char *name, const char *value)
145
146           Set environment variable name to value while running this command.
147
148     void pipecmd_unsetenv(pipecmd *cmd, const char *name)
149
150           Unset environment variable name while running this command.
151
152     void pipecmd_clearenv(pipecmd *cmd)
153
154           Clear the environment while running this command.  (Note that envi‐
155           ronment operations work in sequence; pipecmd_clearenv followed by
156           pipecmd_setenv causes the command to have just a single environment
157           variable set.)  Beware that this may cause unexpected failures, for
158           example if some of the contents of the environment are necessary to
159           execute programs at all (say, PATH).
160
161     void pipecmd_pre_exec(pipecmd *cmd, pipecmd_function_type *func,
162           pipecmd_function_free_type *free_func, void *data)
163
164           Install a pre-exec handler.  This will be run immediately before
165           executing the command's payload (process or function).  Pass NULL
166           to clear any existing pre-exec handler.  The data argument is
167           passed as the function's only argument, and will be freed before
168           returning using free_func (if non-NULL).
169
170           This is similar to pipeline_install_post_fork, except that is spe‐
171           cific to a single command rather than installing a global handler,
172           and it runs slightly later (immediately before exec rather than
173           immediately after fork).
174
175     void pipecmd_sequence_command(pipecmd *cmd, pipecmd *child)
176
177           Add a command to a sequence created using pipecmd_new_sequence().
178
179     void pipecmd_dump(pipecmd *cmd, FILE *stream)
180
181           Dump a string representation of a command to stream.
182
183     char *pipecmd_tostring(pipecmd *cmd)
184
185           Return a string representation of a command.  The caller should
186           free the result.
187
188     void pipecmd_exec(pipecmd *cmd)
189
190           Execute a single command, replacing the current process.  Never
191           returns, instead exiting non-zero on failure.
192
193     void pipecmd_free(pipecmd *cmd)
194
195           Destroy a command.  Safely does nothing if cmd is NULL.
196
197   Functions to build pipelines
198     pipeline *pipeline_new(void)
199
200           Construct a new pipeline.
201
202     pipeline *pipeline_new_commandv(pipecmd *cmd1, va_list cmdv)
203     pipeline *pipeline_new_commands(pipecmd *cmd1, ...)
204
205           Convenience constructors wrapping pipeline_new() and
206           pipeline_command().  Construct a new pipeline consisting of the
207           given list of commands.  Terminate commands with NULL.
208
209     pipeline *pipeline_new_command_argv(const char *name, va_list argv)
210     pipeline *pipeline_new_command_args(const char *name, ...)
211
212           Construct a new pipeline and add a single command to it.
213
214     pipeline *pipeline_join(pipeline *p1, pipeline *p2)
215
216           Joins two pipelines, neither of which are allowed to be started.
217           Discards want_out, want_outfile, and outfd from p1, and want_in,
218           want_infile, and infd from p2.
219
220     void pipeline_connect(pipeline *source, pipeline *sink, ...)
221
222           Connect the input of one or more sink pipelines to the output of a
223           source pipeline.  The source pipeline may be started, but in that
224           case pipeline_want_out() must have been called with a negative fd;
225           otherwise, calls pipeline_want_out(source, -1).  In any event,
226           calls pipeline_want_in(sink, -1) on all sinks, none of which are
227           allowed to be started.  Terminate arguments with NULL.
228
229           This is an application-level connection; data may be intercepted
230           between the pipelines by the program before calling
231           pipeline_pump(), which sets data flowing from the source to the
232           sinks.  It is primarily useful when more than one sink pipeline is
233           involved, in which case the pipelines cannot simply be concatenated
234           into one.
235
236           The result is similar to tee(1), except that output can be sent to
237           more than two places and can easily be sent to multiple processes.
238
239     void pipeline_command(pipeline *p, pipecmd *cmd)
240
241           Add a command to a pipeline.
242
243     void pipeline_command_argv(pipeline *p, const char *name, va_list argv)
244     void pipeline_command_args(pipeline *p, const char *name, ...)
245
246           Construct a new command and add it to a pipeline in one go.
247
248     void pipeline_command_argstr(pipeline *p, const char *argstr)
249
250           Construct a new command from a shell-quoted string and add it to a
251           pipeline in one go.  See the comment against pipecmd_new_argstr()
252           above if you're tempted to use this function.
253
254     void pipeline_commandv(pipeline *p, va_list cmdv)
255     void pipeline_commands(pipeline *p, ...)
256
257           Convenience functions wrapping pipeline_command() to add multiple
258           commands at once.  Terminate arguments with NULL.
259
260     void pipeline_want_in(pipeline *p, int fd)
261     void pipeline_want_out(pipeline *p, int fd)
262
263           Set file descriptors to use as the input and output of the whole
264           pipeline.  If non-negative, fd is used directly as a file descrip‐
265           tor.  If negative, pipeline_start() will create pipes and store the
266           input writing half and the output reading half in the pipeline's
267           infd or outfd field as appropriate.  The default is to leave input
268           and output as stdin and stdout unless pipeline_want_infile() or
269           pipeline_want_outfile() respectively has been called.
270
271           Calling these functions supersedes any previous call to
272           pipeline_want_infile() or pipeline_want_outfile() respectively.
273
274     void pipeline_want_infile(pipeline *p, const char *file)
275     void pipeline_want_outfile(pipeline *p, const char *file)
276
277           Set file names to open and use as the input and output of the whole
278           pipeline.  This may be more convenient than supplying file descrip‐
279           tors, and guarantees that the files are opened with the same privi‐
280           leges under which the pipeline is run.
281
282           Calling these functions (even with NULL, which returns to the
283           default of leaving input and output as stdin and stdout) supersedes
284           any previous call to pipeline_want_in() or pipeline_want_outfile()
285           respectively.
286
287           The given files will be opened when the pipeline is started.  If an
288           output file does not already exist, it is created (with mode 0666
289           modified in the usual way by umask); if it does exist, then it is
290           truncated.
291
292     void pipeline_ignore_signals(pipeline *p, int ignore_signals)
293
294           If ignore_signals is non-zero, ignore SIGINT and SIGQUIT in the
295           calling process while the pipeline is running, like system(3).
296           Otherwise, and by default, leave their dispositions unchanged.
297
298     int pipeline_get_ncommands(pipeline *p)
299
300           Return the number of commands in this pipeline.
301
302     pipecmd *pipeline_get_command(pipeline *p, int n)
303
304           Return command number n from this pipeline, counting from zero, or
305           NULL if n is out of range.
306
307     pipecmd *pipeline_set_command(pipeline *p, int n, pipecmd *cmd)
308
309           Set command number n in this pipeline, counting from zero, to cmd,
310           and return the previous command in that position.  Do nothing and
311           return NULL if n is out of range.
312
313     pid_t pipeline_get_pid(pipeline *p, int n)
314
315           Return the process ID of command number n from this pipeline,
316           counting from zero.  The pipeline must be started.  Return -1 if n
317           is out of range or if the command has already exited and been
318           reaped.
319
320     FILE *pipeline_get_infile(pipeline *p)
321     FILE *pipeline_get_outfile(pipeline *p)
322
323           Get streams corresponding to infd and outfd respectively.  The
324           pipeline must be started.
325
326     void pipeline_dump(pipeline *p, FILE *stream)
327
328           Dump a string representation of p to stream.
329
330     char *pipeline_tostring(pipeline *p)
331
332           Return a string representation of p.  The caller should free the
333           result.
334
335     void pipeline_free(pipeline *p)
336
337           Destroy a pipeline and all its commands.  Safely does nothing if p
338           is NULL.  May wait for the pipeline to complete if it has not
339           already done so.
340
341   Functions to run pipelines and handle signals
342     typedef void pipeline_post_fork_fn (void);
343     void pipeline_install_post_fork(pipeline_post_fork_fn *fn)
344
345           Install a post-fork handler.  This will be run in any child process
346           immediately after it is forked.  For instance, this may be used for
347           cleaning up application-specific signal handlers.  Pass NULL to
348           clear any existing post-fork handler.
349
350           See pipecmd_pre_exec for a similar facility limited to a single
351           command rather than global to the calling process.
352
353     void pipeline_start(pipeline *p)
354
355           Start the processes in a pipeline.  Installs this library's SIGCHLD
356           handler if not already installed.  Calls error (FATAL) on error.
357
358           The standard file descriptors (0, 1, and 2) must be open before
359           calling this function.
360
361     int pipeline_wait_all(pipeline *p, int **statuses, int *n_statuses)
362
363           Wait for a pipeline to complete.  Set *statuses to a newly-allo‐
364           cated array of wait statuses, as returned by waitpid(2), and
365           *n_statuses to the length of that array.  The return value is simi‐
366           lar to the exit status that a shell would return, with some modifi‐
367           cations.  If the last command exits with a signal (other than
368           SIGPIPE, which is considered equivalent to exiting zero), then the
369           return value is 128 plus the signal number; if the last command
370           exits normally but non-zero, then the return value is its exit sta‐
371           tus; if any other command exits non-zero, then the return value is
372           127; otherwise, the return value is 0.  This means that the return
373           value is only 0 if all commands in the pipeline exit successfully.
374
375     int pipeline_wait(pipeline *p)
376
377           Wait for a pipeline to complete and return its combined exit sta‐
378           tus, calculated as for pipeline_wait_all().
379
380     int pipeline_run(pipeline *p)
381
382           Start a pipeline, wait for it to complete, and free it, all in one
383           go.
384
385     void pipeline_pump(pipeline *p, ...)
386
387           Pump data among one or more pipelines connected using
388           pipeline_connect() until all source pipelines have reached end-of-
389           file and all data has been written to all sinks (or failed).  All
390           relevant pipelines must be supplied: that is, no pipeline that has
391           been connected to a source pipeline may be supplied unless that
392           source pipeline is also supplied.  Automatically starts all pipe‐
393           lines if they are not already started, but does not wait for them.
394           Terminate arguments with NULL.
395
396   Functions to read output from pipelines
397     In general, output is returned as a pointer into a buffer owned by the
398     pipeline, which is automatically freed when pipeline_free() is called.
399     This saves the caller from having to explicitly free individual blocks of
400     output data.
401
402     const char *pipeline_read(pipeline *p, size_t *len)
403
404           Read len bytes of data from the pipeline, returning the data block.
405           len is updated with the number of bytes read.
406
407     const char *pipeline_peek(pipeline *p, size_t *len)
408
409           Look ahead in the pipeline's output for len bytes of data, return‐
410           ing the data block.  len is updated with the number of bytes read.
411           The starting position of the next read or peek is not affected by
412           this call.
413
414     size_t pipeline_peek_size(pipeline *p)
415
416           Return the number of bytes of data that can be read using
417           pipeline_read() or pipeline_peek() solely from the peek cache,
418           without having to read from the pipeline itself (and thus poten‐
419           tially block).
420
421     void pipeline_peek_skip(pipeline *p, size_t len)
422
423           Skip over and discard len bytes of data from the peek cache.
424           Asserts that enough data is available to skip, so you may want to
425           check using pipeline_peek_size() first.
426
427     const char *pipeline_readline(pipeline *p)
428
429           Read a line of data from the pipeline, returning it.
430
431     const char *pipeline_peekline(pipeline *p)
432
433           Look ahead in the pipeline's output for a line of data, returning
434           it.  The starting position of the next read or peek is not affected
435           by this call.
436
437   Signal handling
438     libpipeline installs a signal handler for SIGCHLD, and collects the exit
439     status of child processes in pipeline_wait().  Applications using this
440     library must either refrain from changing the disposition of SIGCHLD (in
441     other words, must rely on libpipeline for all child process handling) or
442     else must make sure to restore libpipeline's SIGCHLD handler before call‐
443     ing any of its functions.
444
445     If the ignore_signals flag is set in a pipeline (which is the default),
446     then the SIGINT and SIGQUIT signals will be ignored in the parent process
447     while child processes are running.  This mirrors the behaviour of
448     system(3).
449
450     libpipeline leaves child processes with the default disposition of
451     SIGPIPE, namely to terminate the process.  It ignores SIGPIPE in the par‐
452     ent process while running pipeline_pump().
453
454   Reaping of child processes
455     libpipeline installs a SIGCHLD handler that will attempt to reap child
456     processes which have exited.  This calls waitpid(2) with -1, so it will
457     reap any child process, not merely those created by way of this library.
458     At present, this means that if the calling program forks other child pro‐
459     cesses which may exit while a pipeline is running, the program is not
460     guaranteed to be able to collect exit statuses of those processes.
461
462     You should not rely on this behaviour, and in future it may be modified
463     either to reap only child processes created by this library or to provide
464     a way to return foreign statuses to the application.  Please contact the
465     author if you have an example application and would like to help design
466     such an interface.
467

ENVIRONMENT

469     If the PIPELINE_DEBUG environment variable is set to “1”, then
470     libpipeline will emit debugging messages on standard error.
471
472     If the PIPELINE_QUIET environment variable is set to any value, then
473     libpipeline will refrain from printing an error message when a subprocess
474     is terminated by a signal.
475

EXAMPLES

477     In the following examples, function names starting with pipecmd_ or
478     pipeline_ are real libpipeline functions, while any other function names
479     are pseudocode.
480
481     The simplest case is simple.  To run a single command, such as mv source
482     dest:
483
484           pipeline *p = pipeline_new_command_args ("mv", source, dest, NULL);
485           int status = pipeline_run (p);
486
487     libpipeline is often used to mimic shell pipelines, such as the following
488     example:
489
490           zsoelim < input-file | tbl | nroff -mandoc -Tutf8
491
492     The code to construct this would be:
493
494           pipeline *p;
495           int status;
496
497           p = pipeline_new ();
498           pipeline_want_infile (p, "input-file");
499           pipeline_command_args (p, "zsoelim", NULL);
500           pipeline_command_args (p, "tbl", NULL);
501           pipeline_command_args (p, "nroff", "-mandoc", "-Tutf8", NULL);
502           status = pipeline_run (p);
503
504     You might want to construct a command more dynamically:
505
506           pipecmd *manconv = pipecmd_new_args ("manconv", "-f", from_code,
507                                                "-t", "UTF-8", NULL);
508           if (quiet)
509                   pipecmd_arg (manconv, "-q");
510           pipeline_command (p, manconv);
511
512     Perhaps you want an environment variable set only while running a certain
513     command:
514
515           pipecmd *less = pipecmd_new ("less");
516           pipecmd_setenv (less, "LESSCHARSET", lesscharset);
517
518     You might find yourself needing to pass the output of one pipeline to
519     several other pipelines, in a “tee” arrangement:
520
521           pipeline *source, *sink1, *sink2;
522
523           source = make_source ();
524           sink1 = make_sink1 ();
525           sink2 = make_sink2 ();
526           pipeline_connect (source, sink1, sink2, NULL);
527           /* Pump data among these pipelines until there's nothing left. */
528           pipeline_pump (source, sink1, sink2, NULL);
529           pipeline_free (sink2);
530           pipeline_free (sink1);
531           pipeline_free (source);
532
533     Maybe one of your commands is actually an in-process function, rather
534     than an external program:
535
536           pipecmd *inproc = pipecmd_new_function ("in-process", &func,
537                                                   NULL, NULL);
538           pipeline_command (p, inproc);
539
540     Sometimes your program needs to consume the output of a pipeline, rather
541     than sending it all to some other subprocess:
542
543           pipeline *p = make_pipeline ();
544           const char *line;
545
546           pipeline_want_out (p, -1);
547           pipeline_start (p);
548           line = pipeline_peekline (p);
549           if (!strstr (line, "coding: UTF-8"))
550                   printf ("Unicode text follows:0);
551           while (line = pipeline_readline (p))
552                   printf ("  %s", line);
553           pipeline_free (p);
554

SEE ALSO

556     fork(2), execve(2), system(3), popen(3).
557

AUTHORS

559     Most of libpipeline was written by Colin Watson <cjwatson@debian.org>,
560     originally for use in man-db.  The initial version was based very loosely
561     on the run_pipeline() function in GNU groff, written by James Clark
562     <jjc@jclark.com>.  It also contains library code by Markus Armbruster,
563     and by various contributors to Gnulib.
564
565     libpipeline is licensed under the GNU General Public License, version 3
566     or later.  See the README file for full details.
567

BUGS

569     Using this library in a program which runs any other child processes
570     and/or installs its own SIGCHLD handler is unlikely to work.
571
572GNU                            October 11, 2010                            GNU
Impressum