1Slurm API(3)            Slurm job step launch functions           Slurm API(3)
2
3
4

NAME

6       slurm_step_launch_params_t_init,                     slurm_step_launch,
7       slurm_step_launch_fwd_signal,             slurm_step_launch_wait_start,
8       slurm_step_launch_wait_finish, slurm_step_launch_abort - Slurm job step
9       launch functions
10
11

SYNTAX

13       #include <slurm/slurm.h>
14
15       void slurm_step_launch_params_t_init (
16            slurm_step_launch_params_t *launch_req
17       );
18
19       int slurm_step_launch (
20            slurm_step_ctx ctx,
21            const slurm_step_launch_params_t *launch_req,
22            const slurm_step_launch_callbacks_t callbacks
23       );
24
25       void slurm_step_launch_fwd_signal (
26            slurm_step_ctx ctx,
27            int signo
28       );
29
30       int slurm_step_launch_wait_start (
31            slurm_step_ctx ctx
32       );
33
34       void slurm_step_launch_wait_finish (
35            slurm_step_ctx ctx
36       );
37
38       void slurm_step_launch_abort {
39            slurm_step_ctx ctx
40       );
41
42

ARGUMENTS

44       callbacks
45              Identify functions to be called when various events occur.
46
47       ctx    Job step context. Created by slurm_step_ctx_create, used in sub‐
48              sequent function calls, and destroyed by slurm_step_ctx_destroy.
49
50       launch_req
51              Pointer to a structure allocated by the user containing specifi‐
52              cations of the job step to be launched.
53
54

DESCRIPTION

56       slurm_step_launch_params_t_init     initialize     a     user-allocated
57       slurm_step_launch_params_t structure with default values.  default val‐
58       ues.  This function will NOT allocate any new memory.
59
60       slurm_step_launch Launch a parallel job step.
61
62       slurm_step_launch_fwd_signal Forward a signal to all those  nodes  with
63       running tasks.
64
65       slurm_step_launch_wait_start Block until all tasks have started.
66
67       slurm_step_launch_wait_finish  Block  until all tasks have finished (or
68       failed to start altogether).
69
70       slurm_step_launch_abort Abort an in-progress launch, or  terminate  the
71       fully launched job step. Can be called from a signal handler.
72
73

IO Redirection

75       Use  the local_fds entry in  slurm_step_launch_params_t to specify file
76       descriptors to be used  for  standard  input,  output  and  error.  Any
77       local_fds  not  specified  will  result in the launched tasks using the
78       calling process's standard input, output and error.  Threads created by
79       slurm_step_launch  will  completely  handle  copying  data  between the
80       remote processes and the specified local file descriptors.
81
82       Use the substructure in slurm_step_io_fds_t to restrict the redirection
83       of I/O to a specific node or task ID. For example, to redirect standard
84       output only from task 0, set
85
86       params.local_fs.out.taskid=0;
87
88       Use the remote_*_filename fields in slurm_step_launch_params_t to  have
89       launched  tasks  read  and/or write directly to local files rather than
90       transferring data over the  network  to  the  calling  process.   These
91       strings  support  many  of the same format options as the srun command.
92       Any remote_*_filename  fields  set  will  supersede  the  corresponding
93       local_fds  entries.  For  example,  the following code will direct each
94       task to write standard output and standard error to  local  files  with
95       names  containing the task ID (e.g.  "/home/bob/test_output/run1.out.0"
96       and "/home/bob/test_output/run.1.err.0" for task 0).
97
98       params.remote_output_filename = "/home/bob/test_output/run1.out.%t"
99       params.remote_error_filename  = "/home/bob/test_output/run1.err.%t"
100
101

RETURN VALUE

103       slurm_step_launch   and   slurm_step_launch_wait_start   will    return
104       SLURM_SUCCESS  when all tasks have successfully started, or SLURM_ERROR
105       if the job step is aborted during launch.
106
107

ERRORS

109       EINVAL Invalid argument
110
111       SLURM_PROTOCOL_VERSION_ERROR Protocol version has changed, re-link your
112       code.
113
114       ESLURM_INVALID_JOB_ID the requested job id does not exist.
115
116       ESLURM_ALREADY_DONE the specified job has already completed and can not
117       be modified.
118
119       ESLURM_ACCESS_DENIED the requesting user lacks  authorization  for  the
120       requested action (e.g. trying to delete or modify another user's job).
121
122       ESLURM_INTERCONNECT_FAILURE failed to configure the node interconnect.
123
124       ESLURM_BAD_DIST task distribution specification is invalid.
125
126       SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT  Timeout in communicating with Slurm
127       controller.
128
129

EXAMPLE

131       /*
132        * To compile:
133        * gcc test.c -o test -g -pthread -lslurm
134        *
135        * Or if Slurm is not in your default search paths:
136        * gcc test.c -o test -g -pthread -I{$SLURM_DIR}/include \
137        *     -Wl,--rpath={$SLURM_DIR}/lib -L{$SLURM_DIR}/lib -lslurm
138        */
139       #include <stdio.h>
140       #include <stdlib.h>
141       #include <string.h>
142       #include <slurm/slurm.h>
143       #include <slurm/slurm_errno.h>
144
145       static void _task_start(launch_tasks_response_msg_t *msg)
146       {
147            printf("%d tasks started on node %s\n",
148                 msg->count_of_pids, msg->node_name);
149       }
150
151       static void _task_finish(task_exit_msg_t *msg)
152       {
153            printf("%d tasks finished\n", msg->num_tasks);
154       }
155
156       int main (int argc, char *argv[])
157       {
158            slurm_step_ctx_params_t step_params;
159            slurm_step_ctx step_ctx;
160            slurm_step_launch_params_t params;
161            slurm_step_launch_callbacks_t callbacks;
162            uint32_t job_id, step_id;
163
164            slurm_step_ctx_params_t_init(&step_params);
165            step_params.node_count = 1;
166            step_params.task_count = 4;
167            step_params.overcommit = true;
168
169            step_ctx = slurm_step_ctx_create(&step_params);
170            if (step_ctx == NULL) {
171                 slurm_perror("slurm_step_ctx_create");
172                 exit(1);
173            }
174            slurm_step_ctx_get(step_ctx, SLURM_STEP_CTX_JOBID, &job_id);
175            slurm_step_ctx_get(step_ctx, SLURM_STEP_CTX_STEPID, &step_id);
176            printf("Ready to start job %u step %u\n", job_id, step_id);
177
178            slurm_step_launch_params_t_init(&params);
179            params.argc = argc - 1;
180            params.argv = argv + 1;
181            callbacks.task_start = _task_start;
182            callbacks.task_finish = _task_finish;
183            if (slurm_step_launch(step_ctx, NULL, &params, &callbacks)
184                      != SLURM_SUCCESS) {
185                 slurm_perror("slurm_step_launch");
186                 exit(1);
187            }
188            printf("Sent step launch RPC\n");
189
190            if (slurm_step_launch_wait_start(step_ctx) != SLURM_SUCCESS) {
191                 fprintf(stderr, "job step was aborted during launch\n");
192            } else {
193                 printf("All tasks have started\n");
194            }
195
196            slurm_step_launch_wait_finish(step_ctx);
197            printf("All tasks have finished\n");
198
199            slurm_step_ctx_destroy(step_ctx);
200            exit(0);
201       }
202
203

NOTE

205       These functions are included in the libslurm  library,  which  must  be
206       linked to your process for use (e.g. "cc -lslurm myprog.c").
207
208

COPYING

210       Copyright  (C)  2006-2007  The Regents of the University of California.
211       Copyright (C) 2008 Lawrence Livermore National Security.   Produced  at
212       Lawrence    Livermore    National    Laboratory    (cf,    DISCLAIMER).
213       CODE-OCEC-09-009. All rights reserved.
214
215       This file is  part  of  Slurm   a  resource  management  program.   For
216       details, see <https://slurm.schedmd.com/>.
217
218       Slurm  is free software; you can redistribute it and/or modify it under
219       the terms of the GNU General Public License as published  by  the  Free
220       Software  Foundation;  either  version  2  of  the License, or (at your
221       option) any later version.
222
223       Slurm is distributed in the hope that it will be  useful,  but  WITHOUT
224       ANY  WARRANTY;  without even the implied warranty of MERCHANTABILITY or
225       FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General  Public  License
226       for more details.
227

SEE ALSO

229       slurm_step_ctx_create(3),                    slurm_step_ctx_destroy(3),
230       slurm_get_errno(3),  slurm_perror(3),   slurm_strerror(3),   salloc(1),
231       srun(1)
232
233
234
235April 2015              Slurm job step launch functions           Slurm API(3)
Impressum