1Slurm API(3)            Slurm job initiation functions            Slurm API(3)
2
3
4

NAME

6       slurm_allocate_pack_job_blocking, slurm_allocate_resources, slurm_allo‐
7       cate_resources_blocking, slurm_allocation_msg_thr_create, slurm_alloca‐
8       tion_msg_thr_destroy,  slurm_allocation_lookup,  slurm_pack_job_lookup,
9       slurm_confirm_allocation,      slurm_free_submit_response_response_msg,
10       slurm_init_job_desc_msg,  slurm_job_will_run,  slurm_pack_job_will_run,
11       slurm_job_will_run2,    slurm_read_hostfile,    slurm_submit_batch_job,
12       slurm_submit_batch_pack_job - Slurm job initiation functions
13

SYNTAX

15       #include <slurm/slurm.h>
16
17       int slurm_allocate_resources (
18            job_desc_msg_t *job_desc_msg_ptr,
19            resource_allocation_response_msg_t **slurm_alloc_msg_pptr
20       );
21
22       resource_allocation_response_msg_t *slurm_allocate_resources_blocking (
23            job_desc_msg_t *job_desc_msg_ptr,
24            time_t timeout, void (*pending_callback)(uint32_t job_id)
25       );
26
27       List *slurm_allocate_pack_job_blocking (
28            List job_desc_msg_list,
29            time_t timeout, void (*pending_callback)(uint32_t job_id)
30       );
31
32       allocation_msg_thread_t *slurm_allocation_msg_thr_create (
33            uint16_t *port,
34            slurm_allocation_callbacks_t *callbacks
35       );
36
37       void *slurm_allocation_msg_thr_destroy (
38            allocation_msg_thread_t *slurm_alloc_msg_thr_ptr
39       );
40
41       int slurm_allocation_lookup {
42            uint32_t jobid,
43            resource_allocation_response_msg_t **slurm_alloc_msg_pptr
44       );
45
46       int slurm_pack_job_lookup {
47            uint32_t jobid,
48            List *slurm_alloc_msg_list
49       );
50
51       int slurm_confirm_allocation (
52            old_job_alloc_msg_t *old_job_desc_msg_ptr,
53            resource_allocation_response_msg_t **slurm_alloc_msg_pptr
54       );
55
56       void slurm_free_resource_allocation_response_msg (
57            resource_allocation_response_msg_t *slurm_alloc_msg_ptr
58       );
59
60       void slurm_free_submit_response_response_msg (
61            submit_response_msg_t *slurm_submit_msg_ptr
62       );
63
64       void slurm_init_job_desc_msg (
65            job_desc_msg_t *job_desc_msg_ptr
66       );
67
68       int slurm_job_will_run (
69            job_desc_msg_t *job_desc_msg_ptr
70       );
71
72       int slurm_pack_job_will_run (
73            List job_desc_msg_list
74       );
75
76       int slurm_job_will_run2 (
77            job_desc_msg_t *job_desc_msg_ptr,
78            will_run_response_msg_t **will_run_resp
79       );
80
81       int slurm_read_hostfile (
82            const char *filename, int n
83       );
84
85       int slurm_submit_batch_job (
86            job_desc_msg_t *job_desc_msg_ptr,
87            submit_response_msg_t **slurm_submit_msg_pptr
88       );
89
90       int slurm_submit_batch_pack_job (
91            List job_desc_msg_list,
92            submit_response_msg_t **slurm_submit_msg_pptr
93       );
94

ARGUMENTS

96       job_desc_msg_list
97              List  of job request specifications (of type job_desc_msg_t) for
98              a heterogeneous job in an ordered list.  See  slurm.h  for  full
99              details on the data structure's contents.
100
101       job_desc_msg_ptr
102              Specifies  the  pointer  to  a  job  request  specification. See
103              slurm.h for full details on the data structure's contents.
104
105       callbacks
106              Specifies the pointer to a allocation callbacks structure.   See
107              slurm.h for full details on the data structure's contents.
108
109       old_job_desc_msg_ptr
110              Specifies  the  pointer to a description of an existing job. See
111              slurm.h for full details on the data structure's contents.
112
113       slurm_alloc_msg_list
114              Specifies a pointer to a List structure to be created and filled
115              with  a  list  of  pointers to resource allocation data (of type
116              resource_allocation_response_msg_t).
117
118       slurm_alloc_msg_pptr
119              Specifies the double pointer to the structure to be created  and
120              filled  with  a  description  of the created resource allocation
121              (job): job ID, list of  allocated  nodes,  processor  count  per
122              allocated  node,  etc.  See slurm.h for full details on the data
123              structure's contents.
124
125       slurm_alloc_msg_ptr
126              Specifies the pointer to the structure to be created and  filled
127              in   by   the   function  slurm_allocate_resources,  slurm_allo‐
128              cate_resources_blocking,   slurm_allocation_lookup,   slurm_con‐
129              firm_allocation, slurm_job_will_run or slurm_job_will_run.
130
131       slurm_alloc_msg_thr_ptr
132              Specifies  the  pointer to the structure created and returned by
133              the function slurm_allocation_msg_thr_create.  Must be destroyed
134              with function slurm_allocation_msg_thr_destroy.
135
136       slurm_submit_msg_pptr
137              Specifies  the double pointer to the structure to be created and
138              filled with a description of the created job: job ID,  etc.  See
139              slurm.h for full details on the data structure's contents.
140
141       slurm_submit_msg_ptr
142              Specifies  the pointer to the structure to be created and filled
143              in by the function slurm_submit_batch_job.
144
145       will_run_resp
146              Specifies when and where the specified job descriptor  could  be
147              started.
148

DESCRIPTION

150       slurm_allocate_resources  Request  a  resource allocation for a job. If
151       successful, a job entry is created. Note that if  the  job's  requested
152       node  count  or  time  allocation are outside of the partition's limits
153       then a job entry will be created, a warning indication will  be  placed
154       in  the  error_code  field of the response message, and the job will be
155       left queued until the partition's limits are changed.   Always  release
156       the  response  message  when  no  longer  required  using  the function
157       slurm_free_resource_allocation_response_msg.  This function only  makes
158       the  request  once.  If the allocation is not available immediately the
159       node_cnt variable in the resp will be 0.  If you want a  function  that
160       will  block  until  either  an  error  is  received or an allocation is
161       granted you  can  use  the  slurm_allocate_resources_blocking  function
162       described below.
163
164       slurm_allocate_resources_blocking  Request  a resource allocation for a
165       job.  This call will block until the allocation is  granted,  an  error
166       occurs,  or  the specified timeout limit is reached.  The pending_call‐
167       back parameter will be called if the allocation is not available  imme‐
168       diately  and the immediate flag is not set in the request.  This can be
169       used to get the jobid of the job while waiting for  the  allocation  to
170       become available.  On failure NULL is returned and errno is set.
171
172       slurm_allocate_pack_job_blocking  Request a set of resource allocations
173       for a heterogeneous job.  This call will block until the allocation  is
174       granted,  an  error  occurs, or the specified timeout limit is reached.
175       The pending_callback parameter will be called if the allocation is  not
176       available immediately and the immediate flag is not set in the request.
177       This can be used to get the jobid of the  job  while  waiting  for  the
178       allocation  to become available.  On failure NULL is returned and errno
179       is set.  The returned list should be freed using the list_destroy func‐
180       tion.
181
182       slurm_allocation_msg_thr_create  Startup a message handler talking with
183       the controller dealing with messages  from  the  controller  during  an
184       allocation.  Callback functions are declared in the callbacks parameter
185       and will be called when a corresponding message is  received  from  the
186       controller.  This message thread is needed to receive messages from the
187       controller about node failure in an allocation and other important mes‐
188       sages.   Although technically not required, it could be very helpful to
189       inform about problems with the allocation.
190
191       slurm_allocation_msg_thr_destroy Shutdown the message handler
192        talking with the controller dealing with messages from the  controller
193       during
194        an allocation.
195
196       slurm_confirm_allocation  Return  detailed  information  on  a specific
197       existing job allocation. OBSOLETE FUNCTION: Use slurm_allocation_lookup
198       instead.  This  function may only be successfully executed by the job's
199       owner or user root.
200
201       slurm_allocation_lookup Returns detailed information about an  existing
202       job allocation.
203
204       slurm_pack_job_lookup  Returns  detailed  information about an existing
205       heterogeneous job allocation. Each element in  the  list  represents  a
206       component  of  the job in sequential order. The returned list should be
207       freed using the list_destroy function.
208
209       slurm_free_resource_allocation_response_msg Release the storage  gener‐
210       ated  in response to a call of the function slurm_allocate_resources or
211       slurm_allocation_lookup.
212
213       slurm_free_submit_response_msg  Release  the   storage   generated   in
214       response to a call of the function slurm_submit_batch_job.
215
216       slurm_init_job_desc_msg  Initialize  the  contents  of a job descriptor
217       with default values.  Execute this function before issuing a request to
218       submit or modify a job.
219
220       slurm_job_will_run  Report  when and where the supplied job description
221       can be executed.
222
223       slurm_pack_job_will_run Report when and where  the  supplied  heteroge‐
224       neous job description can be executed.
225
226       slurm_job_will_run2  Determine when and where the supplied job descrip‐
227       tion can be executed.
228
229       slurm_read_hostfile Read a  Slurm  hostfile  specified  by  "filename".
230       "filename" must contain a list of Slurm NodeNames, one per line.  Reads
231       up to "n" number of hostnames from the file. Returns  a  string  repre‐
232       senting  a hostlist ranged string of the contents of the file.  This is
233       a helper function, it does not contact any Slurm daemons.
234
235       slurm_submit_batch_job Submit a job for later execution. Note  that  if
236       the  job's  requested  node count or time allocation are outside of the
237       partition's limits then a job entry will be created, a warning  indica‐
238       tion  will  be  placed in the error_code field of the response message,
239       and the job will be  left  queued  until  the  partition's  limits  are
240       changed  and resources are available.  Always release the response mes‐
241       sage  when  no  longer  required  using  the  function  slurm_free_sub‐
242       mit_response_msg.
243
244       slurm_submit_batch_pack_job Submit a heterogeneous job for later execu‐
245       tion. Note that if the job's requested node count  or  time  allocation
246       are outside of the partition's limits then a job entry will be created,
247       a warning indication will be placed in  the  error_code  field  of  the
248       response message, and the job will be left queued until the partition's
249       limits are changed and resources are  available.   Always  release  the
250       response   message   when   no   longer  required  using  the  function
251       slurm_free_submit_response_msg.
252

RETURN VALUE

254       On success, zero is returned. On error, -1 is returned, and Slurm error
255       code is set appropriately.
256

ERRORS

258       SLURM_PROTOCOL_VERSION_ERROR Protocol version has changed, re-link your
259       code.
260
261       ESLURM_CAN_NOT_START_IMMEDIATELY the job can not be started immediately
262       as requested.
263
264       ESLURM_DEFAULT_PARTITION_NOT_SET  the system lacks a valid default par‐
265       tition.
266
267       ESLURM_JOB_MISSING_PARTITION_KEY use of this  partition  is  restricted
268       through  a credential provided only to user root. This job lacks such a
269       valid credential.
270
271       ESLURM_JOB_MISSING_REQUIRED_PARTITION_GROUP use of  this  partition  is
272       restricted  to  certain  groups. This user is not a member of an autho‐
273       rized group.
274
275       ESLURM_REQUESTED_NODES_NOT_IN_PARTITION the job requested use  of  spe‐
276       cific nodes which are not in the requested (or default) partition.
277
278       ESLURM_TOO_MANY_REQUESTED_CPUS the job requested use of more processors
279       than can be made available to in the requested (or default) partition.
280
281       ESLURM_TOO_MANY_REQUESTED_NODES the job requested  use  of  more  nodes
282       than can be made available to in the requested (or default) partition.
283
284       ESLURM_ERROR_ON_DESC_TO_RECORD_COPY  unable  to  create  the job due to
285       internal resources being exhausted. Try again later.
286
287       ESLURM_JOB_MISSING_SIZE_SPECIFICATION the job failed  to  specify  some
288       size  specification.  At  least  one of the following must be supplied:
289       required processor count, required node count, or required node list.
290
291       ESLURM_JOB_SCRIPT_MISSING failed to identify executable program  to  be
292       queued.
293
294       ESLURM_USER_ID_MISSING  identification  of the job's owner was not pro‐
295       vided.
296
297       ESLURM_DUPLICATE_JOB_ID the requested job id is already in use.
298
299       ESLURM_NOT_TOP_PRIORITY job can  not  be  started  immediately  because
300       higher priority jobs are waiting to use this partition.
301
302       ESLURM_NOT_PACK_JOB_LEADER  the  job  ID does not represent a heteroge‐
303       neous job leader as required by the function.
304
305       ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE the requested node  configura‐
306       tion  is not available (at least not in sufficient quantity) to satisfy
307       the request.
308
309       ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE the requested  partition  con‐
310       figuration is not available to satisfy the request. This is not a fatal
311       error, but indicates that the job will be left queued until the  parti‐
312       tion's  configuration  is  changed.  This  typically indicates that the
313       job's requested node count is outside of the node count range its  par‐
314       tition  is  configured  to support (e.g. the job wants 64 nodes and the
315       partition will only schedule jobs using between 1 and 32 nodes). Alter‐
316       nately, the job's time limit exceeds the partition's time limit.
317
318       ESLURM_NODES_BUSY the requested nodes are already in use.
319
320       ESLURM_INVALID_FEATURE the requested feature(s) does not exist.
321
322       ESLURM_INVALID_JOB_ID the requested job id does not exist.
323
324       ESLURM_INVALID_NODE_COUNT the requested node count is not valid.
325
326       ESLURM_INVALID_NODE_NAME the requested node name(s) is/are not valid.
327
328       ESLURM_INVALID_PARTITION_NAME  the  requested  partition  name  is  not
329       valid.
330
331       ESLURM_TRANSITION_STATE_NO_UPDATE  the  requested   job   configuration
332       change can not take place at this time. Try again later.
333
334       ESLURM_ALREADY_DONE the specified job has already completed and can not
335       be modified.
336
337       ESLURM_ACCESS_DENIED the requesting user lacks  authorization  for  the
338       requested action (e.g. trying to delete or modify another user's job).
339
340       ESLURM_INTERCONNECT_FAILURE failed to configure the node interconnect.
341
342       ESLURM_BAD_DIST task distribution specification is invalid.
343
344       SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT  Timeout in communicating with Slurm
345       controller.
346

NON-BLOCKING EXAMPLE

348       #include <stdio.h>
349       #include <stdlib.h>
350       #include <signal.h>
351       #include <slurm/slurm.h>
352       #include <slurm/slurm_errno.h>
353
354       int main (int argc, char *argv[])
355       {
356            job_desc_msg_t job_desc_msg;
357            resource_allocation_response_msg_t* slurm_alloc_msg_ptr ;
358
359            slurm_init_job_desc_msg( &job_desc_msg );
360            job_desc_msg. name = ("job01 ");
361            job_desc_msg. job_min_memory = 1024;
362            job_desc_msg. time_limit = 200;
363            job_desc_msg. min_nodes = 400;
364            job_desc_msg. user_id = getuid();
365            job_desc_msg. group_id = getgid();
366            if (slurm_allocate_resources(&job_desc_msg,
367                                         &slurm_alloc_msg_ptr)) {
368                 slurm_perror ("slurm_allocate_resources error");
369                 exit (1);
370            }
371            printf ("Allocated nodes %s to job_id %u\n",
372                    slurm_alloc_msg_ptr->node_list,
373                    slurm_alloc_msg_ptr->job_id );
374            if (slurm_kill_job(slurm_alloc_msg_ptr->job_id, SIGKILL, 0)) {
375                 printf ("kill errno %d\n", slurm_get_errno());
376                 exit (1);
377            }
378            printf ("canceled job_id %u\n",
379                    slurm_alloc_msg_ptr->job_id );
380            slurm_free_resource_allocation_response_msg(
381                      slurm_alloc_msg_ptr);
382            exit (0);
383       }
384
385

BLOCKING EXAMPLE

387       #include <stdio.h>
388       #include <stdlib.h>
389       #include <signal.h>
390       #include <slurm/slurm.h>
391       #include <slurm/slurm_errno.h>
392
393       int main (int argc, char *argv[])
394       {
395            job_desc_msg_t job_desc_msg;
396            resource_allocation_response_msg_t* slurm_alloc_msg_ptr ;
397
398            slurm_init_job_desc_msg( &job_desc_msg );
399            job_desc_msg. name = ("job01 ");
400            job_desc_msg. job_min_memory = 1024;
401            job_desc_msg. time_limit = 200;
402            job_desc_msg. min_nodes = 400;
403            job_desc_msg. user_id = getuid();
404            job_desc_msg. group_id = getgid();
405            if (!(slurm_alloc_msg_ptr =
406                  slurm_allocate_resources_blocking(&job_desc_msg, 0,  NULL)))
407       {
408                 slurm_perror ("slurm_allocate_resources_blocking error");
409                 exit (1);
410            }
411            printf ("Allocated nodes %s to job_id %u\n",
412                    slurm_alloc_msg_ptr->node_list,
413                    slurm_alloc_msg_ptr->job_id );
414            if (slurm_kill_job(slurm_alloc_msg_ptr->job_id, SIGKILL, 0)) {
415                 printf ("kill errno %d\n", slurm_get_errno());
416                 exit (1);
417            }
418            printf ("canceled job_id %u\n",
419                    slurm_alloc_msg_ptr->job_id );
420            slurm_free_resource_allocation_response_msg(
421                      slurm_alloc_msg_ptr);
422            exit (0);
423       }
424
425

NOTE

427       These  functions  are  included  in the libslurm library, which must be
428       linked to your process for use (e.g. "cc -lslurm myprog.c").
429
430

COPYING

432       Copyright (C) 2010-2017  SchedMD  LLC.   Copyright  (C)  2002-2006  The
433       Regents  of  the University of California.  Produced at Lawrence Liver‐
434       more  National  Laboratory  (cf,  DISCLAIMER).   CODE-OCEC-09-009.  All
435       rights reserved.
436
437       This  file  is  part  of  Slurm,  a  resource  management program.  For
438       details, see <https://slurm.schedmd.com/>.
439
440       Slurm is free software; you can redistribute it and/or modify it  under
441       the  terms  of  the GNU General Public License as published by the Free
442       Software Foundation; either version 2  of  the  License,  or  (at  your
443       option) any later version.
444
445       Slurm  is  distributed  in the hope that it will be useful, but WITHOUT
446       ANY WARRANTY; without even the implied warranty of  MERCHANTABILITY  or
447       FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU General Public License
448       for more details.
449

SEE ALSO

451       hostlist_create(3), hostlist_shift(3), hostlist_destroy(3), scancel(1),
452       srun(1),         slurm_free_job_info_msg(3),        slurm_get_errno(3),
453       slurm_load_jobs(3), slurm_perror(3), slurm_strerror(3)
454
455
456
457March 2019              Slurm job initiation functions            Slurm API(3)
Impressum