1Slurm API(3) Slurm job initiation functions Slurm API(3)
2
3
4
6 slurm_allocate_het_job_blocking, slurm_allocate_resources, slurm_allo‐
7 cate_resources_blocking, slurm_allocation_msg_thr_create, slurm_alloca‐
8 tion_msg_thr_destroy, slurm_allocation_lookup, slurm_het_job_lookup,
9 slurm_confirm_allocation, slurm_free_submit_response_response_msg,
10 slurm_init_job_desc_msg, slurm_job_will_run, slurm_het_job_will_run,
11 slurm_job_will_run2, slurm_read_hostfile, slurm_submit_batch_job,
12 slurm_submit_batch_het_job - Slurm job initiation functions
13
15 #include <slurm/slurm.h>
16
17 int slurm_allocate_resources (
18 job_desc_msg_t *job_desc_msg_ptr,
19 resource_allocation_response_msg_t **slurm_alloc_msg_pptr
20 );
21
22 resource_allocation_response_msg_t *slurm_allocate_resources_blocking (
23 job_desc_msg_t *job_desc_msg_ptr,
24 time_t timeout, void (*pending_callback)(uint32_t job_id)
25 );
26
27 List *slurm_allocate_het_job_blocking (
28 List job_desc_msg_list,
29 time_t timeout, void (*pending_callback)(uint32_t job_id)
30 );
31
32 allocation_msg_thread_t *slurm_allocation_msg_thr_create (
33 uint16_t *port,
34 slurm_allocation_callbacks_t *callbacks
35 );
36
37 void *slurm_allocation_msg_thr_destroy (
38 allocation_msg_thread_t *slurm_alloc_msg_thr_ptr
39 );
40
41 int slurm_allocation_lookup {
42 uint32_t jobid,
43 resource_allocation_response_msg_t **slurm_alloc_msg_pptr
44 );
45
46 int slurm_het_job_lookup {
47 uint32_t jobid,
48 List *slurm_alloc_msg_list
49 );
50
51 int slurm_confirm_allocation (
52 old_job_alloc_msg_t *old_job_desc_msg_ptr,
53 resource_allocation_response_msg_t **slurm_alloc_msg_pptr
54 );
55
56 void slurm_free_resource_allocation_response_msg (
57 resource_allocation_response_msg_t *slurm_alloc_msg_ptr
58 );
59
60 void slurm_free_submit_response_response_msg (
61 submit_response_msg_t *slurm_submit_msg_ptr
62 );
63
64 void slurm_init_job_desc_msg (
65 job_desc_msg_t *job_desc_msg_ptr
66 );
67
68 int slurm_job_will_run (
69 job_desc_msg_t *job_desc_msg_ptr
70 );
71
72 int slurm_het_job_will_run (
73 List job_desc_msg_list
74 );
75
76 int slurm_job_will_run2 (
77 job_desc_msg_t *job_desc_msg_ptr,
78 will_run_response_msg_t **will_run_resp
79 );
80
81 int slurm_read_hostfile (
82 const char *filename, int n
83 );
84
85 int slurm_submit_batch_job (
86 job_desc_msg_t *job_desc_msg_ptr,
87 submit_response_msg_t **slurm_submit_msg_pptr
88 );
89
90 int slurm_submit_batch_het_job (
91 List job_desc_msg_list,
92 submit_response_msg_t **slurm_submit_msg_pptr
93 );
94
96 job_desc_msg_list
97 List of job request specifications (of type job_desc_msg_t) for
98 a heterogeneous job in an ordered list. See slurm.h for full
99 details on the data structure's contents.
100
101 job_desc_msg_ptr
102 Specifies the pointer to a job request specification. See
103 slurm.h for full details on the data structure's contents.
104
105 callbacks
106 Specifies the pointer to a allocation callbacks structure. See
107 slurm.h for full details on the data structure's contents.
108
109 old_job_desc_msg_ptr
110 Specifies the pointer to a description of an existing job. See
111 slurm.h for full details on the data structure's contents.
112
113 slurm_alloc_msg_list
114 Specifies a pointer to a List structure to be created and filled
115 with a list of pointers to resource allocation data (of type
116 resource_allocation_response_msg_t).
117
118 slurm_alloc_msg_pptr
119 Specifies the double pointer to the structure to be created and
120 filled with a description of the created resource allocation
121 (job): job ID, list of allocated nodes, processor count per
122 allocated node, etc. See slurm.h for full details on the data
123 structure's contents.
124
125 slurm_alloc_msg_ptr
126 Specifies the pointer to the structure to be created and filled
127 in by the function slurm_allocate_resources, slurm_allo‐
128 cate_resources_blocking, slurm_allocation_lookup, slurm_con‐
129 firm_allocation, slurm_job_will_run or slurm_job_will_run.
130
131 slurm_alloc_msg_thr_ptr
132 Specifies the pointer to the structure created and returned by
133 the function slurm_allocation_msg_thr_create. Must be destroyed
134 with function slurm_allocation_msg_thr_destroy.
135
136 slurm_submit_msg_pptr
137 Specifies the double pointer to the structure to be created and
138 filled with a description of the created job: job ID, etc. See
139 slurm.h for full details on the data structure's contents.
140
141 slurm_submit_msg_ptr
142 Specifies the pointer to the structure to be created and filled
143 in by the function slurm_submit_batch_job.
144
145 will_run_resp
146 Specifies when and where the specified job descriptor could be
147 started.
148
150 slurm_allocate_resources Request a resource allocation for a job. If
151 successful, a job entry is created. Note that if the job's requested
152 node count or time allocation are outside of the partition's limits
153 then a job entry will be created, a warning indication will be placed
154 in the error_code field of the response message, and the job will be
155 left queued until the partition's limits are changed. Always release
156 the response message when no longer required using the function
157 slurm_free_resource_allocation_response_msg. This function only makes
158 the request once. If the allocation is not available immediately the
159 node_cnt variable in the resp will be 0. If you want a function that
160 will block until either an error is received or an allocation is
161 granted you can use the slurm_allocate_resources_blocking function
162 described below.
163
164 slurm_allocate_resources_blocking Request a resource allocation for a
165 job. This call will block until the allocation is granted, an error
166 occurs, or the specified timeout limit is reached. The pending_call‐
167 back parameter will be called if the allocation is not available imme‐
168 diately and the immediate flag is not set in the request. This can be
169 used to get the jobid of the job while waiting for the allocation to
170 become available. On failure NULL is returned and errno is set.
171
172 slurm_allocate_het_job_blocking Request a set of resource allocations
173 for a heterogeneous job. This call will block until the allocation is
174 granted, an error occurs, or the specified timeout limit is reached.
175 The pending_callback parameter will be called if the allocation is not
176 available immediately and the immediate flag is not set in the request.
177 This can be used to get the jobid of the job while waiting for the
178 allocation to become available. On failure NULL is returned and errno
179 is set. The returned list should be freed using the list_destroy func‐
180 tion.
181
182 slurm_allocation_msg_thr_create Startup a message handler talking with
183 the controller dealing with messages from the controller during an
184 allocation. Callback functions are declared in the callbacks parameter
185 and will be called when a corresponding message is received from the
186 controller. This message thread is needed to receive messages from the
187 controller about node failure in an allocation and other important mes‐
188 sages. Although technically not required, it could be very helpful to
189 inform about problems with the allocation.
190
191 slurm_allocation_msg_thr_destroy Shutdown the message handler
192 talking with the controller dealing with messages from the controller
193 during
194 an allocation.
195
196 slurm_confirm_allocation Return detailed information on a specific
197 existing job allocation. OBSOLETE FUNCTION: Use slurm_allocation_lookup
198 instead. This function may only be successfully executed by the job's
199 owner or user root.
200
201 slurm_allocation_lookup Returns detailed information about an existing
202 job allocation.
203
204 slurm_het_job_lookup Returns detailed information about an existing
205 heterogeneous job allocation. Each element in the list represents a
206 component of the job in sequential order. The returned list should be
207 freed using the list_destroy function.
208
209 slurm_free_resource_allocation_response_msg Release the storage gener‐
210 ated in response to a call of the function slurm_allocate_resources or
211 slurm_allocation_lookup.
212
213 slurm_free_submit_response_msg Release the storage generated in
214 response to a call of the function slurm_submit_batch_job.
215
216 slurm_init_job_desc_msg Initialize the contents of a job descriptor
217 with default values. Execute this function before issuing a request to
218 submit or modify a job.
219
220 slurm_job_will_run Report when and where the supplied job description
221 can be executed.
222
223 slurm_het_job_will_run Report when and where the supplied heterogeneous
224 job description can be executed.
225
226 slurm_job_will_run2 Determine when and where the supplied job descrip‐
227 tion can be executed.
228
229 slurm_read_hostfile Read a Slurm hostfile specified by "filename".
230 "filename" must contain a list of Slurm NodeNames, one per line. Reads
231 up to "n" number of hostnames from the file. Returns a string repre‐
232 senting a hostlist ranged string of the contents of the file. This is
233 a helper function, it does not contact any Slurm daemons.
234
235 slurm_submit_batch_job Submit a job for later execution. Note that if
236 the job's requested node count or time allocation are outside of the
237 partition's limits then a job entry will be created, a warning indica‐
238 tion will be placed in the error_code field of the response message,
239 and the job will be left queued until the partition's limits are
240 changed and resources are available. Always release the response mes‐
241 sage when no longer required using the function slurm_free_sub‐
242 mit_response_msg.
243
244 slurm_submit_batch_het_job Submit a heterogeneous job for later execu‐
245 tion. Note that if the job's requested node count or time allocation
246 are outside of the partition's limits then a job entry will be created,
247 a warning indication will be placed in the error_code field of the
248 response message, and the job will be left queued until the partition's
249 limits are changed and resources are available. Always release the
250 response message when no longer required using the function
251 slurm_free_submit_response_msg.
252
254 On success, zero is returned. On error, -1 is returned, and Slurm error
255 code is set appropriately.
256
258 SLURM_PROTOCOL_VERSION_ERROR Protocol version has changed, re-link your
259 code.
260
261 ESLURM_CAN_NOT_START_IMMEDIATELY the job can not be started immediately
262 as requested.
263
264 ESLURM_DEFAULT_PARTITION_NOT_SET the system lacks a valid default par‐
265 tition.
266
267 ESLURM_JOB_MISSING_PARTITION_KEY use of this partition is restricted
268 through a credential provided only to user root. This job lacks such a
269 valid credential.
270
271 ESLURM_JOB_MISSING_REQUIRED_PARTITION_GROUP use of this partition is
272 restricted to certain groups. This user is not a member of an autho‐
273 rized group.
274
275 ESLURM_REQUESTED_NODES_NOT_IN_PARTITION the job requested use of spe‐
276 cific nodes which are not in the requested (or default) partition.
277
278 ESLURM_TOO_MANY_REQUESTED_CPUS the job requested use of more processors
279 than can be made available to in the requested (or default) partition.
280
281 ESLURM_TOO_MANY_REQUESTED_NODES the job requested use of more nodes
282 than can be made available to in the requested (or default) partition.
283
284 ESLURM_ERROR_ON_DESC_TO_RECORD_COPY unable to create the job due to
285 internal resources being exhausted. Try again later.
286
287 ESLURM_JOB_MISSING_SIZE_SPECIFICATION the job failed to specify some
288 size specification. At least one of the following must be supplied:
289 required processor count, required node count, or required node list.
290
291 ESLURM_JOB_SCRIPT_MISSING failed to identify executable program to be
292 queued.
293
294 ESLURM_USER_ID_MISSING identification of the job's owner was not pro‐
295 vided.
296
297 ESLURM_DUPLICATE_JOB_ID the requested job id is already in use.
298
299 ESLURM_NOT_TOP_PRIORITY job can not be started immediately because
300 higher priority jobs are waiting to use this partition.
301
302 ESLURM_NOT_HET_JOB_LEADER the job ID does not represent a heterogeneous
303 job leader as required by the function.
304
305 ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE the requested node configura‐
306 tion is not available (at least not in sufficient quantity) to satisfy
307 the request.
308
309 ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE the requested partition con‐
310 figuration is not available to satisfy the request. This is not a fatal
311 error, but indicates that the job will be left queued until the parti‐
312 tion's configuration is changed. This typically indicates that the
313 job's requested node count is outside of the node count range its par‐
314 tition is configured to support (e.g. the job wants 64 nodes and the
315 partition will only schedule jobs using between 1 and 32 nodes). Alter‐
316 nately, the job's time limit exceeds the partition's time limit.
317
318 ESLURM_NODES_BUSY the requested nodes are already in use.
319
320 ESLURM_INVALID_FEATURE the requested feature(s) does not exist.
321
322 ESLURM_INVALID_JOB_ID the requested job id does not exist.
323
324 ESLURM_INVALID_NODE_COUNT the requested node count is not valid.
325
326 ESLURM_INVALID_NODE_NAME the requested node name(s) is/are not valid.
327
328 ESLURM_INVALID_PARTITION_NAME the requested partition name is not
329 valid.
330
331 ESLURM_TRANSITION_STATE_NO_UPDATE the requested job configuration
332 change can not take place at this time. Try again later.
333
334 ESLURM_ALREADY_DONE the specified job has already completed and can not
335 be modified.
336
337 ESLURM_ACCESS_DENIED the requesting user lacks authorization for the
338 requested action (e.g. trying to delete or modify another user's job).
339
340 ESLURM_INTERCONNECT_FAILURE failed to configure the node interconnect.
341
342 ESLURM_BAD_DIST task distribution specification is invalid.
343
344 SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT Timeout in communicating with Slurm
345 controller.
346
348 #include <stdio.h>
349 #include <stdlib.h>
350 #include <signal.h>
351 #include <slurm/slurm.h>
352 #include <slurm/slurm_errno.h>
353
354 int main (int argc, char *argv[])
355 {
356 job_desc_msg_t job_desc_msg;
357 resource_allocation_response_msg_t* slurm_alloc_msg_ptr ;
358
359 slurm_init_job_desc_msg( &job_desc_msg );
360 job_desc_msg. name = ("job01 ");
361 job_desc_msg. job_min_memory = 1024;
362 job_desc_msg. time_limit = 200;
363 job_desc_msg. min_nodes = 400;
364 job_desc_msg. user_id = getuid();
365 job_desc_msg. group_id = getgid();
366 if (slurm_allocate_resources(&job_desc_msg,
367 &slurm_alloc_msg_ptr)) {
368 slurm_perror ("slurm_allocate_resources error");
369 exit (1);
370 }
371 printf ("Allocated nodes %s to job_id %u\n",
372 slurm_alloc_msg_ptr->node_list,
373 slurm_alloc_msg_ptr->job_id );
374 if (slurm_kill_job(slurm_alloc_msg_ptr->job_id, SIGKILL, 0)) {
375 printf ("kill errno %d\n", slurm_get_errno());
376 exit (1);
377 }
378 printf ("canceled job_id %u\n",
379 slurm_alloc_msg_ptr->job_id );
380 slurm_free_resource_allocation_response_msg(
381 slurm_alloc_msg_ptr);
382 exit (0);
383 }
384
385
387 #include <stdio.h>
388 #include <stdlib.h>
389 #include <signal.h>
390 #include <slurm/slurm.h>
391 #include <slurm/slurm_errno.h>
392
393 int main (int argc, char *argv[])
394 {
395 job_desc_msg_t job_desc_msg;
396 resource_allocation_response_msg_t* slurm_alloc_msg_ptr ;
397
398 slurm_init_job_desc_msg( &job_desc_msg );
399 job_desc_msg. name = ("job01 ");
400 job_desc_msg. job_min_memory = 1024;
401 job_desc_msg. time_limit = 200;
402 job_desc_msg. min_nodes = 400;
403 job_desc_msg. user_id = getuid();
404 job_desc_msg. group_id = getgid();
405 if (!(slurm_alloc_msg_ptr =
406 slurm_allocate_resources_blocking(&job_desc_msg, 0, NULL)))
407 {
408 slurm_perror ("slurm_allocate_resources_blocking error");
409 exit (1);
410 }
411 printf ("Allocated nodes %s to job_id %u\n",
412 slurm_alloc_msg_ptr->node_list,
413 slurm_alloc_msg_ptr->job_id );
414 if (slurm_kill_job(slurm_alloc_msg_ptr->job_id, SIGKILL, 0)) {
415 printf ("kill errno %d\n", slurm_get_errno());
416 exit (1);
417 }
418 printf ("canceled job_id %u\n",
419 slurm_alloc_msg_ptr->job_id );
420 slurm_free_resource_allocation_response_msg(
421 slurm_alloc_msg_ptr);
422 exit (0);
423 }
424
425
427 These functions are included in the libslurm library, which must be
428 linked to your process for use (e.g. "cc -lslurm myprog.c").
429
430
432 Copyright (C) 2010-2017 SchedMD LLC. Copyright (C) 2002-2006 The
433 Regents of the University of California. Produced at Lawrence Liver‐
434 more National Laboratory (cf, DISCLAIMER). CODE-OCEC-09-009. All
435 rights reserved.
436
437 This file is part of Slurm, a resource management program. For
438 details, see <https://slurm.schedmd.com/>.
439
440 Slurm is free software; you can redistribute it and/or modify it under
441 the terms of the GNU General Public License as published by the Free
442 Software Foundation; either version 2 of the License, or (at your
443 option) any later version.
444
445 Slurm is distributed in the hope that it will be useful, but WITHOUT
446 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
447 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
448 for more details.
449
451 hostlist_create(3), hostlist_shift(3), hostlist_destroy(3), scancel(1),
452 srun(1), slurm_free_job_info_msg(3), slurm_get_errno(3),
453 slurm_load_jobs(3), slurm_perror(3), slurm_strerror(3)
454
455
456
457March 2019 Slurm job initiation functions Slurm API(3)