1MCE::Loop(3) User Contributed Perl Documentation MCE::Loop(3)
2
3
4
6 MCE::Loop - MCE model for building parallel loops
7
9 This document describes MCE::Loop version 1.884
10
12 This module provides a parallel loop implementation through Many-Core
13 Engine. MCE::Loop is not MCE::Map but more along the lines of an easy
14 way to spin up a MCE instance and have user_func pointing to your code
15 block. If you want something similar to map, then see MCE::Map.
16
17 ## Construction when chunking is not desired
18
19 use MCE::Loop;
20
21 MCE::Loop->init(
22 max_workers => 5, chunk_size => 1
23 );
24
25 mce_loop {
26 my ($mce, $chunk_ref, $chunk_id) = @_;
27 MCE->say("$chunk_id: $_");
28 } 40 .. 48;
29
30 -- Output
31
32 3: 42
33 1: 40
34 2: 41
35 4: 43
36 5: 44
37 6: 45
38 7: 46
39 8: 47
40 9: 48
41
42 ## Construction for 'auto' or greater than 1
43
44 use MCE::Loop;
45
46 MCE::Loop->init(
47 max_workers => 5, chunk_size => 'auto'
48 );
49
50 mce_loop {
51 my ($mce, $chunk_ref, $chunk_id) = @_;
52 for (@{ $chunk_ref }) {
53 MCE->say("$chunk_id: $_");
54 }
55 } 40 .. 48;
56
57 -- Output
58
59 1: 40
60 2: 42
61 1: 41
62 4: 46
63 2: 43
64 5: 48
65 3: 44
66 4: 47
67 3: 45
68
70 All models in MCE default to 'auto' for chunk_size. The arguments for
71 the block are the same as writing a user_func block using the Core API.
72
73 Beginning with MCE 1.5, the next input item is placed into the input
74 scalar variable $_ when chunk_size equals 1. Otherwise, $_ points to
75 $chunk_ref containing many items. Basically, line 2 below may be
76 omitted from your code when using $_. One can call MCE->chunk_id to
77 obtain the current chunk id.
78
79 line 1: user_func => sub {
80 line 2: my ($mce, $chunk_ref, $chunk_id) = @_;
81 line 3:
82 line 4: $_ points to $chunk_ref->[0]
83 line 5: in MCE 1.5 when chunk_size == 1
84 line 6:
85 line 7: $_ points to $chunk_ref
86 line 8: in MCE 1.5 when chunk_size > 1
87 line 9: }
88
89 Follow this synopsis when chunk_size equals one. Looping is not
90 required from inside the block. Hence, the block is called once per
91 each item.
92
93 ## Exports mce_loop, mce_loop_f, and mce_loop_s
94 use MCE::Loop;
95
96 MCE::Loop->init(
97 chunk_size => 1
98 );
99
100 ## Array or array_ref
101 mce_loop { do_work($_) } 1..10000;
102 mce_loop { do_work($_) } \@list;
103
104 ## Important; pass an array_ref for deeply input data
105 mce_loop { do_work($_) } [ [ 0, 1 ], [ 0, 2 ], ... ];
106 mce_loop { do_work($_) } \@deeply_list;
107
108 ## File path, glob ref, IO::All::{ File, Pipe, STDIO } obj, or scalar ref
109 ## Workers read directly and not involve the manager process
110 mce_loop_f { chomp; do_work($_) } "/path/to/file"; # efficient
111
112 ## Involves the manager process, therefore slower
113 mce_loop_f { chomp; do_work($_) } $file_handle;
114 mce_loop_f { chomp; do_work($_) } $io;
115 mce_loop_f { chomp; do_work($_) } \$scalar;
116
117 ## Sequence of numbers (begin, end [, step, format])
118 mce_loop_s { do_work($_) } 1, 10000, 5;
119 mce_loop_s { do_work($_) } [ 1, 10000, 5 ];
120
121 mce_loop_s { do_work($_) } {
122 begin => 1, end => 10000, step => 5, format => undef
123 };
124
126 Follow this synopsis when chunk_size equals 'auto' or greater than 1.
127 This means having to loop through the chunk from inside the block.
128
129 use MCE::Loop;
130
131 MCE::Loop->init( ## Chunk_size defaults to 'auto' when
132 chunk_size => 'auto' ## not specified. Therefore, the init
133 ); ## function may be omitted.
134
135 ## Syntax is shown for mce_loop for demonstration purposes.
136 ## Looping inside the block is the same for mce_loop_f and
137 ## mce_loop_s.
138
139 ## Array or array_ref
140 mce_loop { do_work($_) for (@{ $_ }) } 1..10000;
141 mce_loop { do_work($_) for (@{ $_ }) } \@list;
142
143 ## Important; pass an array_ref for deeply input data
144 mce_loop { do_work($_) for (@{ $_ }) } [ [ 0, 1 ], [ 0, 2 ], ... ];
145 mce_loop { do_work($_) for (@{ $_ }) } \@deeply_list;
146
147 ## Resembles code using the core MCE API
148 mce_loop {
149 my ($mce, $chunk_ref, $chunk_id) = @_;
150
151 for (@{ $chunk_ref }) {
152 do_work($_);
153 }
154
155 } 1..10000;
156
157 Chunking reduces the number of IPC calls behind the scene. Think in
158 terms of chunks whenever processing a large amount of data. For
159 relatively small data, choosing 1 for chunk_size is fine.
160
162 The following list options which may be overridden when loading the
163 module.
164
165 use Sereal qw( encode_sereal decode_sereal );
166 use CBOR::XS qw( encode_cbor decode_cbor );
167 use JSON::XS qw( encode_json decode_json );
168
169 use MCE::Loop
170 max_workers => 4, # Default 'auto'
171 chunk_size => 100, # Default 'auto'
172 tmp_dir => "/path/to/app/tmp", # $MCE::Signal::tmp_dir
173 freeze => \&encode_sereal, # \&Storable::freeze
174 thaw => \&decode_sereal, # \&Storable::thaw
175 init_relay => 0, # Default undef; MCE 1.882+
176 use_threads => 0, # Default undef; MCE 1.882+
177 ;
178
179 From MCE 1.8 onwards, Sereal 3.015+ is loaded automatically if
180 available. Specify "Sereal => 0" to use Storable instead.
181
182 use MCE::Loop Sereal => 0;
183
185 MCE::Loop->init ( options )
186 MCE::Loop::init { options }
187
188 The init function accepts a hash of MCE options.
189
190 use MCE::Loop;
191
192 MCE::Loop->init(
193 chunk_size => 1, max_workers => 4,
194
195 user_begin => sub {
196 print "## ", MCE->wid, " started\n";
197 },
198
199 user_end => sub {
200 print "## ", MCE->wid, " completed\n";
201 }
202 );
203
204 my %a = mce_loop { MCE->gather($_, $_ * $_) } 1..100;
205
206 print "\n", "@a{1..100}", "\n";
207
208 -- Output
209
210 ## 3 started
211 ## 1 started
212 ## 2 started
213 ## 4 started
214 ## 1 completed
215 ## 2 completed
216 ## 3 completed
217 ## 4 completed
218
219 1 4 9 16 25 36 49 64 81 100 121 144 169 196 225 256 289 324 361
220 400 441 484 529 576 625 676 729 784 841 900 961 1024 1089 1156
221 1225 1296 1369 1444 1521 1600 1681 1764 1849 1936 2025 2116 2209
222 2304 2401 2500 2601 2704 2809 2916 3025 3136 3249 3364 3481 3600
223 3721 3844 3969 4096 4225 4356 4489 4624 4761 4900 5041 5184 5329
224 5476 5625 5776 5929 6084 6241 6400 6561 6724 6889 7056 7225 7396
225 7569 7744 7921 8100 8281 8464 8649 8836 9025 9216 9409 9604 9801
226 10000
227
229 The following assumes chunk_size equals 1 in order to demonstrate all
230 the possibilities for providing input data.
231
232 MCE::Loop->run ( sub { code }, list )
233 mce_loop { code } list
234
235 Input data may be defined using a list, an array ref, or a hash ref.
236
237 # $_ contains the item when chunk_size => 1
238
239 mce_loop { do_work($_) } 1..1000;
240 mce_loop { do_work($_) } \@list;
241
242 # Important; pass an array_ref for deeply input data
243
244 mce_loop { do_work($_) } [ [ 0, 1 ], [ 0, 2 ], ... ];
245 mce_loop { do_work($_) } \@deeply_list;
246
247 # Chunking; any chunk_size => 1 or greater
248
249 my %res = mce_loop {
250 my ($mce, $chunk_ref, $chunk_id) = @_;
251 my %ret;
252 for my $item (@{ $chunk_ref }) {
253 $ret{$item} = $item * 2;
254 }
255 MCE->gather(%ret);
256 }
257 \@list;
258
259 # Input hash; current API available since 1.828
260
261 my %res = mce_loop {
262 my ($mce, $chunk_ref, $chunk_id) = @_;
263 my %ret;
264 for my $key (keys %{ $chunk_ref }) {
265 $ret{$key} = $chunk_ref->{$key} * 2;
266 }
267 MCE->gather(%ret);
268 }
269 \%hash;
270
271 MCE::Loop->run_file ( sub { code }, file )
272 mce_loop_f { code } file
273
274 The fastest of these is the /path/to/file. Workers communicate the next
275 offset position among themselves with zero interaction by the manager
276 process.
277
278 "IO::All" { File, Pipe, STDIO } is supported since MCE 1.845.
279
280 # $_ contains the line when chunk_size => 1
281
282 mce_loop_f { $_ } "/path/to/file"; # faster
283 mce_loop_f { $_ } $file_handle;
284 mce_loop_f { $_ } $io; # IO::All
285 mce_loop_f { $_ } \$scalar;
286
287 # chunking, any chunk_size => 1 or greater
288
289 my %res = mce_loop_f {
290 my ($mce, $chunk_ref, $chunk_id) = @_;
291 my $buf = '';
292 for my $line (@{ $chunk_ref }) {
293 $buf .= $line;
294 }
295 MCE->gather($chunk_id, $buf);
296 }
297 "/path/to/file";
298
299 MCE::Loop->run_seq ( sub { code }, $beg, $end [, $step, $fmt ] )
300 mce_loop_s { code } $beg, $end [, $step, $fmt ]
301
302 Sequence may be defined as a list, an array reference, or a hash
303 reference. The functions require both begin and end values to run.
304 Step and format are optional. The format is passed to sprintf (% may be
305 omitted below).
306
307 my ($beg, $end, $step, $fmt) = (10, 20, 0.1, "%4.1f");
308
309 # $_ contains the sequence number when chunk_size => 1
310
311 mce_loop_s { $_ } $beg, $end, $step, $fmt;
312 mce_loop_s { $_ } [ $beg, $end, $step, $fmt ];
313
314 mce_loop_s { $_ } {
315 begin => $beg, end => $end,
316 step => $step, format => $fmt
317 };
318
319 # chunking, any chunk_size => 1 or greater
320
321 my %res = mce_loop_s {
322 my ($mce, $chunk_ref, $chunk_id) = @_;
323 my $buf = '';
324 for my $seq (@{ $chunk_ref }) {
325 $buf .= "$seq\n";
326 }
327 MCE->gather($chunk_id, $buf);
328 }
329 [ $beg, $end ];
330
331 The sequence engine can compute 'begin' and 'end' items only, for the
332 chunk, and not the items in between (hence boundaries only). This
333 option applies to sequence only and has no effect when chunk_size
334 equals 1.
335
336 The time to run is 0.006s below. This becomes 0.827s without the
337 bounds_only option due to computing all items in between, thus creating
338 a very large array. Basically, specify bounds_only => 1 when boundaries
339 is all you need for looping inside the block; e.g. Monte Carlo
340 simulations.
341
342 Time was measured using 1 worker to emphasize the difference.
343
344 use MCE::Loop;
345
346 MCE::Loop->init(
347 max_workers => 1, chunk_size => 1_250_000,
348 bounds_only => 1
349 );
350
351 # Typically, the input scalar $_ contains the sequence number
352 # when chunk_size => 1, unless the bounds_only option is set
353 # which is the case here. Thus, $_ points to $chunk_ref.
354
355 mce_loop_s {
356 my ($mce, $chunk_ref, $chunk_id) = @_;
357
358 # $chunk_ref contains 2 items, not 1_250_000
359 # my ( $begin, $end ) = ( $_->[0], $_->[1] );
360
361 my $begin = $chunk_ref->[0];
362 my $end = $chunk_ref->[1];
363
364 # for my $seq ( $begin .. $end ) {
365 # ...
366 # }
367
368 MCE->printf("%7d .. %8d\n", $begin, $end);
369 }
370 [ 1, 10_000_000 ];
371
372 -- Output
373
374 1 .. 1250000
375 1250001 .. 2500000
376 2500001 .. 3750000
377 3750001 .. 5000000
378 5000001 .. 6250000
379 6250001 .. 7500000
380 7500001 .. 8750000
381 8750001 .. 10000000
382
383 MCE::Loop->run ( sub { code }, iterator )
384 mce_loop { code } iterator
385
386 An iterator reference may be specified for input_data. Iterators are
387 described under section "SYNTAX for INPUT_DATA" at MCE::Core.
388
389 mce_loop { $_ } make_iterator(10, 30, 2);
390
392 Unlike MCE::Map where gather and output order are done for you
393 automatically, the gather method is used to have results sent back to
394 the manager process.
395
396 use MCE::Loop chunk_size => 1;
397
398 ## Output order is not guaranteed.
399 my @a1 = mce_loop { MCE->gather($_ * 2) } 1..100;
400 print "@a1\n\n";
401
402 ## Outputs to a hash instead (key, value).
403 my %h1 = mce_loop { MCE->gather($_, $_ * 2) } 1..100;
404 print "@h1{1..100}\n\n";
405
406 ## This does the same thing due to chunk_id starting at one.
407 my %h2 = mce_loop { MCE->gather(MCE->chunk_id, $_ * 2) } 1..100;
408 print "@h2{1..100}\n\n";
409
410 The gather method may be called multiple times within the block unlike
411 return which would leave the block. Therefore, think of gather as
412 yielding results immediately to the manager process without actually
413 leaving the block.
414
415 use MCE::Loop chunk_size => 1, max_workers => 3;
416
417 my @hosts = qw(
418 hosta hostb hostc hostd hoste
419 );
420
421 my %h3 = mce_loop {
422 my ($output, $error, $status); my $host = $_;
423
424 ## Do something with $host;
425 $output = "Worker ". MCE->wid .": Hello from $host";
426
427 if (MCE->chunk_id % 3 == 0) {
428 ## Simulating an error condition
429 local $? = 1; $status = $?;
430 $error = "Error from $host"
431 }
432 else {
433 $status = 0;
434 }
435
436 ## Ensure unique keys (key, value) when gathering to
437 ## a hash.
438 MCE->gather("$host.out", $output);
439 MCE->gather("$host.err", $error) if (defined $error);
440 MCE->gather("$host.sta", $status);
441
442 } @hosts;
443
444 foreach my $host (@hosts) {
445 print $h3{"$host.out"}, "\n";
446 print $h3{"$host.err"}, "\n" if (exists $h3{"$host.err"});
447 print "Exit status: ", $h3{"$host.sta"}, "\n\n";
448 }
449
450 -- Output
451
452 Worker 2: Hello from hosta
453 Exit status: 0
454
455 Worker 1: Hello from hostb
456 Exit status: 0
457
458 Worker 3: Hello from hostc
459 Error from hostc
460 Exit status: 1
461
462 Worker 2: Hello from hostd
463 Exit status: 0
464
465 Worker 1: Hello from hoste
466 Exit status: 0
467
468 The following uses an anonymous array containing 3 elements when
469 gathering data. Serialization is automatic behind the scene.
470
471 my %h3 = mce_loop {
472 ...
473
474 MCE->gather($host, [$output, $error, $status]);
475
476 } @hosts;
477
478 foreach my $host (@hosts) {
479 print $h3{$host}->[0], "\n";
480 print $h3{$host}->[1], "\n" if (defined $h3{$host}->[1]);
481 print "Exit status: ", $h3{$host}->[2], "\n\n";
482 }
483
484 Although MCE::Map comes to mind, one may want additional control when
485 gathering data such as retaining output order.
486
487 use MCE::Loop;
488
489 sub preserve_order {
490 my %tmp; my $order_id = 1; my $gather_ref = $_[0];
491
492 return sub {
493 $tmp{ (shift) } = \@_;
494
495 while (1) {
496 last unless exists $tmp{$order_id};
497 push @{ $gather_ref }, @{ delete $tmp{$order_id++} };
498 }
499
500 return;
501 };
502 }
503
504 my @m2;
505
506 MCE::Loop->init(
507 chunk_size => 'auto', max_workers => 'auto',
508 gather => preserve_order(\@m2)
509 );
510
511 mce_loop {
512 my @a; my ($mce, $chunk_ref, $chunk_id) = @_;
513
514 ## Compute the entire chunk data at once.
515 push @a, map { $_ * 2 } @{ $chunk_ref };
516
517 ## Afterwards, invoke the gather feature, which
518 ## will direct the data to the callback function.
519 MCE->gather(MCE->chunk_id, @a);
520
521 } 1..100000;
522
523 MCE::Loop->finish;
524
525 print scalar @m2, "\n";
526
527 All 6 models support 'auto' for chunk_size unlike the Core API. Think
528 of the models as the basis for providing JIT for MCE. They create the
529 instance, tune max_workers, and tune chunk_size automatically
530 regardless of the hardware.
531
532 The following does the same thing using the Core API.
533
534 use MCE;
535
536 sub preserve_order {
537 ...
538 }
539
540 my $mce = MCE->new(
541 max_workers => 'auto', chunk_size => 8000,
542
543 user_func => sub {
544 my @a; my ($mce, $chunk_ref, $chunk_id) = @_;
545
546 ## Compute the entire chunk data at once.
547 push @a, map { $_ * 2 } @{ $chunk_ref };
548
549 ## Afterwards, invoke the gather feature, which
550 ## will direct the data to the callback function.
551 MCE->gather(MCE->chunk_id, @a);
552 }
553 );
554
555 my @m2;
556
557 $mce->process({ gather => preserve_order(\@m2) }, [1..100000]);
558 $mce->shutdown;
559
560 print scalar @m2, "\n";
561
563 MCE::Loop->finish
564 MCE::Loop::finish
565
566 Workers remain persistent as much as possible after running. Shutdown
567 occurs automatically when the script terminates. Call finish when
568 workers are no longer needed.
569
570 use MCE::Loop;
571
572 MCE::Loop->init(
573 chunk_size => 20, max_workers => 'auto'
574 );
575
576 mce_loop { ... } 1..100;
577
578 MCE::Loop->finish;
579
581 MCE, MCE::Core
582
584 Mario E. Roy, <marioeroy AT gmail DOT com>
585
586
587
588perl v5.36.0 2023-01-20 MCE::Loop(3)