1MCE::Loop(3) User Contributed Perl Documentation MCE::Loop(3)
2
3
4
6 MCE::Loop - MCE model for building parallel loops
7
9 This document describes MCE::Loop version 1.879
10
12 This module provides a parallel loop implementation through Many-Core
13 Engine. MCE::Loop is not MCE::Map but more along the lines of an easy
14 way to spin up a MCE instance and have user_func pointing to your code
15 block. If you want something similar to map, then see MCE::Map.
16
17 ## Construction when chunking is not desired
18
19 use MCE::Loop;
20
21 MCE::Loop->init(
22 max_workers => 5, chunk_size => 1
23 );
24
25 mce_loop {
26 my ($mce, $chunk_ref, $chunk_id) = @_;
27 MCE->say("$chunk_id: $_");
28 } 40 .. 48;
29
30 -- Output
31
32 3: 42
33 1: 40
34 2: 41
35 4: 43
36 5: 44
37 6: 45
38 7: 46
39 8: 47
40 9: 48
41
42 ## Construction for 'auto' or greater than 1
43
44 use MCE::Loop;
45
46 MCE::Loop->init(
47 max_workers => 5, chunk_size => 'auto'
48 );
49
50 mce_loop {
51 my ($mce, $chunk_ref, $chunk_id) = @_;
52 for (@{ $chunk_ref }) {
53 MCE->say("$chunk_id: $_");
54 }
55 } 40 .. 48;
56
57 -- Output
58
59 1: 40
60 2: 42
61 1: 41
62 4: 46
63 2: 43
64 5: 48
65 3: 44
66 4: 47
67 3: 45
68
70 All models in MCE default to 'auto' for chunk_size. The arguments for
71 the block are the same as writing a user_func block using the Core API.
72
73 Beginning with MCE 1.5, the next input item is placed into the input
74 scalar variable $_ when chunk_size equals 1. Otherwise, $_ points to
75 $chunk_ref containing many items. Basically, line 2 below may be
76 omitted from your code when using $_. One can call MCE->chunk_id to
77 obtain the current chunk id.
78
79 line 1: user_func => sub {
80 line 2: my ($mce, $chunk_ref, $chunk_id) = @_;
81 line 3:
82 line 4: $_ points to $chunk_ref->[0]
83 line 5: in MCE 1.5 when chunk_size == 1
84 line 6:
85 line 7: $_ points to $chunk_ref
86 line 8: in MCE 1.5 when chunk_size > 1
87 line 9: }
88
89 Follow this synopsis when chunk_size equals one. Looping is not
90 required from inside the block. Hence, the block is called once per
91 each item.
92
93 ## Exports mce_loop, mce_loop_f, and mce_loop_s
94 use MCE::Loop;
95
96 MCE::Loop->init(
97 chunk_size => 1
98 );
99
100 ## Array or array_ref
101 mce_loop { do_work($_) } 1..10000;
102 mce_loop { do_work($_) } \@list;
103
104 ## Important; pass an array_ref for deeply input data
105 mce_loop { do_work($_) } [ [ 0, 1 ], [ 0, 2 ], ... ];
106 mce_loop { do_work($_) } \@deeply_list;
107
108 ## File path, glob ref, IO::All::{ File, Pipe, STDIO } obj, or scalar ref
109 ## Workers read directly and not involve the manager process
110 mce_loop_f { chomp; do_work($_) } "/path/to/file"; # efficient
111
112 ## Involves the manager process, therefore slower
113 mce_loop_f { chomp; do_work($_) } $file_handle;
114 mce_loop_f { chomp; do_work($_) } $io;
115 mce_loop_f { chomp; do_work($_) } \$scalar;
116
117 ## Sequence of numbers (begin, end [, step, format])
118 mce_loop_s { do_work($_) } 1, 10000, 5;
119 mce_loop_s { do_work($_) } [ 1, 10000, 5 ];
120
121 mce_loop_s { do_work($_) } {
122 begin => 1, end => 10000, step => 5, format => undef
123 };
124
126 Follow this synopsis when chunk_size equals 'auto' or greater than 1.
127 This means having to loop through the chunk from inside the block.
128
129 use MCE::Loop;
130
131 MCE::Loop->init( ## Chunk_size defaults to 'auto' when
132 chunk_size => 'auto' ## not specified. Therefore, the init
133 ); ## function may be omitted.
134
135 ## Syntax is shown for mce_loop for demonstration purposes.
136 ## Looping inside the block is the same for mce_loop_f and
137 ## mce_loop_s.
138
139 ## Array or array_ref
140 mce_loop { do_work($_) for (@{ $_ }) } 1..10000;
141 mce_loop { do_work($_) for (@{ $_ }) } \@list;
142
143 ## Important; pass an array_ref for deeply input data
144 mce_loop { do_work($_) for (@{ $_ }) } [ [ 0, 1 ], [ 0, 2 ], ... ];
145 mce_loop { do_work($_) for (@{ $_ }) } \@deeply_list;
146
147 ## Resembles code using the core MCE API
148 mce_loop {
149 my ($mce, $chunk_ref, $chunk_id) = @_;
150
151 for (@{ $chunk_ref }) {
152 do_work($_);
153 }
154
155 } 1..10000;
156
157 Chunking reduces the number of IPC calls behind the scene. Think in
158 terms of chunks whenever processing a large amount of data. For
159 relatively small data, choosing 1 for chunk_size is fine.
160
162 The following list options which may be overridden when loading the
163 module.
164
165 use Sereal qw( encode_sereal decode_sereal );
166 use CBOR::XS qw( encode_cbor decode_cbor );
167 use JSON::XS qw( encode_json decode_json );
168
169 use MCE::Loop
170 max_workers => 4, # Default 'auto'
171 chunk_size => 100, # Default 'auto'
172 tmp_dir => "/path/to/app/tmp", # $MCE::Signal::tmp_dir
173 freeze => \&encode_sereal, # \&Storable::freeze
174 thaw => \&decode_sereal # \&Storable::thaw
175 ;
176
177 From MCE 1.8 onwards, Sereal 3.015+ is loaded automatically if
178 available. Specify "Sereal => 0" to use Storable instead.
179
180 use MCE::Loop Sereal => 0;
181
183 MCE::Loop->init ( options )
184 MCE::Loop::init { options }
185
186 The init function accepts a hash of MCE options.
187
188 use MCE::Loop;
189
190 MCE::Loop->init(
191 chunk_size => 1, max_workers => 4,
192
193 user_begin => sub {
194 print "## ", MCE->wid, " started\n";
195 },
196
197 user_end => sub {
198 print "## ", MCE->wid, " completed\n";
199 }
200 );
201
202 my %a = mce_loop { MCE->gather($_, $_ * $_) } 1..100;
203
204 print "\n", "@a{1..100}", "\n";
205
206 -- Output
207
208 ## 3 started
209 ## 1 started
210 ## 2 started
211 ## 4 started
212 ## 1 completed
213 ## 2 completed
214 ## 3 completed
215 ## 4 completed
216
217 1 4 9 16 25 36 49 64 81 100 121 144 169 196 225 256 289 324 361
218 400 441 484 529 576 625 676 729 784 841 900 961 1024 1089 1156
219 1225 1296 1369 1444 1521 1600 1681 1764 1849 1936 2025 2116 2209
220 2304 2401 2500 2601 2704 2809 2916 3025 3136 3249 3364 3481 3600
221 3721 3844 3969 4096 4225 4356 4489 4624 4761 4900 5041 5184 5329
222 5476 5625 5776 5929 6084 6241 6400 6561 6724 6889 7056 7225 7396
223 7569 7744 7921 8100 8281 8464 8649 8836 9025 9216 9409 9604 9801
224 10000
225
227 The following assumes chunk_size equals 1 in order to demonstrate all
228 the possibilities for providing input data.
229
230 MCE::Loop->run ( sub { code }, list )
231 mce_loop { code } list
232
233 Input data may be defined using a list, an array ref, or a hash ref.
234
235 # $_ contains the item when chunk_size => 1
236
237 mce_loop { do_work($_) } 1..1000;
238 mce_loop { do_work($_) } \@list;
239
240 # Important; pass an array_ref for deeply input data
241
242 mce_loop { do_work($_) } [ [ 0, 1 ], [ 0, 2 ], ... ];
243 mce_loop { do_work($_) } \@deeply_list;
244
245 # Chunking; any chunk_size => 1 or greater
246
247 my %res = mce_loop {
248 my ($mce, $chunk_ref, $chunk_id) = @_;
249 my %ret;
250 for my $item (@{ $chunk_ref }) {
251 $ret{$item} = $item * 2;
252 }
253 MCE->gather(%ret);
254 }
255 \@list;
256
257 # Input hash; current API available since 1.828
258
259 my %res = mce_loop {
260 my ($mce, $chunk_ref, $chunk_id) = @_;
261 my %ret;
262 for my $key (keys %{ $chunk_ref }) {
263 $ret{$key} = $chunk_ref->{$key} * 2;
264 }
265 MCE->gather(%ret);
266 }
267 \%hash;
268
269 MCE::Loop->run_file ( sub { code }, file )
270 mce_loop_f { code } file
271
272 The fastest of these is the /path/to/file. Workers communicate the next
273 offset position among themselves with zero interaction by the manager
274 process.
275
276 "IO::All" { File, Pipe, STDIO } is supported since MCE 1.845.
277
278 # $_ contains the line when chunk_size => 1
279
280 mce_loop_f { $_ } "/path/to/file"; # faster
281 mce_loop_f { $_ } $file_handle;
282 mce_loop_f { $_ } $io; # IO::All
283 mce_loop_f { $_ } \$scalar;
284
285 # chunking, any chunk_size => 1 or greater
286
287 my %res = mce_loop_f {
288 my ($mce, $chunk_ref, $chunk_id) = @_;
289 my $buf = '';
290 for my $line (@{ $chunk_ref }) {
291 $buf .= $line;
292 }
293 MCE->gather($chunk_id, $buf);
294 }
295 "/path/to/file";
296
297 MCE::Loop->run_seq ( sub { code }, $beg, $end [, $step, $fmt ] )
298 mce_loop_s { code } $beg, $end [, $step, $fmt ]
299
300 Sequence may be defined as a list, an array reference, or a hash
301 reference. The functions require both begin and end values to run.
302 Step and format are optional. The format is passed to sprintf (% may be
303 omitted below).
304
305 my ($beg, $end, $step, $fmt) = (10, 20, 0.1, "%4.1f");
306
307 # $_ contains the sequence number when chunk_size => 1
308
309 mce_loop_s { $_ } $beg, $end, $step, $fmt;
310 mce_loop_s { $_ } [ $beg, $end, $step, $fmt ];
311
312 mce_loop_s { $_ } {
313 begin => $beg, end => $end,
314 step => $step, format => $fmt
315 };
316
317 # chunking, any chunk_size => 1 or greater
318
319 my %res = mce_loop_s {
320 my ($mce, $chunk_ref, $chunk_id) = @_;
321 my $buf = '';
322 for my $seq (@{ $chunk_ref }) {
323 $buf .= "$seq\n";
324 }
325 MCE->gather($chunk_id, $buf);
326 }
327 [ $beg, $end ];
328
329 The sequence engine can compute 'begin' and 'end' items only, for the
330 chunk, and not the items in between (hence boundaries only). This
331 option applies to sequence only and has no effect when chunk_size
332 equals 1.
333
334 The time to run is 0.006s below. This becomes 0.827s without the
335 bounds_only option due to computing all items in between, thus creating
336 a very large array. Basically, specify bounds_only => 1 when boundaries
337 is all you need for looping inside the block; e.g. Monte Carlo
338 simulations.
339
340 Time was measured using 1 worker to emphasize the difference.
341
342 use MCE::Loop;
343
344 MCE::Loop->init(
345 max_workers => 1, chunk_size => 1_250_000,
346 bounds_only => 1
347 );
348
349 # Typically, the input scalar $_ contains the sequence number
350 # when chunk_size => 1, unless the bounds_only option is set
351 # which is the case here. Thus, $_ points to $chunk_ref.
352
353 mce_loop_s {
354 my ($mce, $chunk_ref, $chunk_id) = @_;
355
356 # $chunk_ref contains 2 items, not 1_250_000
357 # my ( $begin, $end ) = ( $_->[0], $_->[1] );
358
359 my $begin = $chunk_ref->[0];
360 my $end = $chunk_ref->[1];
361
362 # for my $seq ( $begin .. $end ) {
363 # ...
364 # }
365
366 MCE->printf("%7d .. %8d\n", $begin, $end);
367 }
368 [ 1, 10_000_000 ];
369
370 -- Output
371
372 1 .. 1250000
373 1250001 .. 2500000
374 2500001 .. 3750000
375 3750001 .. 5000000
376 5000001 .. 6250000
377 6250001 .. 7500000
378 7500001 .. 8750000
379 8750001 .. 10000000
380
381 MCE::Loop->run ( sub { code }, iterator )
382 mce_loop { code } iterator
383
384 An iterator reference may be specified for input_data. Iterators are
385 described under section "SYNTAX for INPUT_DATA" at MCE::Core.
386
387 mce_loop { $_ } make_iterator(10, 30, 2);
388
390 Unlike MCE::Map where gather and output order are done for you
391 automatically, the gather method is used to have results sent back to
392 the manager process.
393
394 use MCE::Loop chunk_size => 1;
395
396 ## Output order is not guaranteed.
397 my @a1 = mce_loop { MCE->gather($_ * 2) } 1..100;
398 print "@a1\n\n";
399
400 ## Outputs to a hash instead (key, value).
401 my %h1 = mce_loop { MCE->gather($_, $_ * 2) } 1..100;
402 print "@h1{1..100}\n\n";
403
404 ## This does the same thing due to chunk_id starting at one.
405 my %h2 = mce_loop { MCE->gather(MCE->chunk_id, $_ * 2) } 1..100;
406 print "@h2{1..100}\n\n";
407
408 The gather method may be called multiple times within the block unlike
409 return which would leave the block. Therefore, think of gather as
410 yielding results immediately to the manager process without actually
411 leaving the block.
412
413 use MCE::Loop chunk_size => 1, max_workers => 3;
414
415 my @hosts = qw(
416 hosta hostb hostc hostd hoste
417 );
418
419 my %h3 = mce_loop {
420 my ($output, $error, $status); my $host = $_;
421
422 ## Do something with $host;
423 $output = "Worker ". MCE->wid .": Hello from $host";
424
425 if (MCE->chunk_id % 3 == 0) {
426 ## Simulating an error condition
427 local $? = 1; $status = $?;
428 $error = "Error from $host"
429 }
430 else {
431 $status = 0;
432 }
433
434 ## Ensure unique keys (key, value) when gathering to
435 ## a hash.
436 MCE->gather("$host.out", $output);
437 MCE->gather("$host.err", $error) if (defined $error);
438 MCE->gather("$host.sta", $status);
439
440 } @hosts;
441
442 foreach my $host (@hosts) {
443 print $h3{"$host.out"}, "\n";
444 print $h3{"$host.err"}, "\n" if (exists $h3{"$host.err"});
445 print "Exit status: ", $h3{"$host.sta"}, "\n\n";
446 }
447
448 -- Output
449
450 Worker 2: Hello from hosta
451 Exit status: 0
452
453 Worker 1: Hello from hostb
454 Exit status: 0
455
456 Worker 3: Hello from hostc
457 Error from hostc
458 Exit status: 1
459
460 Worker 2: Hello from hostd
461 Exit status: 0
462
463 Worker 1: Hello from hoste
464 Exit status: 0
465
466 The following uses an anonymous array containing 3 elements when
467 gathering data. Serialization is automatic behind the scene.
468
469 my %h3 = mce_loop {
470 ...
471
472 MCE->gather($host, [$output, $error, $status]);
473
474 } @hosts;
475
476 foreach my $host (@hosts) {
477 print $h3{$host}->[0], "\n";
478 print $h3{$host}->[1], "\n" if (defined $h3{$host}->[1]);
479 print "Exit status: ", $h3{$host}->[2], "\n\n";
480 }
481
482 Although MCE::Map comes to mind, one may want additional control when
483 gathering data such as retaining output order.
484
485 use MCE::Loop;
486
487 sub preserve_order {
488 my %tmp; my $order_id = 1; my $gather_ref = $_[0];
489
490 return sub {
491 $tmp{ (shift) } = \@_;
492
493 while (1) {
494 last unless exists $tmp{$order_id};
495 push @{ $gather_ref }, @{ delete $tmp{$order_id++} };
496 }
497
498 return;
499 };
500 }
501
502 my @m2;
503
504 MCE::Loop->init(
505 chunk_size => 'auto', max_workers => 'auto',
506 gather => preserve_order(\@m2)
507 );
508
509 mce_loop {
510 my @a; my ($mce, $chunk_ref, $chunk_id) = @_;
511
512 ## Compute the entire chunk data at once.
513 push @a, map { $_ * 2 } @{ $chunk_ref };
514
515 ## Afterwards, invoke the gather feature, which
516 ## will direct the data to the callback function.
517 MCE->gather(MCE->chunk_id, @a);
518
519 } 1..100000;
520
521 MCE::Loop->finish;
522
523 print scalar @m2, "\n";
524
525 All 6 models support 'auto' for chunk_size unlike the Core API. Think
526 of the models as the basis for providing JIT for MCE. They create the
527 instance, tune max_workers, and tune chunk_size automatically
528 regardless of the hardware.
529
530 The following does the same thing using the Core API.
531
532 use MCE;
533
534 sub preserve_order {
535 ...
536 }
537
538 my $mce = MCE->new(
539 max_workers => 'auto', chunk_size => 8000,
540
541 user_func => sub {
542 my @a; my ($mce, $chunk_ref, $chunk_id) = @_;
543
544 ## Compute the entire chunk data at once.
545 push @a, map { $_ * 2 } @{ $chunk_ref };
546
547 ## Afterwards, invoke the gather feature, which
548 ## will direct the data to the callback function.
549 MCE->gather(MCE->chunk_id, @a);
550 }
551 );
552
553 my @m2;
554
555 $mce->process({ gather => preserve_order(\@m2) }, [1..100000]);
556 $mce->shutdown;
557
558 print scalar @m2, "\n";
559
561 MCE::Loop->finish
562 MCE::Loop::finish
563
564 Workers remain persistent as much as possible after running. Shutdown
565 occurs automatically when the script terminates. Call finish when
566 workers are no longer needed.
567
568 use MCE::Loop;
569
570 MCE::Loop->init(
571 chunk_size => 20, max_workers => 'auto'
572 );
573
574 mce_loop { ... } 1..100;
575
576 MCE::Loop->finish;
577
579 MCE, MCE::Core
580
582 Mario E. Roy, <marioeroy AT gmail DOT com>
583
584
585
586perl v5.36.0 2022-07-22 MCE::Loop(3)