1MCE::Loop(3) User Contributed Perl Documentation MCE::Loop(3)
2
3
4
6 MCE::Loop - MCE model for building parallel loops
7
9 This document describes MCE::Loop version 1.837
10
12 This module provides a parallel loop implementation through Many-Core
13 Engine. MCE::Loop is not MCE::Map but more along the lines of an easy
14 way to spin up a MCE instance and have user_func pointing to your code
15 block. If you want something similar to map, then see MCE::Map.
16
17 ## Construction when chunking is not desired
18
19 use MCE::Loop;
20
21 MCE::Loop::init {
22 max_workers => 5, chunk_size => 1
23 };
24
25 mce_loop {
26 my ($mce, $chunk_ref, $chunk_id) = @_;
27 MCE->say("$chunk_id: $_");
28 } 40 .. 48;
29
30 -- Output
31
32 3: 42
33 1: 40
34 2: 41
35 4: 43
36 5: 44
37 6: 45
38 7: 46
39 8: 47
40 9: 48
41
42 ## Construction for 'auto' or greater than 1
43
44 use MCE::Loop;
45
46 MCE::Loop::init {
47 max_workers => 5, chunk_size => 'auto'
48 };
49
50 mce_loop {
51 my ($mce, $chunk_ref, $chunk_id) = @_;
52 for (@{ $chunk_ref }) {
53 MCE->say("$chunk_id: $_");
54 }
55 } 40 .. 48;
56
57 -- Output
58
59 1: 40
60 2: 42
61 1: 41
62 4: 46
63 2: 43
64 5: 48
65 3: 44
66 4: 47
67 3: 45
68
70 All models in MCE default to 'auto' for chunk_size. The arguments for
71 the block are the same as writing a user_func block using the Core API.
72
73 Beginning with MCE 1.5, the next input item is placed into the input
74 scalar variable $_ when chunk_size equals 1. Otherwise, $_ points to
75 $chunk_ref containing many items. Basically, line 2 below may be
76 omitted from your code when using $_. One can call MCE->chunk_id to
77 obtain the current chunk id.
78
79 line 1: user_func => sub {
80 line 2: my ($mce, $chunk_ref, $chunk_id) = @_;
81 line 3:
82 line 4: $_ points to $chunk_ref->[0]
83 line 5: in MCE 1.5 when chunk_size == 1
84 line 6:
85 line 7: $_ points to $chunk_ref
86 line 8: in MCE 1.5 when chunk_size > 1
87 line 9: }
88
89 Follow this synopsis when chunk_size equals one. Looping is not
90 required from inside the block. Hence, the block is called once per
91 each item.
92
93 ## Exports mce_loop, mce_loop_f, and mce_loop_s
94 use MCE::Loop;
95
96 MCE::Loop::init {
97 chunk_size => 1
98 };
99
100 ## Array or array_ref
101 mce_loop { do_work($_) } 1..10000;
102 mce_loop { do_work($_) } [ 1..10000 ];
103
104 ## File_path, glob_ref, or scalar_ref
105 mce_loop_f { chomp; do_work($_) } "/path/to/file";
106 mce_loop_f { chomp; do_work($_) } $file_handle;
107 mce_loop_f { chomp; do_work($_) } \$scalar;
108
109 ## Sequence of numbers (begin, end [, step, format])
110 mce_loop_s { do_work($_) } 1, 10000, 5;
111 mce_loop_s { do_work($_) } [ 1, 10000, 5 ];
112
113 mce_loop_s { do_work($_) } {
114 begin => 1, end => 10000, step => 5, format => undef
115 };
116
118 Follow this synopsis when chunk_size equals 'auto' or greater than 1.
119 This means having to loop through the chunk from inside the block.
120
121 use MCE::Loop;
122
123 MCE::Loop::init { ## Chunk_size defaults to 'auto' when
124 chunk_size => 'auto' ## not specified. Therefore, the init
125 }; ## function may be omitted.
126
127 ## Syntax is shown for mce_loop for demonstration purposes.
128 ## Looping inside the block is the same for mce_loop_f and
129 ## mce_loop_s.
130
131 mce_loop { do_work($_) for (@{ $_ }) } 1..10000;
132
133 ## Same as above, resembles code using the Core API.
134
135 mce_loop {
136 my ($mce, $chunk_ref, $chunk_id) = @_;
137
138 for (@{ $chunk_ref }) {
139 do_work($_);
140 }
141
142 } 1..10000;
143
144 Chunking reduces the number of IPC calls behind the scene. Think in
145 terms of chunks whenever processing a large amount of data. For
146 relatively small data, choosing 1 for chunk_size is fine.
147
149 The following list options which may be overridden when loading the
150 module.
151
152 use Sereal qw( encode_sereal decode_sereal );
153 use CBOR::XS qw( encode_cbor decode_cbor );
154 use JSON::XS qw( encode_json decode_json );
155
156 use MCE::Loop
157 max_workers => 4, # Default 'auto'
158 chunk_size => 100, # Default 'auto'
159 tmp_dir => "/path/to/app/tmp", # $MCE::Signal::tmp_dir
160 freeze => \&encode_sereal, # \&Storable::freeze
161 thaw => \&decode_sereal # \&Storable::thaw
162 ;
163
164 From MCE 1.8 onwards, Sereal 3.015+ is loaded automatically if
165 available. Specify "Sereal =" 0> to use Storable instead.
166
167 use MCE::Loop Sereal => 0;
168
170 MCE::Loop->init ( options )
171 MCE::Loop::init { options }
172 The init function accepts a hash of MCE options.
173
174 use MCE::Loop;
175
176 MCE::Loop::init {
177 chunk_size => 1, max_workers => 4,
178
179 user_begin => sub {
180 print "## ", MCE->wid, " started\n";
181 },
182
183 user_end => sub {
184 print "## ", MCE->wid, " completed\n";
185 }
186 };
187
188 my %a = mce_loop { MCE->gather($_, $_ * $_) } 1..100;
189
190 print "\n", "@a{1..100}", "\n";
191
192 -- Output
193
194 ## 3 started
195 ## 1 started
196 ## 2 started
197 ## 4 started
198 ## 1 completed
199 ## 2 completed
200 ## 3 completed
201 ## 4 completed
202
203 1 4 9 16 25 36 49 64 81 100 121 144 169 196 225 256 289 324 361
204 400 441 484 529 576 625 676 729 784 841 900 961 1024 1089 1156
205 1225 1296 1369 1444 1521 1600 1681 1764 1849 1936 2025 2116 2209
206 2304 2401 2500 2601 2704 2809 2916 3025 3136 3249 3364 3481 3600
207 3721 3844 3969 4096 4225 4356 4489 4624 4761 4900 5041 5184 5329
208 5476 5625 5776 5929 6084 6241 6400 6561 6724 6889 7056 7225 7396
209 7569 7744 7921 8100 8281 8464 8649 8836 9025 9216 9409 9604 9801
210 10000
211
213 The following assumes chunk_size equals 1 in order to demonstrate all
214 the possibilities for providing input data.
215
216 MCE::Loop->run ( sub { code }, list )
217 mce_loop { code } list
218 Input data may be defined using a list, an array ref, or a hash ref.
219
220 # $_ contains the item when chunk_size => 1
221
222 mce_loop { $_ } 1..1000;
223 mce_loop { $_ } \@list;
224
225 # chunking, any chunk_size => 1 or higher
226
227 my %res = mce_loop {
228 my ($mce, $chunk_ref, $chunk_id) = @_;
229 my %ret;
230 for my $item (@{ $chunk_ref }) {
231 $ret{$item} = $item * 2;
232 }
233 MCE->gather(%ret);
234 }
235 \@list;
236
237 # input hash, current API available since 1.828
238
239 my %res = mce_loop {
240 my ($mce, $chunk_ref, $chunk_id) = @_;
241 my %ret;
242 for my $key (keys %{ $chunk_ref }) {
243 $ret{$key} = $chunk_ref->{$key} * 2;
244 }
245 MCE->gather(%ret);
246 }
247 \%hash;
248
249 MCE::Loop->run_file ( sub { code }, file )
250 mce_loop_f { code } file
251 The fastest of these is the /path/to/file. Workers communicate the
252 next offset position among themselves with zero interaction by the
253 manager process.
254
255 # $_ contains the line when chunk_size => 1
256
257 mce_loop_f { $_ } "/path/to/file"; # faster
258 mce_loop_f { $_ } $file_handle;
259 mce_loop_f { $_ } \$scalar;
260
261 # chunking, any chunk_size => 1 or higher
262
263 my %res = mce_loop_f {
264 my ($mce, $chunk_ref, $chunk_id) = @_;
265 my $buf = '';
266 for my $line (@{ $chunk_ref }) {
267 $buf .= $line;
268 }
269 MCE->gather($chunk_id, $buf);
270 }
271 "/path/to/file";
272
273 MCE::Loop->run_seq ( sub { code }, $beg, $end [, $step, $fmt ] )
274 mce_loop_s { code } $beg, $end [, $step, $fmt ]
275 Sequence may be defined as a list, an array reference, or a hash
276 reference. The functions require both begin and end values to run.
277 Step and format are optional. The format is passed to sprintf (% may
278 be omitted below).
279
280 my ($beg, $end, $step, $fmt) = (10, 20, 0.1, "%4.1f");
281
282 # $_ contains the sequence number when chunk_size => 1
283
284 mce_loop_s { $_ } $beg, $end, $step, $fmt;
285 mce_loop_s { $_ } [ $beg, $end, $step, $fmt ];
286
287 mce_loop_s { $_ } {
288 begin => $beg, end => $end,
289 step => $step, format => $fmt
290 };
291
292 # chunking, any chunk_size => 1 or higher
293
294 my %res = mce_loop_s {
295 my ($mce, $chunk_ref, $chunk_id) = @_;
296 my $buf = '';
297 for my $seq (@{ $chunk_ref }) {
298 $buf .= "$seq\n";
299 }
300 MCE->gather($chunk_id, $buf);
301 }
302 [ $beg, $end ];
303
304 The sequence engine can compute 'begin' and 'end' items only, for
305 the chunk, and not the items in between (hence boundaries only).
306 This option applies to sequence only and has no effect when
307 chunk_size equals 1.
308
309 The time to run is 0.006s below. This becomes 0.827s without the
310 bounds_only option due to computing all items in between, thus
311 creating a very large array. Basically, specify bounds_only => 1
312 when boundaries is all you need for looping inside the block; e.g.
313 Monte Carlo simulations.
314
315 Time was measured using 1 worker to emphasize the difference.
316
317 use MCE::Loop;
318
319 MCE::Loop::init {
320 max_workers => 1, chunk_size => 1_250_000,
321 bounds_only => 1
322 };
323
324 # Typically, the input scalar $_ contains the sequence number
325 # when chunk_size => 1, unless the bounds_only option is set
326 # which is the case here. Thus, $_ points to $chunk_ref.
327
328 mce_loop_s {
329 my ($mce, $chunk_ref, $chunk_id) = @_;
330
331 # $chunk_ref contains 2 items, not 1_250_000
332 # my ( $begin, $end ) = ( $_->[0], $_->[1] );
333
334 my $begin = $chunk_ref->[0];
335 my $end = $chunk_ref->[1];
336
337 # for my $seq ( $begin .. $end ) {
338 # ...
339 # }
340
341 MCE->printf("%7d .. %8d\n", $begin, $end);
342 }
343 [ 1, 10_000_000 ];
344
345 -- Output
346
347 1 .. 1250000
348 1250001 .. 2500000
349 2500001 .. 3750000
350 3750001 .. 5000000
351 5000001 .. 6250000
352 6250001 .. 7500000
353 7500001 .. 8750000
354 8750001 .. 10000000
355
356 MCE::Loop->run ( sub { code }, iterator )
357 mce_loop { code } iterator
358 An iterator reference may be specified for input_data. Iterators are
359 described under section "SYNTAX for INPUT_DATA" at MCE::Core.
360
361 mce_loop { $_ } make_iterator(10, 30, 2);
362
364 Unlike MCE::Map where gather and output order are done for you
365 automatically, the gather method is used to have results sent back to
366 the manager process.
367
368 use MCE::Loop chunk_size => 1;
369
370 ## Output order is not guaranteed.
371 my @a1 = mce_loop { MCE->gather($_ * 2) } 1..100;
372 print "@a1\n\n";
373
374 ## Outputs to a hash instead (key, value).
375 my %h1 = mce_loop { MCE->gather($_, $_ * 2) } 1..100;
376 print "@h1{1..100}\n\n";
377
378 ## This does the same thing due to chunk_id starting at one.
379 my %h2 = mce_loop { MCE->gather(MCE->chunk_id, $_ * 2) } 1..100;
380 print "@h2{1..100}\n\n";
381
382 The gather method may be called multiple times within the block unlike
383 return which would leave the block. Therefore, think of gather as
384 yielding results immediately to the manager process without actually
385 leaving the block.
386
387 use MCE::Loop chunk_size => 1, max_workers => 3;
388
389 my @hosts = qw(
390 hosta hostb hostc hostd hoste
391 );
392
393 my %h3 = mce_loop {
394 my ($output, $error, $status); my $host = $_;
395
396 ## Do something with $host;
397 $output = "Worker ". MCE->wid .": Hello from $host";
398
399 if (MCE->chunk_id % 3 == 0) {
400 ## Simulating an error condition
401 local $? = 1; $status = $?;
402 $error = "Error from $host"
403 }
404 else {
405 $status = 0;
406 }
407
408 ## Ensure unique keys (key, value) when gathering to
409 ## a hash.
410 MCE->gather("$host.out", $output);
411 MCE->gather("$host.err", $error) if (defined $error);
412 MCE->gather("$host.sta", $status);
413
414 } @hosts;
415
416 foreach my $host (@hosts) {
417 print $h3{"$host.out"}, "\n";
418 print $h3{"$host.err"}, "\n" if (exists $h3{"$host.err"});
419 print "Exit status: ", $h3{"$host.sta"}, "\n\n";
420 }
421
422 -- Output
423
424 Worker 2: Hello from hosta
425 Exit status: 0
426
427 Worker 1: Hello from hostb
428 Exit status: 0
429
430 Worker 3: Hello from hostc
431 Error from hostc
432 Exit status: 1
433
434 Worker 2: Hello from hostd
435 Exit status: 0
436
437 Worker 1: Hello from hoste
438 Exit status: 0
439
440 The following uses an anonymous array containing 3 elements when
441 gathering data. Serialization is automatic behind the scene.
442
443 my %h3 = mce_loop {
444 ...
445
446 MCE->gather($host, [$output, $error, $status]);
447
448 } @hosts;
449
450 foreach my $host (@hosts) {
451 print $h3{$host}->[0], "\n";
452 print $h3{$host}->[1], "\n" if (defined $h3{$host}->[1]);
453 print "Exit status: ", $h3{$host}->[2], "\n\n";
454 }
455
456 Although MCE::Map comes to mind, one may want additional control when
457 gathering data such as retaining output order.
458
459 use MCE::Loop;
460
461 sub preserve_order {
462 my %tmp; my $order_id = 1; my $gather_ref = $_[0];
463
464 return sub {
465 $tmp{ (shift) } = \@_;
466
467 while (1) {
468 last unless exists $tmp{$order_id};
469 push @{ $gather_ref }, @{ delete $tmp{$order_id++} };
470 }
471
472 return;
473 };
474 }
475
476 my @m2;
477
478 MCE::Loop::init {
479 chunk_size => 'auto', max_workers => 'auto',
480 gather => preserve_order(\@m2)
481 };
482
483 mce_loop {
484 my @a; my ($mce, $chunk_ref, $chunk_id) = @_;
485
486 ## Compute the entire chunk data at once.
487 push @a, map { $_ * 2 } @{ $chunk_ref };
488
489 ## Afterwards, invoke the gather feature, which
490 ## will direct the data to the callback function.
491 MCE->gather(MCE->chunk_id, @a);
492
493 } 1..100000;
494
495 MCE::Loop::finish;
496
497 print scalar @m2, "\n";
498
499 All 6 models support 'auto' for chunk_size unlike the Core API. Think
500 of the models as the basis for providing JIT for MCE. They create the
501 instance, tune max_workers, and tune chunk_size automatically
502 regardless of the hardware.
503
504 The following does the same thing using the Core API.
505
506 use MCE;
507
508 sub preserve_order {
509 ...
510 }
511
512 my $mce = MCE->new(
513 max_workers => 'auto', chunk_size => 8000,
514
515 user_func => sub {
516 my @a; my ($mce, $chunk_ref, $chunk_id) = @_;
517
518 ## Compute the entire chunk data at once.
519 push @a, map { $_ * 2 } @{ $chunk_ref };
520
521 ## Afterwards, invoke the gather feature, which
522 ## will direct the data to the callback function.
523 MCE->gather(MCE->chunk_id, @a);
524 }
525 );
526
527 my @m2;
528
529 $mce->process({ gather => preserve_order(\@m2) }, [1..100000]);
530 $mce->shutdown;
531
532 print scalar @m2, "\n";
533
535 MCE::Loop->finish
536 MCE::Loop::finish
537 Workers remain persistent as much as possible after running.
538 Shutdown occurs automatically when the script terminates. Call
539 finish when workers are no longer needed.
540
541 use MCE::Loop;
542
543 MCE::Loop::init {
544 chunk_size => 20, max_workers => 'auto'
545 };
546
547 mce_loop { ... } 1..100;
548
549 MCE::Loop::finish;
550
552 MCE, MCE::Core
553
555 Mario E. Roy, <marioeroy AT gmail DOT com>
556
557
558
559perl v5.28.0 2018-08-25 MCE::Loop(3)