MCE::Loop(3pm)

1MCE::Loop(3)          User Contributed Perl Documentation         MCE::Loop(3)
2
3
4

NAME

6       MCE::Loop - MCE model for building parallel loops
7

VERSION

9       This document describes MCE::Loop version 1.874
10

DESCRIPTION

12       This module provides a parallel loop implementation through Many-Core
13       Engine.  MCE::Loop is not MCE::Map but more along the lines of an easy
14       way to spin up a MCE instance and have user_func pointing to your code
15       block. If you want something similar to map, then see MCE::Map.
16
17        ## Construction when chunking is not desired
18
19        use MCE::Loop;
20
21        MCE::Loop->init(
22           max_workers => 5, chunk_size => 1
23        );
24
25        mce_loop {
26           my ($mce, $chunk_ref, $chunk_id) = @_;
27           MCE->say("$chunk_id: $_");
28        } 40 .. 48;
29
30        -- Output
31
32        3: 42
33        1: 40
34        2: 41
35        4: 43
36        5: 44
37        6: 45
38        7: 46
39        8: 47
40        9: 48
41
42        ## Construction for 'auto' or greater than 1
43
44        use MCE::Loop;
45
46        MCE::Loop->init(
47           max_workers => 5, chunk_size => 'auto'
48        );
49
50        mce_loop {
51           my ($mce, $chunk_ref, $chunk_id) = @_;
52           for (@{ $chunk_ref }) {
53              MCE->say("$chunk_id: $_");
54           }
55        } 40 .. 48;
56
57        -- Output
58
59        1: 40
60        2: 42
61        1: 41
62        4: 46
63        2: 43
64        5: 48
65        3: 44
66        4: 47
67        3: 45
68

SYNOPSIS when CHUNK_SIZE EQUALS 1

70       All models in MCE default to 'auto' for chunk_size. The arguments for
71       the block are the same as writing a user_func block using the Core API.
72
73       Beginning with MCE 1.5, the next input item is placed into the input
74       scalar variable $_ when chunk_size equals 1. Otherwise, $_ points to
75       $chunk_ref containing many items. Basically, line 2 below may be
76       omitted from your code when using $_. One can call MCE->chunk_id to
77       obtain the current chunk id.
78
79        line 1:  user_func => sub {
80        line 2:     my ($mce, $chunk_ref, $chunk_id) = @_;
81        line 3:
82        line 4:     $_ points to $chunk_ref->[0]
83        line 5:        in MCE 1.5 when chunk_size == 1
84        line 6:
85        line 7:     $_ points to $chunk_ref
86        line 8:        in MCE 1.5 when chunk_size  > 1
87        line 9:  }
88
89       Follow this synopsis when chunk_size equals one. Looping is not
90       required from inside the block. Hence, the block is called once per
91       each item.
92
93        ## Exports mce_loop, mce_loop_f, and mce_loop_s
94        use MCE::Loop;
95
96        MCE::Loop->init(
97           chunk_size => 1
98        );
99
100        ## Array or array_ref
101        mce_loop { do_work($_) } 1..10000;
102        mce_loop { do_work($_) } \@list;
103
104        ## Important; pass an array_ref for deeply input data
105        mce_loop { do_work($_) } [ [ 0, 1 ], [ 0, 2 ], ... ];
106        mce_loop { do_work($_) } \@deeply_list;
107
108        ## File path, glob ref, IO::All::{ File, Pipe, STDIO } obj, or scalar ref
109        ## Workers read directly and not involve the manager process
110        mce_loop_f { chomp; do_work($_) } "/path/to/file"; # efficient
111
112        ## Involves the manager process, therefore slower
113        mce_loop_f { chomp; do_work($_) } $file_handle;
114        mce_loop_f { chomp; do_work($_) } $io;
115        mce_loop_f { chomp; do_work($_) } \$scalar;
116
117        ## Sequence of numbers (begin, end [, step, format])
118        mce_loop_s { do_work($_) } 1, 10000, 5;
119        mce_loop_s { do_work($_) } [ 1, 10000, 5 ];
120
121        mce_loop_s { do_work($_) } {
122           begin => 1, end => 10000, step => 5, format => undef
123        };
124

SYNOPSIS when CHUNK_SIZE is GREATER THAN 1

126       Follow this synopsis when chunk_size equals 'auto' or greater than 1.
127       This means having to loop through the chunk from inside the block.
128
129        use MCE::Loop;
130
131        MCE::Loop->init(           ## Chunk_size defaults to 'auto' when
132           chunk_size => 'auto'    ## not specified. Therefore, the init
133        );                         ## function may be omitted.
134
135        ## Syntax is shown for mce_loop for demonstration purposes.
136        ## Looping inside the block is the same for mce_loop_f and
137        ## mce_loop_s.
138
139        ## Array or array_ref
140        mce_loop { do_work($_) for (@{ $_ }) } 1..10000;
141        mce_loop { do_work($_) for (@{ $_ }) } \@list;
142
143        ## Important; pass an array_ref for deeply input data
144        mce_loop { do_work($_) for (@{ $_ }) } [ [ 0, 1 ], [ 0, 2 ], ... ];
145        mce_loop { do_work($_) for (@{ $_ }) } \@deeply_list;
146
147        ## Resembles code using the core MCE API
148        mce_loop {
149           my ($mce, $chunk_ref, $chunk_id) = @_;
150
151           for (@{ $chunk_ref }) {
152              do_work($_);
153           }
154
155        } 1..10000;
156
157       Chunking reduces the number of IPC calls behind the scene. Think in
158       terms of chunks whenever processing a large amount of data. For
159       relatively small data, choosing 1 for chunk_size is fine.
160

OVERRIDING DEFAULTS

162       The following list options which may be overridden when loading the
163       module.
164
165        use Sereal qw( encode_sereal decode_sereal );
166        use CBOR::XS qw( encode_cbor decode_cbor );
167        use JSON::XS qw( encode_json decode_json );
168
169        use MCE::Loop
170            max_workers => 4,                # Default 'auto'
171            chunk_size => 100,               # Default 'auto'
172            tmp_dir => "/path/to/app/tmp",   # $MCE::Signal::tmp_dir
173            freeze => \&encode_sereal,       # \&Storable::freeze
174            thaw => \&decode_sereal          # \&Storable::thaw
175        ;
176
177       From MCE 1.8 onwards, Sereal 3.015+ is loaded automatically if
178       available.  Specify "Sereal => 0" to use Storable instead.
179
180        use MCE::Loop Sereal => 0;
181

CUSTOMIZING MCE

183       MCE::Loop->init ( options )
184       MCE::Loop::init { options }
185
186       The init function accepts a hash of MCE options.
187
188        use MCE::Loop;
189
190        MCE::Loop->init(
191           chunk_size => 1, max_workers => 4,
192
193           user_begin => sub {
194              print "## ", MCE->wid, " started\n";
195           },
196
197           user_end => sub {
198              print "## ", MCE->wid, " completed\n";
199           }
200        );
201
202        my %a = mce_loop { MCE->gather($_, $_ * $_) } 1..100;
203
204        print "\n", "@a{1..100}", "\n";
205
206        -- Output
207
208        ## 3 started
209        ## 1 started
210        ## 2 started
211        ## 4 started
212        ## 1 completed
213        ## 2 completed
214        ## 3 completed
215        ## 4 completed
216
217        1 4 9 16 25 36 49 64 81 100 121 144 169 196 225 256 289 324 361
218        400 441 484 529 576 625 676 729 784 841 900 961 1024 1089 1156
219        1225 1296 1369 1444 1521 1600 1681 1764 1849 1936 2025 2116 2209
220        2304 2401 2500 2601 2704 2809 2916 3025 3136 3249 3364 3481 3600
221        3721 3844 3969 4096 4225 4356 4489 4624 4761 4900 5041 5184 5329
222        5476 5625 5776 5929 6084 6241 6400 6561 6724 6889 7056 7225 7396
223        7569 7744 7921 8100 8281 8464 8649 8836 9025 9216 9409 9604 9801
224        10000
225

API DOCUMENTATION

227       The following assumes chunk_size equals 1 in order to demonstrate all
228       the possibilities for providing input data.
229
230       MCE::Loop->run ( sub { code }, list )
231       mce_loop { code } list
232
233       Input data may be defined using a list, an array ref, or a hash ref.
234
235        # $_ contains the item when chunk_size => 1
236
237        mce_loop { do_work($_) } 1..1000;
238        mce_loop { do_work($_) } \@list;
239
240        # Important; pass an array_ref for deeply input data
241
242        mce_loop { do_work($_) } [ [ 0, 1 ], [ 0, 2 ], ... ];
243        mce_loop { do_work($_) } \@deeply_list;
244
245        # Chunking; any chunk_size => 1 or greater
246
247        my %res = mce_loop {
248           my ($mce, $chunk_ref, $chunk_id) = @_;
249           my %ret;
250           for my $item (@{ $chunk_ref }) {
251              $ret{$item} = $item * 2;
252           }
253           MCE->gather(%ret);
254        }
255        \@list;
256
257        # Input hash; current API available since 1.828
258
259        my %res = mce_loop {
260           my ($mce, $chunk_ref, $chunk_id) = @_;
261           my %ret;
262           for my $key (keys %{ $chunk_ref }) {
263              $ret{$key} = $chunk_ref->{$key} * 2;
264           }
265           MCE->gather(%ret);
266        }
267        \%hash;
268
269       MCE::Loop->run_file ( sub { code }, file )
270       mce_loop_f { code } file
271
272       The fastest of these is the /path/to/file. Workers communicate the next
273       offset position among themselves with zero interaction by the manager
274       process.
275
276       "IO::All" { File, Pipe, STDIO } is supported since MCE 1.845.
277
278        # $_ contains the line when chunk_size => 1
279
280        mce_loop_f { $_ } "/path/to/file";  # faster
281        mce_loop_f { $_ } $file_handle;
282        mce_loop_f { $_ } $io;              # IO::All
283        mce_loop_f { $_ } \$scalar;
284
285        # chunking, any chunk_size => 1 or greater
286
287        my %res = mce_loop_f {
288           my ($mce, $chunk_ref, $chunk_id) = @_;
289           my $buf = '';
290           for my $line (@{ $chunk_ref }) {
291              $buf .= $line;
292           }
293           MCE->gather($chunk_id, $buf);
294        }
295        "/path/to/file";
296
297       MCE::Loop->run_seq ( sub { code }, $beg, $end [, $step, $fmt ] )
298       mce_loop_s { code } $beg, $end [, $step, $fmt ]
299
300       Sequence may be defined as a list, an array reference, or a hash
301       reference.  The functions require both begin and end values to run.
302       Step and format are optional. The format is passed to sprintf (% may be
303       omitted below).
304
305        my ($beg, $end, $step, $fmt) = (10, 20, 0.1, "%4.1f");
306
307        # $_ contains the sequence number when chunk_size => 1
308
309        mce_loop_s { $_ } $beg, $end, $step, $fmt;
310        mce_loop_s { $_ } [ $beg, $end, $step, $fmt ];
311
312        mce_loop_s { $_ } {
313           begin => $beg, end => $end,
314           step => $step, format => $fmt
315        };
316
317        # chunking, any chunk_size => 1 or greater
318
319        my %res = mce_loop_s {
320           my ($mce, $chunk_ref, $chunk_id) = @_;
321           my $buf = '';
322           for my $seq (@{ $chunk_ref }) {
323              $buf .= "$seq\n";
324           }
325           MCE->gather($chunk_id, $buf);
326        }
327        [ $beg, $end ];
328
329       The sequence engine can compute 'begin' and 'end' items only, for the
330       chunk, and not the items in between (hence boundaries only). This
331       option applies to sequence only and has no effect when chunk_size
332       equals 1.
333
334       The time to run is 0.006s below. This becomes 0.827s without the
335       bounds_only option due to computing all items in between, thus creating
336       a very large array. Basically, specify bounds_only => 1 when boundaries
337       is all you need for looping inside the block; e.g. Monte Carlo
338       simulations.
339
340       Time was measured using 1 worker to emphasize the difference.
341
342        use MCE::Loop;
343
344        MCE::Loop->init(
345           max_workers => 1, chunk_size => 1_250_000,
346           bounds_only => 1
347        );
348
349        # Typically, the input scalar $_ contains the sequence number
350        # when chunk_size => 1, unless the bounds_only option is set
351        # which is the case here. Thus, $_ points to $chunk_ref.
352
353        mce_loop_s {
354           my ($mce, $chunk_ref, $chunk_id) = @_;
355
356           # $chunk_ref contains 2 items, not 1_250_000
357           # my ( $begin, $end ) = ( $_->[0], $_->[1] );
358
359           my $begin = $chunk_ref->[0];
360           my $end   = $chunk_ref->[1];
361
362           # for my $seq ( $begin .. $end ) {
363           #    ...
364           # }
365
366           MCE->printf("%7d .. %8d\n", $begin, $end);
367        }
368        [ 1, 10_000_000 ];
369
370        -- Output
371
372              1 ..  1250000
373        1250001 ..  2500000
374        2500001 ..  3750000
375        3750001 ..  5000000
376        5000001 ..  6250000
377        6250001 ..  7500000
378        7500001 ..  8750000
379        8750001 .. 10000000
380
381       MCE::Loop->run ( sub { code }, iterator )
382       mce_loop { code } iterator
383
384       An iterator reference may be specified for input_data. Iterators are
385       described under section "SYNTAX for INPUT_DATA" at MCE::Core.
386
387        mce_loop { $_ } make_iterator(10, 30, 2);
388

GATHERING DATA

390       Unlike MCE::Map where gather and output order are done for you
391       automatically, the gather method is used to have results sent back to
392       the manager process.
393
394        use MCE::Loop chunk_size => 1;
395
396        ## Output order is not guaranteed.
397        my @a1 = mce_loop { MCE->gather($_ * 2) } 1..100;
398        print "@a1\n\n";
399
400        ## Outputs to a hash instead (key, value).
401        my %h1 = mce_loop { MCE->gather($_, $_ * 2) } 1..100;
402        print "@h1{1..100}\n\n";
403
404        ## This does the same thing due to chunk_id starting at one.
405        my %h2 = mce_loop { MCE->gather(MCE->chunk_id, $_ * 2) } 1..100;
406        print "@h2{1..100}\n\n";
407
408       The gather method may be called multiple times within the block unlike
409       return which would leave the block. Therefore, think of gather as
410       yielding results immediately to the manager process without actually
411       leaving the block.
412
413        use MCE::Loop chunk_size => 1, max_workers => 3;
414
415        my @hosts = qw(
416           hosta hostb hostc hostd hoste
417        );
418
419        my %h3 = mce_loop {
420           my ($output, $error, $status); my $host = $_;
421
422           ## Do something with $host;
423           $output = "Worker ". MCE->wid .": Hello from $host";
424
425           if (MCE->chunk_id % 3 == 0) {
426              ## Simulating an error condition
427              local $? = 1; $status = $?;
428              $error = "Error from $host"
429           }
430           else {
431              $status = 0;
432           }
433
434           ## Ensure unique keys (key, value) when gathering to
435           ## a hash.
436           MCE->gather("$host.out", $output);
437           MCE->gather("$host.err", $error) if (defined $error);
438           MCE->gather("$host.sta", $status);
439
440        } @hosts;
441
442        foreach my $host (@hosts) {
443           print $h3{"$host.out"}, "\n";
444           print $h3{"$host.err"}, "\n" if (exists $h3{"$host.err"});
445           print "Exit status: ", $h3{"$host.sta"}, "\n\n";
446        }
447
448        -- Output
449
450        Worker 2: Hello from hosta
451        Exit status: 0
452
453        Worker 1: Hello from hostb
454        Exit status: 0
455
456        Worker 3: Hello from hostc
457        Error from hostc
458        Exit status: 1
459
460        Worker 2: Hello from hostd
461        Exit status: 0
462
463        Worker 1: Hello from hoste
464        Exit status: 0
465
466       The following uses an anonymous array containing 3 elements when
467       gathering data. Serialization is automatic behind the scene.
468
469        my %h3 = mce_loop {
470           ...
471
472           MCE->gather($host, [$output, $error, $status]);
473
474        } @hosts;
475
476        foreach my $host (@hosts) {
477           print $h3{$host}->[0], "\n";
478           print $h3{$host}->[1], "\n" if (defined $h3{$host}->[1]);
479           print "Exit status: ", $h3{$host}->[2], "\n\n";
480        }
481
482       Although MCE::Map comes to mind, one may want additional control when
483       gathering data such as retaining output order.
484
485        use MCE::Loop;
486
487        sub preserve_order {
488           my %tmp; my $order_id = 1; my $gather_ref = $_[0];
489
490           return sub {
491              $tmp{ (shift) } = \@_;
492
493              while (1) {
494                 last unless exists $tmp{$order_id};
495                 push @{ $gather_ref }, @{ delete $tmp{$order_id++} };
496              }
497
498              return;
499           };
500        }
501
502        my @m2;
503
504        MCE::Loop->init(
505           chunk_size => 'auto', max_workers => 'auto',
506           gather => preserve_order(\@m2)
507        );
508
509        mce_loop {
510           my @a; my ($mce, $chunk_ref, $chunk_id) = @_;
511
512           ## Compute the entire chunk data at once.
513           push @a, map { $_ * 2 } @{ $chunk_ref };
514
515           ## Afterwards, invoke the gather feature, which
516           ## will direct the data to the callback function.
517           MCE->gather(MCE->chunk_id, @a);
518
519        } 1..100000;
520
521        MCE::Loop->finish;
522
523        print scalar @m2, "\n";
524
525       All 6 models support 'auto' for chunk_size unlike the Core API. Think
526       of the models as the basis for providing JIT for MCE. They create the
527       instance, tune max_workers, and tune chunk_size automatically
528       regardless of the hardware.
529
530       The following does the same thing using the Core API.
531
532        use MCE;
533
534        sub preserve_order {
535           ...
536        }
537
538        my $mce = MCE->new(
539           max_workers => 'auto', chunk_size => 8000,
540
541           user_func => sub {
542              my @a; my ($mce, $chunk_ref, $chunk_id) = @_;
543
544              ## Compute the entire chunk data at once.
545              push @a, map { $_ * 2 } @{ $chunk_ref };
546
547              ## Afterwards, invoke the gather feature, which
548              ## will direct the data to the callback function.
549              MCE->gather(MCE->chunk_id, @a);
550           }
551        );
552
553        my @m2;
554
555        $mce->process({ gather => preserve_order(\@m2) }, [1..100000]);
556        $mce->shutdown;
557
558        print scalar @m2, "\n";
559

MANUAL SHUTDOWN

561       MCE::Loop->finish
562       MCE::Loop::finish
563
564       Workers remain persistent as much as possible after running. Shutdown
565       occurs automatically when the script terminates. Call finish when
566       workers are no longer needed.
567
568        use MCE::Loop;
569
570        MCE::Loop->init(
571           chunk_size => 20, max_workers => 'auto'
572        );
573
574        mce_loop { ... } 1..100;
575
576        MCE::Loop->finish;
577

INDEX

579       MCE, MCE::Core
580

AUTHOR

582       Mario E. Roy, <marioeroy AT gmail DOT com>
583
584
585
586perl v5.32.1                      2021-01-27                      MCE::Loop(3)