MCE::Loop(3pm)

1MCE::Loop(3)          User Contributed Perl Documentation         MCE::Loop(3)
2
3
4

NAME

6       MCE::Loop - MCE model for building parallel loops
7

VERSION

9       This document describes MCE::Loop version 1.884
10

DESCRIPTION

12       This module provides a parallel loop implementation through Many-Core
13       Engine.  MCE::Loop is not MCE::Map but more along the lines of an easy
14       way to spin up a MCE instance and have user_func pointing to your code
15       block. If you want something similar to map, then see MCE::Map.
16
17        ## Construction when chunking is not desired
18
19        use MCE::Loop;
20
21        MCE::Loop->init(
22           max_workers => 5, chunk_size => 1
23        );
24
25        mce_loop {
26           my ($mce, $chunk_ref, $chunk_id) = @_;
27           MCE->say("$chunk_id: $_");
28        } 40 .. 48;
29
30        -- Output
31
32        3: 42
33        1: 40
34        2: 41
35        4: 43
36        5: 44
37        6: 45
38        7: 46
39        8: 47
40        9: 48
41
42        ## Construction for 'auto' or greater than 1
43
44        use MCE::Loop;
45
46        MCE::Loop->init(
47           max_workers => 5, chunk_size => 'auto'
48        );
49
50        mce_loop {
51           my ($mce, $chunk_ref, $chunk_id) = @_;
52           for (@{ $chunk_ref }) {
53              MCE->say("$chunk_id: $_");
54           }
55        } 40 .. 48;
56
57        -- Output
58
59        1: 40
60        2: 42
61        1: 41
62        4: 46
63        2: 43
64        5: 48
65        3: 44
66        4: 47
67        3: 45
68

SYNOPSIS when CHUNK_SIZE EQUALS 1

70       All models in MCE default to 'auto' for chunk_size. The arguments for
71       the block are the same as writing a user_func block using the Core API.
72
73       Beginning with MCE 1.5, the next input item is placed into the input
74       scalar variable $_ when chunk_size equals 1. Otherwise, $_ points to
75       $chunk_ref containing many items. Basically, line 2 below may be
76       omitted from your code when using $_. One can call MCE->chunk_id to
77       obtain the current chunk id.
78
79        line 1:  user_func => sub {
80        line 2:     my ($mce, $chunk_ref, $chunk_id) = @_;
81        line 3:
82        line 4:     $_ points to $chunk_ref->[0]
83        line 5:        in MCE 1.5 when chunk_size == 1
84        line 6:
85        line 7:     $_ points to $chunk_ref
86        line 8:        in MCE 1.5 when chunk_size  > 1
87        line 9:  }
88
89       Follow this synopsis when chunk_size equals one. Looping is not
90       required from inside the block. Hence, the block is called once per
91       each item.
92
93        ## Exports mce_loop, mce_loop_f, and mce_loop_s
94        use MCE::Loop;
95
96        MCE::Loop->init(
97           chunk_size => 1
98        );
99
100        ## Array or array_ref
101        mce_loop { do_work($_) } 1..10000;
102        mce_loop { do_work($_) } \@list;
103
104        ## Important; pass an array_ref for deeply input data
105        mce_loop { do_work($_) } [ [ 0, 1 ], [ 0, 2 ], ... ];
106        mce_loop { do_work($_) } \@deeply_list;
107
108        ## File path, glob ref, IO::All::{ File, Pipe, STDIO } obj, or scalar ref
109        ## Workers read directly and not involve the manager process
110        mce_loop_f { chomp; do_work($_) } "/path/to/file"; # efficient
111
112        ## Involves the manager process, therefore slower
113        mce_loop_f { chomp; do_work($_) } $file_handle;
114        mce_loop_f { chomp; do_work($_) } $io;
115        mce_loop_f { chomp; do_work($_) } \$scalar;
116
117        ## Sequence of numbers (begin, end [, step, format])
118        mce_loop_s { do_work($_) } 1, 10000, 5;
119        mce_loop_s { do_work($_) } [ 1, 10000, 5 ];
120
121        mce_loop_s { do_work($_) } {
122           begin => 1, end => 10000, step => 5, format => undef
123        };
124

SYNOPSIS when CHUNK_SIZE is GREATER THAN 1

126       Follow this synopsis when chunk_size equals 'auto' or greater than 1.
127       This means having to loop through the chunk from inside the block.
128
129        use MCE::Loop;
130
131        MCE::Loop->init(           ## Chunk_size defaults to 'auto' when
132           chunk_size => 'auto'    ## not specified. Therefore, the init
133        );                         ## function may be omitted.
134
135        ## Syntax is shown for mce_loop for demonstration purposes.
136        ## Looping inside the block is the same for mce_loop_f and
137        ## mce_loop_s.
138
139        ## Array or array_ref
140        mce_loop { do_work($_) for (@{ $_ }) } 1..10000;
141        mce_loop { do_work($_) for (@{ $_ }) } \@list;
142
143        ## Important; pass an array_ref for deeply input data
144        mce_loop { do_work($_) for (@{ $_ }) } [ [ 0, 1 ], [ 0, 2 ], ... ];
145        mce_loop { do_work($_) for (@{ $_ }) } \@deeply_list;
146
147        ## Resembles code using the core MCE API
148        mce_loop {
149           my ($mce, $chunk_ref, $chunk_id) = @_;
150
151           for (@{ $chunk_ref }) {
152              do_work($_);
153           }
154
155        } 1..10000;
156
157       Chunking reduces the number of IPC calls behind the scene. Think in
158       terms of chunks whenever processing a large amount of data. For
159       relatively small data, choosing 1 for chunk_size is fine.
160

OVERRIDING DEFAULTS

162       The following list options which may be overridden when loading the
163       module.
164
165        use Sereal qw( encode_sereal decode_sereal );
166        use CBOR::XS qw( encode_cbor decode_cbor );
167        use JSON::XS qw( encode_json decode_json );
168
169        use MCE::Loop
170            max_workers => 4,                # Default 'auto'
171            chunk_size => 100,               # Default 'auto'
172            tmp_dir => "/path/to/app/tmp",   # $MCE::Signal::tmp_dir
173            freeze => \&encode_sereal,       # \&Storable::freeze
174            thaw => \&decode_sereal,         # \&Storable::thaw
175            init_relay => 0,                 # Default undef; MCE 1.882+
176            use_threads => 0,                # Default undef; MCE 1.882+
177        ;
178
179       From MCE 1.8 onwards, Sereal 3.015+ is loaded automatically if
180       available.  Specify "Sereal => 0" to use Storable instead.
181
182        use MCE::Loop Sereal => 0;
183

CUSTOMIZING MCE

185       MCE::Loop->init ( options )
186       MCE::Loop::init { options }
187
188       The init function accepts a hash of MCE options.
189
190        use MCE::Loop;
191
192        MCE::Loop->init(
193           chunk_size => 1, max_workers => 4,
194
195           user_begin => sub {
196              print "## ", MCE->wid, " started\n";
197           },
198
199           user_end => sub {
200              print "## ", MCE->wid, " completed\n";
201           }
202        );
203
204        my %a = mce_loop { MCE->gather($_, $_ * $_) } 1..100;
205
206        print "\n", "@a{1..100}", "\n";
207
208        -- Output
209
210        ## 3 started
211        ## 1 started
212        ## 2 started
213        ## 4 started
214        ## 1 completed
215        ## 2 completed
216        ## 3 completed
217        ## 4 completed
218
219        1 4 9 16 25 36 49 64 81 100 121 144 169 196 225 256 289 324 361
220        400 441 484 529 576 625 676 729 784 841 900 961 1024 1089 1156
221        1225 1296 1369 1444 1521 1600 1681 1764 1849 1936 2025 2116 2209
222        2304 2401 2500 2601 2704 2809 2916 3025 3136 3249 3364 3481 3600
223        3721 3844 3969 4096 4225 4356 4489 4624 4761 4900 5041 5184 5329
224        5476 5625 5776 5929 6084 6241 6400 6561 6724 6889 7056 7225 7396
225        7569 7744 7921 8100 8281 8464 8649 8836 9025 9216 9409 9604 9801
226        10000
227

API DOCUMENTATION

229       The following assumes chunk_size equals 1 in order to demonstrate all
230       the possibilities for providing input data.
231
232       MCE::Loop->run ( sub { code }, list )
233       mce_loop { code } list
234
235       Input data may be defined using a list, an array ref, or a hash ref.
236
237        # $_ contains the item when chunk_size => 1
238
239        mce_loop { do_work($_) } 1..1000;
240        mce_loop { do_work($_) } \@list;
241
242        # Important; pass an array_ref for deeply input data
243
244        mce_loop { do_work($_) } [ [ 0, 1 ], [ 0, 2 ], ... ];
245        mce_loop { do_work($_) } \@deeply_list;
246
247        # Chunking; any chunk_size => 1 or greater
248
249        my %res = mce_loop {
250           my ($mce, $chunk_ref, $chunk_id) = @_;
251           my %ret;
252           for my $item (@{ $chunk_ref }) {
253              $ret{$item} = $item * 2;
254           }
255           MCE->gather(%ret);
256        }
257        \@list;
258
259        # Input hash; current API available since 1.828
260
261        my %res = mce_loop {
262           my ($mce, $chunk_ref, $chunk_id) = @_;
263           my %ret;
264           for my $key (keys %{ $chunk_ref }) {
265              $ret{$key} = $chunk_ref->{$key} * 2;
266           }
267           MCE->gather(%ret);
268        }
269        \%hash;
270
271       MCE::Loop->run_file ( sub { code }, file )
272       mce_loop_f { code } file
273
274       The fastest of these is the /path/to/file. Workers communicate the next
275       offset position among themselves with zero interaction by the manager
276       process.
277
278       "IO::All" { File, Pipe, STDIO } is supported since MCE 1.845.
279
280        # $_ contains the line when chunk_size => 1
281
282        mce_loop_f { $_ } "/path/to/file";  # faster
283        mce_loop_f { $_ } $file_handle;
284        mce_loop_f { $_ } $io;              # IO::All
285        mce_loop_f { $_ } \$scalar;
286
287        # chunking, any chunk_size => 1 or greater
288
289        my %res = mce_loop_f {
290           my ($mce, $chunk_ref, $chunk_id) = @_;
291           my $buf = '';
292           for my $line (@{ $chunk_ref }) {
293              $buf .= $line;
294           }
295           MCE->gather($chunk_id, $buf);
296        }
297        "/path/to/file";
298
299       MCE::Loop->run_seq ( sub { code }, $beg, $end [, $step, $fmt ] )
300       mce_loop_s { code } $beg, $end [, $step, $fmt ]
301
302       Sequence may be defined as a list, an array reference, or a hash
303       reference.  The functions require both begin and end values to run.
304       Step and format are optional. The format is passed to sprintf (% may be
305       omitted below).
306
307        my ($beg, $end, $step, $fmt) = (10, 20, 0.1, "%4.1f");
308
309        # $_ contains the sequence number when chunk_size => 1
310
311        mce_loop_s { $_ } $beg, $end, $step, $fmt;
312        mce_loop_s { $_ } [ $beg, $end, $step, $fmt ];
313
314        mce_loop_s { $_ } {
315           begin => $beg, end => $end,
316           step => $step, format => $fmt
317        };
318
319        # chunking, any chunk_size => 1 or greater
320
321        my %res = mce_loop_s {
322           my ($mce, $chunk_ref, $chunk_id) = @_;
323           my $buf = '';
324           for my $seq (@{ $chunk_ref }) {
325              $buf .= "$seq\n";
326           }
327           MCE->gather($chunk_id, $buf);
328        }
329        [ $beg, $end ];
330
331       The sequence engine can compute 'begin' and 'end' items only, for the
332       chunk, and not the items in between (hence boundaries only). This
333       option applies to sequence only and has no effect when chunk_size
334       equals 1.
335
336       The time to run is 0.006s below. This becomes 0.827s without the
337       bounds_only option due to computing all items in between, thus creating
338       a very large array. Basically, specify bounds_only => 1 when boundaries
339       is all you need for looping inside the block; e.g. Monte Carlo
340       simulations.
341
342       Time was measured using 1 worker to emphasize the difference.
343
344        use MCE::Loop;
345
346        MCE::Loop->init(
347           max_workers => 1, chunk_size => 1_250_000,
348           bounds_only => 1
349        );
350
351        # Typically, the input scalar $_ contains the sequence number
352        # when chunk_size => 1, unless the bounds_only option is set
353        # which is the case here. Thus, $_ points to $chunk_ref.
354
355        mce_loop_s {
356           my ($mce, $chunk_ref, $chunk_id) = @_;
357
358           # $chunk_ref contains 2 items, not 1_250_000
359           # my ( $begin, $end ) = ( $_->[0], $_->[1] );
360
361           my $begin = $chunk_ref->[0];
362           my $end   = $chunk_ref->[1];
363
364           # for my $seq ( $begin .. $end ) {
365           #    ...
366           # }
367
368           MCE->printf("%7d .. %8d\n", $begin, $end);
369        }
370        [ 1, 10_000_000 ];
371
372        -- Output
373
374              1 ..  1250000
375        1250001 ..  2500000
376        2500001 ..  3750000
377        3750001 ..  5000000
378        5000001 ..  6250000
379        6250001 ..  7500000
380        7500001 ..  8750000
381        8750001 .. 10000000
382
383       MCE::Loop->run ( sub { code }, iterator )
384       mce_loop { code } iterator
385
386       An iterator reference may be specified for input_data. Iterators are
387       described under section "SYNTAX for INPUT_DATA" at MCE::Core.
388
389        mce_loop { $_ } make_iterator(10, 30, 2);
390

GATHERING DATA

392       Unlike MCE::Map where gather and output order are done for you
393       automatically, the gather method is used to have results sent back to
394       the manager process.
395
396        use MCE::Loop chunk_size => 1;
397
398        ## Output order is not guaranteed.
399        my @a1 = mce_loop { MCE->gather($_ * 2) } 1..100;
400        print "@a1\n\n";
401
402        ## Outputs to a hash instead (key, value).
403        my %h1 = mce_loop { MCE->gather($_, $_ * 2) } 1..100;
404        print "@h1{1..100}\n\n";
405
406        ## This does the same thing due to chunk_id starting at one.
407        my %h2 = mce_loop { MCE->gather(MCE->chunk_id, $_ * 2) } 1..100;
408        print "@h2{1..100}\n\n";
409
410       The gather method may be called multiple times within the block unlike
411       return which would leave the block. Therefore, think of gather as
412       yielding results immediately to the manager process without actually
413       leaving the block.
414
415        use MCE::Loop chunk_size => 1, max_workers => 3;
416
417        my @hosts = qw(
418           hosta hostb hostc hostd hoste
419        );
420
421        my %h3 = mce_loop {
422           my ($output, $error, $status); my $host = $_;
423
424           ## Do something with $host;
425           $output = "Worker ". MCE->wid .": Hello from $host";
426
427           if (MCE->chunk_id % 3 == 0) {
428              ## Simulating an error condition
429              local $? = 1; $status = $?;
430              $error = "Error from $host"
431           }
432           else {
433              $status = 0;
434           }
435
436           ## Ensure unique keys (key, value) when gathering to
437           ## a hash.
438           MCE->gather("$host.out", $output);
439           MCE->gather("$host.err", $error) if (defined $error);
440           MCE->gather("$host.sta", $status);
441
442        } @hosts;
443
444        foreach my $host (@hosts) {
445           print $h3{"$host.out"}, "\n";
446           print $h3{"$host.err"}, "\n" if (exists $h3{"$host.err"});
447           print "Exit status: ", $h3{"$host.sta"}, "\n\n";
448        }
449
450        -- Output
451
452        Worker 2: Hello from hosta
453        Exit status: 0
454
455        Worker 1: Hello from hostb
456        Exit status: 0
457
458        Worker 3: Hello from hostc
459        Error from hostc
460        Exit status: 1
461
462        Worker 2: Hello from hostd
463        Exit status: 0
464
465        Worker 1: Hello from hoste
466        Exit status: 0
467
468       The following uses an anonymous array containing 3 elements when
469       gathering data. Serialization is automatic behind the scene.
470
471        my %h3 = mce_loop {
472           ...
473
474           MCE->gather($host, [$output, $error, $status]);
475
476        } @hosts;
477
478        foreach my $host (@hosts) {
479           print $h3{$host}->[0], "\n";
480           print $h3{$host}->[1], "\n" if (defined $h3{$host}->[1]);
481           print "Exit status: ", $h3{$host}->[2], "\n\n";
482        }
483
484       Although MCE::Map comes to mind, one may want additional control when
485       gathering data such as retaining output order.
486
487        use MCE::Loop;
488
489        sub preserve_order {
490           my %tmp; my $order_id = 1; my $gather_ref = $_[0];
491
492           return sub {
493              $tmp{ (shift) } = \@_;
494
495              while (1) {
496                 last unless exists $tmp{$order_id};
497                 push @{ $gather_ref }, @{ delete $tmp{$order_id++} };
498              }
499
500              return;
501           };
502        }
503
504        my @m2;
505
506        MCE::Loop->init(
507           chunk_size => 'auto', max_workers => 'auto',
508           gather => preserve_order(\@m2)
509        );
510
511        mce_loop {
512           my @a; my ($mce, $chunk_ref, $chunk_id) = @_;
513
514           ## Compute the entire chunk data at once.
515           push @a, map { $_ * 2 } @{ $chunk_ref };
516
517           ## Afterwards, invoke the gather feature, which
518           ## will direct the data to the callback function.
519           MCE->gather(MCE->chunk_id, @a);
520
521        } 1..100000;
522
523        MCE::Loop->finish;
524
525        print scalar @m2, "\n";
526
527       All 6 models support 'auto' for chunk_size unlike the Core API. Think
528       of the models as the basis for providing JIT for MCE. They create the
529       instance, tune max_workers, and tune chunk_size automatically
530       regardless of the hardware.
531
532       The following does the same thing using the Core API.
533
534        use MCE;
535
536        sub preserve_order {
537           ...
538        }
539
540        my $mce = MCE->new(
541           max_workers => 'auto', chunk_size => 8000,
542
543           user_func => sub {
544              my @a; my ($mce, $chunk_ref, $chunk_id) = @_;
545
546              ## Compute the entire chunk data at once.
547              push @a, map { $_ * 2 } @{ $chunk_ref };
548
549              ## Afterwards, invoke the gather feature, which
550              ## will direct the data to the callback function.
551              MCE->gather(MCE->chunk_id, @a);
552           }
553        );
554
555        my @m2;
556
557        $mce->process({ gather => preserve_order(\@m2) }, [1..100000]);
558        $mce->shutdown;
559
560        print scalar @m2, "\n";
561

MANUAL SHUTDOWN

563       MCE::Loop->finish
564       MCE::Loop::finish
565
566       Workers remain persistent as much as possible after running. Shutdown
567       occurs automatically when the script terminates. Call finish when
568       workers are no longer needed.
569
570        use MCE::Loop;
571
572        MCE::Loop->init(
573           chunk_size => 20, max_workers => 'auto'
574        );
575
576        mce_loop { ... } 1..100;
577
578        MCE::Loop->finish;
579

INDEX

581       MCE, MCE::Core
582

AUTHOR

584       Mario E. Roy, <marioeroy AT gmail DOT com>
585
586
587
588perl v5.36.0                      2023-01-20                      MCE::Loop(3)