MCE::Loop(3pm)

1MCE::Loop(3)          User Contributed Perl Documentation         MCE::Loop(3)
2
3
4

NAME

6       MCE::Loop - MCE model for building parallel loops
7

VERSION

9       This document describes MCE::Loop version 1.837
10

DESCRIPTION

12       This module provides a parallel loop implementation through Many-Core
13       Engine.  MCE::Loop is not MCE::Map but more along the lines of an easy
14       way to spin up a MCE instance and have user_func pointing to your code
15       block. If you want something similar to map, then see MCE::Map.
16
17        ## Construction when chunking is not desired
18
19        use MCE::Loop;
20
21        MCE::Loop::init {
22           max_workers => 5, chunk_size => 1
23        };
24
25        mce_loop {
26           my ($mce, $chunk_ref, $chunk_id) = @_;
27           MCE->say("$chunk_id: $_");
28        } 40 .. 48;
29
30        -- Output
31
32        3: 42
33        1: 40
34        2: 41
35        4: 43
36        5: 44
37        6: 45
38        7: 46
39        8: 47
40        9: 48
41
42        ## Construction for 'auto' or greater than 1
43
44        use MCE::Loop;
45
46        MCE::Loop::init {
47           max_workers => 5, chunk_size => 'auto'
48        };
49
50        mce_loop {
51           my ($mce, $chunk_ref, $chunk_id) = @_;
52           for (@{ $chunk_ref }) {
53              MCE->say("$chunk_id: $_");
54           }
55        } 40 .. 48;
56
57        -- Output
58
59        1: 40
60        2: 42
61        1: 41
62        4: 46
63        2: 43
64        5: 48
65        3: 44
66        4: 47
67        3: 45
68

SYNOPSIS when CHUNK_SIZE EQUALS 1

70       All models in MCE default to 'auto' for chunk_size. The arguments for
71       the block are the same as writing a user_func block using the Core API.
72
73       Beginning with MCE 1.5, the next input item is placed into the input
74       scalar variable $_ when chunk_size equals 1. Otherwise, $_ points to
75       $chunk_ref containing many items. Basically, line 2 below may be
76       omitted from your code when using $_. One can call MCE->chunk_id to
77       obtain the current chunk id.
78
79        line 1:  user_func => sub {
80        line 2:     my ($mce, $chunk_ref, $chunk_id) = @_;
81        line 3:
82        line 4:     $_ points to $chunk_ref->[0]
83        line 5:        in MCE 1.5 when chunk_size == 1
84        line 6:
85        line 7:     $_ points to $chunk_ref
86        line 8:        in MCE 1.5 when chunk_size  > 1
87        line 9:  }
88
89       Follow this synopsis when chunk_size equals one. Looping is not
90       required from inside the block. Hence, the block is called once per
91       each item.
92
93        ## Exports mce_loop, mce_loop_f, and mce_loop_s
94        use MCE::Loop;
95
96        MCE::Loop::init {
97           chunk_size => 1
98        };
99
100        ## Array or array_ref
101        mce_loop { do_work($_) } 1..10000;
102        mce_loop { do_work($_) } [ 1..10000 ];
103
104        ## File_path, glob_ref, or scalar_ref
105        mce_loop_f { chomp; do_work($_) } "/path/to/file";
106        mce_loop_f { chomp; do_work($_) } $file_handle;
107        mce_loop_f { chomp; do_work($_) } \$scalar;
108
109        ## Sequence of numbers (begin, end [, step, format])
110        mce_loop_s { do_work($_) } 1, 10000, 5;
111        mce_loop_s { do_work($_) } [ 1, 10000, 5 ];
112
113        mce_loop_s { do_work($_) } {
114           begin => 1, end => 10000, step => 5, format => undef
115        };
116

SYNOPSIS when CHUNK_SIZE is GREATER THAN 1

118       Follow this synopsis when chunk_size equals 'auto' or greater than 1.
119       This means having to loop through the chunk from inside the block.
120
121        use MCE::Loop;
122
123        MCE::Loop::init {          ## Chunk_size defaults to 'auto' when
124           chunk_size => 'auto'    ## not specified. Therefore, the init
125        };                         ## function may be omitted.
126
127        ## Syntax is shown for mce_loop for demonstration purposes.
128        ## Looping inside the block is the same for mce_loop_f and
129        ## mce_loop_s.
130
131        mce_loop { do_work($_) for (@{ $_ }) } 1..10000;
132
133        ## Same as above, resembles code using the Core API.
134
135        mce_loop {
136           my ($mce, $chunk_ref, $chunk_id) = @_;
137
138           for (@{ $chunk_ref }) {
139              do_work($_);
140           }
141
142        } 1..10000;
143
144       Chunking reduces the number of IPC calls behind the scene. Think in
145       terms of chunks whenever processing a large amount of data. For
146       relatively small data, choosing 1 for chunk_size is fine.
147

OVERRIDING DEFAULTS

149       The following list options which may be overridden when loading the
150       module.
151
152        use Sereal qw( encode_sereal decode_sereal );
153        use CBOR::XS qw( encode_cbor decode_cbor );
154        use JSON::XS qw( encode_json decode_json );
155
156        use MCE::Loop
157            max_workers => 4,                # Default 'auto'
158            chunk_size => 100,               # Default 'auto'
159            tmp_dir => "/path/to/app/tmp",   # $MCE::Signal::tmp_dir
160            freeze => \&encode_sereal,       # \&Storable::freeze
161            thaw => \&decode_sereal          # \&Storable::thaw
162        ;
163
164       From MCE 1.8 onwards, Sereal 3.015+ is loaded automatically if
165       available.  Specify "Sereal =" 0> to use Storable instead.
166
167        use MCE::Loop Sereal => 0;
168

CUSTOMIZING MCE

170       MCE::Loop->init ( options )
171       MCE::Loop::init { options }
172          The init function accepts a hash of MCE options.
173
174           use MCE::Loop;
175
176           MCE::Loop::init {
177              chunk_size => 1, max_workers => 4,
178
179              user_begin => sub {
180                 print "## ", MCE->wid, " started\n";
181              },
182
183              user_end => sub {
184                 print "## ", MCE->wid, " completed\n";
185              }
186           };
187
188           my %a = mce_loop { MCE->gather($_, $_ * $_) } 1..100;
189
190           print "\n", "@a{1..100}", "\n";
191
192           -- Output
193
194           ## 3 started
195           ## 1 started
196           ## 2 started
197           ## 4 started
198           ## 1 completed
199           ## 2 completed
200           ## 3 completed
201           ## 4 completed
202
203           1 4 9 16 25 36 49 64 81 100 121 144 169 196 225 256 289 324 361
204           400 441 484 529 576 625 676 729 784 841 900 961 1024 1089 1156
205           1225 1296 1369 1444 1521 1600 1681 1764 1849 1936 2025 2116 2209
206           2304 2401 2500 2601 2704 2809 2916 3025 3136 3249 3364 3481 3600
207           3721 3844 3969 4096 4225 4356 4489 4624 4761 4900 5041 5184 5329
208           5476 5625 5776 5929 6084 6241 6400 6561 6724 6889 7056 7225 7396
209           7569 7744 7921 8100 8281 8464 8649 8836 9025 9216 9409 9604 9801
210           10000
211

API DOCUMENTATION

213       The following assumes chunk_size equals 1 in order to demonstrate all
214       the possibilities for providing input data.
215
216       MCE::Loop->run ( sub { code }, list )
217       mce_loop { code } list
218          Input data may be defined using a list, an array ref, or a hash ref.
219
220           # $_ contains the item when chunk_size => 1
221
222           mce_loop { $_ } 1..1000;
223           mce_loop { $_ } \@list;
224
225           # chunking, any chunk_size => 1 or higher
226
227           my %res = mce_loop {
228              my ($mce, $chunk_ref, $chunk_id) = @_;
229              my %ret;
230              for my $item (@{ $chunk_ref }) {
231                 $ret{$item} = $item * 2;
232              }
233              MCE->gather(%ret);
234           }
235           \@list;
236
237           # input hash, current API available since 1.828
238
239           my %res = mce_loop {
240              my ($mce, $chunk_ref, $chunk_id) = @_;
241              my %ret;
242              for my $key (keys %{ $chunk_ref }) {
243                 $ret{$key} = $chunk_ref->{$key} * 2;
244              }
245              MCE->gather(%ret);
246           }
247           \%hash;
248
249       MCE::Loop->run_file ( sub { code }, file )
250       mce_loop_f { code } file
251          The fastest of these is the /path/to/file. Workers communicate the
252          next offset position among themselves with zero interaction by the
253          manager process.
254
255           # $_ contains the line when chunk_size => 1
256
257           mce_loop_f { $_ } "/path/to/file";  # faster
258           mce_loop_f { $_ } $file_handle;
259           mce_loop_f { $_ } \$scalar;
260
261           # chunking, any chunk_size => 1 or higher
262
263           my %res = mce_loop_f {
264              my ($mce, $chunk_ref, $chunk_id) = @_;
265              my $buf = '';
266              for my $line (@{ $chunk_ref }) {
267                 $buf .= $line;
268              }
269              MCE->gather($chunk_id, $buf);
270           }
271           "/path/to/file";
272
273       MCE::Loop->run_seq ( sub { code }, $beg, $end [, $step, $fmt ] )
274       mce_loop_s { code } $beg, $end [, $step, $fmt ]
275          Sequence may be defined as a list, an array reference, or a hash
276          reference.  The functions require both begin and end values to run.
277          Step and format are optional. The format is passed to sprintf (% may
278          be omitted below).
279
280           my ($beg, $end, $step, $fmt) = (10, 20, 0.1, "%4.1f");
281
282           # $_ contains the sequence number when chunk_size => 1
283
284           mce_loop_s { $_ } $beg, $end, $step, $fmt;
285           mce_loop_s { $_ } [ $beg, $end, $step, $fmt ];
286
287           mce_loop_s { $_ } {
288              begin => $beg, end => $end,
289              step => $step, format => $fmt
290           };
291
292           # chunking, any chunk_size => 1 or higher
293
294           my %res = mce_loop_s {
295              my ($mce, $chunk_ref, $chunk_id) = @_;
296              my $buf = '';
297              for my $seq (@{ $chunk_ref }) {
298                 $buf .= "$seq\n";
299              }
300              MCE->gather($chunk_id, $buf);
301           }
302           [ $beg, $end ];
303
304          The sequence engine can compute 'begin' and 'end' items only, for
305          the chunk, and not the items in between (hence boundaries only).
306          This option applies to sequence only and has no effect when
307          chunk_size equals 1.
308
309          The time to run is 0.006s below. This becomes 0.827s without the
310          bounds_only option due to computing all items in between, thus
311          creating a very large array. Basically, specify bounds_only => 1
312          when boundaries is all you need for looping inside the block; e.g.
313          Monte Carlo simulations.
314
315          Time was measured using 1 worker to emphasize the difference.
316
317           use MCE::Loop;
318
319           MCE::Loop::init {
320              max_workers => 1, chunk_size => 1_250_000,
321              bounds_only => 1
322           };
323
324           # Typically, the input scalar $_ contains the sequence number
325           # when chunk_size => 1, unless the bounds_only option is set
326           # which is the case here. Thus, $_ points to $chunk_ref.
327
328           mce_loop_s {
329              my ($mce, $chunk_ref, $chunk_id) = @_;
330
331              # $chunk_ref contains 2 items, not 1_250_000
332              # my ( $begin, $end ) = ( $_->[0], $_->[1] );
333
334              my $begin = $chunk_ref->[0];
335              my $end   = $chunk_ref->[1];
336
337              # for my $seq ( $begin .. $end ) {
338              #    ...
339              # }
340
341              MCE->printf("%7d .. %8d\n", $begin, $end);
342           }
343           [ 1, 10_000_000 ];
344
345           -- Output
346
347                 1 ..  1250000
348           1250001 ..  2500000
349           2500001 ..  3750000
350           3750001 ..  5000000
351           5000001 ..  6250000
352           6250001 ..  7500000
353           7500001 ..  8750000
354           8750001 .. 10000000
355
356       MCE::Loop->run ( sub { code }, iterator )
357       mce_loop { code } iterator
358          An iterator reference may be specified for input_data. Iterators are
359          described under section "SYNTAX for INPUT_DATA" at MCE::Core.
360
361           mce_loop { $_ } make_iterator(10, 30, 2);
362

GATHERING DATA

364       Unlike MCE::Map where gather and output order are done for you
365       automatically, the gather method is used to have results sent back to
366       the manager process.
367
368        use MCE::Loop chunk_size => 1;
369
370        ## Output order is not guaranteed.
371        my @a1 = mce_loop { MCE->gather($_ * 2) } 1..100;
372        print "@a1\n\n";
373
374        ## Outputs to a hash instead (key, value).
375        my %h1 = mce_loop { MCE->gather($_, $_ * 2) } 1..100;
376        print "@h1{1..100}\n\n";
377
378        ## This does the same thing due to chunk_id starting at one.
379        my %h2 = mce_loop { MCE->gather(MCE->chunk_id, $_ * 2) } 1..100;
380        print "@h2{1..100}\n\n";
381
382       The gather method may be called multiple times within the block unlike
383       return which would leave the block. Therefore, think of gather as
384       yielding results immediately to the manager process without actually
385       leaving the block.
386
387        use MCE::Loop chunk_size => 1, max_workers => 3;
388
389        my @hosts = qw(
390           hosta hostb hostc hostd hoste
391        );
392
393        my %h3 = mce_loop {
394           my ($output, $error, $status); my $host = $_;
395
396           ## Do something with $host;
397           $output = "Worker ". MCE->wid .": Hello from $host";
398
399           if (MCE->chunk_id % 3 == 0) {
400              ## Simulating an error condition
401              local $? = 1; $status = $?;
402              $error = "Error from $host"
403           }
404           else {
405              $status = 0;
406           }
407
408           ## Ensure unique keys (key, value) when gathering to
409           ## a hash.
410           MCE->gather("$host.out", $output);
411           MCE->gather("$host.err", $error) if (defined $error);
412           MCE->gather("$host.sta", $status);
413
414        } @hosts;
415
416        foreach my $host (@hosts) {
417           print $h3{"$host.out"}, "\n";
418           print $h3{"$host.err"}, "\n" if (exists $h3{"$host.err"});
419           print "Exit status: ", $h3{"$host.sta"}, "\n\n";
420        }
421
422        -- Output
423
424        Worker 2: Hello from hosta
425        Exit status: 0
426
427        Worker 1: Hello from hostb
428        Exit status: 0
429
430        Worker 3: Hello from hostc
431        Error from hostc
432        Exit status: 1
433
434        Worker 2: Hello from hostd
435        Exit status: 0
436
437        Worker 1: Hello from hoste
438        Exit status: 0
439
440       The following uses an anonymous array containing 3 elements when
441       gathering data. Serialization is automatic behind the scene.
442
443        my %h3 = mce_loop {
444           ...
445
446           MCE->gather($host, [$output, $error, $status]);
447
448        } @hosts;
449
450        foreach my $host (@hosts) {
451           print $h3{$host}->[0], "\n";
452           print $h3{$host}->[1], "\n" if (defined $h3{$host}->[1]);
453           print "Exit status: ", $h3{$host}->[2], "\n\n";
454        }
455
456       Although MCE::Map comes to mind, one may want additional control when
457       gathering data such as retaining output order.
458
459        use MCE::Loop;
460
461        sub preserve_order {
462           my %tmp; my $order_id = 1; my $gather_ref = $_[0];
463
464           return sub {
465              $tmp{ (shift) } = \@_;
466
467              while (1) {
468                 last unless exists $tmp{$order_id};
469                 push @{ $gather_ref }, @{ delete $tmp{$order_id++} };
470              }
471
472              return;
473           };
474        }
475
476        my @m2;
477
478        MCE::Loop::init {
479           chunk_size => 'auto', max_workers => 'auto',
480           gather => preserve_order(\@m2)
481        };
482
483        mce_loop {
484           my @a; my ($mce, $chunk_ref, $chunk_id) = @_;
485
486           ## Compute the entire chunk data at once.
487           push @a, map { $_ * 2 } @{ $chunk_ref };
488
489           ## Afterwards, invoke the gather feature, which
490           ## will direct the data to the callback function.
491           MCE->gather(MCE->chunk_id, @a);
492
493        } 1..100000;
494
495        MCE::Loop::finish;
496
497        print scalar @m2, "\n";
498
499       All 6 models support 'auto' for chunk_size unlike the Core API. Think
500       of the models as the basis for providing JIT for MCE. They create the
501       instance, tune max_workers, and tune chunk_size automatically
502       regardless of the hardware.
503
504       The following does the same thing using the Core API.
505
506        use MCE;
507
508        sub preserve_order {
509           ...
510        }
511
512        my $mce = MCE->new(
513           max_workers => 'auto', chunk_size => 8000,
514
515           user_func => sub {
516              my @a; my ($mce, $chunk_ref, $chunk_id) = @_;
517
518              ## Compute the entire chunk data at once.
519              push @a, map { $_ * 2 } @{ $chunk_ref };
520
521              ## Afterwards, invoke the gather feature, which
522              ## will direct the data to the callback function.
523              MCE->gather(MCE->chunk_id, @a);
524           }
525        );
526
527        my @m2;
528
529        $mce->process({ gather => preserve_order(\@m2) }, [1..100000]);
530        $mce->shutdown;
531
532        print scalar @m2, "\n";
533

MANUAL SHUTDOWN

535       MCE::Loop->finish
536       MCE::Loop::finish
537          Workers remain persistent as much as possible after running.
538          Shutdown occurs automatically when the script terminates. Call
539          finish when workers are no longer needed.
540
541           use MCE::Loop;
542
543           MCE::Loop::init {
544              chunk_size => 20, max_workers => 'auto'
545           };
546
547           mce_loop { ... } 1..100;
548
549           MCE::Loop::finish;
550

INDEX

552       MCE, MCE::Core
553

AUTHOR

555       Mario E. Roy, <marioeroy AT gmail DOT com>
556
557
558
559perl v5.28.0                      2018-08-25                      MCE::Loop(3)