1WWW::Mechanize::ExampleUss(e3r)Contributed Perl DocumentWaWtWi:o:nMechanize::Examples(3)
2
3
4

NAME

6       WWW::Mechanize::Examples - Sample programs that use WWW::Mechanize
7

VERSION

9       version 2.00
10

SYNOPSIS

12       Plenty of people have learned WWW::Mechanize, and now, you can too!
13
14       Following are user-supplied samples of WWW::Mechanize in action.  If
15       you have samples you'd like to contribute, please send 'em to
16       "<andy@petdance.com>".
17
18       You can also look at the t/*.t files in the distribution.
19
20       Please note that these examples are not intended to do any specific
21       task.  For all I know, they're no longer functional because the sites
22       they hit have changed.  They're here to give examples of how people
23       have used WWW::Mechanize.
24
25       Note that the examples are in reverse order of my having received them,
26       so the freshest examples are always at the top.
27
28   Starbucks Density Calculator, by Nat Torkington
29       Here's a pair of programs from Nat Torkington, editor for O'Reilly
30       Media and co-author of the Perl Cookbook.
31
32           Rael [Dornfest] discovered that you can easily find out how many
33           Starbucks there are in an area by searching for "Starbucks".  So I
34           wrote a silly scraper for some old census data and came up with
35           some Starbucks density figures.  There's no meaning to these
36           numbers thanks to errors from using old census data coupled with
37           false positives in Yahoo search (e.g., "Dodie Starbuck-Your Style
38           Desgn" in Portland OR).  But it was fun to waste a night on.
39
40           Here are the top twenty cities in descending order of population,
41           with the amount of territory each Starbucks has.  E.g., A New York
42           NY Starbucks covers 1.7 square miles of ground.
43
44               New York, NY        1.7
45               Los Angeles, CA     1.2
46               Chicago, IL         1.0
47               Houston, TX         4.6
48               Philadelphia, PA    6.8
49               San Diego, CA       2.7
50               Detroit, MI        19.9
51               Dallas, TX          2.7
52               Phoenix, AZ         4.1
53               San Antonio, TX    12.3
54               San Jose, CA        1.1
55               Baltimore, MD       3.9
56               Indianapolis, IN   12.1
57               San Francisco, CA   0.5
58               Jacksonville, FL   39.9
59               Columbus, OH        7.3
60               Milwaukee, WI       5.1
61               Memphis, TN        15.1
62               Washington, DC      1.4
63               Boston, MA          0.5
64
65       "get_pop_data"
66
67           #!/usr/bin/perl -w
68
69           use WWW::Mechanize;
70           use Storable;
71
72           $url = 'http://www.census.gov/population/www/documentation/twps0027.html';
73           $m = WWW::Mechanize->new();
74           $m->get($url);
75
76           $c = $m->content;
77
78           $c =~ m{<A NAME=.tabA.>(.*?)</TABLE>}s
79             or die "Can't find the population table\n";
80           $t = $1;
81           @outer = $t =~ m{<TR.*?>(.*?)</TR>}gs;
82           shift @outer;
83           foreach $r (@outer) {
84             @bits = $r =~ m{<TD.*?>(.*?)</TD>}gs;
85             for ($x = 0; $x < @bits; $x++) {
86               $b = $bits[$x];
87               @v = split /\s*<BR>\s*/, $b;
88               foreach (@v) { s/^\s+//; s/\s+$// }
89               push @{$data[$x]}, @v;
90             }
91           }
92
93           for ($y = 0; $y < @{$data[0]}; $y++) {
94               $data{$data[1][$y]} = {
95                   NAME => $data[1][$y],
96                   RANK => $data[0][$y],
97                   POP  => comma_free($data[2][$y]),
98                   AREA => comma_free($data[3][$y]),
99                   DENS => comma_free($data[4][$y]),
100               };
101           }
102
103           store(\%data, "cities.dat");
104
105           sub comma_free {
106             my $n = shift;
107             $n =~ s/,//;
108             return $n;
109           }
110
111       "plague_of_coffee"
112
113           #!/usr/bin/perl -w
114
115           use WWW::Mechanize;
116           use strict;
117           use Storable;
118
119           $SIG{__WARN__} = sub {} ;  # ssssssh
120
121           my $Cities = retrieve("cities.dat");
122
123           my $m = WWW::Mechanize->new();
124           $m->get("http://local.yahoo.com/");
125
126           my @cities = sort { $Cities->{$a}{RANK} <=> $Cities->{$b}{RANK} } keys %$Cities;
127           foreach my $c ( @cities ) {
128             my $fields = {
129               'stx' => "starbucks",
130               'csz' => $c,
131             };
132
133             my $r = $m->submit_form(form_number => 2,
134                                     fields => $fields);
135             die "Couldn't submit form" unless $r->is_success;
136
137             my $hits = number_of_hits($r);
138             #  my $ppl  = sprintf("%d", 1000 * $Cities->{$c}{POP} / $hits);
139             #  print "$c has $hits Starbucks.  That's one for every $ppl people.\n";
140             my $density = sprintf("%.1f", $Cities->{$c}{AREA} / $hits);
141             print "$c : $density\n";
142           }
143
144           sub number_of_hits {
145             my $r = shift;
146             my $c = $r->content;
147             if ($c =~ m{\d+ out of <b>(\d+)</b> total results for}) {
148               return $1;
149             }
150             if ($c =~ m{Sorry, no .*? found in or near}) {
151               return 0;
152             }
153             if ($c =~ m{Your search matched multiple cities}) {
154               warn "Your search matched multiple cities\n";
155               return 0;
156             }
157             if ($c =~ m{Sorry we couldn.t find that location}) {
158               warn "No cities\n";
159               return 0;
160             }
161             if ($c =~ m{Could not find.*?, showing results for}) {
162               warn "No matches\n";
163               return 0;
164             }
165             die "Unknown response\n$c\n";
166           }
167
168   pb-upload, by John Beppu
169       This program takes filenames of images from the command line and
170       uploads them to a www.photobucket.com folder.  John Beppu, the author,
171       says:
172
173           I had 92 pictures I wanted to upload, and doing it through a
174           browser would've been torture.  But thanks to mech, all I had to do
175           was `./pb.upload *.jpg` and watch it do its thing.  It felt good.
176           If I had more time, I'd implement WWW::Photobucket on top of
177           WWW::Mechanize.
178
179           #!/usr/bin/perl -w -T
180
181           use strict;
182           use WWW::Mechanize;
183
184           my $login    = "login_name";
185           my $password = "password";
186           my $folder   = "folder";
187
188           my $url = "http://img78.photobucket.com/albums/v281/$login/$folder/";
189
190           # login to your photobucket.com account
191           my $mech = WWW::Mechanize->new();
192           $mech->get($url);
193           $mech->submit_form(
194               form_number => 1,
195               fields      => { password => $password },
196           );
197           die unless ($mech->success);
198
199           # upload image files specified on command line
200           foreach (@ARGV) {
201               print "$_\n";
202               $mech->form_number(2);
203               $mech->field('the_file[]' => $_);
204               $mech->submit();
205           }
206
207   listmod, by Ian Langworth
208       Ian Langworth contributes this little gem that will bring joy to
209       beleaguered mailing list admins.  It discards spam messages through
210       mailman's web interface.
211
212           #!/arch/unix/bin/perl
213           use strict;
214           use warnings;
215           #
216           # listmod - fast alternative to mailman list interface
217           #
218           # usage: listmod crew XXXXXXXX
219           #
220
221           die "usage: $0 <listname> <password>\n" unless @ARGV == 2;
222           my ($listname, $password) = @ARGV;
223
224           use CGI qw(unescape);
225
226           use WWW::Mechanize;
227           my $m = WWW::Mechanize->new( autocheck => 1 );
228
229           use Term::ReadLine;
230           my $term = Term::ReadLine->new($0);
231
232           # submit the form, get the cookie, go to the list admin page
233           $m->get("https://lists.ccs.neu.edu/bin/admindb/$listname");
234           $m->set_visible( $password );
235           $m->click;
236
237           # exit if nothing to do
238           print "There are no pending requests.\n" and exit
239               if $m->content =~ /There are no pending requests/;
240
241           # select the first form and examine its contents
242           $m->form_number(1);
243           my $f = $m->current_form or die "Couldn't get first form!\n";
244
245           # get me the base form element for each email item
246           my @items = map {m/^.+?-(.+)/} grep {m/senderbanp/} $f->param
247               or die "Couldn't get items in first form!\n";
248
249           # iterate through items, prompt user, commit actions
250           foreach my $item (@items) {
251
252               # show item info
253               my $sender = unescape($item);
254               my ($subject) = [$f->find_input("senderbanp-$item")->value_names]->[1]
255                   =~ /Subject:\s+(.+?)\s+Size:/g;
256
257               # prompt user
258               my $choice = '';
259               while ( $choice !~ /^[DAX]$/ ) {
260                   print "$sender\: '$subject'\n";
261                   $choice = uc $term->readline("Action: defer/accept/discard [dax]: ");
262                   print "\n\n";
263               }
264
265               # set button
266               $m->field("senderaction-$item" => {D=>0,A=>1,X=>3}->{$choice});
267           }
268
269           # submit actions
270           $m->click;
271
272   ccdl, by Andy Lester
273       Steve McConnell, author of the landmark Code Complete has put up the
274       chapters for the 2nd edition in PDF format on his website.  I needed to
275       download them to take to Kinko's to have printed.  This little program
276       did it for me.
277
278           #!/usr/bin/perl -w
279
280           use strict;
281           use WWW::Mechanize;
282
283           my $start = "http://www.stevemcconnell.com/cc2/cc.htm";
284
285           my $mech = WWW::Mechanize->new( autocheck => 1 );
286           $mech->get( $start );
287
288           my @links = $mech->find_all_links( url_regex => qr/\d+.+\.pdf$/ );
289
290           for my $link ( @links ) {
291               my $url = $link->url_abs;
292               my $filename = $url;
293               $filename =~ s[^.+/][];
294
295               print "Fetching $url";
296               $mech->get( $url, ':content_file' => $filename );
297
298               print "   ", -s $filename, " bytes\n";
299           }
300
301   quotes.pl, by Andy Lester
302       This was a program that was going to get a hack in Spidering Hacks, but
303       got cut at the last minute, probably because it's against IMDB's TOS to
304       scrape from it.  I present it here as an example, not a suggestion that
305       you break their TOS.
306
307       Last I checked, it didn't work because their HTML didn't match, but
308       it's still good as sample code.
309
310           #!/usr/bin/perl -w
311
312           use strict;
313
314           use WWW::Mechanize;
315           use Getopt::Long;
316           use Text::Wrap;
317
318           my $match = undef;
319           my $random = undef;
320           GetOptions(
321               "match=s" => \$match,
322               "random" => \$random,
323           ) or exit 1;
324
325           my $movie = shift @ARGV or die "Must specify a movie\n";
326
327           my $quotes_page = get_quotes_page( $movie );
328           my @quotes = extract_quotes( $quotes_page );
329
330           if ( $match ) {
331               $match = quotemeta($match);
332               @quotes = grep /$match/i, @quotes;
333           }
334
335           if ( $random ) {
336               print $quotes[rand @quotes];
337           }
338           else {
339               print join( "\n", @quotes );
340           }
341
342
343           sub get_quotes_page {
344               my $movie = shift;
345
346               my $mech = WWW::Mechanize->new;
347               $mech->get( "http://www.imdb.com/search" );
348               $mech->success or die "Can't get the search page";
349
350               $mech->submit_form(
351                   form_number => 2,
352                   fields => {
353                       title   => $movie,
354                       restrict    => "Movies only",
355                   },
356               );
357
358               my @links = $mech->find_all_links( url_regex => qr[^/Title] )
359                   or die "No matches for \"$movie\" were found.\n";
360
361               # Use the first link
362               my ( $url, $title ) = @{$links[0]};
363
364               warn "Checking $title...\n";
365
366               $mech->get( $url );
367               my $link = $mech->find_link( text_regex => qr/Memorable Quotes/i )
368                   or die qq{"$title" has no quotes in IMDB!\n};
369
370               warn "Fetching quotes...\n\n";
371               $mech->get( $link->[0] );
372
373               return $mech->content;
374           }
375
376
377           sub extract_quotes {
378               my $page = shift;
379
380               # Nibble away at the unwanted HTML at the beginnning...
381               $page =~ s/.+Memorable Quotes//si;
382               $page =~ s/.+?(<a name)/$1/si;
383
384               # ... and the end of the page
385               $page =~ s/Browse titles in the movie quotes.+$//si;
386               $page =~ s/<p.+$//g;
387
388               # Quotes separated by an <HR> tag
389               my @quotes = split( /<hr.+?>/, $page );
390
391               for my $quote ( @quotes ) {
392                   my @lines = split( /<br>/, $quote );
393                   for ( @lines ) {
394                       s/<[^>]+>//g;   # Strip HTML tags
395                       s/\s+/ /g;          # Squash whitespace
396                       s/^ //;     # Strip leading space
397                       s/ $//;     # Strip trailing space
398                       s/&#34;/"/g;    # Replace HTML entity quotes
399
400                       # Word-wrap to fit in 72 columns
401                       $Text::Wrap::columns = 72;
402                       $_ = wrap( '', '    ', $_ );
403                   }
404                   $quote = join( "\n", @lines );
405               }
406
407               return @quotes;
408           }
409
410   cpansearch.pl, by Ed Silva
411       A quick little utility to search the CPAN and fire up a browser with a
412       results page.
413
414           #!/usr/bin/perl
415
416           # turn on perl's safety features
417           use strict;
418           use warnings;
419
420           # work out the name of the module we're looking for
421           my $module_name = $ARGV[0]
422             or die "Must specify module name on command line";
423
424           # create a new browser
425           use WWW::Mechanize;
426           my $browser = WWW::Mechanize->new();
427
428           # tell it to get the main page
429           $browser->get("http://search.cpan.org/");
430
431           # okay, fill in the box with the name of the
432           # module we want to look up
433           $browser->form_number(1);
434           $browser->field("query", $module_name);
435           $browser->click();
436
437           # click on the link that matches the module name
438           $browser->follow_link( text_regex => $module_name );
439
440           my $url = $browser->uri;
441
442           # launch a browser...
443           system('galeon', $url);
444
445           exit(0);
446
447   lj_friends.cgi, by Matt Cashner
448           #!/usr/bin/perl
449
450           # Provides an rss feed of a paid user's LiveJournal friends list
451           # Full entries, protected entries, etc.
452           # Add to your favorite rss reader as
453           # http://your.site.com/cgi-bin/lj_friends.cgi?user=USER&password=PASSWORD
454
455           use warnings;
456           use strict;
457
458           use WWW::Mechanize;
459           use CGI;
460
461           my $cgi = CGI->new();
462           my $form = $cgi->Vars;
463
464           my $agent = WWW::Mechanize->new();
465
466           $agent->get('http://www.livejournal.com/login.bml');
467           $agent->form_number('3');
468           $agent->field('user',$form->{user});
469           $agent->field('password',$form->{password});
470           $agent->submit();
471           $agent->get('http://www.livejournal.com/customview.cgi?user='.$form->{user}.'&styleid=225596&checkcookies=1');
472           print "Content-type: text/plain\n\n";
473           print $agent->content();
474
475   Hacking Movable Type, by Dan Rinzel
476           use strict;
477           use WWW::Mechanize;
478
479           # a tool to automatically post entries to a moveable type weblog, and set arbitrary creation dates
480
481           my $mech = WWW::Mechanize->new();
482           my $entry;
483           $entry->{title} = "Test AutoEntry Title";
484           $entry->{btext} = "Test AutoEntry Body";
485           $entry->{date} = '2002-04-15 14:18:00';
486           my $start = qq|http://my.blog.site/mt.cgi|;
487
488           $mech->get($start);
489           $mech->field('username','und3f1n3d');
490           $mech->field('password','obscur3d');
491           $mech->submit(); # to get login cookie
492           $mech->get(qq|$start?__mode=view&_type=entry&blog_id=1|);
493           $mech->form_name('entry_form');
494           $mech->field('title',$entry->{title});
495           $mech->field('category_id',1); # adjust as needed
496           $mech->field('text',$entry->{btext});
497           $mech->field('status',2); # publish, or 1 = draft
498           $results = $mech->submit();
499
500           # if we're ok with this entry being datestamped "NOW" (no {date} in %entry)
501           # we're done. Otherwise, time to be tricksy
502           # MT returns a 302 redirect from this form. the redirect itself contains a <body onload=""> handler
503           # which takes the user to an editable version of the form where the create date can be edited
504           # MT date format of YYYY-MM-DD HH:MI:SS is the only one that won't error out
505
506           if ($entry->{date} && $entry->{date} =~ /^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}/) {
507               # travel the redirect
508               $results = $mech->get($results->{_headers}->{location});
509               $results->{_content} =~ /<body onLoad="([^\"]+)"/is;
510               my $js = $1;
511               $js =~ /\'([^']+)\'/;
512               $results = $mech->get($start.$1);
513               $mech->form_name('entry_form');
514               $mech->field('created_on_manual',$entry->{date});
515               $mech->submit();
516           }
517
518   get-despair, by Randal Schwartz
519       Randal submitted this bot that walks the despair.com site sucking down
520       all the pictures.
521
522           use strict;
523           $|++;
524
525           use WWW::Mechanize;
526           use File::Basename;
527
528           my $m = WWW::Mechanize->new;
529
530           $m->get("http://www.despair.com/indem.html");
531
532           my @top_links = @{$m->links};
533
534           for my $top_link_num (0..$#top_links) {
535               next unless $top_links[$top_link_num][0] =~ /^http:/;
536
537               $m->follow_link( n=>$top_link_num ) or die "can't follow $top_link_num";
538
539               print $m->uri, "\n";
540               for my $image (grep m{^http://store4}, map $_->[0], @{$m->links}) {
541                   my $local = basename $image;
542                   print " $image...", $m->mirror($image, $local)->message, "\n"
543               }
544
545               $m->back or die "can't go back";
546           }
547

AUTHOR

549       Andy Lester <andy at petdance.com>
550
552       This software is copyright (c) 2004 by Andy Lester.
553
554       This is free software; you can redistribute it and/or modify it under
555       the same terms as the Perl 5 programming language system itself.
556
557
558
559perl v5.32.0                      2020-07-28       WWW::Mechanize::Examples(3)
Impressum