WWW::Mechanize::Examples(3pm)

1WWW::Mechanize::ExampleUss(e3r)Contributed Perl DocumentWaWtWi:o:nMechanize::Examples(3)
2
3
4

NAME

6       WWW::Mechanize::Examples - Sample programs that use WWW::Mechanize
7

SYNOPSIS

9       Plenty of people have learned WWW::Mechanize, and now, you can too!
10
11       Following are user-supplied samples of WWW::Mechanize in action.  If
12       you have samples you'd like to contribute, please send 'em to
13       "<andy@petdance.com>".
14
15       You can also look at the t/*.t files in the distribution.
16
17       Please note that these examples are not intended to do any specific
18       task.  For all I know, they're no longer functional because the sites
19       they hit have changed.  They're here to give examples of how people
20       have used WWW::Mechanize.
21
22       Note that the examples are in reverse order of my having received them,
23       so the freshest examples are always at the top.
24
25       Starbucks Density Calculator, by Nat Torkington
26
27       Here's a pair of scripts from Nat Torkington, editor for O'Reilly Media
28       and co-author of the Perl Cookbook.
29
30           Rael [Dornfest] discovered that you can easily find out how many
31           Starbucks there are in an area by searching for "Starbucks".  So I
32           wrote a silly scraper for some old census data and came up with
33           some Starbucks density figures.  There's no meaning to these num‐
34           bers thanks to errors from using old census data coupled with false
35           positives in Yahoo search (e.g., "Dodie Starbuck-Your Style Desgn"
36           in Portland OR).  But it was fun to waste a night on.
37
38           Here are the top twenty cities in descending order of population,
39           with the amount of territory each Starbucks has.  E.g., A New York
40           NY Starbucks covers 1.7 square miles of ground.
41
42               New York, NY        1.7
43               Los Angeles, CA     1.2
44               Chicago, IL         1.0
45               Houston, TX         4.6
46               Philadelphia, PA    6.8
47               San Diego, CA       2.7
48               Detroit, MI        19.9
49               Dallas, TX          2.7
50               Phoenix, AZ         4.1
51               San Antonio, TX    12.3
52               San Jose, CA        1.1
53               Baltimore, MD       3.9
54               Indianapolis, IN   12.1
55               San Francisco, CA   0.5
56               Jacksonville, FL   39.9
57               Columbus, OH        7.3
58               Milwaukee, WI       5.1
59               Memphis, TN        15.1
60               Washington, DC      1.4
61               Boston, MA          0.5
62
63       "get_pop_data"
64
65           #!/usr/bin/perl -w
66
67           use WWW::Mechanize;
68           use Storable;
69
70           $url = 'http://www.census.gov/population/www/documentation/twps0027.html';
71           $m = WWW::Mechanize->new();
72           $m->get($url);
73
74           $c = $m->content;
75
76           $c =~ m{<A NAME=.tabA.>(.*?)</TABLE>}s
77             or die "Can't find the population table\n";
78           $t = $1;
79           @outer = $t =~ m{<TR.*?>(.*?)</TR>}gs;
80           shift @outer;
81           foreach $r (@outer) {
82             @bits = $r =~ m{<TD.*?>(.*?)</TD>}gs;
83             for ($x = 0; $x < @bits; $x++) {
84               $b = $bits[$x];
85               @v = split /\s*<BR>\s*/, $b;
86               foreach (@v) { s/^\s+//; s/\s+$// }
87               push @{$data[$x]}, @v;
88             }
89           }
90
91           for ($y = 0; $y < @{$data[0]}; $y++) {
92               $data{$data[1][$y]} = {
93                   NAME => $data[1][$y],
94                   RANK => $data[0][$y],
95                   POP  => comma_free($data[2][$y]),
96                   AREA => comma_free($data[3][$y]),
97                   DENS => comma_free($data[4][$y]),
98               };
99           }
100
101           store(\%data, "cities.dat");
102
103           sub comma_free {
104             my $n = shift;
105             $n =~ s/,//;
106             return $n;
107           }
108
109       "plague_of_coffee"
110
111           #!/usr/bin/perl -w
112
113           use WWW::Mechanize;
114           use strict;
115           use Storable;
116
117           $SIG{__WARN__} = sub {} ;  # ssssssh
118
119           my $Cities = retrieve("cities.dat");
120
121           my $m = WWW::Mechanize->new();
122           $m->get("http://local.yahoo.com/");
123
124           my @cities = sort { $Cities->{$a}{RANK} <=> $Cities->{$b}{RANK} } keys %$Cities;
125           foreach my $c ( @cities ) {
126             my $fields = {
127               'stx' => "starbucks",
128               'csz' => $c,
129             };
130
131             my $r = $m->submit_form(form_number => 2,
132                                     fields => $fields);
133             die "Couldn't submit form" unless $r->is_success;
134
135             my $hits = number_of_hits($r);
136             #  my $ppl  = sprintf("%d", 1000 * $Cities->{$c}{POP} / $hits);
137             #  print "$c has $hits Starbucks.  That's one for every $ppl people.\n";
138             my $density = sprintf("%.1f", $Cities->{$c}{AREA} / $hits);
139             print "$c : $density\n";
140           }
141
142           sub number_of_hits {
143             my $r = shift;
144             my $c = $r->content;
145             if ($c =~ m{\d+ out of <b>(\d+)</b> total results for}) {
146               return $1;
147             }
148             if ($c =~ m{Sorry, no .*? found in or near}) {
149               return 0;
150             }
151             if ($c =~ m{Your search matched multiple cities}) {
152               warn "Your search matched multiple cities\n";
153               return 0;
154             }
155             if ($c =~ m{Sorry we couldn.t find that location}) {
156               warn "No cities\n";
157               return 0;
158             }
159             if ($c =~ m{Could not find.*?, showing results for}) {
160               warn "No matches\n";
161               return 0;
162             }
163             die "Unknown response\n$c\n";
164           }
165
166       pb-upload, by John Beppu
167
168       This script takes filenames of images from the command line and uploads
169       them to a www.photobucket.com folder.  John Beppu, the author, says:
170
171           I had 92 pictures I wanted to upload, and doing it through a
172           browser would've been torture.  But thanks to mech, all I had to do
173           was `./pb.upload *.jpg` and watch it do its thing.  It felt good.
174           If I had more time, I'd implement WWW::Photobucket on top of
175           WWW::Mechanize.
176
177           #!/usr/bin/perl -w -T
178
179           use strict;
180           use WWW::Mechanize;
181
182           my $login    = "login_name";
183           my $password = "password";
184           my $folder   = "folder";
185
186           my $url = "http://img78.photobucket.com/albums/v281/$login/$folder/";
187
188           # login to your photobucket.com account
189           my $mech = WWW::Mechanize->new();
190           $mech->get($url);
191           $mech->submit_form(
192               form_number => 1,
193               fields      => { password => $password },
194           );
195           die unless ($mech->success);
196
197           # upload image files specified on command line
198           foreach (@ARGV) {
199               print "$_\n";
200               $mech->form_number(2);
201               $mech->field('the_file[]' => $_);
202               $mech->submit();
203           }
204
205       listmod, by Ian Langworth
206
207       Ian Langworth contributes this little gem that will bring joy to
208       beleagured mailing list admins.  It discards spam messages through
209       mailman's web interface.
210
211           #!/arch/unix/bin/perl
212           use strict;
213           use warnings;
214           #
215           # listmod - fast alternative to mailman list interface
216           #
217           # usage: listmod crew XXXXXXXX
218           #
219
220           die "usage: $0 <listname> <password>\n" unless @ARGV == 2;
221           my ($listname, $password) = @ARGV;
222
223           use CGI qw(unescape);
224
225           use WWW::Mechanize;
226           my $m = WWW::Mechanize->new( autocheck => 1 );
227
228           use Term::ReadLine;
229           my $term = Term::ReadLine->new($0);
230
231           # submit the form, get the cookie, go to the list admin page
232           $m->get("https://lists.ccs.neu.edu/bin/admindb/$listname");
233           $m->set_visible( $password );
234           $m->click;
235
236           # exit if nothing to do
237           print "There are no pending requests.\n" and exit
238               if $m->content =~ /There are no pending requests/;
239
240           # select the first form and examine its contents
241           $m->form_number(1);
242           my $f = $m->current_form or die "Couldn't get first form!\n";
243
244           # get me the base form element for each email item
245           my @items = map {m/^.+?-(.+)/} grep {m/senderbanp/} $f->param
246               or die "Couldn't get items in first form!\n";
247
248           # iterate through items, prompt user, commit actions
249           foreach my $item (@items) {
250
251               # show item info
252               my $sender = unescape($item);
253               my ($subject) = [$f->find_input("senderbanp-$item")->value_names]->[1]
254                   =~ /Subject:\s+(.+?)\s+Size:/g;
255
256               # prompt user
257               my $choice = '';
258               while ( $choice !~ /^[DAX]$/ ) {
259                   print "$sender\: '$subject'\n";
260                   $choice = uc $term->readline("Action: defer/accept/discard [dax]: ");
261                   print "\n\n";
262               }
263
264               # set button
265               $m->field("senderaction-$item" => {D=>0,A=>1,X=>3}->{$choice});
266           }
267
268           # submit actions
269           $m->click;
270
271       ccdl, by Andy Lester
272
273       Steve McConnell, author of the landmark Code Complete has put up the
274       chapters for the 2nd edition in PDF format on his website.  I needed to
275       download them to take to Kinko's to have printed.  This little script
276       did it for me.
277
278           #!/usr/bin/perl -w
279
280           use strict;
281           use WWW::Mechanize;
282
283           my $start = "http://www.stevemcconnell.com/cc2/cc.htm";
284
285           my $mech = WWW::Mechanize->new( autocheck => 1 );
286           $mech->get( $start );
287
288           my @links = $mech->find_all_links( url_regex => qr/\d+.+\.pdf$/ );
289
290           for my $link ( @links ) {
291               my $url = $link->url_abs;
292               my $filename = $url;
293               $filename =~ s[^.+/][];
294
295               print "Fetching $url";
296               $mech->get( $url, ':content_file' => $filename );
297
298               print "   ", -s $filename, " bytes\n";
299           }
300
301       quotes.pl, by Andy Lester
302
303       This was a script that was going to get a hack in Spidering Hacks, but
304       got cut at the last minute, probably because it's against IMDB's TOS to
305       scrape from it.  I present it here as an example, not a suggestion that
306       you break their TOS.
307
308       Last I checked, it didn't work because their HTML didn't match, but
309       it's still good as sample code.
310
311           #!/usr/bin/perl -w
312
313           use strict;
314
315           use WWW::Mechanize;
316           use Getopt::Long;
317           use Text::Wrap;
318
319           my $match = undef;
320           my $random = undef;
321           GetOptions(
322               "match=s" => \$match,
323               "random" => \$random,
324           ) or exit 1;
325
326           my $movie = shift @ARGV or die "Must specify a movie\n";
327
328           my $quotes_page = get_quotes_page( $movie );
329           my @quotes = extract_quotes( $quotes_page );
330
331           if ( $match ) {
332               $match = quotemeta($match);
333               @quotes = grep /$match/i, @quotes;
334           }
335
336           if ( $random ) {
337               print $quotes[rand @quotes];
338           } else {
339               print join( "\n", @quotes );
340           }
341
342           sub get_quotes_page {
343               my $movie = shift;
344
345               my $mech = new WWW::Mechanize;
346               $mech->get( "http://www.imdb.com/search" );
347               $mech->success or die "Can't get the search page";
348
349               $mech->submit_form(
350               form_number => 2,
351               fields => {
352                   title       => $movie,
353                   restrict    => "Movies only",
354               },
355               );
356
357               my @links = $mech->find_all_links( url_regex => qr[^/Title] )
358               or die "No matches for \"$movie\" were found.\n";
359
360               # Use the first link
361               my ( $url, $title ) = @{$links[0]};
362
363               warn "Checking $title...\n";
364
365               $mech->get( $url );
366               my $link = $mech->find_link( text_regex => qr/Memorable Quotes/i )
367               or die qq{"$title" has no quotes in IMDB!\n};
368
369               warn "Fetching quotes...\n\n";
370               $mech->get( $link->[0] );
371
372               return $mech->content;
373           }
374
375           sub extract_quotes {
376               my $page = shift;
377
378               # Nibble away at the unwanted HTML at the beginnning...
379               $page =~ s/.+Memorable Quotes//si;
380               $page =~ s/.+?(<a name)/$1/si;
381
382               # ... and the end of the page
383               $page =~ s/Browse titles in the movie quotes.+$//si;
384               $page =~ s/<p.+$//g;
385
386               # Quotes separated by an <HR> tag
387               my @quotes = split( /<hr.+?>/, $page );
388
389               for my $quote ( @quotes ) {
390               my @lines = split( /<br>/, $quote );
391               for ( @lines ) {
392                   s/<[^>]+>//g;   # Strip HTML tags
393                   s/\s+/ /g;      # Squash whitespace
394                   s/^ //;         # Strip leading space
395                   s/ $//;         # Strip trailing space
396                   s/&#34;/"/g;    # Replace HTML entity quotes
397
398                   # Word-wrap to fit in 72 columns
399                   $Text::Wrap::columns = 72;
400                   $_ = wrap( '', '    ', $_ );
401               }
402               $quote = join( "\n", @lines );
403               }
404
405               return @quotes;
406           }
407
408       cpansearch.pl, by Ed Silva
409
410       A quick little utility to search the CPAN and fire up a browser with a
411       results page.
412
413           #!/usr/bin/perl
414
415           # turn on perl's safety features
416           use strict;
417           use warnings;
418
419           # work out the name of the module we're looking for
420           my $module_name = $ARGV[0]
421             or die "Must specify module name on command line";
422
423           # create a new browser
424           use WWW::Mechanize;
425           my $browser = WWW::Mechanize->new();
426
427           # tell it to get the main page
428           $browser->get("http://search.cpan.org/");
429
430           # okay, fill in the box with the name of the
431           # module we want to look up
432           $browser->form_number(1);
433           $browser->field("query", $module_name);
434           $browser->click();
435
436           # click on the link that matches the module name
437           $browser->follow_link( text_regex => $module_name );
438
439           my $url = $browser->uri;
440
441           # launch a browser...
442           system('galeon', $url);
443
444           exit(0);
445
446       lj_friends.cgi, by Matt Cashner
447
448           #!/usr/bin/perl
449
450           # Provides an rss feed of a paid user's LiveJournal friends list
451           # Full entries, protected entries, etc.
452           # Add to your favorite rss reader as
453           # http://your.site.com/cgi-bin/lj_friends.cgi?user=USER&password=PASSWORD
454
455           use warnings;
456           use strict;
457
458           use WWW::Mechanize;
459           use CGI;
460
461           my $cgi = CGI->new();
462           my $form = $cgi->Vars;
463
464           my $agent = WWW::Mechanize->new();
465
466           $agent->get('http://www.livejournal.com/login.bml');
467           $agent->form_number('3');
468           $agent->field('user',$form->{user});
469           $agent->field('password',$form->{password});
470           $agent->submit();
471           $agent->get('http://www.livejournal.com/customview.cgi?user='.$form->{user}.'&styleid=225596&checkcookies=1');
472           print "Content-type: text/plain\n\n";
473           print $agent->content();
474
475       Hacking Movable Type, by Dan Rinzel
476
477           use WWW::Mechanize;
478
479           # a tool to automatically post entries to a moveable type weblog, and set arbitary creation dates
480
481           my $mech = WWW::Mechanize->new();
482           my %entry;
483           $entry->{title} = "Test AutoEntry Title";
484           $entry->{btext} = "Test AutoEntry Body";
485           $entry->{date} = '2002-04-15 14:18:00';
486           my $start = qq⎪http://my.blog.site/mt.cgi⎪;
487
488           $mech->get($start);
489           $mech->field('username','und3f1n3d');
490           $mech->field('password','obscur3d');
491           $mech->submit(); # to get login cookie
492           $mech->get(qq⎪$start?__mode=view&_type=entry&blog_id=1⎪);
493           $mech->form_name('entry_form');
494           $mech->field('title',$entry->{title});
495           $mech->field('category_id',1); # adjust as needed
496           $mech->field('text',$entry->{btext});
497           $mech->field('status',2); # publish, or 1 = draft
498           $results = $mech->submit();
499
500           # if we're ok with this entry being datestamped "NOW" (no {date} in %entry)
501           # we're done. Otherwise, time to be tricksy
502           # MT returns a 302 redirect from this form. the redirect itself contains a <body onload=""> handler
503           # which takes the user to an editable version of the form where the create date can be edited
504           # MT date format of YYYY-MM-DD HH:MI:SS is the only one that won't error out
505
506           if ($entry->{date} && $entry->{date} =~ /^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}/) {
507               # travel the redirect
508               $results = $mech->get($results->{_headers}->{location});
509               $results->{_content} =~ /<body onLoad="([^\"]+)"/is;
510               my $js = $1;
511               $js =~ /\'([^']+)\'/;
512               $results = $mech->get($start.$1);
513               $mech->form_name('entry_form');
514               $mech->field('created_on_manual',$entry->{date});
515               $mech->submit();
516           }
517
518       get-despair, by Randal Schwartz
519
520       Randal submitted this bot that walks the despair.com site sucking down
521       all the pictures.
522
523           use strict;
524           $⎪++;
525
526           use WWW::Mechanize;
527           use File::Basename;
528
529           my $m = WWW::Mechanize->new;
530
531           $m->get("http://www.despair.com/indem.html");
532
533           my @top_links = @{$m->links};
534
535           for my $top_link_num (0..$#top_links) {
536               next unless $top_links[$top_link_num][0] =~ /^http:/;
537
538               $m->follow_link( n=>$top_link_num ) or die "can't follow $top_link_num";
539
540               print $m->uri, "\n";
541               for my $image (grep m{^http://store4}, map $_->[0], @{$m->links}) {
542                   my $local = basename $image;
543                   print " $image...", $m->mirror($image, $local)->message, "\n"
544               }
545
546               $m->back or die "can't go back";
547           }
548
549
550
551perl v5.8.8                       2007-10-30       WWW::Mechanize::Examples(3)