1WWW::Mechanize::ExampleUss(e3r)Contributed Perl DocumentWaWtWi:o:nMechanize::Examples(3)
2
3
4

NAME

6       WWW::Mechanize::Examples - Sample programs that use WWW::Mechanize
7

SYNOPSIS

9       Plenty of people have learned WWW::Mechanize, and now, you can too!
10
11       Following are user-supplied samples of WWW::Mechanize in action.  If
12       you have samples you'd like to contribute, please send 'em to
13       "<andy@petdance.com>".
14
15       You can also look at the t/*.t files in the distribution.
16
17       Please note that these examples are not intended to do any specific
18       task.  For all I know, they're no longer functional because the sites
19       they hit have changed.  They're here to give examples of how people
20       have used WWW::Mechanize.
21
22       Note that the examples are in reverse order of my having received them,
23       so the freshest examples are always at the top.
24
25   Starbucks Density Calculator, by Nat Torkington
26       Here's a pair of programs from Nat Torkington, editor for O'Reilly
27       Media and co-author of the Perl Cookbook.
28
29           Rael [Dornfest] discovered that you can easily find out how many
30           Starbucks there are in an area by searching for "Starbucks".  So I
31           wrote a silly scraper for some old census data and came up with
32           some Starbucks density figures.  There's no meaning to these
33           numbers thanks to errors from using old census data coupled with
34           false positives in Yahoo search (e.g., "Dodie Starbuck-Your Style
35           Desgn" in Portland OR).  But it was fun to waste a night on.
36
37           Here are the top twenty cities in descending order of population,
38           with the amount of territory each Starbucks has.  E.g., A New York
39           NY Starbucks covers 1.7 square miles of ground.
40
41               New York, NY        1.7
42               Los Angeles, CA     1.2
43               Chicago, IL         1.0
44               Houston, TX         4.6
45               Philadelphia, PA    6.8
46               San Diego, CA       2.7
47               Detroit, MI        19.9
48               Dallas, TX          2.7
49               Phoenix, AZ         4.1
50               San Antonio, TX    12.3
51               San Jose, CA        1.1
52               Baltimore, MD       3.9
53               Indianapolis, IN   12.1
54               San Francisco, CA   0.5
55               Jacksonville, FL   39.9
56               Columbus, OH        7.3
57               Milwaukee, WI       5.1
58               Memphis, TN        15.1
59               Washington, DC      1.4
60               Boston, MA          0.5
61
62       "get_pop_data"
63
64           #!/usr/bin/perl -w
65
66           use WWW::Mechanize;
67           use Storable;
68
69           $url = 'http://www.census.gov/population/www/documentation/twps0027.html';
70           $m = WWW::Mechanize->new();
71           $m->get($url);
72
73           $c = $m->content;
74
75           $c =~ m{<A NAME=.tabA.>(.*?)</TABLE>}s
76             or die "Can't find the population table\n";
77           $t = $1;
78           @outer = $t =~ m{<TR.*?>(.*?)</TR>}gs;
79           shift @outer;
80           foreach $r (@outer) {
81             @bits = $r =~ m{<TD.*?>(.*?)</TD>}gs;
82             for ($x = 0; $x < @bits; $x++) {
83               $b = $bits[$x];
84               @v = split /\s*<BR>\s*/, $b;
85               foreach (@v) { s/^\s+//; s/\s+$// }
86               push @{$data[$x]}, @v;
87             }
88           }
89
90           for ($y = 0; $y < @{$data[0]}; $y++) {
91               $data{$data[1][$y]} = {
92                   NAME => $data[1][$y],
93                   RANK => $data[0][$y],
94                   POP  => comma_free($data[2][$y]),
95                   AREA => comma_free($data[3][$y]),
96                   DENS => comma_free($data[4][$y]),
97               };
98           }
99
100           store(\%data, "cities.dat");
101
102           sub comma_free {
103             my $n = shift;
104             $n =~ s/,//;
105             return $n;
106           }
107
108       "plague_of_coffee"
109
110           #!/usr/bin/perl -w
111
112           use WWW::Mechanize;
113           use strict;
114           use Storable;
115
116           $SIG{__WARN__} = sub {} ;  # ssssssh
117
118           my $Cities = retrieve("cities.dat");
119
120           my $m = WWW::Mechanize->new();
121           $m->get("http://local.yahoo.com/");
122
123           my @cities = sort { $Cities->{$a}{RANK} <=> $Cities->{$b}{RANK} } keys %$Cities;
124           foreach my $c ( @cities ) {
125             my $fields = {
126               'stx' => "starbucks",
127               'csz' => $c,
128             };
129
130             my $r = $m->submit_form(form_number => 2,
131                                     fields => $fields);
132             die "Couldn't submit form" unless $r->is_success;
133
134             my $hits = number_of_hits($r);
135             #  my $ppl  = sprintf("%d", 1000 * $Cities->{$c}{POP} / $hits);
136             #  print "$c has $hits Starbucks.  That's one for every $ppl people.\n";
137             my $density = sprintf("%.1f", $Cities->{$c}{AREA} / $hits);
138             print "$c : $density\n";
139           }
140
141           sub number_of_hits {
142             my $r = shift;
143             my $c = $r->content;
144             if ($c =~ m{\d+ out of <b>(\d+)</b> total results for}) {
145               return $1;
146             }
147             if ($c =~ m{Sorry, no .*? found in or near}) {
148               return 0;
149             }
150             if ($c =~ m{Your search matched multiple cities}) {
151               warn "Your search matched multiple cities\n";
152               return 0;
153             }
154             if ($c =~ m{Sorry we couldn.t find that location}) {
155               warn "No cities\n";
156               return 0;
157             }
158             if ($c =~ m{Could not find.*?, showing results for}) {
159               warn "No matches\n";
160               return 0;
161             }
162             die "Unknown response\n$c\n";
163           }
164
165   pb-upload, by John Beppu
166       This program takes filenames of images from the command line and
167       uploads them to a www.photobucket.com folder.  John Beppu, the author,
168       says:
169
170           I had 92 pictures I wanted to upload, and doing it through a
171           browser would've been torture.  But thanks to mech, all I had to do
172           was `./pb.upload *.jpg` and watch it do its thing.  It felt good.
173           If I had more time, I'd implement WWW::Photobucket on top of
174           WWW::Mechanize.
175
176           #!/usr/bin/perl -w -T
177
178           use strict;
179           use WWW::Mechanize;
180
181           my $login    = "login_name";
182           my $password = "password";
183           my $folder   = "folder";
184
185           my $url = "http://img78.photobucket.com/albums/v281/$login/$folder/";
186
187           # login to your photobucket.com account
188           my $mech = WWW::Mechanize->new();
189           $mech->get($url);
190           $mech->submit_form(
191               form_number => 1,
192               fields      => { password => $password },
193           );
194           die unless ($mech->success);
195
196           # upload image files specified on command line
197           foreach (@ARGV) {
198               print "$_\n";
199               $mech->form_number(2);
200               $mech->field('the_file[]' => $_);
201               $mech->submit();
202           }
203
204   listmod, by Ian Langworth
205       Ian Langworth contributes this little gem that will bring joy to
206       beleagured mailing list admins.  It discards spam messages through
207       mailman's web interface.
208
209           #!/arch/unix/bin/perl
210           use strict;
211           use warnings;
212           #
213           # listmod - fast alternative to mailman list interface
214           #
215           # usage: listmod crew XXXXXXXX
216           #
217
218           die "usage: $0 <listname> <password>\n" unless @ARGV == 2;
219           my ($listname, $password) = @ARGV;
220
221           use CGI qw(unescape);
222
223           use WWW::Mechanize;
224           my $m = WWW::Mechanize->new( autocheck => 1 );
225
226           use Term::ReadLine;
227           my $term = Term::ReadLine->new($0);
228
229           # submit the form, get the cookie, go to the list admin page
230           $m->get("https://lists.ccs.neu.edu/bin/admindb/$listname");
231           $m->set_visible( $password );
232           $m->click;
233
234           # exit if nothing to do
235           print "There are no pending requests.\n" and exit
236               if $m->content =~ /There are no pending requests/;
237
238           # select the first form and examine its contents
239           $m->form_number(1);
240           my $f = $m->current_form or die "Couldn't get first form!\n";
241
242           # get me the base form element for each email item
243           my @items = map {m/^.+?-(.+)/} grep {m/senderbanp/} $f->param
244               or die "Couldn't get items in first form!\n";
245
246           # iterate through items, prompt user, commit actions
247           foreach my $item (@items) {
248
249               # show item info
250               my $sender = unescape($item);
251               my ($subject) = [$f->find_input("senderbanp-$item")->value_names]->[1]
252                   =~ /Subject:\s+(.+?)\s+Size:/g;
253
254               # prompt user
255               my $choice = '';
256               while ( $choice !~ /^[DAX]$/ ) {
257                   print "$sender\: '$subject'\n";
258                   $choice = uc $term->readline("Action: defer/accept/discard [dax]: ");
259                   print "\n\n";
260               }
261
262               # set button
263               $m->field("senderaction-$item" => {D=>0,A=>1,X=>3}->{$choice});
264           }
265
266           # submit actions
267           $m->click;
268
269   ccdl, by Andy Lester
270       Steve McConnell, author of the landmark Code Complete has put up the
271       chapters for the 2nd edition in PDF format on his website.  I needed to
272       download them to take to Kinko's to have printed.  This little program
273       did it for me.
274
275           #!/usr/bin/perl -w
276
277           use strict;
278           use WWW::Mechanize;
279
280           my $start = "http://www.stevemcconnell.com/cc2/cc.htm";
281
282           my $mech = WWW::Mechanize->new( autocheck => 1 );
283           $mech->get( $start );
284
285           my @links = $mech->find_all_links( url_regex => qr/\d+.+\.pdf$/ );
286
287           for my $link ( @links ) {
288               my $url = $link->url_abs;
289               my $filename = $url;
290               $filename =~ s[^.+/][];
291
292               print "Fetching $url";
293               $mech->get( $url, ':content_file' => $filename );
294
295               print "   ", -s $filename, " bytes\n";
296           }
297
298   quotes.pl, by Andy Lester
299       This was a program that was going to get a hack in Spidering Hacks, but
300       got cut at the last minute, probably because it's against IMDB's TOS to
301       scrape from it.  I present it here as an example, not a suggestion that
302       you break their TOS.
303
304       Last I checked, it didn't work because their HTML didn't match, but
305       it's still good as sample code.
306
307           #!/usr/bin/perl -w
308
309           use strict;
310
311           use WWW::Mechanize;
312           use Getopt::Long;
313           use Text::Wrap;
314
315           my $match = undef;
316           my $random = undef;
317           GetOptions(
318               "match=s" => \$match,
319               "random" => \$random,
320           ) or exit 1;
321
322           my $movie = shift @ARGV or die "Must specify a movie\n";
323
324           my $quotes_page = get_quotes_page( $movie );
325           my @quotes = extract_quotes( $quotes_page );
326
327           if ( $match ) {
328               $match = quotemeta($match);
329               @quotes = grep /$match/i, @quotes;
330           }
331
332           if ( $random ) {
333               print $quotes[rand @quotes];
334           }
335           else {
336               print join( "\n", @quotes );
337           }
338
339
340           sub get_quotes_page {
341               my $movie = shift;
342
343               my $mech = WWW::Mechanize->new;
344               $mech->get( "http://www.imdb.com/search" );
345               $mech->success or die "Can't get the search page";
346
347               $mech->submit_form(
348                   form_number => 2,
349                   fields => {
350                       title   => $movie,
351                       restrict    => "Movies only",
352                   },
353               );
354
355               my @links = $mech->find_all_links( url_regex => qr[^/Title] )
356                   or die "No matches for \"$movie\" were found.\n";
357
358               # Use the first link
359               my ( $url, $title ) = @{$links[0]};
360
361               warn "Checking $title...\n";
362
363               $mech->get( $url );
364               my $link = $mech->find_link( text_regex => qr/Memorable Quotes/i )
365                   or die qq{"$title" has no quotes in IMDB!\n};
366
367               warn "Fetching quotes...\n\n";
368               $mech->get( $link->[0] );
369
370               return $mech->content;
371           }
372
373
374           sub extract_quotes {
375               my $page = shift;
376
377               # Nibble away at the unwanted HTML at the beginnning...
378               $page =~ s/.+Memorable Quotes//si;
379               $page =~ s/.+?(<a name)/$1/si;
380
381               # ... and the end of the page
382               $page =~ s/Browse titles in the movie quotes.+$//si;
383               $page =~ s/<p.+$//g;
384
385               # Quotes separated by an <HR> tag
386               my @quotes = split( /<hr.+?>/, $page );
387
388               for my $quote ( @quotes ) {
389                   my @lines = split( /<br>/, $quote );
390                   for ( @lines ) {
391                       s/<[^>]+>//g;   # Strip HTML tags
392                       s/\s+/ /g;          # Squash whitespace
393                       s/^ //;     # Strip leading space
394                       s/ $//;     # Strip trailing space
395                       s/&#34;/"/g;    # Replace HTML entity quotes
396
397                       # Word-wrap to fit in 72 columns
398                       $Text::Wrap::columns = 72;
399                       $_ = wrap( '', '    ', $_ );
400                   }
401                   $quote = join( "\n", @lines );
402               }
403
404               return @quotes;
405           }
406
407   cpansearch.pl, by Ed Silva
408       A quick little utility to search the CPAN and fire up a browser with a
409       results page.
410
411           #!/usr/bin/perl
412
413           # turn on perl's safety features
414           use strict;
415           use warnings;
416
417           # work out the name of the module we're looking for
418           my $module_name = $ARGV[0]
419             or die "Must specify module name on command line";
420
421           # create a new browser
422           use WWW::Mechanize;
423           my $browser = WWW::Mechanize->new();
424
425           # tell it to get the main page
426           $browser->get("http://search.cpan.org/");
427
428           # okay, fill in the box with the name of the
429           # module we want to look up
430           $browser->form_number(1);
431           $browser->field("query", $module_name);
432           $browser->click();
433
434           # click on the link that matches the module name
435           $browser->follow_link( text_regex => $module_name );
436
437           my $url = $browser->uri;
438
439           # launch a browser...
440           system('galeon', $url);
441
442           exit(0);
443
444   lj_friends.cgi, by Matt Cashner
445           #!/usr/bin/perl
446
447           # Provides an rss feed of a paid user's LiveJournal friends list
448           # Full entries, protected entries, etc.
449           # Add to your favorite rss reader as
450           # http://your.site.com/cgi-bin/lj_friends.cgi?user=USER&password=PASSWORD
451
452           use warnings;
453           use strict;
454
455           use WWW::Mechanize;
456           use CGI;
457
458           my $cgi = CGI->new();
459           my $form = $cgi->Vars;
460
461           my $agent = WWW::Mechanize->new();
462
463           $agent->get('http://www.livejournal.com/login.bml');
464           $agent->form_number('3');
465           $agent->field('user',$form->{user});
466           $agent->field('password',$form->{password});
467           $agent->submit();
468           $agent->get('http://www.livejournal.com/customview.cgi?user='.$form->{user}.'&styleid=225596&checkcookies=1');
469           print "Content-type: text/plain\n\n";
470           print $agent->content();
471
472   Hacking Movable Type, by Dan Rinzel
473           use strict;
474           use WWW::Mechanize;
475
476           # a tool to automatically post entries to a moveable type weblog, and set arbitary creation dates
477
478           my $mech = WWW::Mechanize->new();
479           my $entry;
480           $entry->{title} = "Test AutoEntry Title";
481           $entry->{btext} = "Test AutoEntry Body";
482           $entry->{date} = '2002-04-15 14:18:00';
483           my $start = qq|http://my.blog.site/mt.cgi|;
484
485           $mech->get($start);
486           $mech->field('username','und3f1n3d');
487           $mech->field('password','obscur3d');
488           $mech->submit(); # to get login cookie
489           $mech->get(qq|$start?__mode=view&_type=entry&blog_id=1|);
490           $mech->form_name('entry_form');
491           $mech->field('title',$entry->{title});
492           $mech->field('category_id',1); # adjust as needed
493           $mech->field('text',$entry->{btext});
494           $mech->field('status',2); # publish, or 1 = draft
495           $results = $mech->submit();
496
497           # if we're ok with this entry being datestamped "NOW" (no {date} in %entry)
498           # we're done. Otherwise, time to be tricksy
499           # MT returns a 302 redirect from this form. the redirect itself contains a <body onload=""> handler
500           # which takes the user to an editable version of the form where the create date can be edited
501           # MT date format of YYYY-MM-DD HH:MI:SS is the only one that won't error out
502
503           if ($entry->{date} && $entry->{date} =~ /^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}/) {
504               # travel the redirect
505               $results = $mech->get($results->{_headers}->{location});
506               $results->{_content} =~ /<body onLoad="([^\"]+)"/is;
507               my $js = $1;
508               $js =~ /\'([^']+)\'/;
509               $results = $mech->get($start.$1);
510               $mech->form_name('entry_form');
511               $mech->field('created_on_manual',$entry->{date});
512               $mech->submit();
513           }
514
515   get-despair, by Randal Schwartz
516       Randal submitted this bot that walks the despair.com site sucking down
517       all the pictures.
518
519           use strict;
520           $|++;
521
522           use WWW::Mechanize;
523           use File::Basename;
524
525           my $m = WWW::Mechanize->new;
526
527           $m->get("http://www.despair.com/indem.html");
528
529           my @top_links = @{$m->links};
530
531           for my $top_link_num (0..$#top_links) {
532               next unless $top_links[$top_link_num][0] =~ /^http:/;
533
534               $m->follow_link( n=>$top_link_num ) or die "can't follow $top_link_num";
535
536               print $m->uri, "\n";
537               for my $image (grep m{^http://store4}, map $_->[0], @{$m->links}) {
538                   my $local = basename $image;
539                   print " $image...", $m->mirror($image, $local)->message, "\n"
540               }
541
542               $m->back or die "can't go back";
543           }
544
545
546
547perl v5.12.0                      2010-04-11       WWW::Mechanize::Examples(3)
Impressum