Wikipedia:WikiProject Missing encyclopedic articles/Mw links
Appearance
#!/usr/bin/env perl # Author: Ævar Arnfjörð Bjarmason <avarab@gmail.com> # Copyright: 2006, Ævar Arnfjörð Bjarmason # License: The DWTFYWWI LICENSE, see http://tools.wikimedia.de/~avar/COPYING use strict; use warnings; use LWP::UserAgent; use HTML::TreeBuilder; my $arg = shift; &help unless defined $arg; my $ua = LWP::UserAgent->new( agent => '' ); my $res = $ua->get($arg); my %links = getlinks($res->content); print "==Nonexisting==\n"; for my $i (@{$links{red}}) { print "# [[$i]]\n" } print "==Existing==\n"; for my $i (@{$links{blue}}) { print "# [[$i]]\n" } sub getlinks { my $tree = HTML::TreeBuilder->new_from_content(shift); my %links = ( blue => [], red => [] ); my $yummie = $tree->look_down( '_tag' => 'div', id => 'bodyContent' ); my @blue = $yummie->look_down( '_tag' => 'a', sub { no warnings; $_[0]->attr('class') ne 'new' }, sub { no warnings; $_[0]->attr('class') !~ / (?: external | extiw ) /x } ); my @red = $yummie->look_down( '_tag' => 'a', class => 'new' ); @{$links{blue}} = booya(\@blue); @{$links{red}} = booya(\@red); %links; } sub booya { my $links = shift; my @ret; for my $i (@$links) { my $j = $i->attr('title'); next unless defined $j; chomp $j; push @ret, $j unless $j eq '' } @ret } sub help { print <<HELP; usage: $0 url HELP exit 1 }