User:Dantheox/pmlinks.pl
Appearance
#!/usr/local/bin/perl -w use strict; use LWP; if (@ARGV < 2) { print "Usage:\n"; print "$0 <wiki source file> <url>\n"; exit(1); } my $ua = LWP::UserAgent->new( agent => '' ); # First get the wiki source my $srcfile = shift; # Then get the rendered page to establish whether a link is blue or red my $url = shift; my $resp = $ua->get($url) or die "Can't get page '$url'"; my $html = $resp->content; # Extract the relevant links, taking title and text my %links = (); while ($html =~ m!<a href="[^"]+" (class="new" )?title="([^"]+)">(.*?)</a>!gsi) { #" unless ($1) { $links{$2}= $3 } } # Now check the source against the text # Only pull out the first link on a line beginning with ":" # Any subsequent lines which appear "under" this line are included with it # This means everything up to either a blank line or a line beginning with a lone '#' my $good = ""; my $bad = ""; my $lst = 0; open IN, "$srcfile" or die "Can't open $srcfile"; while (<IN>) { if (/^#[^:#*;]/) { my ($lt) = /\[\[(.*?)\]\]/; unless ($lt) { $lst = 0; next } my ($link, $text); if ($lt =~ /\|/) { ($link, $text) = $lt =~ /([^|]+)\|(.+)/ } else { $link = $text = $lt } $lst = $links{$link} ? \$good : \$bad; $$lst.=$_; } elsif (/^\s*$/) { $lst = 0; } elsif ($lst) { $$lst.=$_; } } close IN; print "Red links:\n"; print "$bad\n"; print "Blue links:\n"; print "$good\n";