User:AnomieBOT/source/tasks/ACNClerk.pm
Appearance
Approved 2011-01-24 Wikipedia:Bots/Requests for approval/AnomieBOT 51 |
package tasks::ACNClerk;
=pod
=begin metadata
Bot: AnomieBOT
Task: ACNClerk
BRFA: Wikipedia:Bots/Requests for approval/AnomieBOT 51
Status: Approved 2011-01-24
Created: 2011-01-13
Update crosslinks on [[WP:ACN]] and [[WT:ACN]] when content is archived.
=end metadata
=cut
use utf8;
use strict;
# Apparently this is experimental in the version of Perl on toolforge
no warnings qw( experimental::lexical_subs );
use feature qw( lexical_subs );
use AnomieBOT::Task;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;
my $version=1;
use Data::Dumper;
sub new {
my $class=shift;
my $self=$class->SUPER::new;
bless $self, $class;
return $self;
}
=pod
=for info
Approved 2011-01-24<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 51]]
=cut
sub approved {
return 2;
}
sub run {
my ($self, $api)=@_;
my $res;
$api->task('ACNClerk', 0, 10, qw(d::Timestamp d::Redirects d::Talk d::Templates));
if(($api->store->{'version'}//0) != $version){
foreach my $k (keys %{$api->store}) {
delete $api->store->{$k} if $k=~/^revid /;
}
$api->store->{'version'}=$version;
}
my $starttime=time();
my %slink = $api->redirects_to_resolved( 'Template:Slink' );
if ( exists( $slink{''} ) ) {
$api->warn( "Failed to get redirects to Template:Slink: " . $slink{''}{'error'} . "\n" );
return 60;
}
my %acn = $api->redirects_to_resolved( 'Wikipedia:Arbitration Committee/Noticeboard', 'Wikipedia talk:Arbitration Committee/Noticeboard' );
if ( exists( $acn{''} ) ) {
$api->warn( "Failed to get redirects to WP:ACN: " . $acn{''}{'error'} . "\n" );
return 60;
}
# Figure out which pages need re-scanning
my %scan=();
my @WPpages=();
my @WTpages=();
my $iter=$api->iterator(
generator => 'allpages',
gapnamespace => [4,5],
gapprefix => 'Arbitration Committee/Noticeboard',
gaplimit => 'max',
prop => 'info',
);
while(my $p=$iter->next){
if(!$p->{'_ok_'}){
$api->warn("Could not retrieve page from iterator: ".$p->{'error'}."\n");
return 60;
}
my $title=$p->{'title'};
my $t=$title; $t=~s/^(?:Wikipedia|Wikipedia talk)://;
next unless $title=~/^Wikipedia(?: talk)?:Arbitration Committee\/Noticeboard(?:\/Archive (\d+))?$/;
push @WPpages, $title if $iter->iterval==4;
push @WTpages, $title if $iter->iterval==5;
$scan{$title}=$p->{'lastrevid'} unless exists($api->store->{"toc $title"}) && $p->{'lastrevid'}==($api->store->{"revid $title"}//0);
}
return 3600 unless %scan;
# Load the headers for the needed pages
foreach my $title (keys %scan) {
return 0 if $api->halting;
$api->log("Scanning section headings in $title");
$res=$api->query(
action => 'parse',
title => $title,
text => "__TOC__\n{{:$title}}",
prop => 'sections',
);
if($res->{'code'} ne 'success'){
$api->warn("Failed to retrieve section list for $title: ".$res->{'error'});
return 60;
}
my @s=();
foreach my $s (@{$res->{'parse'}{'sections'}}) {
push @s, { line => $s->{'line'}, anchor => $s->{'anchor'} };
if($s->{'anchor'}=~/_(\d+)$/){
my $n=$1;
unless($s->{'line'}=~/[ _]$n$/){
$s->{'anchor'}=~s/_\d+$//;
push @s, { line => $s->{'line'}, anchor => $s->{'anchor'} } unless $a;
}
}
}
$api->store->{"toc $title"}=\@s;
$api->store->{"revid $title"}=$scan{$title};
}
# Construct the mappings
my %WPmap=();
foreach my $title (@WPpages) {
foreach my $s (@{$api->store->{"toc $title"}}) {
my $tt=$title;
if(exists($WPmap{$s->{'anchor'}})){
my $t=$WPmap{$s->{'anchor'}};
my $n1=($title=~/\/Archive (\d+)$/)?$1:1e100;
my $n2=($t=~/\/Archive (\d+)$/)?$1:1e100;
$tt=($n1>=$n2)?$title:$t;
#$api->warn("Duplicate section heading $s->{anchor} in $title and $t, using $tt");
}
$WPmap{$s->{'anchor'}}=$tt;
}
}
my %WTmap=();
foreach my $title (@WTpages) {
foreach my $s (@{$api->store->{"toc $title"}}) {
my $tt=$title;
if(exists($WTmap{$s->{'anchor'}})){
my $t=$WTmap{$s->{'anchor'}};
my $n1=($title=~/\/Archive (\d+)$/)?$1:1e100;
my $n2=($t=~/\/Archive (\d+)$/)?$1:1e100;
$tt=($n1>=$n2)?$title:$t;
#$api->warn("Duplicate section heading $s->{anchor} in $title and $t, using $tt");
}
$WTmap{$s->{'anchor'}}=$tt;
}
}
# Load the list of pages to scan for links
my %scanlinks=();
$iter=$api->iterator(
generator => 'backlinks',
gblnamespace => '4|5',
gbltitle => [keys %scan],
gbllimit => 'max',
gblredirect => 1,
);
while(my $p=$iter->next){
if(!$p->{'_ok_'}){
$api->warn("Could not retrieve backlinks from iterator: ".$p->{'error'}."\n");
return 60;
}
my $title=$p->{'title'};
next unless $title=~/^Wikipedia(?: talk)?:Arbitration Committee\/Noticeboard(?:\/Archive (\d+))?$/;
$scanlinks{$title}=1;
}
# Find all links to relevant pages, with their anchors.
foreach my $title (keys %scanlinks) {
return 0 if $api->halting;
$api->log("Scanning links in $title");
my $tok=$api->edittoken($title);
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for $title: ".$tok->{'error'});
return 60;
}
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
# Find all relevant anchors
my %anchors = ();
while ( $intxt=~/\[\[([^#|\]]++)#([^|\]]++)(?=\||\]\])/g ) {
my ($p,$a) = ($1,$2);
next unless exists($acn{fixpage($p)});
$anchors{$a} = undef;
}
my $fail = 0;
$api->process_templates( $intxt, sub {
return undef if $fail;
my $name = shift;
my $params = shift;
return undef unless exists($slink{$name}) || exists($slink{"Template:$name"});
my ($p,$a);
($p,$a,$fail) = procparams( $api, $params );
return undef if $fail;
return undef unless exists($acn{fixpage($p)});
$anchors{$a} = undef;
return undef;
} );
if ( $fail ) {
$api->warn( "{{slink}} with multiple sections is not supported in $title\n" );
next;
}
if ( !%anchors ) {
$api->log("No links to check in $title");
next;
}
# Map all the anchors to encoded versions for $WPmap/$WTmap
my @anchors = sort keys %anchors;
my $txt = '';
for ( my $i = 0; $i < @anchors; $i++ ) {
$txt .= "$i:{{anchorencode:$anchors[$i]}}\n";
}
$res=$api->query(action=>'expandtemplates',title=>$title,text=>$txt,prop=>'wikitext');
if($res->{'code'} ne 'success'){
$api->warn("Failed to retrieve anchor mapping for $title: ".$res->{'error'});
return 60;
}
foreach my $l (split /\n/, $res->{'expandtemplates'}{'wikitext'}) {
unless($l=~/^(\d+):(.+)$/){
$api->warn("Invalid response checking anchor mapping in $title");
return 60;
}
$anchors{$anchors[$1]} = $2;
}
# Replace all relevant links. Flag them with ENQ (U+0005)
my $outtxt = $intxt;
my sub repl {
my ( $z, $p, $a ) = @_;
$p = fixpage($p);
return $z unless exists($acn{$p});
my $new = !exists($anchors{$a}) ? '' : ($p=~/^Wikipedia:/i) ? ($WPmap{$anchors{$a}}//'') : ($WTmap{$anchors{$a}}//'');
if ( $new eq '' ) {
$api->warn("No mapping for \"$a\" in $title");
return $z;
}
return $z unless $new =~ m{/Archive \d+$};
return "\x05[[$new#$a";
};
$outtxt =~ s/\[\[([^#|\]]++)#([^|\]]++)(?=\||\]\])/repl($&,$1,$2)/ge;
$outtxt = $api->process_templates( $outtxt, sub {
my $name = shift;
my $params = shift;
shift;
shift;
my $oname = shift;
return undef unless exists($slink{$name}) || exists($slink{"Template:$name"});
my ($p,$a,$fail,@p) = procparams( $api, $params );
$p = fixpage($p);
return undef unless exists($acn{$p});
my $new = !exists($anchors{$a}) ? '' : ($p=~/^Wikipedia:/i) ? ($WPmap{$anchors{$a}}//'') : ($WTmap{$anchors{$a}}//'');
if ( $new eq '' ) {
$api->warn("No mapping for \"$a\" in $title");
return undef;
}
return undef unless $new =~ m{/Archive \d+$};
unshift @p, $a=~/=/ ? "2=$a" : $a;
unshift @p, $new;
return "\x05{{$oname|" . join( '|', @p ) . '}}';
} );
# Adjust some text, using the ENQ placeholders to flag it. Then remove the ENQs.
$outtxt=~s/Discuss this at: (?=.*\x05)/Archived discussion at: /g;
$outtxt=~s/(\x05.*)(?:Discussion|Discuss|Discuss (?:this|announcement|report))/${1}Archived discussion/g;
$outtxt=~s/\x05//g;
if($intxt ne $outtxt){
$api->log("Adjusting links to archived content in $title");
$res=$api->edit($tok, $outtxt, "Adjusting links to archived content", 1, 1);
if($res->{'code'} ne 'success'){
$api->warn("Failed to edit $title: ".$res->{'error'}."\n");
}
}
}
return 3600;
}
sub fixpage {
my $p = shift;
$p=~s/[ _]+/ /g;
$p=~s/^(?:WP|Project):/Wikipedia:/i;
$p=~s/^(?:WT|Project talk):/Wikipedia talk:/i;
return $p;
}
sub procparams {
my ($api, $params) = @_;
my ($p,$a);
my @p = ();
my $fail = 0;
foreach ( $api->process_paramlist( @$params ) ) {
if ( $_->{'name'} eq '1' ) {
$p = $_->{'value'};
} elsif ( $_->{'name'} eq '2' ) {
$a = $_->{'value'};
} else {
$fail = 1 if $_->{'name'} =~ /^\d+$/;
push @p, $_->{'text'};
}
}
if ( $p =~ s/#(.*)$// ) {
$fail = 1 if defined( $a );
$a = $1;
}
return ($p,$a,$fail,@p);
}
1;