User:AnomieBOT/source/tasks/AccidentalLangLinkFixer.pm
Appearance
Approved 2011-01-02. Wikipedia:Bots/Requests for approval/AnomieBOT 43 |
package tasks::AccidentalLangLinkFixer;
=pod
=begin metadata
Bot: AnomieBOT
Task: AccidentalLangLinkFixer
BRFA: Wikipedia:Bots/Requests for approval/AnomieBOT 43
Status: Approved 2011-01-02
Created: 2010-09-11
Periodically checks pages in [[:Category:Pages automatically checked for
incorrect links]] for categories and language links seemingly in
running text, and adds the necessary ":" to make them wikilinks instead.
=end metadata
=cut
use utf8;
use strict;
use Data::Dumper;
use POSIX;
use Date::Parse;
use AnomieBOT::Task qw/:time/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;
my @categories = (
'Category:Pages automatically checked for accidental language links',
'Category:Pages automatically checked for incorrect links',
);
my $frequency=600; # minutes
sub new {
my $class=shift;
my $self=$class->SUPER::new();
$self->{'iter'}=undef;
$self->{'next'}=0;
bless $self, $class;
return $self;
}
=pod
=for info
Approved 2011-01-02.<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 43]]
=cut
sub approved {
return 3;
}
sub run {
my ($self, $api)=@_;
my $res;
$api->task('AccidentalLangLinkFixer', 0, 10, qw/d::IWNS d::Nowiki/);
my $help='User:'.$api->user.'/docs/AccidentalLangLinkFixer';
# Spend a max of 5 minutes on this task before restarting
my $endtime=time()+300;
# Get regular expressions
return 60 unless $api->load_IWNS_maps();
my $llre=$api->interlanguage_re();
my $clre=$api->namespace_re(14);
if(!defined($self->{'iter'})){
$self->{'iter'}=$api->iterator(
generator => 'categorymembers',
gcmtitle => [ @categories ],
gcmlimit => 100,
prop => 'langlinks|categories',
lllimit => 'max',
cllimit => 'max',
);
$self->{'next'}=time()+$frequency;
}
while(my $pg=$self->{'iter'}->next){
my $category = $self->{'iter'}->iterval;
if(!$pg->{'_ok_'}){
$api->warn("Failed to retrieve page list for $category: ".$pg->{'error'}."\n");
return 60;
}
return 0 if $api->halting;
my $page=$pg->{'title'};
# Get list of langlinks and categories in a standardized format
my $ll=join '|', sort map $_->{'lang'}.':'.$_->{'*'}, @{$pg->{'langlinks'}};
my $cl=join '|', sort map $_->{'title'}, @{$pg->{'categories'}};
# If they haven't changed, we need do nothing more here
next if(($api->store->{"$page#ll"} // '') eq $ll && ($api->store->{"$page#cl"} // '') eq $cl);
# Ugh, we need to check the page.
my $tok=$api->edittoken($page, EditRedir => 1);
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for $page: ".$tok->{'error'}."\n");
next;
}
if(exists($tok->{'missing'})){
$api->warn("WTF? $page does not exist?\n");
next;
}
# Fix any bad links
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
my ($outtxt,$nowiki)=$api->strip_regex(qr/^(?:\s|<!--.*?-->|<\/?(?:noinclude|includeonly|onlyinclude)>)*\[\[\s*(?:$llre|$clre)\s*:[^]]*\]\](?:\s|<!--.*?-->|<\/?(?:noinclude|includeonly|onlyinclude)>)*$/m, $intxt);
($outtxt,$nowiki)=$api->strip_regex(qr/\[\[\s*(?:$clre)\s*:\s*(?:\|[^]]*)?\]\]/, $outtxt, $nowiki);
($outtxt,$nowiki)=$api->strip_nowiki($outtxt,$nowiki);
my @summary=();
push @summary, 'category' if $outtxt=~s/(\[\[\s*)($clre\s*:)/$1:$2/g;
push @summary, 'language' if $outtxt=~s/(\[\[\s*)($llre\s*:)/$1:$2/g;
$outtxt=$api->replace_stripped($outtxt,$nowiki);
if(@summary){
my $summary="Fixing accidental ".join(' and ', @summary)." links";
$api->log("$summary in $page");
my $r=$api->edit($tok, $outtxt, "[[$help|$summary]]", 1, 1);
if($r->{'code'} ne 'success'){
$api->warn("Write failed on $page: ".$r->{'error'}."\n");
next;
}
} else {
$api->log("Updating saved category and language links for $page");
$api->store->{"$page#ll"}=$ll;
$api->store->{"$page#cl"}=$cl;
}
# If we've been at it long enough, let another task have a go.
return 0 if time()>=$endtime;
}
$self->{'iter'}=undef;
return $self->{'next'}-time();
}
1;