User:AnomieBOT/source/tasks/TFDClerk.pm
Appearance
Approved 2011-11-08 Wikipedia:Bots/Requests for approval/AnomieBOT 59 |
package tasks::TFDClerk;
=pod
=begin metadata
Bot: AnomieBOT
Task: TFDClerk
BRFA: Wikipedia:Bots/Requests for approval/AnomieBOT 59
Status: Approved 2011-11-08
Created: 2011-10-30
Peform the following tasks at [[WP:TFD]]:
* Create the daily TFD subpage.
* Fix the headers on the daily TFD subpages, if they get removed or damaged.
* Maintain the page [[Wikipedia:Templates for discussion/Old unclosed discussions]].
* Subst {{tl|tfd top}} and {{tl|tfd bottom}}, when editing the page anyway.
=end metadata
=cut
use utf8;
use strict;
use AnomieBOT::Task qw/:time/;
use Data::Dumper;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;
my @months=('','January','February','March','April','May','June','July','August','September','October','November','December');
my $is_closed_re=qr((?:\{\{\s*tfd[_ ]?top\s*[|}]|<div class="[^"]*(?<=[" ])[tx]fd-closed[ "]));
my %db=(
nonsense => 'G1',
test => 'G2',
vandalism => 'G3',
pagemove => 'G3',
hoax => 'G3',
repost => 'G4',
banned => 'G5',
histmerge => 'G6',
move => 'G6',
copypaste => 'G6',
xfd => 'G6',
maintenance => 'G6',
house => 'G6',
disambig => 'G6',
movedab => 'G6',
unpatrolled => 'G6',
author => 'G7',
self => 'G7',
blanked => 'G7',
talk => 'G8',
subpage => 'G8',
imagepage => 'G8',
redirnone => 'G8',
templatecat => 'G8',
attack => 'G10',
blp => 'G10',
attackorg => 'G10',
spam => 'G11',
promo => 'G11',
copyvio => 'G12',
policy => 'T2',
misrepresentation => 'T2',
misrepresent => 'T2',
fake => 'T2',
't3-s' => 'T3',
duplicatetemplate => 'T3',
duplicate => 'T3',
);
sub new {
my $class=shift;
my $self=$class->SUPER::new();
$self->{'lasttime'}=0;
$self->{'broken'}=0;
bless $self, $class;
return $self;
}
=pod
=for info
Approved 2011-11-08<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 59]]
=cut
sub approved {
return 2;
}
sub run {
my ($self, $api)=@_;
my $res;
$api->task('TFDClerk', 0, 10, qw/d::Talk d::Templates d::Redirects d::Sections/);
my %p=$api->redirects_to_resolved('Template:Delrevxfd');
if(exists($p{''})){
if($p{''}{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$p{''}{'content'}."\n");
return 300;
}
$api->warn("Failed to get notice template redirects: ".$p{''}{'error'}."\n");
return 60;
}
my @p=map { s/^Template://; my ($a,$b)=split(//,$_,2); "(?i:\Q$a\E)\Q$b\E"; } keys %p;
my $p=join('|',@p);
my $noticere=qr/(?:(?i:<noinclude>\s*)?\{\{\s*(?i:Template\s*:\s*)?(?:$p)\s*(?:\|.*?)?\}\}\s*(?i:<\/noinclude>\s*)?)/;
my %tosubst=$api->redirects_to_resolved('Template:Tfd top', 'Template:Tfd bottom');
if(exists($tosubst{''})){
if($tosubst{''}{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tosubst{''}{'content'}."\n");
return 300;
}
$api->warn("Failed to get top/bottom template redirects: ".$tosubst{''}{'error'}."\n");
return 60;
}
my %tfdlinks=$api->redirects_to_resolved('Template:Tfd links');
if(exists($tfdlinks{''})){
if($tfdlinks{''}{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tfdlinks{''}{'content'}."\n");
return 300;
}
$api->warn("Failed to get {{tfd links}} redirects: ".$tfdlinks{''}{'error'}."\n");
return 60;
}
# Only check once per hour
if($self->{'lasttime'}==0){
if(exists($api->store->{'lasttime'})){
my $t=$api->store->{'lasttime'};
$self->{'lasttime'}=$t if($t=~/^\d+$/ && $t<=time());
}
$self->{'broken'}=$api->store->{'broken'} if(exists($api->store->{'broken'}));
}
my $starttime=time();
my $t=$self->{'lasttime'}+($self->{'broken'}?300:3600)-$starttime;
return $t if $t>0;
# If it's close enough to 23:00, just wait for 23:00.
$t=82800-($starttime%86400);
return $t if($t>0 && $t<($self->{'broken'}?300:3600));
# If it's close enough to 00:00, just wait for 00:00.
$t=86400-($starttime%86400);
return $t if($t>0 && $t<($self->{'broken'}?300:1800));
my $startdate=[1,3,2018];
$startdate=$api->store->{'startdate'} if exists($api->store->{'startdate'});
my $screwup=' Errors? [[User:'.$api->user.'/shutoff/TFDClerk]]';
# Get the content of all versions of "tfd top" since the startdate
my $re='\{\{\s*[tT]fd[ _]?top\s*(?s:\|.*?)?\}\}';
my %cont=();
my $first=1;
while($first || %cont) {
my $t=$api->query(
titles => 'Template:tfd top',
prop => 'revisions',
rvprop => 'timestamp|content',
rvslots => 'main',
rvlimit => 1,
%cont,
);
if($t->{'code'} ne 'success'){
$api->warn("Failed to load revisions for Template:tfd top: ".$t->{'error'}."\n");
return 60;
}
%cont=exists($t->{'query-continue'})?%{$t->{'query-continue'}{'revisions'}}:();
$t=(values(%{$t->{'query'}{'pages'}}))[0]{'revisions'}[0];
%cont=() if $t->{'timestamp'} lt sprintf("%04d-%02d-%02d", reverse @$startdate);
$t=$t->{'slots'}{'main'}{'*'};
$t=~s!<noinclude>.*</noinclude>!!gs;
$t=~s!</?includeonly>!!g;
$t=~s!\{\{\{1\|'''delete'''\.\}\}\}\{\{(?:safe)?subst:#ifeq:\{\{\{2\|\}\}\}\|y\| ~~~~\}\}!\x07!g;
unless($t =~ m/^\s*$is_closed_re/o){
next unless $first;
$api->whine("[[Template:Tfd top]] is broken", "Help! The template {{tl|tfd top}} is missing the \"is_closed\" regex, or this regex is not at the beginning of the template's output. To avoid confusion, I'm not going to process any TFDs until it's fixed or I'm fixed.");
return 300;
}
if($t =~ m/\x07\s*$/){
next unless $first;
$api->whine("[[Template:Tfd top]] is broken", "Help! The template {{tl|tfd top}} does not end with some constant text, i.e. <nowiki>{{{1|}}}</nowiki> is at the very end of the template. To avoid confusion, I'm not going to process any TFDs until it's fixed or I'm fixed.");
return 300;
}
if($t =~ m/\{\{\{/){
next unless $first;
$api->whine("[[Template:Tfd top]] is broken", "Help! The template {{tl|tfd top}} contains unknown parameters. To avoid confusion, I'm not going to process any TFDs until it's fixed or I'm fixed.");
return 300;
}
$t=quotemeta($t);
$t=~s/\\\x07/(?s:.*?)/g;
$re.="|$t";
$first = 0;
}
# Iterate over all our pages
my $broken=0;
my $new_start=_make_date();
my $sevendays=_date_add(_make_date(),-7,0,0);
my @old=();
my @oldsumm=();
my @oldlinks=();
MAINLOOP: for(my $date=_make_date(time+3600); _cmp_date($startdate,$date)<=0; $date=_date_add($date,-1,0,0)){
return 0 if $api->halting;
my $title='Wikipedia:Templates for discussion/Log/'.$date->[2].' '.$months[$date->[1]].' '.$date->[0];
$api->log("Checking TFDs in $title");
my $tok=$api->edittoken($title);
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
return 60;
}
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'} // '';
my $outtxt=$intxt;
# Fix header if necessary
my $fixedhead=0;
my ($pageheader,$err)=_makepagehead($api,$title,$date);
if ( ! defined( $pageheader ) ) {
$api->warn("Failed to get page header for $title: $err\n");
return 60;
}
if($outtxt!~/^\Q$pageheader\E/){
my $dt=$months[$date->[1]].' '.$date->[0];
my $dt2=$date->[2].' '.$months[$date->[1]].' '.$date->[0];
my $oldtxt;
do {
$oldtxt=$outtxt;
$outtxt=~s!^(?:|.*?\n)===\s*(?:\Q[[Wikipedia:Templates for discussion/Log/$dt2|$dt]]\E|\Q$dt\E)\s*===[^\n]*\n!!s;
$outtxt=~s/^\s*\[\[\s*(?i:Category)\s*:\s*Non-talk pages that are automatically signed(?:\|.*?)?\]\].*?\n//;
$outtxt=~s/^\s*<!--.*?-->\s*//s;
$outtxt=~s/^\s*//;
} while($oldtxt ne $outtxt);
$outtxt="$pageheader\n$outtxt";
}
$fixedhead=($outtxt ne $intxt);
# If the page has been edited in the last day, keep watching it in case
# the last closing gets reverted.
my $ts=ISO2timestamp($tok->{'revisions'}[0]{'timestamp'}) // time;
$new_start=[@$date] if(time()-$ts<86400);
# Fix any simple mispositioned headers: armor any good headers, then
# fix any mispositioned ones, then unarmor.
my ($marker, $i)=('', 0);
do {
$marker = "\x02--$i--\x03";
$i++;
} while($outtxt=~/$marker/);
$outtxt=~s/(?:^|(?<=\n))((====+)[^=](?:.*[^=])?\2\s*?\n\s*$noticere*$is_closed_re)/$1$marker/go;
my $fixed=($outtxt=~s/(?:^|(?<=\n))((?>$re).*\n)\s*((====+)[^=](?:.*[^=])?\3\s*?\n)/$2$1/go);
$outtxt=~s/$marker//go;
# Split into level-4+ sections, and check if each is closed
my @sections=$api->split_sections($outtxt, "456");
my $ct=0;
my %tmpl=();
my @secs=();
my @closed=();
my @pageoldlinks=();
for(my $i=0; $i<@sections; $i++){
my $s=$sections[$i];
if($s->{'body'}=~m/^\s*$noticere*$is_closed_re/o){
# Someone closed a section, so merge in all its subsections
my $j;
for($j=$i+1; $j<@sections && $sections[$j]->{'level'} > $s->{'level'}; $j++){}
if($j>$i+1){
$s->{'body'}=~s/\s*$/\n\n/;
$s->{'body'}.=$api->join_sections(splice(@sections, $i+1, $j-$i-1));
}
}
$_=$s->{'body'};
# Check manual closes
my $bad=/(?>^\s*$noticere*\S).*$is_closed_re/so;
next if !$bad && m/^\s*$noticere*$is_closed_re/o;
if($bad || /$is_closed_re/so){
$api->log("Crap, $title is b0rken");
$api->warn("Crap, $title is b0rken\n");
$api->whine("[[$title]] is broken", "Help! A section in [[$title]] contains the \"is_closed\" regex but not at the beginning of the section. Probably someone put the {{tl|tfd top}} before a section header instead of after. Anyway, I can't do anything to that page until someone fixes it.");
if(_cmp_date($date,$sevendays)<0){
push @old, "{{$title}}\n";
unshift @oldsumm, [@$date];
push @oldlinks, "* <b>[[$title]] is broken</b>\n";
}
$new_start=[@$date];
$broken=1;
next MAINLOOP;
}
# Check for bot-closability
my @templates=();
$api->process_templates($_, sub {
my $name=shift;
my $params=shift;
my $ns = 'Template';
my $tpl = undef;
return undef unless exists($tfdlinks{"Template:$name"});
for my $p ($api->process_paramlist(@$params)){
$tpl = $p->{'value'} if $p->{'name'} eq '1';
$ns = 'Module' if ( $p->{'name'} eq 'module' && $p->{'value'} ne '' );
$ns = 'Category' if ( $p->{'name'} eq 'catfd' && $p->{'value'} ne '' );
}
push @templates, "$ns:$tpl" if defined( $tpl );
return undef;
});
unless ( @templates ) {
if ( $s->{'level'} ) {
push @pageoldlinks, "* [[$title#{{anchorencode:" . $s->{'title'} . "}}]] – No {{tl|tfd links}}, a human must close it.\n";
$ct++;
}
next;
}
next unless $s->{'level'};
$ct++;
foreach my $t (@templates){
$tmpl{$t}=undef;
}
push @secs, [ $s, @templates ];
}
next if($ct==0 && @closed==0 && !$fixedhead && !$fixed);
# Check if the unclosed templates still exist.
my @titles=keys %tmpl;
while(@titles){
my @tmpl=splice(@titles, 0, 500);
$res=$api->query(
titles => join('|', @tmpl),
prop => 'info',
);
if($res->{'code'} ne 'success'){
$api->warn("Failed to retrieve template info for $title: ".$res->{'error'}."\n");
return 60;
}
my %map=();
%map=map { $_->{'to'}, $_->{'from'} } @{$res->{'query'}{'normalized'}} if exists($res->{'query'}{'normalized'});
PAGE: foreach (values %{$res->{'query'}{'pages'}}){
next PAGE unless exists($_->{'missing'});
my $tmpl=$api->apply_redirect_map( $_->{'title'}, \%map );
if(!exists($tmpl{$tmpl})){
my $d=Dumper($_); $d=~s/\s+/ /g;
$api->warn("How odd, this was apparently returned even though it wasn't requested: $d\n");
next PAGE;
}
my $msg=undef;
# First, check for a deletion.
my $r=$api->query(
letitle => $_->{'title'},
list => 'logevents',
letype => 'delete',
lelimit => 1,
leprop => 'user|timestamp|comment',
);
if($r->{'code'} ne 'success'){
$api->warn("Failed to retrieve logs for ".$_->{'title'}.": ".$r->{'error'}."\n");
return 60;
}
if(exists($r->{'query'}{'logevents'}[0])){
my $log=$r->{'query'}{'logevents'}[0];
# Skip for now if it was deleted within the past hour, to
# give the closing admin a chance to close it themself.
my $t=ISO2timestamp($log->{'timestamp'});
next PAGE if(time()-$t<3600);
# Check whether the deletion log entry could reasonably
# belong to this TFD: the deletion log entry should be
# dated on or after the TFD page date. Give them a day of
# leeway just in case.
$t=_date_add(_make_date($t),1,0,0);
if(_cmp_date($t,$date)>=0){
# Close it!
$msg="'''Delete'''; deleted";
if($log->{'comment'}=~/CSD(?:#|\]\] | )([GT]\d+)/i ||
$log->{'comment'}=~/db-([gt]\d+)/){
my $c=uc($1);
$msg.=" as [[WP:CSD#$c|$c]]";
} elsif($log->{'comment'}=~/db-([a-z])/ && exists($db{$1})){
my $c=$db{$1};
$msg.=" as [[WP:CSD#$c|$c]]";
}
$msg.=" by {{admin|".$log->{'user'}."}}";
}
}
# Check for a move-without-redirect
$r=$api->query(
letitle => $_->{'title'},
list => 'logevents',
letype => 'move',
lelimit => 1,
leprop => 'user|timestamp|comment|details',
);
if($r->{'code'} ne 'success'){
$api->warn("Failed to retrieve logs for ".$_->{'title'}.": ".$r->{'error'}."\n");
return 60;
}
if(exists($r->{'query'}{'logevents'}[0])){
my $log=$r->{'query'}{'logevents'}[0];
next PAGE unless exists( $log->{'params'}{'suppressredirect'} );
# Skip for now if it was deleted within the past hour, to
# give the closing admin a chance to close it themself.
my $t=ISO2timestamp($log->{'timestamp'});
next PAGE if(time()-$t<3600);
# Check whether the move log entry could reasonably
# belong to this TFD: the move log entry should be
# dated on or after the TFD page date. Give them a day of
# leeway just in case.
$t=_date_add(_make_date($t),1,0,0);
if(_cmp_date($t,$date)>=0){
# Close it!
$msg="'''Moved without redirect'''";
if(($log->{'params'}{'target_title'} // '') ne '') {
$msg .= ' to [[:' . $log->{'params'}{'target_title'} . ']]';
}
if($log->{'comment'} ne '') {
$msg .= ' with comment "<nowiki>' . $log->{'comment'} . '</nowiki>"';
}
$msg.=" by {{admin|".$log->{'user'}."}}";
}
}
if(!defined($msg)){
my $what = $_->{'title'} =~ /^Module:/ ? 'Module' : 'Template';
$msg="'''$what does not exist'''. If the title linked contains a typo, feel free to correct the typo and un-close this discussion.";
}
next if !defined($msg);
$tmpl{$tmpl}=$msg;
}
}
# Now check the discussions to determine if all are closed.
foreach my $sec (@secs){
my @tmpl=@$sec;
my $s = shift @tmpl;
my %msgs=();
my $ok=1;
foreach my $t (@tmpl){
$ok=0 unless defined($tmpl{$t});
last unless $ok;
push @{$msgs{$tmpl{$t}}}, "[[:$t]]";
}
unless ( $ok ) {
push @pageoldlinks, "* [[$title#{{anchorencode:" . $s->{'title'} . "}}]]\n";
next;
}
$sec->[0]{'body'}=~s/^\s+|\s+$//g;
my @msgs=keys %msgs;
my $msg="\x02!!!!\x03\n";
if(@msgs==1){
$msg="{{subst:tfd top|1=".$msgs[0]." ~~~~}}\n";
} else {
$msg="{{subst:tfd top|1='''Delete''' for multiple reasons:\n";
foreach my $m (@msgs){
$msg.="* ".join(", ", @{$msgs{$m}}).": $m\n";
}
$msg.="~~~~}}\n";
}
$sec->[0]{'body'}=$msg.$sec->[0]{'body'}."\n{{subst:tfd bottom}}\n";
$ct--;
push @closed, (@tmpl==1 ? "[[:".$tmpl[0]."]]" : '"'.$sec->[0]{'title'}.'"');
}
# Mark for entry on the list of old TFDs, if applicable
if($ct>0){
if(_cmp_date($date,$sevendays)<0){
push @old, "{{$title}}\n";
unshift @oldsumm, [@$date];
}
$new_start=[@$date];
}
if(_cmp_date($date,$sevendays)<0){
push @oldlinks, @pageoldlinks;
}
# Need to edit?
next unless(@closed || $fixed || $fixedhead);
# Processed, now reconstruct the page
$outtxt=$api->join_sections(@sections);
# Subst templates, if necessary
my $subst=0;
$outtxt=$api->process_templates($outtxt, sub {
my $name=shift;
shift; #$params
my $wikitext=shift;
return undef unless exists($tosubst{"Template:$name"});
$subst++;
$wikitext=~s/^\{\{\s*/\{\{subst:/;
return $wikitext;
});
# Create summary
my @summary=();
if($fixedhead){
if(exists($tok->{'missing'})){
push @summary, "new discussion page: ".$date->[2].' '.$months[$date->[1]].' '.$date->[0];
} else {
push @summary, "fix page header";
}
}
push @summary, "subst {{tfd top}} and/or {{tfd bottom}}" if $subst>0;
push @summary, 'move closing box'.(($fixed>1)?'es':'').' per [[WP:DPR#TFD]]' if $fixed;
my $toomany=@summary;
push @summary, 'close discussions for deleted/nonexistent templates: '.join(', ', @closed) if @closed;
my $summary='(BOT) '.ucfirst(join('; ', @summary)).".$screwup";
$api->log("$summary in $title");
if(length($summary)>500){
$summary[$toomany]='close discussions for deleted/nonexistent templates: [too many to list]' if @closed;
$summary='(BOT) '.ucfirst(join('; ', @summary)).$screwup;
}
my $r=$api->edit($tok, $outtxt, $summary, 0, 1);
if($r->{'code'} ne 'success'){
$api->warn("Write failed on $title: ".$r->{'error'}."\n");
return 60;
}
}
# Ok, we've processed all the subpages. Now update the page of links to old
# unclosed discussions.
my $title='Wikipedia:Templates for discussion/Old unclosed discussions';
$api->log("Updating discussions lists on $title");
my $tok=$api->edittoken($title);
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
return 60;
}
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'} // '';
$intxt=~s/Last updated .*?<!-- Bot edit date -->/Last updated ~~~~~<!-- Bot edit date -->/;
$intxt=~s/\s*$/\n/;
my $outtxt="This is a list of unclosed TfDs over 7 days old. It is automatically maintained by a bot, but humans are free to remove lines when closing discussions if they'd like. Last updated ~~~~~<!-- Bot edit date -->.\n\n";
if ( @oldlinks ) {
$outtxt .= join( '', @oldlinks );
} else {
$outtxt .= "* None at this time\n";
}
if($intxt ne $outtxt){
my $summary;
if(@oldsumm){
my $m=0;
my @oldsumm2=map {
my $ret;
if($_->[1]!=$m){
$m=$_->[1];
$ret=substr($months[$_->[1]],0,3).' '.$_->[0];
} else {
$ret=$_->[0];
}
$ret
} @oldsumm;
$oldsumm2[-1].='.';
$summary='(BOT) Updating discussions: '.join(', ', @oldsumm2).$screwup;
$api->log("$summary in $title");
$summary='(BOT) Updating discussions: major backlog!'.$screwup if length($summary)>500;
} else {
$summary='(BOT) Updating discussions: no old discussions'.$screwup;
}
my $r=$api->edit($tok, $outtxt, $summary, 0, 1);
if($r->{'code'} ne 'success'){
$api->warn("Write failed on $title: ".$r->{'error'}."\n");
return 60;
}
}
# Save checked revision
$self->{'lasttime'}=$starttime;
$self->{'broken'}=$broken;
$api->store->{'startdate'}=$new_start;
$api->store->{'lasttime'}=$starttime;
$api->store->{'broken'}=$broken;
return $starttime+($self->{'broken'}?300:3600)-time;
}
sub _make_date {
my $t=shift || time;
if(ref($t) eq 'ARRAY'){
return _fix_date([@$t]);
} else {
my @t=gmtime($t);
@t=@t[3..5];
$t[1]+=1;
$t[2]+=1900;
return [@t];
}
}
sub _date_add {
my @t=@{$_[0]};
$t[0]+=$_[1];
$t[1]+=$_[2];
$t[2]+=$_[3];
return _fix_date([@t]);
}
sub _fix_date {
my $t=shift;
my @t=gmtime(timegm(0,0,0,$t->[0],$t->[1]-1,$t->[2]-1900));
@t=@t[3..5];
$t[1]+=1;
$t[2]+=1900;
return [@t];
}
sub _cmp_date {
my $a=shift;
my $b=shift;
my $x;
$x=$a->[2]-$b->[2];
$x=$a->[1]-$b->[1] if $x==0;
$x=$a->[0]-$b->[0] if $x==0;
return $x;
}
sub _makepagehead {
my $api = shift;
my $title = shift;
my $date = shift;
my $res = $api->query(
action => 'parse',
title => $title,
text => '{{subst:TfD log day|' . $date->[2] . '|' . $months[$date->[1]] . '|' . $date->[0] . '}}',
onlypst => 1,
formatversion => 2,
);
return ( undef, $res->{'error'} ) if $res->{'code'} ne 'success';
my $txt = $res->{'parse'}{'text'};
$txt =~ s/\s*$/\n/s;
return ($txt, undef);
}
1;