User:AnomieBOT/source/tasks/CFDClerk.pm
Appearance
Approved 2016-11-19 Wikipedia:Bots/Requests for approval/AnomieBOT 75 |
package tasks::CFDClerk;
=pod
=begin metadata
Bot: AnomieBOT
Task: CFDClerk
BRFA: Wikipedia:Bots/Requests for approval/AnomieBOT 75
Status: Approved 2016-11-19
Created: 2016-10-30
Peform the following tasks at [[WP:CFD]]:
* Create the daily CFD subpage.
* Fix the headers on the daily CFD subpages, if they get removed or damaged.
* Remove the "NEW NOMINATIONS" section and related comments from past days' discussions.
* Maintain the list at [[Wikipedia:Categories for discussion/Awaiting closure]].
* Maintain the list at [[Wikipedia:Categories for discussion/Old unclosed discussions]].
* Subst {{tl|cfd top}} and {{tl|cfd bottom}}, when editing the page anyway.
=end metadata
=cut
use utf8;
use strict;
use AnomieBOT::Task qw/:time/;
use Data::Dumper;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;
my @months=('','January','February','March','April','May','June','July','August','September','October','November','December');
my $is_closed_re=qr((?:\{\{\s*cfd[_ ]?top\s*[|}]|<div class="[^"]*(?<=[" ])[tx]fd-closed[ "]));
my %db=(
nonsense => 'G1',
test => 'G2',
vandalism => 'G3',
pagemove => 'G3',
hoax => 'G3',
repost => 'G4',
banned => 'G5',
histmerge => 'G6',
move => 'G6',
copypaste => 'G6',
xfd => 'G6',
maintenance => 'G6',
house => 'G6',
disambig => 'G6',
movedab => 'G6',
unpatrolled => 'G6',
author => 'G7',
self => 'G7',
blanked => 'G7',
talk => 'G8',
subpage => 'G8',
imagepage => 'G8',
redirnone => 'G8',
templatecat => 'G8',
attack => 'G10',
blp => 'G10',
attackorg => 'G10',
spam => 'G11',
promo => 'G11',
copyvio => 'G12',
c1 => 'C1',
catempty => 'C1',
);
sub new {
my $class=shift;
my $self=$class->SUPER::new();
$self->{'lasttime'}=0;
$self->{'broken'}=0;
bless $self, $class;
return $self;
}
=pod
=for info
Approved 2016-11-19<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 75]]
=cut
sub approved {
return 2;
}
sub myquotemeta {
my $r = quotemeta( shift );
$r=~s/\\([\x02\x03])/$1/g;
return $r;
}
sub run {
my ($self, $api)=@_;
my $res;
$api->task('CFDClerk', 0, 10, qw/d::Talk d::Templates d::Redirects d::Sections/);
my %p=$api->redirects_to_resolved('Template:Delrevxfd', 'Template:Move review talk');
if(exists($p{''})){
if($p{''}{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$p{''}{'content'}."\n");
return 300;
}
$api->warn("Failed to get notice template redirects: ".$p{''}{'error'}."\n");
return 60;
}
my @p=map { s/^Template://; my ($a,$b)=split(//,$_,2); "(?i:\Q$a\E)\Q$b\E"; } keys %p;
my $p=join('|',@p);
my $noticere=qr/(?:(?i:<noinclude>\s*)?\{\{\s*(?i:Template\s*:\s*)?(?:$p)\s*(?:\|.*?)?\}\}\s*(?i:<\/noinclude>\s*)?)/;
my %tosubst=$api->redirects_to_resolved('Template:Cfd top', 'Template:Cfd bottom', 'Template:Cfd relisted');
if(exists($tosubst{''})){
if($tosubst{''}{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tosubst{''}{'content'}."\n");
return 300;
}
$api->warn("Failed to get top/bottom/relisted template redirects: ".$tosubst{''}{'error'}."\n");
return 60;
}
# Only check once per hour
if($self->{'lasttime'}==0){
if(exists($api->store->{'lasttime'})){
my $t=$api->store->{'lasttime'};
$self->{'lasttime'}=$t if($t=~/^\d+$/ && $t<=time());
}
$self->{'broken'}=$api->store->{'broken'} if(exists($api->store->{'broken'}));
}
my $starttime=time();
my $t=$self->{'lasttime'}+($self->{'broken'}?300:3600)-$starttime;
return $t if $t>0;
# If it's close enough to 23:00, just wait for 23:00.
$t=82800-($starttime%86400);
return $t if($t>0 && $t<($self->{'broken'}?300:3600));
# If it's close enough to 00:00, just wait for 00:00.
$t=86400-($starttime%86400);
return $t if($t>0 && $t<($self->{'broken'}?300:1800));
my $startdate=[8,6,2016];
$startdate=$api->store->{'startdate'} if exists($api->store->{'startdate'});
my $screwup=' Errors? [[User:'.$api->user.'/shutoff/CFDClerk]]';
# Get the content of all versions of "cfd top" since the startdate
my $re='\{\{\s*[cC]fd[ _]?top\s*(?s:\|.*?)?\}\}';
my %cont=();
my $first=1;
while($first || %cont) {
my $t=$api->query(
titles => 'Template:cfd top',
prop => 'revisions',
rvprop => 'timestamp|content',
rvslots => 'main',
rvlimit => 1,
%cont,
);
if($t->{'code'} ne 'success'){
$api->warn("Failed to load revisions for Template:cfd top: ".$t->{'error'}."\n");
return 60;
}
%cont=exists($t->{'query-continue'})?%{$t->{'query-continue'}{'revisions'}}:();
$t=(values(%{$t->{'query'}{'pages'}}))[0]{'revisions'}[0];
%cont=() if $t->{'timestamp'} lt sprintf("%04d-%02d-%02d", reverse @$startdate);
$t=$t->{'slots'}{'main'}{'*'};
$t=~s!<noinclude>.*</noinclude>!!gs;
$t=~s!</?includeonly>!!g;
$t=~s!\{\{\{1\|\}\}\}!\x07!g;
my %substedIfs = ();
$t = $api->process_templates( $t, sub {
my ( $name, $params, $wikitext ) = @_;
if ( $name =~ /^\{\{\{\|safesubst:\}\}\}#if:/ ) {
my $p1 = $params->[0];
my $p2 = $params->[1];
if($p1 =~ m/\x07/ || $p2 =~ m/\x07/ ){
$api->whine("[[Template:Cfd top]] is broken", "Help! The template {{tl|cfd top}} has <nowiki>{{{1|}}}</nowiki> in the output of some substed <code>#if</code>. That's too confusing for me to handle. I'm not going to process any CFDs until it's fixed or I'm fixed.");
return 300;
}
if($p1 =~ m/\{\{\{/ || $p2 =~ m/\{\{\{/ ){
$api->whine("[[Template:Cfd top]] is broken", "Help! The template {{tl|cfd top}} contains unknown parameters.<!-- in #if --> To avoid confusion, I'm not going to process any CFDs until it's fixed or I'm fixed.");
return 300;
}
my $tag = $api->get_token_for( $wikitext );
$p1 = $api->replace_stripped( myquotemeta( $p1 ), \%substedIfs );
$p2 = $api->replace_stripped( myquotemeta( $p2 ), \%substedIfs );
$substedIfs{$tag} = '(?:' . $p1 . '|' . $p2 . ')';
return $tag;
}
return undef;
} );
unless($t =~ m/^\s*$is_closed_re/){
next unless $first;
$api->whine("[[Template:Cfd top]] is broken", "Help! The template {{tl|cfd top}} is missing the \"is_closed\" regex, or this regex is not at the beginning of the template's output. To avoid confusion, I'm not going to process any CFDs until it's fixed or I'm fixed.");
return 300;
}
if($t =~ m/\x07\s*$/){
next unless $first;
$api->whine("[[Template:Cfd top]] is broken", "Help! The template {{tl|cfd top}} does not end with some constant text, i.e. <nowiki>{{{1|}}}</nowiki> is at the very end of the template. To avoid confusion, I'm not going to process any CFDs until it's fixed or I'm fixed.");
return 300;
}
if($t =~ m/\{\{\{/){
next unless $first;
$api->whine("[[Template:Cfd top]] is broken", "Help! The template {{tl|cfd top}} contains unknown parameters. To avoid confusion, I'm not going to process any CFDs until it's fixed or I'm fixed.");
return 300;
}
$t=myquotemeta($t);
$t = $api->replace_stripped( $t, \%substedIfs );
$t=~s/\\\x07/(?s:.*?)/g;
$re.="|$t";
$first = 0;
}
# Get the content of all versions of "cfd relisted" since the startdate
my $relisted_re='\{\{\s*[cC]fd[ _]?relisted\s*(?s:\|.*?)?\}\}';
%cont=();
$first=1;
while($first || %cont) {
my $t=$api->query(
titles => 'Template:cfd relisted',
prop => 'revisions',
rvprop => 'timestamp|content',
rvslots => 'main',
rvlimit => 1,
%cont,
);
if($t->{'code'} ne 'success'){
$api->warn("Failed to load revisions for Template:cfd top: ".$t->{'error'}."\n");
return 60;
}
%cont=exists($t->{'query-continue'})?%{$t->{'query-continue'}{'revisions'}}:();
$t=(values(%{$t->{'query'}{'pages'}}))[0]{'revisions'}[0];
%cont=() if $t->{'timestamp'} lt sprintf("%04d-%02d-%02d", reverse @$startdate);
$t=$t->{'slots'}{'main'}{'*'};
$t=~s!<noinclude>.*</noinclude>!!gs;
$t=~s!</?includeonly>!!g;
$t=~s!\Q{{{{{|subst:}}}#if:{{{page| }}}|{{{page}}}|{{{{{|subst:}}}CURRENTYEAR}} {{{{{|subst:}}}CURRENTMONTHNAME}} {{{{{|subst:}}}CURRENTDAY}}}}{{{{{|subst:}}}#if:{{{1|}}}|%23{{{1|}}}}}\E!\x07!g;
while ( $t=~m!(\{\{\{\{\{\|safesubst:\}\}\}require subst\s*\|(.*?)\s*\}\})!s ) {
my ($old,$new) = ($1,$2);
# {{require subst}} strips comments. Sigh.
$new =~ s/<!--.*?-->//gs;
$t=~s!\Q$old\E!$new!;
}
if($t =~ m/\x07\s*$/){
next unless $first;
$api->whine("[[Template:Cfd relisted]] is broken", "Help! The template {{tl|cfd relisted}} does not end with some constant text, i.e. <nowiki>{{{1|}}}</nowiki> is at the very end of the template. To avoid confusion, I'm not going to process any CFDs until it's fixed or I'm fixed.");
return 300;
}
if($t =~ m/\{\{\{/){
next unless $first;
$api->whine("[[Template:Cfd relisted]] is broken", "Help! The template {{tl|cfd relisted}} contains unknown parameters. To avoid confusion, I'm not going to process any CFDs until it's fixed or I'm fixed.");
return 300;
}
$t=quotemeta($t);
$t=~s/\\\x07/(?s:.*?)/g;
$relisted_re.="|$t";
$first = 0;
}
# Iterate over all our pages
my $broken=0;
my $new_start=_make_date();
my $today=_make_date();
my $sevendays=_date_add(_make_date(),-7,0,0);
my @old=();
my @oldsumm=();
my @oldlinks=();
MAINLOOP: for(my $date=_make_date(time+3600); _cmp_date($startdate,$date)<=0; $date=_date_add($date,-1,0,0)){
return 0 if $api->halting;
my $title='Wikipedia:Categories for discussion/Log/'.$date->[2].' '.$months[$date->[1]].' '.$date->[0];
$api->log("Checking CFDs in $title");
my $tok=$api->edittoken($title);
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
return 60;
}
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'} // '';
my $outtxt=$intxt;
# Fix header if necessary
my $iscur = _cmp_date($date,$today) >= 0;
my $fixedhead=0;
my ($pageheader,$err)=_makepagehead($api, $title, $date, $iscur);
if ( ! defined( $pageheader ) ) {
$api->warn("Failed to get page header for $title: $err\n");
return 60;
}
if($outtxt!~/^\Q$pageheader\E/){
my $dt=$months[$date->[1]].' '.$date->[0];
my $oldtxt;
do {
$oldtxt=$outtxt;
$outtxt=~s/^(?:|.*?\n)===\s*\Q$dt\E\s*===[^\n]*(?:\n|$)//s;
$outtxt=~s/^\s*<!--.*?-->\s*//s;
$outtxt=~s/^\s*====[ \t]*NEW NOMINATIONS[ \t]*====\s*(?:\n|$)//s;
$outtxt=~s/^\s*<!--.*?-->\s*//s;
$outtxt=~s/^\s*//;
} while($oldtxt ne $outtxt);
$outtxt="$pageheader\n$outtxt";
}
if ( $iscur ) {
$outtxt .= "\n<!-- Please add the newest nominations to the top -->" unless $outtxt=~/\n<!-- Please add the newest nominations to the top -->\s*$/;
} else {
$outtxt =~ s/\s*\n====[ \t]*NEW NOMINATIONS[ \t]*====\s*\n/\n/;
$outtxt =~ s/\s*\n\s*<!-- Please add the newest nominations below this line -->\s*\n/\n/;
$outtxt =~ s/\s*\n<!-- Please add the newest nominations to the top -->\s*$//;
}
$fixedhead=($outtxt ne $intxt);
# If the page has been edited in the last day, keep watching it in case
# the last closing gets reverted.
my $ts=ISO2timestamp($tok->{'revisions'}[0]{'timestamp'}) // time;
$new_start=[@$date] if(time()-$ts<86400);
# Fix any simple mispositioned headers: armor any good headers, then
# fix any mispositioned ones, then unarmor.
my ($marker, $i)=('', 0);
do {
$marker = "\x02--$i--\x03";
$i++;
} while($outtxt=~/$marker/);
$outtxt=~s/(?:^|(?<=\n))((====+)[^=](?:.*[^=])?\2\s*?\n\s*$noticere*$is_closed_re)/$1$marker/g;
my $fixed=($outtxt=~s/(?:^|(?<=\n))((?>$re).*\n)\s*((====+)[^=](?:.*[^=])?\3\s*?\n)/$2$1/g);
$outtxt=~s/$marker//g;
# Split into level-4+ sections, and check if each is closed
my @sections=$api->split_sections($outtxt, "456");
my $ct=0;
my @secs=();
my @closed=();
my @pageoldlinks=();
for(my $i=0; $i<@sections; $i++){
my $s=$sections[$i];
next if $s->{'body'}=~m/^\s*<!-- CFDClerk: This section heading isn't a discussion -->/;
if($s->{'body'}=~m/^\s*$noticere*$is_closed_re/ || $s->{'body'}=~m/^\s*$relisted_re\s*$/){
# Someone closed a section, so merge in all its subsections
my $j;
for($j=$i+1; $j<@sections && $sections[$j]->{'level'} > $s->{'level'}; $j++){}
if($j>$i+1){
$s->{'body'}=~s/\s*$/\n\n/;
$s->{'body'}.=$api->join_sections(splice(@sections, $i+1, $j-$i-1));
}
}
$_=$s->{'body'};
my $bad=/(?>^\s*$noticere*\S).*$is_closed_re/s;
next if !$bad && ( m/^\s*$noticere*$is_closed_re/ || $s->{'body'}=~m/^\s*$relisted_re\s*$/ );
if($bad || /$is_closed_re/s){
$api->log("Crap, \[\[$title#$s->{title}\]\] is b0rken");
$api->warn("Crap, $title is b0rken\n");
$api->whine("[[$title]] is broken", "Help! A section in [[$title]] contains the \"is_closed\" regex but not at the beginning of the section. Probably someone put the {{tl|cfd top}} before a section header instead of after. Anyway, I can't do anything to that page until someone fixes it.");
if(_cmp_date($date,$sevendays)<0){
push @old, "* [[$title]] (broken)\n";
unshift @oldsumm, [@$date];
push @oldlinks, "* <b>[[$title]] is broken</b>\n";
}
$new_start=[@$date];
$broken=1;
next MAINLOOP;
}
next unless defined($s->{'level'});
$ct++;
push @secs, [ $s ];
}
next if($ct==0 && @closed==0 && !$fixedhead && !$fixed);
# Now check the discussions to determine if all are closed.
foreach my $sec (@secs){
my @cats=@$sec;
my $s = shift @cats;
my $ok=0;
unless ( $ok ) {
push @pageoldlinks, "* [[$title#{{anchorencode:" . $s->{'title'} . "}}]]\n";
next;
}
}
# Mark for entry on the list of old CFDs, if applicable
if($ct>0){
if(_cmp_date($date,$sevendays)<0){
push @old, "* [[$title]] ($ct open)\n";
unshift @oldsumm, [@$date];
}
$new_start=[@$date];
}
if(_cmp_date($date,$sevendays)<0){
push @oldlinks, @pageoldlinks;
}
# Need to edit?
next unless(@closed || $fixed || $fixedhead);
# Processed, now reconstruct the page
$outtxt=$api->join_sections(@sections);
# Subst templates, if necessary
my $subst=0;
$outtxt=$api->process_templates($outtxt, sub {
my $name=shift;
shift; #$params
my $wikitext=shift;
return undef unless exists($tosubst{"Template:$name"});
$subst++;
$wikitext=~s/^\{\{\s*/\{\{subst:/;
return $wikitext;
});
# Create summary
my @summary=();
if($fixedhead){
if(exists($tok->{'missing'})){
push @summary, "new discussion page: ".$date->[2].' '.$months[$date->[1]].' '.$date->[0];
} else {
push @summary, "fix page header";
}
}
push @summary, "subst {{cfd top}} and/or {{cfd bottom}}" if $subst>0;
push @summary, 'move closing box'.(($fixed>1)?'es':'').' per [[WP:DPR#CFD]]' if $fixed;
my $toomany=@summary;
push @summary, 'close discussions for deleted/nonexistent categories: '.join(', ', @closed) if @closed;
my $summary='(BOT) '.ucfirst(join('; ', @summary)).".$screwup";
$api->log("$summary in $title");
if(length($summary)>500){
$summary[$toomany]='close discussions for deleted/nonexistent categories: [too many to list]' if @closed;
$summary='(BOT) '.ucfirst(join('; ', @summary)).$screwup;
}
# Sanity check for whitespace-only edits
my ($intxtSpace, $outtxtSpace) = ( $intxt, $outtxt );
$intxtSpace =~ s/\s*\n\s*/\n/g;
$intxtSpace =~ s/[ \t]+/ /g;
$intxtSpace =~ s/\s*$//g;
$outtxtSpace =~ s/\s*\n\s*/\n/g;
$outtxtSpace =~ s/[ \t]+/ /g;
$outtxtSpace =~ s/\s*$//g;
if ( $intxtSpace eq $outtxtSpace ) {
#$api->log( "Skipping edit to $title because it seems to be whitespace-only: $summary" );
next;
}
my $r=$api->edit($tok, $outtxt, $summary, 0, 1);
if($r->{'code'} ne 'success'){
$api->warn("Write failed on $title: ".$r->{'error'}."\n");
return 60;
}
}
# Ok, we've processed all the subpages. Now update the list of links to old
# unclosed discussions.
if ( 1 ) {
my $title='Wikipedia:Categories for discussion/Old unclosed discussions';
$api->log("Updating discussions lists on $title");
my $tok=$api->edittoken($title);
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
return 60;
}
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'} // '';
$intxt=~s/Last updated .*?<!-- Bot edit date -->/Last updated ~~~~~<!-- Bot edit date -->/;
$intxt=~s/\s*$/\n/;
my $outtxt="This is a list of unclosed CfDs over 7 days old. It is automatically maintained by a bot, but humans are free to remove lines when closing discussions if they'd like. Last updated ~~~~~<!-- Bot edit date -->.\n\n";
if ( @oldlinks ) {
$outtxt .= join( '', @oldlinks );
} else {
$outtxt .= "* None at this time\n";
}
if($intxt ne $outtxt){
my $summary;
if(@oldsumm){
my $m=0;
my @oldsumm2=map {
my $ret;
if($_->[1]!=$m){
$m=$_->[1];
$ret=substr($months[$_->[1]],0,3).' '.$_->[0];
} else {
$ret=$_->[0];
}
$ret
} @oldsumm;
$oldsumm2[-1].='.';
$summary='(BOT) Updating discussions: '.join(', ', @oldsumm2).$screwup;
$api->log("$summary in $title");
$summary='(BOT) Updating discussions: major backlog!'.$screwup if length($summary)>500;
} else {
$summary='(BOT) Updating discussions: no old discussions'.$screwup;
}
my $r=$api->edit($tok, $outtxt, $summary, 0, 1);
if($r->{'code'} ne 'success'){
$api->warn("Write failed on $title: ".$r->{'error'}."\n");
return 60;
}
}
}
# Save checked revision
$self->{'lasttime'}=$starttime;
$self->{'broken'}=$broken;
$api->store->{'startdate'}=$new_start;
$api->store->{'lasttime'}=$starttime;
$api->store->{'broken'}=$broken;
return $starttime+($self->{'broken'}?300:3600)-time;
}
sub _make_date {
my $t=shift || time;
if(ref($t) eq 'ARRAY'){
return _fix_date([@$t]);
} else {
my @t=gmtime($t);
@t=@t[3..5];
$t[1]+=1;
$t[2]+=1900;
return [@t];
}
}
sub _date_add {
my @t=@{$_[0]};
$t[0]+=$_[1];
$t[1]+=$_[2];
$t[2]+=$_[3];
return _fix_date([@t]);
}
sub _fix_date {
my $t=shift;
my @t=gmtime(timegm(0,0,0,$t->[0],$t->[1]-1,$t->[2]-1900));
@t=@t[3..5];
$t[1]+=1;
$t[2]+=1900;
return [@t];
}
sub _cmp_date {
my $a=shift;
my $b=shift;
my $x;
$x=$a->[2]-$b->[2];
$x=$a->[1]-$b->[1] if $x==0;
$x=$a->[0]-$b->[0] if $x==0;
return $x;
}
sub _makepagehead {
my $api = shift;
my $title = shift;
my $date = shift;
my $iscur = shift;
my $res = $api->query(
action => 'parse',
title => $title,
text => '{{subst:CFD log day|' . $date->[2] . '|' . $months[$date->[1]] . '|' . $date->[0] . '}}',
onlypst => 1,
formatversion => 2,
);
return ( undef, $res->{'error'} ) if $res->{'code'} ne 'success';
my $txt = $res->{'parse'}{'text'};
$txt =~ s/\n\s*<!--(?:[^>]+|[^-]>|[^-]->)*-->\s*$/\n/s;
if ( ! $iscur ) {
$txt =~ s/\n\s*<!--(?:[^>]+|[^-]>|[^-]->)*-->\s*$/\n/s;
$txt =~ s/\n====[ \t]*NEW NOMINATIONS[ \t]*====\s*$/\n/s;
$txt =~ s/\s*$/\n/s;
}
return ($txt, undef);
}
1;