User:AnomieBOT/source/tasks/RandomPagePicker.pm
Appearance
Approved 2009-01-14. Wikipedia:Bots/Requests for approval/AnomieBOT 21 |
package tasks::RandomPagePicker;
=pod
=begin metadata
Bot: AnomieBOT
Task: RandomPagePicker
BRFA: Wikipedia:Bots/Requests for approval/AnomieBOT 21
Status: Approved 2009-01-14
Created: 2009-01-09
Periodically choose a random article from a category or union/intersection of
categories and write it to a page, as instructed by {{tlu|User:AnomieBOT/RandomPage}}.
=end metadata
=cut
use utf8;
use strict;
use Data::Dumper;
use POSIX;
use Date::Parse;
use AnomieBOT::Task qw/:time ns2cmtype/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;
my $template='User:AnomieBOT/RandomPage';
my $category='Category:AnomieBOT RandomPage subscriptions';
my $minimum_frequency=3600; # 1 hour
my $max_next=1800; # must be less than $minimum_frequency
# Default configuration:
# frequency: How often to edit, for example "2 days" or "2 hours 30 minutes".
# Recognized values: minutes, hours, days, weeks, months, years.
# categories: Prefix notation of arrays. For example, to do "(A or B) and (C
# or D)", do [ AND, [ OR, A, B ], [ OR, C, D ] ].
# namespaces: Namespaces to choose from, as for the API cmnamespace.
# summary: Edit summary to use
# repeat: Boolean, if false then articles will not be repeated until all
# other articles have had a chance.
# botflag: Boolean.
my %default_cfg=(
frequency => '1 week',
categories => '',
namespaces => '0',
);
# Cache so we don't look up the same category multiple times
my %cache=();
sub new {
my $class=shift;
my $self=$class->SUPER::new();
bless $self, $class;
return $self;
}
=pod
=for info
Approved 2009-01-14.<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 21]]
=cut
sub approved {
return 5;
}
sub run {
my ($self, $api)=@_;
my $res;
$api->task('RandomPagePicker', 0, 10, qw/d::Templates d::IWNS/);
# Spend a max of 5 minutes on this task before restarting
my $endtime=time()+300;
# Load namespaces
$self->{'namespaces'}={$api->namespace_reverse_map};
my $next=$max_next;
%cache=();
my %q=(
generator => 'categorymembers',
gcmtitle => $category,
gcmsort => 'sortkey',
gcmlimit => 'max',
prop => 'info',
);
do {
$res=$api->query(%q);
if($res->{'code'} ne 'success'){
$api->warn("Failed to retrieve transclusion list for $template: ".$res->{'error'}."\n");
return 60;
}
if(exists($res->{'query-continue'})){
$q{'gcmcontinue'}=$res->{'query-continue'}{'categorymembers'}{'gcmcontinue'};
} else {
delete $q{'gcmcontinue'};
}
# Process found pages
foreach (values %{$res->{'query'}{'pages'}}){
return 0 if $api->halting;
my $page=$_->{'title'};
my $pageid=$_->{'pageid'};
my $revid=$_->{'lastrevid'} // 0;
my $check=$api->store->{$pageid} // undef;
# If the page has been edited, we have to check it because they
# might have edited the template parameters.
$check=undef if(defined($check) && $check->{'revid'}!=$revid);
# If the page hasn't been edited since the last check, we can use
# the saved data to possibly skip loading the page.
if(defined($check) && $check->{'nextrun'}>time()){
my $t=$check->{'nextrun'}-time();
$next=$t if $t<$next;
next;
}
my $min=(defined($check) && exists($check->{'min'}))?$check->{'min'}:0;
$api->log("Checking for $template in $page");
# Ok, check the page
my $tok=$api->edittoken($page, EditRedir => 1);
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for $page: ".$tok->{'error'}."\n");
next;
}
next if exists($tok->{'missing'});
# Get page text
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
my $summary="Automatically updating $template";
my $minor=0;
my $bot=0;
my $done=0;
$check={
revid => $tok->{'lastrevid'} // 0,
nextrun => 0,
min => $min,
};
my $outtxt=$api->process_templates($intxt, sub {
my $name=shift;
my @params=@{shift()};
return undef unless $name eq $template;
my %cfg=%default_cfg;
foreach (@params){
$cfg{$1}=$2 if /^\s*([^=]+?)\s*=\s*(.*?)\s*$/s;
}
$cfg{'minor'}=($cfg{'minor'}=~/^[1y]|yes$/i)?1:0 if(exists($cfg{'minor'}));
$cfg{'botflag'}=($cfg{'botflag'}=~/^[1y]|yes$/i)?1:0 if(exists($cfg{'botflag'}));
my $out="{{$template\n";
$out.=' | frequency = '.$cfg{'frequency'}."\n";
$out.=' | categories = '.$cfg{'categories'}."\n";
$out.=' | namespaces = '.$cfg{'namespaces'}."\n";
$out.=' | summary = '.$cfg{'summary'}."\n" if exists($cfg{'summary'});
$out.=' | minor = '.($cfg{'minor'}?'yes':'no')."\n" if exists($cfg{'minor'});
$out.=' | botflag = '.($cfg{'botflag'}?'yes':'no')."\n" if exists($cfg{'botflag'});
$out.=' | template = '.$cfg{'template'}."\n" if exists($cfg{'template'});
$out.=" | this page = $page\n";
if($done){
$out.=" | error = Only one $template is allowed per page\n}}";
return $out;
}
$done=1;
if(exists($cfg{'date'})){
$cfg{'date'}=str2time($cfg{'date'});
$cfg{'date'}=0 unless defined($cfg{'date'});
} else {
$cfg{'date'}=0;
}
my $t=add_frequency($cfg{'date'}, $cfg{'frequency'}, $check->{'min'});
if(!defined($t)){
$out.=" | error = Invalid frequency\n}}";
$check->{'nextrun'}=time()+86400;
$summary="{{[[User:AnomieBOT/RandomPage]]}} error: Invalid frequency";
return $out;
}
$check->{'nextrun'}=$t;
if($t>time()){
$t-=time();
$next=$t if $t<$next;
return undef;
}
if($cfg{'namespaces'}!~/^\d+(?:,\d+)*$/){
$out.=" | error = Invalid namespaces parameter\n}}";
$check->{'nextrun'}=time()+86400;
$summary="{{[[User:AnomieBOT/RandomPage]]}} error: (Invalid namespaces parameter)";
return $out;
}
my %ns=();
foreach (split /,/, $cfg{'namespaces'}){
if(exists($self->{'namespaces'}{$_})){
$ns{$_&~1}=1 if exists($self->{'namespaces'}{$_&~1});
$ns{$_|1}=1 if exists($self->{'namespaces'}{$_|1});
} else {
$out.=" | error = Invalid namespace number $_, see [[Help:Namespaces#List of namespaces|Help:Namespaces]].\n}}";
$check->{'nextrun'}=time()+86400;
$summary="{{[[User:AnomieBOT/RandomPage]]}} error: (Invalid namespace number $_, see [[Help:Namespaces#List of namespaces|Help:Namespaces]].)";
return $out;
}
}
$self->{'lookups'}=0;
my $pages;
eval {
$pages=$self->load_pages($api, join('|',sort { $a<=>$b } keys %ns), $cfg{'categories'});
};
if($@){
my $x=$@;
$x=~s/\s+$//;
$out.=" | error = Invalid categories parameter: <nowiki>$x</nowiki>\n}}";
$check->{'nextrun'}=time()+86400;
$summary="{{[[User:AnomieBOT/RandomPage]]}} error: (Invalid categories parameter: <nowiki>$x</nowiki>)";
return $out;
}
if(!defined($pages)){
$next=60 if $next>60;
return undef;
}
$check->{'min'}=$self->{'lookups'}*600;
if(!@$pages){
$out.=" | error = No pages match.\n}}";
$check->{'nextrun'}=time()+$check->{'min'};
$summary="{{[[User:AnomieBOT/RandomPage]]}} error: (No pages match.)";
return $out;
}
# Find the list of pages we've picked in the last 5000 edits
# (and since the last time we ran out of pages), to avoid
# picking them again too soon
my %pages;
@pages{@$pages}=undef;
my %qc=(
titles => $page,
prop => "revisions",
rvprop => "comment",
rvuser => $api->user,
rvlimit => "max",
);
$qc{'rvend'}=$api->store->{"reset$pageid"} if exists($api->store->{"reset$pageid"});
my $resc=$api->query(%qc);
if($resc->{'code'} ne 'success'){
$api->warn("Failed to retrieve edit summaries for $page: ".$resc->{'error'}."\n");
return 60;
}
foreach (@{(values %{$resc->{'query'}{'pages'}})[0]{'revisions'}}){
next unless($_->{'comment'} && $_->{'comment'}=~/\[\[([^]]*)\]\]$/);
delete $pages{$1};
last unless %pages;
}
if(%pages){
$pages=[keys %pages];
} else {
# Ran out of pages, reset the date for "recently"
$api->store->{"reset$pageid"}=$tok->{'revisions'}[0]{'timestamp'};
}
my $pg=$pages->[int rand(@$pages)];
$t=time();
$out.=" | page = $pg\n";
$out.=" | date = ".strftime("%F %T +0000", gmtime $t)."\n";
my $min=$check->{'min'};
$min=$minimum_frequency if($min<$minimum_frequency);
my @m=();
if($min>=7*86400){
my $w=POSIX::floor($min/(7*86400));
$min-=$w*7*86400;
push @m, "$w week".(($w==1)?'':'s');
}
if($min>=86400){
my $d=POSIX::floor($min/86400);
$min-=$d*86400;
push @m, "$d day".(($d==1)?'':'s');
}
if($min>=3600){
my $h=POSIX::floor($min/3600);
$min-=$h*3600;
push @m, "$h hour".(($h==1)?'':'s');
}
if($min>0){
my $m=POSIX::ceil($min/60);
push @m, "$m minute".(($m==1)?'':'s');
}
$out.=" | minimum frequency = ".join(' ', @m)."\n";
$out.="}}";
$minor=$cfg{'minor'} if exists($cfg{'minor'});
$bot=$cfg{'botflag'} if exists($cfg{'botflag'});
$summary=$cfg{'summary'} if exists($cfg{'summary'});
$summary=substr($summary,0,250-length($pg)-6).": [[$pg]]";
$check->{'nextrun'}=add_frequency($t, $cfg{'frequency'}, $check->{'min'});
return $out;
});
# Need to edit?
if($outtxt ne $intxt){
$api->log("$summary in $page");
my $r=$api->edit($tok, $outtxt, $summary, $minor, $bot);
if($r->{'code'} ne 'success'){
$api->warn("Write failed on $page: ".$r->{'error'}."\n");
next;
}
$check->{'revid'}=$r->{'edit'}{'newrevid'} // 0;
} else {
$api->log("Nothing to do in $page");
}
# Store data
$api->store->{$pageid}=$check;
# If we've been at it long enough, let another task have a go.
return 0 if time()>=$endtime;
}
} while(exists($q{'gcmcontinue'}));
return $next;
}
sub add_frequency {
my ($lastrun,$freq,$min)=@_;
local $_=' '.$freq;
my @t=gmtime $lastrun;
while(s/^\s+(\d+)\s+(minute|hour|day|week|month|year)s?//i){
$t[1]+=$1 if lc($2) eq 'minute';
$t[2]+=$1 if lc($2) eq 'hour';
$t[3]+=$1 if lc($2) eq 'day';
$t[3]+=7*$1 if lc($2) eq 'week';
$t[4]+=$1 if lc($2) eq 'month';
$t[5]+=$1 if lc($2) eq 'year';
}
return undef unless(/^\s*$/);
my $t=timegm($t[0],$t[1],$t[2],$t[3],$t[4],$t[5]);
$min=$minimum_frequency if $min<$minimum_frequency;
$t=$lastrun+$min if $t<$lastrun+$min;
return $t;
}
sub load_pages {
my $self=shift;
my $api=shift;
my $ns=shift;
my $text=shift;
$text=~s/^\s+|\s+$//g;
if($text=~/^Category:/i){
if(!exists($cache{$text})){
my %q=(
list => 'categorymembers',
cmtitle => $text,
cmprop => 'title',
cmnamespace => $ns,
cmtype => ns2cmtype($ns),
cmlimit => 'max'
);
my %x=();
do {
my $res=$api->query(%q);
if($res->{'code'} ne 'success'){
$api->warn("Failed to retrieve categories for $text: ".$res->{'error'});
return undef;
}
if(exists($res->{'query-continue'})){
$q{'cmcontinue'}=$res->{'query-continue'}{'categorymembers'}{'cmcontinue'};
} else {
delete $q{'cmcontinue'};
}
foreach (@{$res->{'query'}{'categorymembers'}}){
$_->{'title'}=~s/^([^:]+) talk:/$1:/ if(($_->{'ns'}&1)==1);
$_->{'title'}=~s/^Talk:// if $_->{'ns'}==1;
$x{$_->{'title'}}=1;
}
$self->{'lookups'}++;
} while(exists($q{'cmcontinue'}));
$cache{$text}=[keys %x];
}
return $cache{$text};
}
die "Invalid parameter \"$text\"\n" unless $text=~/\{\{\s*(AND|OR|AND NOT|SUBCATS)\s*\|(.+?)\}\}$/is;
my ($op,$params)=(uc($1),$2);
$params=~s/^\s+|\s+$//g;
my @params=();
my $depth=0;
my $l=length($params);
my $j=0;
for(my $i=0; $i<$l; $i++){
my $c=substr($params,$i,1);
if($c eq '{'){
$depth++;
} elsif($c eq '}'){
$depth--;
die "Unexpected '}' in \"$text\"\n" if $depth<0;
} elsif($c eq '|' && $depth==0){
push @params, substr($params, $j, $i-$j);
$j=$i+1;
}
}
push @params, substr($params, $j, $l-$j) if $j<$l;
die "Unmatched '{' in \"$text\"\n" if $depth!=0;
if($op eq 'SUBCATS'){
die "$op first parameter must be a category" unless(@params>=1 && $params[0]=~/^Category:/i);
push @params, -1 if(@params==1);
die "$op optional second parameter must be an integer depth" if(@params>=2 && $params[1]!~/^[+-]?\d+$/);
die "$op takes only 1 or 2 parameters" if @params>=3;
my %cats=();
my @cats=( [$params[0], $params[1]] );
while(my $x=shift(@cats)){
my ($cat,$depth)=@$x;
next if exists($cats{$cat});
$cats{$cat}=1;
next if $depth==0;
my $res=$self->load_pages($api, '14', $cat);
return undef unless defined($res);
push @cats, [ $_, $depth-1 ] foreach (@$res);
}
$op='OR';
@params=keys %cats;
}
my %pages=();
my $add=1;
foreach my $p (@params){
my $res=$self->load_pages($api, $ns, $p);
return undef unless defined($res);
foreach (@$res){
$pages{$_}=0 unless exists($pages{$_});
$pages{$_}+=$add;
}
$add=-1 if $op eq 'AND NOT';
}
if($op eq 'AND'){
my $ct=@params;
return [grep($pages{$_}>=$ct, keys %pages)];
} elsif($op eq 'OR'){
return [keys %pages];
} elsif($op eq 'AND NOT'){
return [grep($pages{$_}>=1, keys %pages)];
} else {
die "Invalid op \"$op\"";
}
}
1;