User:AnomieBOT/source/tasks/ArticleCreationGrapher.pm
Appearance
Approved 2010-08-29 Wikipedia:Bots/Requests for approval/AnomieBOT 41 |
Task discontinued 2022-01-25. The graph has trailed off, the page it's on hasn't been updated in years and gets minimal views, and we now have mw:Extension:Graph that can generate graphs from data instead of having to have an image uploaded monthly. If someone still wants such a graph, it would be better to have a bot create a data table somewhere to be graphed via that extension. |
package tasks::ArticleCreationGrapher;
=pod
=begin metadata
Bot: AnomieBOT
Task: ArticleCreationGrapher
BRFA: Wikipedia:Bots/Requests for approval/AnomieBOT 41
Status: Inactive 2022-01-25
Created: 2010-08-25
Creates a graph showing article creation for a project.
=end metadata
=cut
use utf8;
use strict;
use Data::Dumper;
use IPC::Open2;
use AnomieBOT::Task qw/:time/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;
my $epoch=2; # Increment this to force a new graph, e.g. if an improved version of gnuplot is installed
my @gnuplot=("/usr/bin/gnuplot");
my $nonet=0;
my @pages=(
{
for => 'WikiProject Medicine - Dermatology task force',
cats => [ 'Category:Dermatology task force articles' ],
page => 'Wikipedia:WikiProject Medicine/Dermatology task force/Articles created',
page_cats => [],
writepage => 0,
graph => 'File:File-WikiProject Medicine - Dermatology task force - Articles created.svg',
size => [1000,600],
thumbsize => 800,
graph_cats => [],
x2ticsettings => 'rotate by 0 scale 0.4',
x2tics => '',
},
);
sub new {
my $class=shift;
my $self=$class->SUPER::new();
bless $self, $class;
return $self;
}
=pod
=for info
Approved 2010-08-29<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 41]]
=for info
Task discontinued 2022-01-25. The graph has trailed off, the page it's on hasn't been updated in years and gets minimal views, and we now have [[mw:Extension:Graph]] that can generate graphs from data instead of having to have an image uploaded monthly. If someone still wants such a graph, it would be better to have a bot create a data table somewhere to be graphed via that extension.
=cut
sub approved {
return -6;
}
sub run {
my ($self, $api)=@_;
my $res;
$api->task('ArticleCreationGrapher', 0, 10, qw/d::Timestamp/);
foreach my $page (@pages){
return 0 if $api->halting;
# Update
if ( exists( $api->store->{$page->{'page'}} ) ) {
my $pgdata=$api->store->{$page->{'page'}};
$api->store->{'epoch:'.$page->{'page'}} = $pgdata->{'epoch'} // 0;
$api->store->{'nextrun:'.$page->{'page'}} = $pgdata->{'nextrun'};
$api->store->{'pg:'.$page->{'page'}} = $pgdata->{'pages'};
delete( $api->store->{$page->{'page'}} );
}
my $nextrun = $api->store->{'nextrun:'.$page->{'page'}} // 0;
my $pages = $api->store->{'pg:'.$page->{'page'}} // {};
my @pp=();
if($nonet){
foreach my $v (values %$pages){
next unless ref($v) eq 'HASH';
push @pp, { redir=>$v->{'r'}, ts=>$v->{'t'}, title=>$v->{'tt'} };
}
} else {
next if($nextrun > time() && ($api->store->{'epoch:'.$page->{'page'}}//0) >= $epoch);
$api->log( "Loading data to graph " . $page->{'page'} );
my $i=$api->iterator(
generator => 'categorymembers',
gcmtitle => $page->{'cats'},
gcmlimit => 'max',
gcmnamespace => '0|1',
gcmtype => 'page',
prop => 'info',
inprop => 'subjectid',
);
my @pageids=();
while($_=$i->next){
if(!$_->{'_ok_'}){
$api->warn("Failed to retrieve page list for $page->{page}: ".$_->{'error'}."\n");
return 60;
}
push @pageids, $_->{'subjectid'} if exists($_->{'subjectid'});
}
my @px=();
my $ct=0;
foreach my $p (@pageids) {
return 0 if $api->halting;
if(exists($pages->{$p})){
push @px, $p;
next;
}
if($ct++>=100){
# periodic save
$api->store->{'pg:'.$page->{'page'}}=$pages;
$ct=0;
}
my $res=$api->query(
pageids => $p,
prop => 'info|revisions',
rvprop => 'timestamp',
rvdir => 'newer',
rvlimit => 1
);
if($res->{'code'} eq 'shutoff'){
$api->store->{'pg:'.$page->{'page'}}=$pages;
$api->warn("Task disabled: ".$res->{'content'}."\n");
return 300;
}
if($res->{'code'} ne 'success'){
$api->store->{'pg:'.$page->{'page'}}=$pages;
$api->warn("Failed to get info for $p: ".$res->{'error'}."\n");
return 60;
}
my $pg=$res->{'query'}{'pages'}{$p} // undef;
next unless defined($pg);
my $is_redir=exists($pg->{'redirect'});
my $ts=$pg->{'revisions'}[0]{'timestamp'} // '';
next if $ts eq '';
$ts=$api->ISO2timestamp($ts);
$pages->{$p}={
t=>$ts,
r=>$is_redir,
tt=>$pg->{'title'},
};
push @pp, {
title=>$pg->{'title'},
redir=>$is_redir,
ts=>$ts,
};
}
$api->store->{'pg:'.$page->{'page'}}=$pages;
if(@px){
my %q=(
pageids => [],
prop => 'info',
);
while(@px){
push @{$q{'pageids'}}, join('|', splice(@px, 0, 500));
}
$i=$api->iterator(%q);
while($_=$i->next){
if(!$_->{'_ok_'}){
$api->warn("Failed to retrieve page list for $page->{page}: ".$_->{'error'}."\n");
return 60;
}
my $p=$_->{'pageid'};
$pages->{$p}{'r'}=exists($_->{'redirect'});
$pages->{$p}{'tt'}=$_->{'title'};
push @pp, {
title=>$_->{'title'},
redir=>$pages->{$p}{'r'},
ts=>$pages->{$p}{'t'},
};
}
$api->store->{'pg:'.$page->{'page'}}=$pages;
}
}
$api->log( "Graphing data for " . $page->{'page'} );
@pp=sort {
my $x=($a->{'ts'} <=> $b->{'ts'});
$x=($a->{'title'} cmp $b->{'title'}) if !$x;
$x;
} @pp;
my $pid = open2(*R, *W, @gnuplot);
my ($w,$h)=@{$page->{'size'}};
my $range='["'.g_tt($pp[0]{'ts'}).'":"'.g_tt(time).'"]';
print W <<EOH ;
set terminal svg enhanced size $w $h font "DejaVu Sans"
set key horizontal bmargin center
set autoscale
set ytics nomirror out
set yrange [0:*]
set xdata time
set x2data time
set timefmt "%Y-%m"
set xtics nomirror out format "%b %Y"
set xrange $range
set x2range $range
EOH
print W <<EOH if $page->{x2tics} ne '';
set x2tics nomirror out $page->{x2ticsettings}
set x2tics ($page->{x2tics})
set grid x2tics
EOH
my @t=gmtime $pp[0]{'ts'};
my $end=strftime('%Y-%m', gmtime);
my $x;
print W "set xtics (";
my $f=1;
do {
$x=strftime('%Y-%m', 0,0,0,1,$t[4]++,$t[5]);
($t[4], $t[5]) = (1, $t[5]+1) if $t[4] == 13;
print W "," unless $f;
if($x=~/^(\d+)-01$/){
print W qq("$1" "$x" 0);
} else {
print W qq("" "$x" 1);
}
$f=0;
} while($x ne $end);
print W ")\n";
print W "plot '-' using 1:2 title \"Articles\" w filledcurves x1 fs transparent solid 0.1 lc rgb \"#0000ff\", ";
print W "'-' using 1:2 title \"Redirects\" w filledcurves x1 fs transparent solid 0.1 lc rgb \"#ff0000\"\n";
my $xx=g_init($pp[0]{'ts'});
foreach my $p (@pp){
$xx=g_update($xx, $p->{'ts'}, 0, \*W);
g_add($xx) unless $p->{'redir'};
}
g_update($xx, time, 1, \*W);
print W "e\n";
$xx=g_init($pp[0]{'ts'});
foreach my $p (@pp){
$xx=g_update($xx, $p->{'ts'}, 0, \*W);
g_add($xx) if $p->{'redir'};
}
g_update($xx, time, 1, \*W);
print W "e\n";
close W;
waitpid($pid, 0);
my $svg;
{
local $/=undef;
$svg=<R>;
}
close R;
if($page->{'writepage'}){
$api->log( "Updating " . $page->{'page'} );
my $tok=$api->edittoken($page->{'page'});
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for $page->{page}: ".$tok->{'error'}."\n");
next;
}
my $txt="This is a compilation of article creation dates for $page->{for}\n\nThis page is generated by {{User|".$api->user."}} once a month. Last generated: ~~~~~\n\n[[$page->{graph}|".$page->{'thumbsize'}."px|frameless|center|alt=Line graph of article and redirect creation dates by month]]\n\n{| class=\"wikitable\"\n! Article Created !! Article Title !! Type\n";
foreach my $p (@pp){
$txt.="|-\n| ".strftime('%F, %T', gmtime $p->{'ts'})." || [[:".$p->{'title'}."]] || ".($p->{'redir'}?'Redirect':'Article')."\n";
}
$txt.="|}\n\n";
foreach my $c (@{$page->{'page_cats'}}){
$txt.="[[Category:$c]]\n";
}
$res=$api->edit($tok, $txt, "Update page statistics", 0, 0);
if($res->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$res->{'content'}."\n");
return 300;
}
if($res->{'code'} ne 'success'){
$api->warn("Failed to update $page->{page}: ".$res->{'error'}."\n");
next;
}
}
$api->log( "Uploading image file " . $page->{'graph'} . ' for ' . $page->{'page'} );
my $tok=$api->edittoken($page->{'graph'});
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for $page->{graph}: ".$tok->{'error'}."\n");
next;
}
my $comment="Updated graph";
my $txt="{{imbox|type=style|image=[[File:Ambox warning yellow.svg|50px]]|imageright=[[File:Crystal Clear action run.svg|50px]]|text=This image is automatically updated by a bot, {{User|".$api->user."}}. Any changes will be overwritten automatically.<center>'''Do not move this file to Wikimedia Commons.'''<br /><small>If for some reason you need to stop the bot, place {{tl|nobots}} on this page or post a message [[User:".$api->user."/shutoff/ArticleCreationGrapher|here]].</small></center>}}\n{{Information\n|description=Bot-generated graph of [[$page->{page}|$page->{for}]] article creation\n|source=Own work, created using [[Gnuplot]]\n|date=~~~~~\n|author={{User|".$api->operator."}} as the author of {{User|".$api->user."}}\n|permission={{PD-self|date=August 2010}}\n}}\n{{esoteric file}}\n\n[[Category:Wikipedia charts]]\n";
foreach my $c (@{$page->{'graph_cats'}}){
$txt.="[[Category:$c]]\n";
}
$comment=$txt if exists($tok->{'missing'});
my $res=$api->upload($tok, Data=>$svg, Comment=>$comment, IgnoreWarnings=>1);
if($res->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$res->{'content'}."\n");
return 300;
}
if($res->{'code'} ne 'success'){
$api->warn("Failed to upload new version for $page->{graph}: ".$res->{'error'}."\n");
next;
}
@t=gmtime;
$api->store->{'pg:'.$page->{'page'}}=$pages;
$api->store->{'epoch:'.$page->{'page'}}=$epoch;
$api->store->{'nextrun:'.$page->{'page'}}=timegm(0,0,0,1,$t[4]+1,$t[5]);
if(!exists($tok->{'missing'})){
$api->log( "Updating image description for " . $page->{'graph'} );
$res=$api->edit($tok, $txt, "Update page text", 0, 0);
if($res->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$res->{'content'}."\n");
return 300;
}
if($res->{'code'} ne 'success'){
$api->warn("Failed to update page text for $page->{graph}: ".$res->{'error'}."\n");
}
}
}
# No more pages to check, try again later
my $t=undef;
foreach my $page (@pages){
my $nextrun=$api->store->{'nextrun:'.$page->{'page'}};
$t=$nextrun if(!defined($t) || $t>$nextrun);
}
return $t-time();
}
sub g_tt {
my @t=gmtime shift;
return strftime('%Y-%m', 0,0,0,1,$t[4],$t[5]);
}
sub g_init {
my $ts=shift;
return [0,0,g_tt($ts),undef];
}
sub g_add {
$_[0][0]++;
}
sub g_update {
my ($a,$aa,$dt,$pv)=@{shift()};
my $ts=shift;
my $force=shift;
my $fh=shift;
my $xx=g_tt($ts);
if($dt ne $xx && $a!=$aa){
my @t=split(/-/,$dt);
my $dt2=strftime('%Y-%m', 0,0,0,1,$t[1]-2,$t[0]-1900);
print $fh "$dt2 $aa\n" if(defined($pv) && $pv ne $dt2);
print $fh "$dt $a\n";
$pv=$dt;
$dt=$xx;
$a=0;
$aa=$a;
}
print $fh "$xx $a\n" if($force && $xx ne $pv);
return [$a,$aa,$dt,$pv];
}
1;