Jump to content

User:Firefly/code

From Wikipedia, the free encyclopedia

Main 'master control' program:

 use strict;
 use Data::Dumper;
 use Time::HiRes qw(sleep);

 my $data_root = 'data/';

 our(%shared_data);
 %shared_data =
 (
  job_list      => [],
  add_job       => sub {my ($r_job , $timing) = @_;push (@{$shared_data{job_list}} , [$r_job , (time()+$timing)]);}
 );
 my(%plugins);

 open(CFG,'HBC_MCP.cfg');
 sysread(CFG, my $cfg, -s(CFG));
 close(CFG);
 eval($cfg);

 warn "Initializing plugins...\n";
 foreach my $name (keys(%plugins))
  {
  my $obj;
  $plugins{$name}{shared} = \%shared_data;
  $plugins{$name}{files} = $data_root.$name.'/';
  mkdir ($data_root.$name.'/') unless (-d($data_root.$name.'/'));
  my $plugin_command = 'use HBCPlugins::'.$name.';$obj = HBCPlugins::'.$name.'->new($plugins{\''.$name.'\'});';
  eval $plugin_command;
  $shared_data{$obj->{label}} = $obj;
  }
 warn "Initialization complete.\n\n";
 until (6 == 9)                               # Infinite loop, a serpent biting it's own tail.
  {
  my $ra_job_list = $shared_data{job_list};
  sleep(.1);                             # Important in all infinite loops to keep it calm
  my (@kept_jobs);                      # A place to put jobs not ready to run yet
  while (my $job = shift(@{$ra_job_list}))    # Go through each job pending
    {
    my($r_job , $timing) = @{$job};
    if ($timing < time())               # If it is time to run it then run it
      {
      if (ref($r_job) eq 'ARRAY')       # Callback style, reference to an array with a sub followed by paramaters
        {
        my $cmd = shift(@{$r_job});
        &{$cmd}(@{$r_job});
        }
      elsif (ref($r_job) eq 'CODE')     # Otherwise just the reference to the sub
        {
        &{$r_job};
        }
      }
    else                                # If it is not time yet, save it for later
      {
      push(@kept_jobs , $job)
      }
    }
  push (@{$ra_job_list} , @kept_jobs);        # Keep jobs that are still pending
  }


RenameChecker:

 package HBCPlugins::RenameChecker;
 use    Encode;
 use     MediaWiki;
 use     strict;
 use     Data::Dumper;
 use     URI::Escape;
 our     $self;
 
 sub new
  {
  shift;
  $self = shift;
  bless($self);
  warn "RenameChecker active.\n";
  my(@pages) =
  (
   'Wikipedia:Changing username/Usurpations',
   'Wikipedia:Changing username'
  );
  my $timing = 0;
  &{$self->{shared}{add_job}}(\&login,0);
  &{$self->{shared}{add_job}}(\&contact_LogWatcher_plugin, 0);
  &{$self->{shared}{add_job}}([\&contact_irc_plugin,\@pages] , 0);
  foreach my $page (@pages)
    {
    &{$self->{shared}{add_job}}([\&parse_page,undef,$page], $timing);
    $timing += 30;
    }
  return $self;
  }
 
 sub login
  {
  warn "Connecting to Wikipedia...\n";
  my $c                  =   MediaWiki->new;
  $c->setup
                          ({
                            'bot' => {'user' => $self->{params}{username},'pass' => $self->{params}{password}},
                            'wiki' => {'host' => 'en.wikipedia.org','path' => 'w'}
                          }) || warn "Failed to log in\n";
  my $whoami              =  $c->user();
  warn "$whoami connected\n";
  $self->{WP_obj} = $c;
  &{$self->{shared}{add_job}}(\&login,3600);
  }
 
 sub contact_LogWatcher_plugin
  {
  $self->{LogMonitor} = $self->{shared}{$self->{params}{log_label}} || die;
  $self->{LogMonitor}->add_job
   (
    type        => 'renameuser',    # The type of log to read
    start_point        => 'all',        # Where to start reading the log from, timestamp or 'all'(for everything) or 'now'(to only   log from now)
    catch_up_frequency    => 0,            # Delay between reading while catching up to current state
    regular_frequency    => 36000,        # Delay between reading after catching up to current state
    step_size        => 500            # How many entries to load per attempt. Limit of 500 for users, 5000 for bots and admins
   );
  }
 
 sub contact_irc_plugin
  {
  my $ra_pages = shift;
  $self->{IRCFeed} = $self->{shared}{$self->{params}{irc_label}} || die;
  my $esc = chr(0x03);
 #  my $rename_pattern = ($esc.'07Special:Log/renameuser'.$esc.'14');
 #  $self->{IRCFeed}->add_hook
 #   ({
 #     'check'    => sub {return $_[0] =~ m|$rename_pattern|i;},
 #     'callback' => sub {
 #                 sleep 3;
 #                 $self->{LogMonitor}->update_now('renameuser');
 #            warn "Rename detected, checking.\n";
 #                 foreach my $page (@{$ra_pages})
 #                {
 #                &{$self->{shared}{add_job}}([\&parse_page,undef,$page],2);
 #                }
 #            warn "Page checks called.\n";
 #                    },
 #   });
  foreach my $page (@{$ra_pages})
    {
    my $pattern = ($esc.'07('.$page.')'.$esc.'14');
    $self->{IRCFeed}->add_hook
     ({
       'check'    => sub {(($_[0] !~ m|HBC RenameClerkBot|) && ($_[0] =~ m|$pattern|) );return $1;},
       'callback' => [\&parse_page,$page],
     });
    }
  return;
  }
 
 sub parse_page
  {
  my $page = $_[1];
  my $ra_name_history;
  unless ($self->{LogMonitor}{params}{jobs}{renameuser}{current})
    {
    warn "Delaying 10 seconds till logs are loaded...\n";
    &{$self->{shared}{add_job}}([\&parse_page,undef,$page], 10);
    return;
    }
  warn "Loading $page\n";
  my $page_obj = $self->{WP_obj}->get($page,'rw');
  my $start_content = $page_obj->{'content'};
  my(@lines) = split("\n", $page_obj->{'content'});
  my @new_content;
  my $current_name;
  my $wanted_name;
  my $report_count;
  my $has_rename_count;
  my $need_save = 0;
  warn "Parsing page.\n";
  my %status_table;
  while (scalar(@lines))
    {
    my $line = shift(@lines);
    if ($line =~ m/\*\s?Current (user)?name:.*\{\{User13\|(.*?)\}\}/i)
      {
      $current_name = $2;
      $lines[0] =~ m/\*\s?(Target|Requested) (user)?name:.*\{\{(User13|Listuser)\|(.*?)\}\}/i;
      $wanted_name = $4;
      unless ($wanted_name)
        {
        $current_name = undef;
        }
      }
    if ($lines[0] =~ m/'''Robot clerk's notes'''/) #'
      {
      $status_table{$current_name} = $lines[0];
      $status_table{$current_name} =~ s/\s\[\[User:HBC RenameClerkBot\|HBC RenameClerkBot\]\] .*$// || die;
      }
    push(@new_content, $line) unless ($line =~ m/'''Robot clerk's notes'''/); #'
 
    if ((($line =~ m/For bureaucrat use/) || ($line =~ m|\* Reason: |) || (scalar(@lines) < 1)) && $current_name)
      {
      my $ra_name_history = [];
      $ra_name_history = find_rename_history($wanted_name, $ra_name_history) if ($wanted_name);
      unless ($ra_name_history)
        {
        $ra_name_history = find_rename_history($current_name, $ra_name_history);
        }
      if ($ra_name_history)
        {
        foreach (@{$ra_name_history}) {$_ = "'''".$_."'''" if ($_ =~ m/\|$current_name\]/);}
        my $rename_string = join(' &larr; ', @{$ra_name_history});
        my $addition = "*'''Robot clerk's notes''': Rename history of \"\[\[User:$current_name|$current_name\]\]\": \"".$rename_string."\"";
#        warn "\n\n$addition\n\n";
        push(@new_content, $addition.' ~~~~');
        if ($addition ne $status_table{$current_name})
          {
          $need_save = 1;
          $has_rename_count++;
          $report_count++;
          }
        }
      else
        {
        my $addition = "*'''Robot clerk's notes''': \[\[User:$current_name|$current_name\]\] does not have any history of being  renamed in the logs";
        push(@new_content, $addition.' ~~~~');
        if ($addition ne $status_table{$current_name})
          {
          $need_save = 1;
          $report_count++;
          }
        }
      $current_name = undef;
      }
    }
  my $new_content = join("\n", @new_content);
  unless ($need_save)
    {
    warn "Don't need change\n";
    return;
    }
  $has_rename_count ||= 0;
  $report_count ||= 0;
  $page_obj->{'content'} = $new_content;
  $page_obj->{'summary'} = "(Testing) Updating rename history on $report_count user".(($report_count != 1) ? ('s') : ('')).",     $has_rename_count user".(($has_rename_count != 1) ? ('s') : (''))." renamed.";
  warn "saving...\n";
  warn $page_obj->save();
 #  warn $page_obj->{'summary'};
 #  warn $page_obj->{'content'};
  warn "I have saved $page\n";
  }
 
 sub find_rename_history
  {
  my $name = shift;
  my $ra_name_history = shift;
  foreach my $check (@{$ra_name_history})
    {
    return $ra_name_history if ($check =~ m/\|$name\]\]/);
    }
 #  warn "Adding: $name\n";
  my $name_string = encode_utf8($name);
  push(@{$ra_name_history}, "\[\[User:$name_string|$name_string\]\]");
  my $ra_logs = $self->{LogMonitor}{params}{jobs}{renameuser}{log};
  foreach my $rh_log (@{$ra_logs})
    {
    ${$rh_log}{comment} =~ m/\[\[User:(.*?)\|.*?\]\].*\[\[User:(.*?)\|.*\]\]/;
    my $old_name = $1;
    my $new_name = $2;
    if ($name eq $new_name)
      {
      find_rename_history($old_name, $ra_name_history);
      }
    }
  if (@{$ra_name_history} > 1)
    {
    return $ra_name_history;
    }
  else
    {
    return undef;
    }
  }
 1;

LogMonitor <syntaxhighlight lang=perl>

package HBCPlugins::LogMonitor;
use     strict;
use    XML::Simple;
use     Data::Dumper;
use     URI::Escape;
use    LWP::UserAgent;
our    $self;

sub new
 {
 shift;
 $self = shift;
 bless($self);
 $self->{params}{jobs} = {};
 $self->{UA} = my $ua = LWP::UserAgent->new('agent' => 'LogMonitor .0001b');
 warn "LogMonitor active.\n";
 return $self;
 }

sub add_job
 {
 my $self = shift;
 my %params = @_;
 my $type = $params{type};
 $self->{params}{jobs}{$type} = \%params;
 $self->{params}{jobs}{$type}{offset} = ((lc($params{start_point}) eq 'all') ? (0) : ($params{start_point}));
 $self->{params}{jobs}{$type}{log} = [];
 $self->{params}{jobs}{$type}{step_size} ||= 250;
 $self->{params}{jobs}{$type}{current} = 0;
 warn "Set initial offset for $type to ".$self->{params}{jobs}{$type}{offset}."\n\n";
 &{$self->{shared}{add_job}}([\&handle_jobs,$type],0);
 }

sub update_now
 {
 my $self = shift;
 my $type = shift;
 warn "Forcing manual update for '$type' log.\n";
 &{$self->{shared}{add_job}}([\&handle_jobs,$type],0);
 }

sub handle_jobs
 {
 my $type = shift;
 my $url_template = 'http://en.wikipedia.org/w/api.php?action=query&format=xml&list=logevents&letype=<TYPE>&lelimit=<GRAB><OFFSET>& ledir=newer';
 my $url = $url_template;
 $self->{params}{jobs}{$type}{offset} ||= 0;
 warn "Reading up to ".$self->{params}{jobs}{$type}{step_size}." log entries from $type starting at: ".$self->{params}{jobs}{$type} {offset}."\n";
 $url =~ s|<TYPE>|$type|;
 $url =~ s|<GRAB>|$self->{params}{jobs}{$type}{step_size}|;
 my $offset_line = ('&lestart='.$self->{params}{jobs}{$type}{offset});
 if ($self->{params}{jobs}{$type}{offset}){$url =~ s|<OFFSET>|$offset_line|} else {$url =~ s|<OFFSET>||}
 my $rh_xml = XMLin($self->{UA}->get($url)->content());
 my $ra_renames = ${$rh_xml}{query}{logevents}{item};
 ($ra_renames = [$ra_renames]) if (ref($ra_renames) eq 'HASH');
 shift(@{$ra_renames}) if ($self->{params}{jobs}{$type}{offset} > 0);
 push(@{$self->{params}{jobs}{$type}{log}}, @{$ra_renames});
 @{$self->{params}{jobs}{$type}{log}} = sort {return ${$a}{timestamp} <=> ${$b}{timestamp}} (@{$self->{params}{jobs}{$type}{log}});
 $self->{params}{jobs}{$type}{offset} = ${${$self->{params}{jobs}{$type}{log}}[scalar(@{$self->{params}{jobs}{$type}{log}}) -  1]}{'timestamp'};
 unless (scalar(@{$ra_renames}) < ($self->{params}{jobs}{$type}{step_size}-1)) # Unless we got less than what we asked for, ask  again using the last timestamp as an offset
   {
   &{$self->{shared}{add_job}}([\&handle_jobs,$type],$self->{params}{jobs}{$type}{catch_up_frequency});
   $self->{params}{jobs}{$type}{current} = 0;
   }
 else
   {
   &{$self->{shared}{add_job}}([\&handle_jobs,$type],$self->{params}{jobs}{$type}{regular_frequency});
   $self->{params}{jobs}{$type}{current} = 1;
   }
 warn "Added ".scalar(@{$ra_renames})." log entries on this pass.\n";
 warn "Current total of: ".scalar(@{$self->{params}{jobs}{$type}{log}})."\n";
 warn ((($self->{params}{jobs}{$type}{current}) ? ('This is current') : ('This is not current'))."\n\n");
 }

1;