User:HighInBC/MCP/NameWatcher
Appearance
< User:HighInBC | MCP
This is a plugin, not a stand alone program. See here[1] for GFDL history prior to 02:54, June 4, 2007.
package HBCPlugins::NameWatcher; use strict; use Data::Dumper; use MediaWiki; use URI::Escape; our $self; sub new { shift; $self = shift; bless($self); $self->{params}{version_number} = '1.0.2'; $self->{params}{VERSION} = 'HBC NameWatcherBot v'.$self->{params}{version_number}; warn $self->{params}{VERSION}." starting up.\n"; $self->{params}{parameter_expected} = { 'Status' => 1, 'Default target' => 1, 'Long name' => 1, 'Repeating character' => 1, 'Write rate' => 1, 'Recheck duration' => 1, 'Recheck interval' => 1, }; $self->{params}{parameter_required} = [ 'Status', 'Default target', ]; $self->{params}{default_params} = { 'Write rate' => 10, 'Long name' => 0, 'Repeating character' => 0, }; $self->{params}{whitelist} = []; $self->{params}{blacklist} = []; $self->{params}{delayed_names} = {}; &{$self->{shared}{add_job}}(\&contact_irc_plugin , 0); &{$self->{shared}{add_job}}(\&login,0); &{$self->{shared}{add_job}}(\&check_homoglyphs,2); &{$self->{shared}{add_job}}(\&check_parameters,2); &{$self->{shared}{add_job}}(\&check_whitelist,2); &{$self->{shared}{add_job}}(\&check_blacklist,2); &{$self->{shared}{add_job}}(\&debug_report,5); return $self; } sub check_version { my ($active_version,$my_version) = @_; my @active_parts = split(/\./, $active_version); my @my_parts = split(/\./, $my_version); return 0 if scalar(@active_parts) > scalar(@my_parts); # should never happen foreach (@active_parts) { my $check_part = shift(@my_parts); last if $check_part > $_; next if $_ <= $check_part; return 0; } return 1; } sub send_info { my $type = shift; if ($self->{params}{send_info_to}) { my %payloads = ( 'client' => $self->{WP_obj}, 'panel' => $self->{params}{param}, 'blacklist' => $self->{params}{blacklist}, 'whitelist' => $self->{params}{whitelist} ); foreach my $target (@{$self->{params}{send_info_to}}) { warn "Sending data to $target: $type\n"; $self->{shared}{$target}->set_info($type, $payloads{$type}); } } } sub login { warn "Connecting to Wikipedia...\n"; my $c = MediaWiki->new; $c->setup ({ 'bot' => {'user' => $self->{params}{username},'pass' => $self->{params}{password}}, 'wiki' => {'host' => 'en.wikipedia.org','path' => 'w'} }) || warn "Failed to log in\n"; my $whoami = $c->user(); warn "$whoami connected\n"; $self->{WP_obj} = $c; send_info('client'); &{$self->{shared}{add_job}}(\&login,3600); } sub debug_report { warn Dumper ({ # 'Whitelist' => $self->{params}{whitelist}, # 'Blacklist' => $self->{params}{blacklist}, 'Params' => $self->{params}{param}, 'Delayed' => $self->{params}{delayed_names}, }); &{$self->{shared}{add_job}}([\&debug_report],300); } sub contact_irc_plugin { $self->{IRCFeed} = $self->{shared}{$self->{params}{irc_label}} || die; $self->{IRCFeed}->add_hook ({ 'check' => sub { my $text = shift; my $pattern = chr(0x03).'03(.*?)'.chr(0x03); my $createpattern = chr(0x03).'4\s*create'.chr(0x03); if (($text =~ m|$createpattern|) && ($text =~ m"$pattern")) { return $1; } else { return 0; } }, 'callback' => \&check_name, } ); $self->{IRCFeed}->add_hook ({ 'check' => sub {$_[0] =~ m|User:HBC NameWatcherBot/Control panel|}, 'callback' => \&check_parameters, } ); $self->{IRCFeed}->add_hook ({ 'check' => sub {$_[0] =~ m|User:HBC NameWatcherBot/Whitelist|}, 'callback' => \&check_whitelist, } ); $self->{IRCFeed}->add_hook ({ 'check' => sub {$_[0] =~ m|User:HBC NameWatcherBot/Blacklist|}, 'callback' => \&check_blacklist, } ); return; } sub check_parameters { warn "Checking parameters...\n"; my $page = $self->{WP_obj}->get('User:HBC NameWatcherBot/Control panel','r')->content(); %{$self->{params}{param}} = (); foreach my $line (split("\n",$page)) { if ($line =~ m|^;(.*?):(.*)$|) { $self->{params}{param}{$1} = $2 if ($self->{params}{parameter_expected}{$1}); } } die "Bot is not turned on in the control panel\n" unless (lc($self->{params}{param}{'Status'}) eq 'on'); foreach my $key (@{$self->{params}{parameter_required}}) { die "Missing required parameter: $key\n" unless ($self->{params}{param}{$key}); } %{$self->{params}{param}} = (%{$self->{params}{default_params}},%{$self->{params}{param}}); send_info('panel'); } sub check_homoglyphs { warn "Checking homoglyphs...\n"; my $page = $self->{WP_obj}->get('User:HBC_NameWatcherBot/Homoglyphs','r')->content(); utf8::upgrade($page); @{$self->{params}{homoglyphs}} = (); foreach my $line (split("\n",$page)) { next unless ($line =~ m|^;(.+?):\s*(.+)$|); my $symbol = $1; my @matches = split(/\s+/,$2); for (@matches) {$_ = quotemeta($_)}; push (@{$self->{params}{homoglyphs}}, $symbol, \@matches) } } sub check_whitelist { warn "Checking whitelist...\n"; my $page = $self->{WP_obj}->get('User:HBC NameWatcherBot/Whitelist','r')->content(); @{$self->{params}{whitelist}} = (); foreach my $line (split("\n",$page)) { push(@{$self->{params}{whitelist}},$1) if ($line =~ m|^;(.*)$|); } send_info('whitelist'); } sub check_blacklist { warn "Checking blacklist...\n"; my $page = $self->{WP_obj}->get('User:HBC NameWatcherBot/Blacklist','r')->content(); @{$self->{params}{blacklist}} = (); foreach my $line (split("\n",$page)) { if ($line =~ m|^;(.*?)(:(.*))?$|) { my $rh_entry = {}; ${$rh_entry}{word} = $1; if ($2) { my @flags = split(',',$3); ${$rh_entry}{flags} = parse_flags(\@flags); } if (${$rh_entry}{'flags'}{'HOMOGLYPH'}) { my $hg_regex = compile_homoglyph_pattern(${$rh_entry}{'word'}); ${$rh_entry}{'flags'}{'LABEL'}[0] = ('Homoglyph of: "'.${$rh_entry}{'word'}.'"'); ${$rh_entry}{'flags'}{'REGEX'} = 1; ${$rh_entry}{'word'} = $hg_regex; } push(@{$self->{params}{blacklist}},$rh_entry); } } send_info('blacklist'); } sub parse_flags { my $ra_flags = shift; my %has_flag; foreach my $flag (@{$ra_flags}) { if ($flag =~ m|(.*)\((.*)\)|) { my (@params) = split('\|',$2); $has_flag{$1} = \@params; } else { $has_flag{$flag} = 1 } } return \%has_flag; } sub check_name { shift; my($name) = shift; my $display_name = "'$name'"; my $master_name = $name; warn "Checking name: $display_name\n"; foreach my $white_word (@{$self->{params}{whitelist}}) {$name =~ s|$white_word||gi;} # Remove occurances of whitelisted words my $ra_offenses = []; foreach my $rh_black_word (@{$self->{params}{blacklist}}) { my $word = ${$rh_black_word}{word}; if (${$rh_black_word}{flags}{'REGEX'}) { if (${$rh_black_word}{flags}{'CASE_SENSITIVE'}) { push(@{$ra_offenses},$rh_black_word) if ($name =~ m|$word|); } else { push(@{$ra_offenses},$rh_black_word) if ($name =~ m|$word|i); } } else { if (${$rh_black_word}{flags}{'CASE_SENSITIVE'}) { push(@{$ra_offenses},$rh_black_word) if (index($name,$word) > -1); } else { push(@{$ra_offenses},$rh_black_word) if (index(lc($name),lc($word)) > -1); } } } &{$self->{shared}{add_job}}([\&bad_name,$master_name,$ra_offenses],0) if ( scalar(@{$ra_offenses}) || (($self->{params}{param}{'Long name'}) && (length($name) > $self->{params}{param}{'Long name'})) || (($self->{params}{param}{'Repeating characters'}) && (detect_repeatition($name,$self->{params}{param}{'Repeating characters'}))) || (($self->{params}{param}{'Repeating numbers'}) && (detect_repeatition_numbers($name,$self->{params}{param}{'Repeating numbers'}))) ); return 1; } sub bad_name { my ($name, $ra_offenses) = @_; my(@offense_reports) = ('* {{user-uaa|1='.$name.'}}'); my $need_rechecking = 0; # default no reason to report my $need_header = 0; my $has_edited = user_has_edited($name); my $is_blocked = is_blocked($name); my $target_page = $self->{params}{param}{'Default target'}; push(@offense_reports,':* This user has edited at least one time.') if ($has_edited); if (($self->{params}{param}{'Long name'}) && (length($name) > $self->{params}{param}{'Long name'})) { push(@offense_reports, ":* At ".length($name)." characters, this name exceeds ".$self->{params}{param}{'Long name'}." characters"); } if (($self->{params}{param}{'Repeating characters'}) && (my $char = detect_repeatition($name,$self->{params}{param}{'Repeating characters'}))) { push(@offense_reports, ":* This name has the character \"'''".$char."'''\" repeating ".$self->{params}{param}{'Repeating characters'}." or more than times in a row."); } if (($self->{params}{param}{'Repeating numbers'}) && (detect_repeatition_numbers($name,$self->{params}{param}{'Repeating numbers'}))) { push(@offense_reports, ":* This name has ".$self->{params}{param}{'Repeating numbers'}." or more numbers in a row."); } foreach my $rh_blackword (@{$ra_offenses}) { my $rh_has_flag = ${$rh_blackword}{flags}; my $label = ((${$rh_has_flag}{'LABEL'}) ? ("called '''".${${$rh_has_flag}{'LABEL'}}[0]."'''") : ("'''${$rh_blackword}{word}'''")); my $name_pattern = '('.${$rh_blackword}{word}.')'; my $match; if (${$rh_has_flag}{'CASE_SENSITIVE'}) { $name =~ m|$name_pattern|; $match = " The portion that matched was '''$1'''." if ($1); } else { $name =~ m|$name_pattern|i; $match = " The portion that matched was '''$1'''." if ($1); } my(@report) = (((${$rh_has_flag}{'REGEX'}) ? (":* Matches the regular expression $label.$match") : (":* Matches the literal pattern $label."))); if (${$rh_has_flag}{'CASE_SENSITIVE'}) { push (@report, "::* This filter is case sensitive."); } if (${$rh_has_flag}{'WAIT_TILL_EDIT'}) { if ($has_edited) { push (@report, "::* This report was delayed until the user edited."); delete(${$self->{params}{param}{delayed_names}}{$name}); } else { warn "\t$name has not edited, not counting match to the string $label at this point, rechecking in ".$self->{params}{param}{'Recheck interval'}." seconds.\n"; $need_rechecking = 1 unless (scalar(@offense_reports) > 1); next; } } if (${$rh_has_flag}{'HOMOGLYPH'}) { push (@report, "::* This filter attempts to see past the use of similar looking characters and creative substitutions, see [[Homoglyphs]]."); } $need_rechecking = 0; warn Dumper ({ 'Name' => $name, 'Flags' => $rh_has_flag, 'Word' => ${$rh_blackword}{'word'}, }); if (${$rh_has_flag}{'HTTP_CHECK'}) { warn "Gotta check '$name' for http server.\n"; my $code = $self->{WP_obj}->{ua}->head('http://'.$name)->code(); if (($code == 500) && (ref(${$rh_has_flag}{'HTTP_CHECK'})) && (lc(${${$rh_has_flag}{'HTTP_CHECK'}}[0]) eq 'ignore_fail')) { warn "Ignoring failed connection.\n"; return; } my $info = ''; if ($code =~ m|^2..$|) { $info = "A response code starting in 2 means '''HTTP server found, succesful request'''."; } elsif ($code =~ m|^3..$|) { $info = "A response code starting in 3 means '''HTTP server found, client redirected'''."; } elsif ($code =~ m|^4..$|) { $info = "A response code starting in 4 means '''HTTP server found, HTTP server gave error'''."; } elsif ($code =~ m|^5..$|) { $info = "A response code starting in 5 means '''HTTP server not found'''."; } push (@report, "::* When using the name '''$name([http://$name link])''' as a http URL I got a response code of '''$code'''.".$info); } if (${$rh_has_flag}{'ALTERNATE_TARGET'}) { $target_page = ${${$rh_has_flag}{'ALTERNATE_TARGET'}}[0]; warn "Alternate target: $target_page\n"; } if (${$rh_has_flag}{'USE_HEADER'}) { $need_header = 1; } if (${$rh_has_flag}{'LOW_CONFIDENCE'}) { push (@report, "::* The string $label is prone to false positives, please take extra care ensuring this name is actually a violation before blocking."); } if (${$rh_has_flag}{'NOTE'}) { push (@report, "::* The string $label has a comment associated with it: ".${${$rh_has_flag}{'NOTE'}}[0]); } if (${$rh_has_flag}{'SOCK_PUPPET'}) { push (@report, "::* The string $label is often associate with sock puppets of [[User:".${${$rh_has_flag}{'SOCK_PUPPET'}}[0]."]] {{#ifexist:Category:Suspected Wikipedia sockpuppets of ".${${$rh_has_flag}{'SOCK_PUPPET'}}[0]."|(see [[:Category:Suspected Wikipedia sockpuppets of ".${${$rh_has_flag}{'SOCK_PUPPET'}}[0]."]])}}"); } push (@offense_reports,@report); } if ($need_rechecking) { ${$self->{params}{param}{delayed_names}}{$name} ||= time(); my $report_age = (time() - ${$self->{params}{param}{delayed_names}}{$name}); if (($report_age > $self->{params}{param}{'Recheck duration'}) || ($is_blocked)) { warn "Forgetting about $name, it has been $report_age seconds since creation and no edits.\n"; warn "\tThe user is blocked you see.\n" if ($is_blocked); delete(${$self->{params}{param}{delayed_names}}{$name}); } else { warn "Scheduling recheck of $name\n"; &{$self->{shared}{add_job}}([\&check_name,$name,$name],$self->{params}{param}{'Recheck interval'}); } return 1; } unshift(@offense_reports, "== Account [[User:$name]] created ==") if $need_header; my $report = (join("\n",@offense_reports)." ~~~~\n"); warn "Writing to: $target_page\n"; my $page = $self->{WP_obj}->get($target_page, 'rw'); if (($page->content() =~ m|<\!-- HBC NameWatcherBot v(\d+\.\d+\.\d+) allowed -->|) && !($is_blocked)) { my $version_number_needed = $1; unless (check_version($version_number_needed,$self->{params}{version_number})) { warn "I need $version_number_needed and my version is ".$self->{params}{version_number}.".\n"; warn "I am too old to edit this page.\n"; return; } while ($page->{'content'} !~ m|\n\n$|) {$page->{'content'} = $page->{'content'}."\n";} $page->{'content'} = ($page->{'content'}.$report); $page->{'summary'} = "Reporting [[Special:Contributions/$name|$name]] ([[Special:Blockip/$name|block]])."; $page->save(); warn ($page->{'summary'}."\n"); # warn ($page->{'content'}."\n\n\n"); sleep($self->{params}{param}{'Write rate'}); warn "\n"; } elsif ($is_blocked) { warn "Skipping, user is already blocked.\n"; } else { warn "I don't have permission to write to '$target_page'\n"; return; } } sub user_has_edited { my ($name) = @_; my $url = 'http://en.wikipedia.org/w/api.php?action=query&list=usercontribs&format=xml&ucuser='.uri_escape($name); my $data = $self->{WP_obj}->{ua}->get($url)->content(); if (!$data || $data =~ m|<usercontribs\s*/>|) { # blank XML container, no contribs, return zero return 0; } else { return 1; } } sub is_blocked { my ($name) = @_; my $url = $self->{WP_obj}->{index}.'?title=Special:Ipblocklist&ip='.uri_escape($name); my $data = $self->{WP_obj}->{ua}->get($url)->content(); # Get blocklist info for user return ($data =~ m|</a>\) blocked <a href|) # If the user is currently blocked } sub detect_repeatition { my $string = shift; my $limit = shift; my $last_char; my $current_rep = 1; while (my $char = substr($string,0,1,'')) { if ($char eq $last_char) { $current_rep++; } else { $last_char = $char; $current_rep = 1; } return $char if ($current_rep >= $limit); } return 0; } sub detect_repeatition_numbers { my $string = shift; my $limit = shift; my $last_char; my $current_rep = 1; while (my $char = substr($string,0,1,'')) { if ($char =~ m|^[0-9]$|) { $current_rep++; } else { $current_rep = 0; } return 1 if ($current_rep >= $limit); } return 0; } sub compile_homoglyph_pattern { my $target_pattern = shift; my @homoglyphs = @{$self->{params}{homoglyphs}}; while (scalar(@homoglyphs)) { my ($str, $ra_values) = (shift(@homoglyphs),shift(@homoglyphs)); next unless (scalar(@{$ra_values})); my $pattern = ('('.join('|', $str,@{$ra_values}).')'); $target_pattern =~ s"$str"$pattern"ig; #" } return $target_pattern; } 1;