User:FairuseBot/libBot.pm
Appearance
#!/usr/bin/perl # libBot: A Perl module of useful routines for running a bot package libBot; use strict; use warnings; use Pearle; use Data::Dumper; use Array::Utils; require Exporter; our @ISA = qw(Exporter); our @EXPORT = qw(config usernotify wikilog botwarnlog notelog LoadInfoboxPatterns FixupLinks MakeWikiRegex DoIHaveMessages GetPageCategories GetPageTemplates GetLinksOnPage GetPageText GetPageList GetFullPageList GetImageNames SaveImage UpdateLink RemoveImageFromPage ReplaceImage IsNotified IsPageNotified isDated getDate getUploadDates getLastEditDate GetImageUploader loadNotificationList saveNotificationList usesTemplate DoesPageExist); our $VERSION = 1.00; my $test_only = 0; my $username = ""; my @infobox_patterns = (); sub config { my %params = @_; $test_only = $params{test_only} if(defined($params{test_only})); $username = $params{username} if(defined($params{username})); } # Log a warning on a user's talkpage, using an existing edit session sub usernotify { my ($wikipage, $text, $user, $summary); $wikipage = $_[1]; $summary = $_[2]; $summary = "Logging warning message" if(!defined($summary)); if(!$wikipage->isa("Pearle::WikiPage")) { Pearle::myLog(0, "usernotify(): Second parameter is not a WikiPage object\n"); die "usernotify(): Second parameter is not a WikiPage object\n"; } # We've been handed an editing session Pearle::myLog(4, "Warning with existing edit session\n"); if($test_only) { print STDERR $_[0]; return; } if($wikipage->getWikiText() =~ /^#redirect/i) { botwarnlog("*User talk page [[User talk:$user]] is a redirect\n"); return; } $text = $wikipage->getEditableText(); $text .= $_[0]; $wikipage->setEditableText($text); Pearle::postPage($wikipage, $summary, 0); print STDERR $_[0]; } # General-purpose on-Wiki logging routine sub wikilog { my($target, $text, $token, $summary); $target = $_[0]; $text = $_[1]; $summary = $_[2] || "Logging note"; chomp($text); $text = "\n$text" if($text !~ /^\n/); # The edit API eats trailing newlines, so prepend a newline if the message doesn't already have one. eval { $token = Pearle::getToken($target); }; if($@) { if($@ =~ /^925/) { Pearle::myLog(1, "Failed to notify: Protected page $target\n"); return; } else { die; } } if($test_only) { print STDERR $_[1]; return; } Pearle::appendToPage($target, $token, $text, $summary, 0); } # Log a warning on the talk page of the bot sub botwarnlog { my ($page, $text, $summary); $text = $_[0]; $summary = $_[1]; $summary = "Logging warning message" if(!defined($summary)); $page = "User talk:${username}/log"; wikilog($page, $text, $summary); } # Log a notification message to the console sub notelog { print STDERR @_; } # Fix all wikilinks in a string so that they shows as a link, not inline, if it's for a category or image sub FixupLinks { my $link = shift; $link =~ s/\[\[(Category|Image|File)/[[:$1/g; return $link; } # Make a string into a Wikipedia-compatible regex sub MakeWikiRegex { my $string = shift; my @chars = split //, $string; my $result = ''; return undef if(!defined($string)); foreach my $char (@chars) { # Escape metacharacters, and add percent-encoding for certain characters if($char eq '\\') {$result .= '\\\\';} elsif($char eq '/') {$result .= '(?i:\/|%2F)';} elsif($char eq '.') {$result .= '\.';} elsif($char eq '(') {$result .= '(?i:\(|%28)';} elsif($char eq ')') {$result .= '(?i:\)|%29)';} elsif($char eq '[') {$result .= '\[';} elsif($char eq ']') {$result .= '\]';} elsif($char eq '+') {$result .= '\+';} elsif($char eq '*') {$result .= '\*';} elsif($char eq '?') {$result .= '(?i:\?|%3F)';} elsif($char eq '^') {$result .= '\^';} elsif($char eq '$') {$result .= '\$';} elsif($char eq '&') {$result .= '(?i:&|%26)';} elsif($char eq '!') {$result .= '(?i:!|%21)';} elsif($char eq '~') {$result .= '(?i:~|%7E)';} elsif($char eq "'") {$result .= "(?i:'|%27)";} elsif($char eq '"') {$result .= '(?i:"|%22)';} elsif($char eq ',') {$result .= '(?i:,|%2C)';} else {$result .= $char;} } # Process the string to match both with spaces and with underscores $result =~ s/[ _]/[ _]+/g; # Process the string to match both upcase and lowercase first characters if($result =~ /^[A-Za-z]/) { $result =~ s/^(.)/"[$1".lc($1)."]"/e; } return $result; } sub HTMLEncode { my $char = shift; return sprintf("&X%X;", ord($char)); } # Make a string into something that can match most image name formats sub MakeFancyRegex { my $string = shift; my @chars = split //, $string; my $result; foreach my $char (@chars) { if($char eq '\\') { $result .= "(\\\\|%5C|%5c|&x5C;)"; } elsif($char eq '.') { } elsif($char eq '(') { } elsif($char eq ')') { } else { $result .= "($char|" . uri_escape_utf8($char) . "|" . lc(uri_escape_utf8($char)) . "|" . HTMLEncode($char) . "|" . lc(HTMLEncode($char)) . ")"; } } return $result; } # Check for new talk page messages sub DoIHaveMessages { my $xml = shift; my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml); if(exists($parsed_xml->{query}->{userinfo}->{messages}) and defined($parsed_xml->{query}->{userinfo}->{messages})) { return 1; } else { return 0; } } sub GetPageCategories { my $xml = shift; my @pages = (); if(defined($xml)) { my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml); Pearle::myLog(4, Dumper($parsed_xml)); if(exists($parsed_xml->{query}->{pages}->{page}->{categories}->{cl}) and defined($parsed_xml->{query}->{pages}->{page}->{categories}->{cl})) { if(ref($parsed_xml->{query}->{pages}->{page}->{categories}->{cl}) eq 'ARRAY') { my @all_pages = @{$parsed_xml->{query}->{pages}->{page}->{categories}->{cl}}; @pages = map {$_->{title}} @all_pages; } else { @pages = ($parsed_xml->{query}->{pages}->{page}->{categories}->{cl}->{title}); } } } return @pages; } sub GetLinksOnPage { my $xml = shift; my @pages = (); if(defined($xml)) { my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml); Pearle::myLog(4, Dumper($parsed_xml)); if(exists($parsed_xml->{query}->{pages}->{page}->{links}->{pl}) and defined($parsed_xml->{query}->{pages}->{page}->{links}->{pl})) { if(ref($parsed_xml->{query}->{pages}->{page}->{links}->{pl}) eq 'ARRAY') { my @all_pages = @{$parsed_xml->{query}->{pages}->{page}->{links}->{pl}}; @pages = map {$_->{title}} @all_pages; } else { @pages = ($parsed_xml->{query}->{pages}->{page}->{links}->{pl}->{title}); } } } return @pages; } sub GetPageText { my $xml = shift; my $text = undef; if(defined($xml)) { my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml); Pearle::myLog(4, Dumper($parsed_xml)); if(exists($parsed_xml->{query}->{pages}->{page}->{revisions}->{rev}->{content}) and defined($parsed_xml->{query}->{pages}->{page}->{revisions}->{rev}->{content})) { if(ref($parsed_xml->{query}->{pages}->{page}->{revisions}->{rev}->{content})) { # The API/XML parser interact to produce a HASH ref if the revision is empty $text = ""; } else { $text = $parsed_xml->{query}->{pages}->{page}->{revisions}->{rev}->{content}; } } } return $text; } # Input: XML from the API, generated with prop => 'templates' and with only a single title # Either as text or as a parsed tree # # Returns: A list of templates used by the page # # Side effects: None sub GetPageTemplates { my $xml = shift; my @templates; if(defined($xml)) { my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml, ForceArray => ['tl']); if(exists($parsed_xml->{query}->{pages}->{page}->{templates}->{tl}) and defined($parsed_xml->{query}->{pages}->{page}->{templates}->{tl})) { @templates = map {$_->{title}} @{$parsed_xml->{query}->{pages}->{page}->{templates}->{tl}}; } } return @templates; } # Input: XML, either a tree produced by parsing, or XML text # # Returns: A list of pages that this image is used on # # Side effects: For pages in certain namespaces, posts on the bot's log page sub GetPageList { my $xml = shift; my $image; my @pages = (); if(defined($xml)) { my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml, ForceArray => ['iu']); my $image = $parsed_xml->{query}->{pages}->{page}->{title}; Pearle::myLog(4, Dumper($parsed_xml)); if(exists($parsed_xml->{query}->{imageusage}->{iu}) and defined($parsed_xml->{query}->{imageusage}->{iu})) { my @bad_pages = grep {$_->{ns} == 10 or $_->{ns} == 12} @{$parsed_xml->{query}->{imageusage}->{iu}}; my @good_pages = grep {$_->{ns} != 10 and $_->{ns} != 12} @{$parsed_xml->{query}->{imageusage}->{iu}}; @pages = map {$_->{title}} @good_pages; if(scalar(@bad_pages) > 0 and defined($image)) # If "image" is undefined, we're probably doing a pure usage check, rather than one in preparation for removal { my $notice; foreach my $page (@bad_pages) { $notice .= "*Found image [[:$image]] in [[$page->{title}]]\n"; } botwarnlog($notice); } } } return @pages; } # Get all pages. Don't filter for bad namespaces. sub GetFullPageList { my $xml = shift; my $image; my @pages = (); if(defined($xml)) { my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml, ForceArray => ['iu']); my $image = $parsed_xml->{query}->{pages}->{page}->{title}; Pearle::myLog(4, Dumper($parsed_xml)); if(exists($parsed_xml->{query}->{imageusage}->{iu}) and defined($parsed_xml->{query}->{imageusage}->{iu})) { @pages = map {$_->{title}} @{$parsed_xml->{query}->{imageusage}->{iu}}; } } return @pages; } # Input: XML from the API, generated with list => 'backlinks'. blfilterredir => 'redirects' is recommended but not mandatory. # Either as text or as a parsed tree # # Returns: A list of redirects to the image # # Side effects: None sub GetImageNames { my $xml = shift; my @names; if(defined($xml)) { my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml, ForceArray => ['bl']); if(exists($parsed_xml->{query}->{backlinks}->{bl}) and defined($parsed_xml->{query}->{backlinks}->{bl})) { @names = map {$_->{title}} grep( {defined($_->{redirect})} @{$parsed_xml->{query}->{backlinks}->{bl}}); } } return @names; } sub UpdateLink { my $page = shift; my $from = shift; my $to = shift; my $summary = shift || "Updating link to bypass a redirect or disambiguation page"; die "No page to edit" if(!defined($page)); die "No link to change" if(!defined($from)); die "No new link" if(!defined($to)); Pearle::myLog(3, "Updating link from $from to $to for page $page\n"); my $wikipage = Pearle::getPage($page); $wikipage->canonicalizeLinks(); my $text = $wikipage->getEditableText(); my $link_regex = MakeWikiRegex($from); my $matches = $text =~ s/\x01($link_regex)\x02/\x01${to}|${1}\x02/gi; $matches += $text =~ s/\x01$link_regex([#|])/\x01${to}${1}/gi; $matches += $text =~ s/([^=]\s*=\s*)$link_regex(\s*[|\n])/${1}$to${2}/gi; $wikipage->setEditableText($text); print $text; if($matches > 0) { Pearle::postPage( $wikipage, $summary, 0); } else { Pearle::myLog(3, "No update\n"); } return $matches; } sub RemoveImageFromPage { my $image = shift; my $page = shift; my $image_regex = shift; my $removal_prefix = shift; my $removal_comment = shift; my $wikipage; my $text; my ($match1, $match2); my $old_length; my $new_length; my $change_len; my $match_len; tryagain: # Fetch an article page $wikipage = Pearle::getPage($page); $wikipage->canonicalizeLinks(); $text = $wikipage->getEditableText(); if(!defined($text)) { Pearle::myLog(1, "Error: Bad edit page [[$page]]\n"); botwarnlog(FixupLinks("*Error: Bad edit page [[$page]]\n")); sleep(300); return 0; } if($text =~ /^\s*$/) { # Might be protected instead of empty Pearle::myLog(1, "Error: Empty or protected page [[$page]]\n"); botwarnlog(FixupLinks("*Error: Empty or protected page [[$page]]\n")); sleep(300); return 0; } if($text =~ /^#redirect/i) { Pearle::myLog(1, "Redirect found for page [[$page]] (image [[:$image]])\n"); botwarnlog(FixupLinks("*Redirect found for page [[$page]] (image [[:$image]])\n")); print $text; return 0; } # Remove the image my $regex3 = "([ \\t]*\x01${image_regex}[^\x01]*?(\x01[^\x02]*?\x02[^\x01]*?|)+\x02[ \\t]*)"; # Regex to match images #my $regex3 = "( # [ \\t]* # Any leading whitespace # \x01 # Open double-bracket for the image # ${image_regex} # The image itself # [^\x01]*? # Anything up to the first link in the caption, or a closing double bracket (minimal match) # (\x01 # Open double-bracket for a link in the caption # [^\x02]*? # Anything but a closing double-bracket # \x02 # The closing double-bracket for the link # [^\x01]*?|) # Any non-link text, or nothing # + # Matches one or more times # \x02 # The closing double-bracket for the image # [ \\t]*) # Any trailing whitespace # "; my $regex3ex = "\\w[ \\t]*${regex3}[ \\t]*\\w"; # Regex to try to spot inline images my $regex3g = "(${image_regex}.*)"; # Regex to match gallery images my ($raw_image) = $image =~ /(?:Image|File):(.*)/; Pearle::myLog(3, "Regex 3: $regex3\n"); notelog("Regex 3 extended: $regex3ex\n"); notelog("Regex 3 gallery: $regex3g\n"); Pearle::myLog(3, "Raw regex: $raw_image\n"); if($text =~ /$regex3ex/) { Pearle::myLog(1, "Possible inline image in [[$page]]\n"); botwarnlog(FixupLinks("*Possible inline image [[:$image]] in [[$page]]\n")); return 0; # Can't do gallery matching because that also matches regular images, and odds are, we don't have an infobox } $text =~ /$regex3/; $match_len = length($1); my @matches = $text =~ /$regex3/g; if(grep {$_ =~ /[\x{F0000}-\x{FFFFF}]/} @matches) # If any images have comments in their captions, we can't remove them { botwarnlog(FixupLinks("*Comment in image in [[$page]]\n")); goto skipregular; } if(defined($removal_prefix)) { $match2 = $text =~ s/$regex3/<!-- $removal_prefix $1 -->/g; } else { $match2 = $text =~ s/$regex3//g; } $new_length = length($text); print "Num: $match2 Len: $match_len\n"; if($match2) { if($match_len < (2 + length($image))) { Pearle::myLog(0, "Short replacement of $match_len bytes (min " . (length($image) + 2) . ") in [[$page]] ($match2 matches). Exiting.\n"); Pearle::myLog(0, "Text:\n$text\n"); print Dumper($1); print Dumper($image); exit; } # If many matches, log a warning if($match2 > 2) { Pearle::myLog(3, "More than one match ($match2) in page [[$page]]\n"); } if($match2 > 100) { Pearle::myLog(1, "Too many matches ($match2) in page [[$page]]. Skipping.\n"); botwarnlog("*Too many matches ($match2) in page [[$page]]. Skipping.\n"); return 0; } } skipregular: # Put the text back and get it again in order to fold any comments resulting from removing non-gallery images. # This is because gallery image matching will also match commented images. $wikipage->setEditableText($text); $text = $wikipage->getEditableText(); if($text =~ /<gallery/i) { Pearle::myLog(3, "*Possible image gallery in page [[$page]]\n"); if(defined($removal_prefix)) { if($text =~ s/$regex3g/<!-- $removal_prefix $1 -->/g) { $match2 += 1; } } else { if($text =~ s/$regex3g//g) { $match2 += 1; } } } if($match2 > 0) { if($text =~ /\[\[(?: |)<!--/) { Pearle::myLog(2, "Possible multiline image in page [[$page]]\n"); botwarnlog(FixupLinks("*Possible multiline image in page [[$page]]\n")); } } # Improved infobox removal my $infobox_regex = "([-A-Za-z0-9_]+[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}]*=)[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*"; my $infobox_regex_full = "([-A-Za-z0-9_]+[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}]*=)[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . '(?:[Ii][Mm][Aa][Gg][Ee]|[Ff][Ii][Ll][Ee])[ _]*:[ _]*' . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*"; if($text =~ /$infobox_regex/) { Pearle::myLog(3, "Matched on infobox regex: $infobox_regex\n"); Pearle::myLog(3, "Infobox parameter: $1\n"); if($& =~ /puic/) { botwarnlog(FixupLinks("*PUIC in page [[$page]]\n")); } else { my $sub = $1; my $match_regex = MakeWikiRegex($sub) . "[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*"; $text =~ s/$match_regex/$sub/g; $match2 += 1; } } if($text =~ /$infobox_regex_full/) { Pearle::myLog(3, "Matched on infobox regex: $infobox_regex_full\n"); Pearle::myLog(3, "Infobox parameter: $1\n"); if($& =~ /puic/) { botwarnlog(FixupLinks("*PUIC in page [[$page]]\n")); } else { my $sub = $1; my $match_regex = MakeWikiRegex($sub) . "[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . '(?:[Ii][Mm][Aa][Gg][Ee]|[Ff][Ii][Ll][Ee])[ _]*:[ _]*' . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*"; $text =~ s/$match_regex/$sub/g; $match2 += 1; } } if($match2) # No need to null-edit articles anymore { if($test_only) { notelog("Test removal from page succeeded\n"); } else { # Submit the changes $wikipage->setEditableText($text); eval { Pearle::postPage($wikipage, $removal_comment, 0); }; if($@) { if($@ =~ /^924 Spam filter: (.*)$/) { botwarnlog("*Spam filter on page [[$page]], url <nowiki>$1\n"); $match2 = 0; # We weren't able to remove it } elsif($@ =~ /^922/) { # Edit conflict. Try editing the page again. botwarnlog("*Edit conflict on page [[$page]]\n"); goto tryagain; } else { die; } } } } return ($match2) } sub ReplaceImage { my $image = shift; my $page = shift; my $image_regex = shift; my $new_image = shift; my $reason = shift; my $wikipage; my $text; my ($match1, $match2); my $old_length; my $new_length; my $change_len; my $match_len; tryagain: # Fetch an article page $wikipage = Pearle::getPage($page); $wikipage->canonicalizeLinks(); $text = $wikipage->getEditableText(); if(!defined($text)) { Pearle::myLog(1, "Error: Bad edit page [[$page]]\n"); botwarnlog(FixupLinks("*Error: Bad edit page [[$page]]\n")); sleep(300); return 0; } if($text =~ /^\s*$/) { # Might be protected instead of empty Pearle::myLog(1, "Error: Empty or protected page [[$page]]\n"); botwarnlog(FixupLinks("*Error: Empty or protected page [[$page]]\n")); sleep(300); return 0; } if($text =~ /^#redirect/i) { Pearle::myLog(1, "Redirect found for page [[$page]] (image [[:$image]])\n"); botwarnlog(FixupLinks("*Redirect found for page [[$page]] (image [[:$image]])\n")); print $text; return 0; } # Remove the image my $regex3 = "([ \\t]*\x01)${image_regex}([^\x01]*?(\x01[^\x02]*?\x02[^\x01]*?|)+\x02[ \\t]*)"; # Regex to match images #my $regex3 = "( # [ \\t]* # Any leading whitespace # \x01 # Open double-bracket for the image # ${image_regex} # The image itself # [^\x01]*? # Anything up to the first link in the caption, or a closing double bracket (minimal match) # (\x01 # Open double-bracket for a link in the caption # [^\x02]*? # Anything but a closing double-bracket # \x02 # The closing double-bracket for the link # [^\x01]*?|) # Any non-link text, or nothing # + # Matches one or more times # \x02 # The closing double-bracket for the image # [ \\t]*) # Any trailing whitespace # "; my $regex3g = "${image_regex}(.*)"; # Regex to match gallery images my ($raw_image) = $image =~ /(?:Image|File):(.*)/; my $regex4m = "(\x01[ _]*[Mm][Ee][Dd][Ii][Aa][ _]*:[ _]*)" . MakeWikiRegex($raw_image) . "([ _]*\\|([^]]*)\x02)"; # Regex to match inline Media: links Pearle::myLog(3, "Regex 3: $regex3\n"); notelog("Regex 3 gallery: $regex3g\n"); Pearle::myLog(3, "Raw regex: $raw_image\n"); notelog("Regex 4 Media: $regex4m\n"); $old_length = length($text); $match2 = $text =~ s/$regex3/$1$new_image$2/g; $new_length = length($text); print "Num: $match2 Len: $match_len\n"; if($match2) { # If the length change isn't right, log a warning and return without saving if($new_length != $old_length - ($match2 * (length($image) - length($new_image)))) { botwarnlog(FixupLinks("*Length mismatch on [[$page]] replacing [[$image]] with [[$new_image]]\n")); Pearle::myLog(1, "Length mismatch on $page replacing [[$image]] with [{$new_image]]\n"); return 0; } # If many matches, log a warning if($match2 > 2) { Pearle::myLog(3, "More than one match ($match2) in page [[$page]]\n"); } if($match2 > 100) { Pearle::myLog(1, "Too many matches ($match2) in page [[$page]]. Skipping.\n"); botwarnlog("*Too many matches ($match2) in page [[$page]]. Skipping.\n"); return 0; } } if($text =~ /<gallery/i) { Pearle::myLog(3, "*Possible image gallery in page [[$page]]\n"); if($new_image =~ /$regex3g/) { Pearle::myLog(2, "New image name $new_image is a substring of old image name $image\n"); botwarnlog("*New image [[$new_image]] is a substring of [[$image]] in page [[$page]]\n"); } else { my $gallery_matches = $text =~ s/$regex3g/$new_image$1/g; $match2 += $gallery_matches; } } # Improved infobox replacement my $infobox_regex = "([-A-Za-z0-9_]+[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}]*=)[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*"; my $infobox_regex_full = "([-A-Za-z0-9_]+[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}]*=)[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . '(?:[Ii][Mm][Aa][Gg][Ee]|[Ff][Ii][Ll][Ee])[[ _]*:[ _]*' . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*"; if($text =~ /$infobox_regex/) { Pearle::myLog(3, "Matched on infobox regex: $infobox_regex\n"); Pearle::myLog(3, "Infobox parameter: $1\n"); if($& =~ /puic/) { botwarnlog(FixupLinks("*PUIC in page [[$page]]\n")); } else { my $sub = $1; my $match_regex = MakeWikiRegex($sub) . "[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*"; my $non_prefixed_new_image = $new_image; $non_prefixed_new_image =~ s/^(Image|File)://; $text =~ s/$match_regex/${sub}${non_prefixed_new_image}/g; $match2 += 1; } } if($text =~ /$infobox_regex_full/) { Pearle::myLog(3, "Matched on infobox regex: $infobox_regex_full\n"); Pearle::myLog(3, "Infobox parameter: $1\n"); if($& =~ /puic/) { botwarnlog(FixupLinks("*PUIC in page [[$page]]\n")); } else { my $sub = $1; my $match_regex = MakeWikiRegex($sub) . "[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . '(?:[Ii][Mm][Aa][Gg][Ee]|[Ff][Ii][Ll][Ee])[ _]*:[ _]*' . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*"; $text =~ s/$match_regex/${sub}$new_image/g; $match2 += 1; } } if($match2) # No need to null-edit articles anymore { if($test_only) { notelog("Test removal from page succeeded\n"); } else { # Submit the changes $wikipage->setEditableText($text); eval { Pearle::postPage($wikipage, $reason, 0); }; if($@) { if($@ =~ /^924 Spam filter: (.*)$/) { botwarnlog("*Spam filter on page [[$page]], url $1\n"); $match2 = 0; # We weren't able to remove it } elsif($@ =~ /^922/) { # Edit conflict. Try editing the page again. botwarnlog("*Edit conflict on page [[$page]]\n"); goto tryagain; } else { die; } } } } return ($match2) } # Returns 1 if the user has been notified, or 0 if they haven't sub IsNotified { my $uploader = shift; my $image_regex = shift; my $image_name = shift; my $notes_ref = shift; my $donts_ref = shift; # Check notification list if(defined($notes_ref) and $notes_ref->{"$uploader,$image_name"}) { Pearle::myLog(2, "Already notified for this image\n"); return 1; } if(defined($donts_ref) and $donts_ref->{$uploader}) { Pearle::myLog(2, "On exception list: $uploader\n"); return 1; } # Check uploader's talkpage my $page_data = Pearle::APIQuery(titles => "User talk:$uploader", prop => ['links', 'templates'], plnamespace => 6, pllimit => 500, tlnamespace => 10, tllimit => 500); $image_regex = MakeWikiRegex($image_name) if(!defined($image_regex)); if($page_data =~ /$image_regex/) { Pearle::myLog(2, "Has a link from userpage\n"); return 1; } if(usesTemplate($page_data, "Template:Nobots")) { Pearle::myLog(2, "Uses {{nobots}}\n"); return 1; } return 0; } # Returns 1 if the page has been notified, or 0 if it hasn't sub IsPageNotified { my $page = shift; my $image_regex = shift; my $image_name = shift; my $notes_ref = shift; my $donts_ref = shift; # Check notification list if($notes_ref->{"$page,$image_name"}) { Pearle::myLog(2, "Already notified for this image\n"); return 1; } if($donts_ref->{$page}) { Pearle::myLog(2, "On exception list: $page\n"); return 1; } # Check page my $page_data = Pearle::APIQuery(titles => $page, prop => 'links', plnamespace => 6); if($page_data =~ /$image_regex/) { Pearle::myLog(2, "Has a link from page\n"); return 1; } return 0; } sub isDated { my $image_text = shift; if($image_text =~ /\((\d\d?) (\w*) (\d\d\d\d)\)/) # Dated template { Pearle::myLog(4, "Dated tag $1 $2 $3\n"); return 1; } # as of 6 October 2006"> elsif($image_text =~ /as of (\d\d?) (\w*) (\d\d\d\d)/) # Template borked, working off category { Pearle::myLog(4, "Template borked; category $1 $2 $3\n"); return 1; } elsif($image_text =~ /{{{day}}} {{{month}}} \d\d\d\d/) # Generic template { Pearle::myLog(4, "Generic tag\n"); return 0; } else { Pearle::myLog(4, "No tag match\n"); return 0; } } # Return the tag date if there is one, the upload date if not # Returns in (day, month, year) format sub getDate { my $image_text = shift; if($image_text =~ /\((\d\d?) (\w*) (\d\d\d\d)\)/) { Pearle::myLog(4, "Template date $1-$2-$3\n"); return ($1, $2, $3); } elsif($image_text =~ /as of (\d\d?) (\w*) (\d\d\d\d)/) # Template borked, working off category { Pearle::myLog(4, "Category date 'as of' $1-$2-$3\n"); return ($1, $2, $3); } elsif($image_text =~ /from (\d\d?) (\w*) (\d\d\d\d)/) # Alternate category naming { Pearle::myLog(4, "Category date 'from' $1-$2-$3\n"); return ($1, $2, $3); } else { Pearle::myLog(4, "No date\n"); return (1, "January", 2007); } } # Return a list of upload dates sub getUploadDates { my @dates; my $image_text = shift; while($image_text =~ />\d\d?:\d\d, (\d\d?) (\w*) (\d\d\d\d)</g) { push @dates, [$1, $2, $3]; } return @dates; } sub getLastEditDate { my ($day, $month, $year); my $image = shift; my @history = Pearle::parseHistory($image); (undef, $day, $month, $year) = @{$history[0]}; return ($day, $month, $year); } # Find the most recent non-vandal, non-revert uploader sub GetImageUploader { my $image_data = shift; my ($uploader, $sha1, $comment); my @uploaders; my $uploader_data; my $i = 0; my $count = 0; my $parsed_xml = Pearle::getXMLParser()->XMLin($image_data); Pearle::myLog(4, Dumper($parsed_xml)); if(exists($parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}) and defined($parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii})) { if(ref($parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}) eq 'ARRAY') { @uploaders = @{$parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}}; } else { return $parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}->{user}; } } else { return undef; } $uploader = $uploaders[0]->{user}; $sha1 = $uploaders[0]->{sha1}; $comment = $uploaders[0]->{comment} || ""; my $done = 0; while(!$done) { if($comment =~ /^Reverted/) { Pearle::myLog(4, "Revert found\n"); $i += 1; while($uploaders[$i]->{sha1} ne $sha1) { $i = $i + 1; } } elsif($comment =~ /optimi(z|s)ed|adjust|tweak|scale|crop|change|resize|remove|reduc(e|ing)/i) { Pearle::myLog(4, "Tweak found\n"); $i = $i + 1; } elsif(!defined($uploader)) { Pearle::myLog(4, "Something went wrong with finding the uploader\n"); $done = 1; } elsif($count > 500) { Pearle::myLog(4, "Took too long finding the uploader\n"); $uploader = undef; $done = 1; } else { $done = 1; } $uploader = $uploaders[$i]->{user}; $sha1 = $uploaders[$i]->{sha1}; $comment = $uploaders[$i]->{comment} || ""; $count = $count + 1; } if(defined($uploader)) { Pearle::myLog(4, "Uploader: $uploader\n"); return $uploader; } else { return undef; } } sub loadNotificationList { my $file = shift; my %notelist; my $i = 0; notelog("File: $file\n"); open INFILE, "<:utf8", $file; while(<INFILE>) { $_ =~ s/\s*#.*$//g; chomp; $notelist{$_} = 1; $i++; } close INFILE; notelog("$i notifications loaded\n"); return %notelist; } sub saveNotificationList { return if($test_only); my $file = shift; my %notelist = @_; my $key; open OUTFILE, ">:utf8", $file; foreach $key (keys(%notelist)) { print OUTFILE "$key\n"; } close OUTFILE; } # Does a page transclude any of a set of templates? Template names must be in the canonical form, with prefix. sub usesTemplate { my $xml = shift; my @templates = @_; my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml, ForceArray => ['tl']); Pearle::myLog(4, "Templates: " . join(", ", @templates) . "\n"); # Pearle::myLog(4, Dumper($parsed_xml)); if(!exists($parsed_xml->{query}->{pages}->{page}->{templates}->{tl}) or !defined($parsed_xml->{query}->{pages}->{page}->{templates}->{tl})) { Pearle::myLog(4, "No templates or no page\n"); return 0; } my $result = eval { my @used_templates = @{$parsed_xml->{query}->{pages}->{page}->{templates}->{tl}}; @used_templates = map {$_->{title}} @used_templates; Pearle::myLog(4, "Used: " . join(", ", @used_templates) . "\n"); Pearle::myLog(4, "Intersect: " . join(", ", Array::Utils::intersect( @templates, @used_templates )) . "\n"); if(Array::Utils::intersect( @templates, @used_templates )) { return 1; } else { return 0; } }; if($@) { # Probably more than one page in the xml print "usesTemplate error: $@"; return 0; } return $result; } sub DoesPageExist { my $page = shift; my $xml = Pearle::APIQuery(titles => [$page], prop => ['info']); # TODO: Handle API query errors ('undef' return values) if($xml =~ /missing=""/) { return 0; } return 1; } 1; </nowiki>