Jump to content

User:FairuseBot/libBot.pm

From Wikipedia, the free encyclopedia
#!/usr/bin/perl

# libBot: A Perl module of useful routines for running a bot

package libBot;

use strict;
use warnings;

use Pearle;
use Data::Dumper;
use Array::Utils;

require Exporter;

our @ISA = qw(Exporter);
our @EXPORT = qw(config usernotify wikilog botwarnlog notelog LoadInfoboxPatterns FixupLinks MakeWikiRegex DoIHaveMessages GetPageCategories GetPageTemplates GetLinksOnPage GetPageText GetPageList GetFullPageList GetImageNames SaveImage UpdateLink RemoveImageFromPage ReplaceImage IsNotified IsPageNotified isDated getDate getUploadDates getLastEditDate GetImageUploader loadNotificationList saveNotificationList usesTemplate DoesPageExist);
our $VERSION = 1.00;

my $test_only = 0;
my $username = "";
my @infobox_patterns = ();

sub config
{
	my %params = @_;
	
	$test_only = $params{test_only} if(defined($params{test_only}));
	$username = $params{username} if(defined($params{username}));
}

# Log a warning on a user's talkpage, using an existing edit session
sub usernotify
{
	my ($wikipage, $text, $user, $summary);
	$wikipage = $_[1];
	$summary = $_[2];
	$summary = "Logging warning message" if(!defined($summary));
	
	if(!$wikipage->isa("Pearle::WikiPage"))
	{
		Pearle::myLog(0, "usernotify(): Second parameter is not a WikiPage object\n");
		die "usernotify(): Second parameter is not a WikiPage object\n";
	}
	
	# We've been handed an editing session
	Pearle::myLog(4, "Warning with existing edit session\n");

	if($test_only)
	{
		print STDERR $_[0];
		return;
	}
	
	if($wikipage->getWikiText() =~ /^#redirect/i)
	{
		botwarnlog("*User talk page [[User talk:$user]] is a redirect\n");
		return;
	}
	$text = $wikipage->getEditableText();
	$text .= $_[0];
	$wikipage->setEditableText($text);
	Pearle::postPage($wikipage, $summary, 0);
	print STDERR $_[0];
}

# General-purpose on-Wiki logging routine
sub wikilog
{
	my($target, $text, $token, $summary);
	$target = $_[0];
	$text = $_[1];
	$summary = $_[2] || "Logging note";

	chomp($text);
	$text = "\n$text" if($text !~ /^\n/);	# The edit API eats trailing newlines, so prepend a newline if the message doesn't already have one.

	eval
	{
		$token = Pearle::getToken($target);
	};
	if($@)
	{
		if($@ =~ /^925/)
		{
			Pearle::myLog(1, "Failed to notify: Protected page $target\n");
			return;
		}
		else
		{
			die;
		}
	}

	if($test_only)
	{
		print STDERR $_[1];
		return;
	}

	Pearle::appendToPage($target, $token, $text, $summary, 0);
}

# Log a warning on the talk page of the bot
sub botwarnlog
{
	my ($page, $text, $summary);
	$text = $_[0];
	$summary = $_[1];
	$summary = "Logging warning message" if(!defined($summary));
	$page = "User talk:${username}/log";
	
	wikilog($page, $text, $summary);
}

# Log a notification message to the console
sub notelog
{
	print STDERR @_;
}

# Fix all wikilinks in a string so that they shows as a link, not inline, if it's for a category or image
sub FixupLinks
{
	my $link = shift;
	$link =~ s/\[\[(Category|Image|File)/[[:$1/g;
	return $link;
}

# Make a string into a Wikipedia-compatible regex
sub MakeWikiRegex
{
	my $string = shift;
	my @chars = split //, $string;
	my $result = '';

	return undef if(!defined($string));

	foreach my $char (@chars)
	{
		# Escape metacharacters, and add percent-encoding for certain characters
		if($char eq '\\') {$result .= '\\\\';}
		elsif($char eq '/') {$result .= '(?i:\/|%2F)';}
		elsif($char eq '.') {$result .= '\.';}
		elsif($char eq '(') {$result .= '(?i:\(|%28)';}
		elsif($char eq ')') {$result .= '(?i:\)|%29)';}
		elsif($char eq '[') {$result .= '\[';}
		elsif($char eq ']') {$result .= '\]';}
		elsif($char eq '+') {$result .= '\+';}
		elsif($char eq '*') {$result .= '\*';}
		elsif($char eq '?') {$result .= '(?i:\?|%3F)';}
		elsif($char eq '^') {$result .= '\^';}
		elsif($char eq '$') {$result .= '\$';}
		elsif($char eq '&') {$result .= '(?i:&|%26)';}
		elsif($char eq '!') {$result .= '(?i:!|%21)';}
		elsif($char eq '~') {$result .= '(?i:~|%7E)';}
		elsif($char eq "'") {$result .= "(?i:'|%27)";}
		elsif($char eq '"') {$result .= '(?i:"|%22)';}
		elsif($char eq ',') {$result .= '(?i:,|%2C)';}
		else {$result .= $char;}
	}
	# Process the string to match both with spaces and with underscores
	$result =~ s/[ _]/[ _]+/g;

	# Process the string to match both upcase and lowercase first characters
	if($result =~ /^[A-Za-z]/)
	{
		$result =~ s/^(.)/"[$1".lc($1)."]"/e;
	}
	return $result;
}

sub HTMLEncode
{
	my $char = shift;
	return sprintf("&X%X;", ord($char));
}

# Make a string into something that can match most image name formats
sub MakeFancyRegex
{
	my $string = shift;
	my @chars = split //, $string;
	my $result;
	
	foreach my $char (@chars)
	{
		if($char eq '\\')
		{
			$result .= "(\\\\|%5C|%5c|&x5C;)";
		}
		elsif($char eq '.')
		{
		}
		elsif($char eq '(')
		{
		}
		elsif($char eq ')')
		{
		}
		else
		{
			$result .= "($char|" . uri_escape_utf8($char) . "|" . lc(uri_escape_utf8($char)) . "|" . HTMLEncode($char) . "|" . lc(HTMLEncode($char)) . ")";
		}
	}

	return $result;
}

# Check for new talk page messages
sub DoIHaveMessages
{
	my $xml = shift;
	my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml);
	
	if(exists($parsed_xml->{query}->{userinfo}->{messages}) and defined($parsed_xml->{query}->{userinfo}->{messages}))
	{
		return 1;
	}
	else
	{
		return 0;
	}
}


sub GetPageCategories
{
	my $xml = shift;
	my @pages = ();
	
	if(defined($xml))
	{
		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml);
		
		Pearle::myLog(4, Dumper($parsed_xml));
		
		if(exists($parsed_xml->{query}->{pages}->{page}->{categories}->{cl}) and defined($parsed_xml->{query}->{pages}->{page}->{categories}->{cl}))
		{
			if(ref($parsed_xml->{query}->{pages}->{page}->{categories}->{cl}) eq 'ARRAY')
			{
				my @all_pages = @{$parsed_xml->{query}->{pages}->{page}->{categories}->{cl}};
				
				@pages = map {$_->{title}} @all_pages;
			}
			else
			{
				@pages = ($parsed_xml->{query}->{pages}->{page}->{categories}->{cl}->{title});
			}
		}
	}
	return @pages;
}

sub GetLinksOnPage
{
	my $xml = shift;
	my @pages = ();
	
	if(defined($xml))
	{
		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml);
		
		Pearle::myLog(4, Dumper($parsed_xml));
		
		if(exists($parsed_xml->{query}->{pages}->{page}->{links}->{pl}) and defined($parsed_xml->{query}->{pages}->{page}->{links}->{pl}))
		{
			if(ref($parsed_xml->{query}->{pages}->{page}->{links}->{pl}) eq 'ARRAY')
			{
				my @all_pages = @{$parsed_xml->{query}->{pages}->{page}->{links}->{pl}};
				
				@pages = map {$_->{title}} @all_pages;
			}
			else
			{
				@pages = ($parsed_xml->{query}->{pages}->{page}->{links}->{pl}->{title});
			}
		}
	}
	return @pages;
}

sub GetPageText
{
	my $xml = shift;
	my $text = undef;
	
	if(defined($xml))
	{
		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml);
		
		Pearle::myLog(4, Dumper($parsed_xml));
		
		if(exists($parsed_xml->{query}->{pages}->{page}->{revisions}->{rev}->{content}) and defined($parsed_xml->{query}->{pages}->{page}->{revisions}->{rev}->{content}))
		{
			if(ref($parsed_xml->{query}->{pages}->{page}->{revisions}->{rev}->{content}))
			{
				# The API/XML parser interact to produce a HASH ref if the revision is empty
				$text = "";
			}
			else
			{
				$text = $parsed_xml->{query}->{pages}->{page}->{revisions}->{rev}->{content};
			}
		}
	}
	return $text;
}

# Input: XML from the API, generated with prop => 'templates' and with only a single title
#        Either as text or as a parsed tree
#
# Returns: A list of templates used by the page
#
# Side effects: None
sub GetPageTemplates
{
	my $xml = shift;
	my @templates;
	
	if(defined($xml))
	{
		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml, ForceArray => ['tl']);
		if(exists($parsed_xml->{query}->{pages}->{page}->{templates}->{tl}) and defined($parsed_xml->{query}->{pages}->{page}->{templates}->{tl}))
		{
			@templates = map {$_->{title}} @{$parsed_xml->{query}->{pages}->{page}->{templates}->{tl}};
		}
	}
	return @templates;
}

# Input: XML, either a tree produced by parsing, or XML text
# 
# Returns: A list of pages that this image is used on
#
# Side effects: For pages in certain namespaces, posts on the bot's log page
sub GetPageList
{
	my $xml = shift;
	my $image;
	my @pages = ();

	if(defined($xml))
	{
		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml, ForceArray => ['iu']);
		my $image = $parsed_xml->{query}->{pages}->{page}->{title};

		Pearle::myLog(4, Dumper($parsed_xml));
		
		if(exists($parsed_xml->{query}->{imageusage}->{iu}) and defined($parsed_xml->{query}->{imageusage}->{iu}))
		{
			my @bad_pages = grep {$_->{ns} == 10 or $_->{ns} == 12} @{$parsed_xml->{query}->{imageusage}->{iu}};
			my @good_pages = grep {$_->{ns} != 10 and $_->{ns} != 12} @{$parsed_xml->{query}->{imageusage}->{iu}};
			
			@pages = map {$_->{title}} @good_pages;
			
			if(scalar(@bad_pages) > 0 and defined($image))	# If "image" is undefined, we're probably doing a pure usage check, rather than one in preparation for removal
			{
				my $notice;
				foreach my $page (@bad_pages)
				{
					$notice .= "*Found image [[:$image]] in [[$page->{title}]]\n";
				}
				botwarnlog($notice);
			}
		}
	}
	
	return @pages;
}

# Get all pages.  Don't filter for bad namespaces.
sub GetFullPageList
{
	my $xml = shift;
	my $image;
	my @pages = ();

	if(defined($xml))
	{
		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml, ForceArray => ['iu']);
		my $image = $parsed_xml->{query}->{pages}->{page}->{title};

		Pearle::myLog(4, Dumper($parsed_xml));
		
		if(exists($parsed_xml->{query}->{imageusage}->{iu}) and defined($parsed_xml->{query}->{imageusage}->{iu}))
		{
			@pages = map {$_->{title}} @{$parsed_xml->{query}->{imageusage}->{iu}};
		}
	}
	
	return @pages;
}

# Input: XML from the API, generated with list => 'backlinks'.  blfilterredir => 'redirects' is recommended but not mandatory.
#        Either as text or as a parsed tree
#
# Returns: A list of redirects to the image
#
# Side effects: None
sub GetImageNames
{
	my $xml = shift;
	my @names;
	
	if(defined($xml))
	{
		my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml, ForceArray => ['bl']);
		if(exists($parsed_xml->{query}->{backlinks}->{bl}) and defined($parsed_xml->{query}->{backlinks}->{bl}))
		{
			@names = map {$_->{title}} grep( {defined($_->{redirect})} @{$parsed_xml->{query}->{backlinks}->{bl}});
		}
	}
	return @names;
}


sub UpdateLink
{
	my $page = shift;
	my $from = shift;
	my $to = shift;
	my $summary = shift || "Updating link to bypass a redirect or disambiguation page";
	
	die "No page to edit" if(!defined($page));
	die "No link to change" if(!defined($from));
	die "No new link" if(!defined($to));
	
	Pearle::myLog(3, "Updating link from $from to $to for page $page\n");
	
	my $wikipage = Pearle::getPage($page);
	$wikipage->canonicalizeLinks();
	my $text = $wikipage->getEditableText();
	my $link_regex = MakeWikiRegex($from);
		
	my $matches = $text =~ s/\x01($link_regex)\x02/\x01${to}|${1}\x02/gi;
	$matches += $text =~ s/\x01$link_regex([#|])/\x01${to}${1}/gi;
	$matches += $text =~ s/([^=]\s*=\s*)$link_regex(\s*[|\n])/${1}$to${2}/gi;
	
	$wikipage->setEditableText($text);
	print $text;
	if($matches > 0)
	{
		Pearle::postPage( $wikipage, $summary, 0);
	}
	else
	{
		Pearle::myLog(3, "No update\n");
	}
	
	return $matches;
}

sub RemoveImageFromPage
{
	my $image = shift;
	my $page = shift;
	my $image_regex = shift;
	my $removal_prefix = shift;
	my $removal_comment = shift;

	my $wikipage;
	my $text;
	my ($match1, $match2);
	my $old_length;
	my $new_length;
	my $change_len;
	my $match_len;

tryagain:
	# Fetch an article page
	$wikipage = Pearle::getPage($page);
	$wikipage->canonicalizeLinks();
	$text = $wikipage->getEditableText();
	
	if(!defined($text))
	{
		Pearle::myLog(1, "Error: Bad edit page [[$page]]\n");
		botwarnlog(FixupLinks("*Error: Bad edit page [[$page]]\n"));
		sleep(300);
		return 0;
	}
	
	if($text =~ /^\s*$/)
	{
		# Might be protected instead of empty
		Pearle::myLog(1, "Error: Empty or protected page [[$page]]\n");
		botwarnlog(FixupLinks("*Error: Empty or protected page [[$page]]\n"));
		sleep(300);
		return 0;
	}
	
	if($text =~ /^#redirect/i)
	{
		Pearle::myLog(1, "Redirect found for page [[$page]] (image [[:$image]])\n");
		botwarnlog(FixupLinks("*Redirect found for page [[$page]] (image [[:$image]])\n"));
		print $text;
		return 0;
	}

	# Remove the image
	my $regex3 = "([ \\t]*\x01${image_regex}[^\x01]*?(\x01[^\x02]*?\x02[^\x01]*?|)+\x02[ \\t]*)";		# Regex to match images

	#my $regex3 = "(
	#               [ \\t]*                          # Any leading whitespace
	#               \x01                             # Open double-bracket for the image
	#               ${image_regex}                   # The image itself
	#               [^\x01]*?                        # Anything up to the first link in the caption, or a closing double bracket (minimal match)
	#                   (\x01                            # Open double-bracket for a link in the caption
	#                   [^\x02]*?                        # Anything but a closing double-bracket
	#                   \x02                             # The closing double-bracket for the link
	#                   [^\x01]*?|)                      # Any non-link text, or nothing
	#                   +                                # Matches one or more times
	#               \x02                             # The closing double-bracket for the image
	#               [ \\t]*)                         # Any trailing whitespace
	#             ";

	my $regex3ex = "\\w[ \\t]*${regex3}[ \\t]*\\w";								# Regex to try to spot inline images
	my $regex3g = "(${image_regex}.*)";											# Regex to match gallery images
	my ($raw_image) = $image =~ /(?:Image|File):(.*)/;	

	Pearle::myLog(3, "Regex 3: $regex3\n");
	notelog("Regex 3 extended: $regex3ex\n");
	notelog("Regex 3 gallery: $regex3g\n");
	Pearle::myLog(3, "Raw regex: $raw_image\n");
	
	if($text =~ /$regex3ex/)
	{
		Pearle::myLog(1, "Possible inline image in [[$page]]\n");
		botwarnlog(FixupLinks("*Possible inline image [[:$image]] in [[$page]]\n"));
		return 0;	# Can't do gallery matching because that also matches regular images, and odds are, we don't have an infobox
	}
	
	$text =~ /$regex3/;
	$match_len = length($1);
	my @matches = $text =~ /$regex3/g;
	if(grep {$_ =~ /[\x{F0000}-\x{FFFFF}]/} @matches) # If any images have comments in their captions, we can't remove them
	{
		botwarnlog(FixupLinks("*Comment in image in [[$page]]\n"));
		goto skipregular;
	}

	if(defined($removal_prefix))
	{
		$match2 = $text =~ s/$regex3/<!-- $removal_prefix $1 -->/g;
	}
	else
	{
		$match2 = $text =~ s/$regex3//g;
	}

	$new_length = length($text);
	print "Num: $match2 Len: $match_len\n";
	if($match2)
	{
		if($match_len < (2 + length($image)))
		{
			Pearle::myLog(0, "Short replacement of $match_len bytes (min " . (length($image) + 2) . ") in [[$page]] ($match2 matches).  Exiting.\n");
			Pearle::myLog(0, "Text:\n$text\n");
			print Dumper($1);
			print Dumper($image);
			exit;
		}
		# If many matches, log a warning
		if($match2 > 2)
		{
			Pearle::myLog(3, "More than one match ($match2) in page [[$page]]\n");
		}
		if($match2 > 100)
		{
			Pearle::myLog(1, "Too many matches ($match2) in page [[$page]].  Skipping.\n");
			botwarnlog("*Too many matches ($match2) in page [[$page]].  Skipping.\n");
			return 0;
		}
	}

skipregular:
	# Put the text back and get it again in order to fold any comments resulting from removing non-gallery images.
	# This is because gallery image matching will also match commented images.
	$wikipage->setEditableText($text);
	$text = $wikipage->getEditableText();

	if($text =~ /<gallery/i)
	{
		Pearle::myLog(3, "*Possible image gallery in page [[$page]]\n");
		if(defined($removal_prefix))
		{
			if($text =~ s/$regex3g/<!-- $removal_prefix $1 -->/g)
			{
				$match2 += 1;
			}
		}
		else
		{
			if($text =~ s/$regex3g//g)
			{
				$match2 += 1;
			}
		}
	}

	if($match2 > 0)
	{
		if($text =~ /\[\[(?: |)<!--/)
		{
			Pearle::myLog(2, "Possible multiline image in page [[$page]]\n");
			botwarnlog(FixupLinks("*Possible multiline image in page [[$page]]\n"));
		}
	}

	# Improved infobox removal
	my $infobox_regex = "([-A-Za-z0-9_]+[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}]*=)[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*";
	my $infobox_regex_full = "([-A-Za-z0-9_]+[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}]*=)[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . '(?:[Ii][Mm][Aa][Gg][Ee]|[Ff][Ii][Ll][Ee])[ _]*:[ _]*' . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*";
	if($text =~ /$infobox_regex/)
	{
		Pearle::myLog(3, "Matched on infobox regex: $infobox_regex\n");
		Pearle::myLog(3, "Infobox parameter: $1\n");
		if($& =~ /puic/)
		{
			botwarnlog(FixupLinks("*PUIC in page [[$page]]\n"));
		}
		else
		{
			my $sub = $1;
			my $match_regex = MakeWikiRegex($sub) . "[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*";
			$text =~ s/$match_regex/$sub/g;
			$match2 += 1;
		}
	}
	if($text =~ /$infobox_regex_full/)
	{
		Pearle::myLog(3, "Matched on infobox regex: $infobox_regex_full\n");
		Pearle::myLog(3, "Infobox parameter: $1\n");
		if($& =~ /puic/)
		{
			botwarnlog(FixupLinks("*PUIC in page [[$page]]\n"));
		}
		else
		{
			my $sub = $1;
			my $match_regex = MakeWikiRegex($sub) . "[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . '(?:[Ii][Mm][Aa][Gg][Ee]|[Ff][Ii][Ll][Ee])[ _]*:[ _]*' . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*";
			$text =~ s/$match_regex/$sub/g;
			$match2 += 1;
		}
	}

	if($match2)	# No need to null-edit articles anymore
	{
		if($test_only)
		{
			notelog("Test removal from page succeeded\n");
		}
		else
		{
			# Submit the changes
			$wikipage->setEditableText($text);
			eval
			{
				Pearle::postPage($wikipage, $removal_comment, 0);
			};
			if($@)
			{
				if($@ =~ /^924 Spam filter: (.*)$/)
				{
					botwarnlog("*Spam filter on page [[$page]], url <nowiki>$1\n");
					$match2 = 0;	# We weren't able to remove it
				}
				elsif($@ =~ /^922/)
				{
					# Edit conflict.  Try editing the page again.
					botwarnlog("*Edit conflict on page [[$page]]\n");
					goto tryagain;
				}
				else
				{
					die;
				}
			}
		}
	}
	
	return ($match2)
}

sub ReplaceImage
{
	my $image = shift;
	my $page = shift;
	my $image_regex = shift;
	my $new_image = shift;
	my $reason = shift;

	my $wikipage;
	my $text;
	my ($match1, $match2);
	my $old_length;
	my $new_length;
	my $change_len;
	my $match_len;

tryagain:
	# Fetch an article page
	$wikipage = Pearle::getPage($page);
	$wikipage->canonicalizeLinks();
	$text = $wikipage->getEditableText();
			
	if(!defined($text))
	{
		Pearle::myLog(1, "Error: Bad edit page [[$page]]\n");
		botwarnlog(FixupLinks("*Error: Bad edit page [[$page]]\n"));
		sleep(300);
		return 0;
	}
	
	if($text =~ /^\s*$/)
	{
		# Might be protected instead of empty
		Pearle::myLog(1, "Error: Empty or protected page [[$page]]\n");
		botwarnlog(FixupLinks("*Error: Empty or protected page [[$page]]\n"));
		sleep(300);
		return 0;
	}
	
	if($text =~ /^#redirect/i)
	{
		Pearle::myLog(1, "Redirect found for page [[$page]] (image [[:$image]])\n");
		botwarnlog(FixupLinks("*Redirect found for page [[$page]] (image [[:$image]])\n"));
		print $text;
		return 0;
	}

	# Remove the image
	my $regex3 = "([ \\t]*\x01)${image_regex}([^\x01]*?(\x01[^\x02]*?\x02[^\x01]*?|)+\x02[ \\t]*)";		# Regex to match images

	#my $regex3 = "(
	#               [ \\t]*                          # Any leading whitespace
	#               \x01                             # Open double-bracket for the image
	#               ${image_regex}                   # The image itself
	#               [^\x01]*?                        # Anything up to the first link in the caption, or a closing double bracket (minimal match)
	#                   (\x01                            # Open double-bracket for a link in the caption
	#                   [^\x02]*?                        # Anything but a closing double-bracket
	#                   \x02                             # The closing double-bracket for the link
	#                   [^\x01]*?|)                      # Any non-link text, or nothing
	#                   +                                # Matches one or more times
	#               \x02                             # The closing double-bracket for the image
	#               [ \\t]*)                         # Any trailing whitespace
	#             ";

	my $regex3g = "${image_regex}(.*)";											# Regex to match gallery images
	my ($raw_image) = $image =~ /(?:Image|File):(.*)/;	

	my $regex4m = "(\x01[ _]*[Mm][Ee][Dd][Ii][Aa][ _]*:[ _]*)" . MakeWikiRegex($raw_image) . "([ _]*\\|([^]]*)\x02)";	# Regex to match inline Media: links

	Pearle::myLog(3, "Regex 3: $regex3\n");
	notelog("Regex 3 gallery: $regex3g\n");
	Pearle::myLog(3, "Raw regex: $raw_image\n");
	notelog("Regex 4 Media: $regex4m\n");
	
	$old_length = length($text);
	
	$match2 = $text =~ s/$regex3/$1$new_image$2/g;

	$new_length = length($text);
	print "Num: $match2 Len: $match_len\n";
	if($match2)
	{
		# If the length change isn't right, log a warning and return without saving
		if($new_length != $old_length - ($match2 * (length($image) - length($new_image))))
		{
			botwarnlog(FixupLinks("*Length mismatch on [[$page]] replacing [[$image]] with [[$new_image]]\n"));
			Pearle::myLog(1, "Length mismatch on $page replacing [[$image]] with [{$new_image]]\n");
			return 0;
		}
		
		# If many matches, log a warning
		if($match2 > 2)
		{
			Pearle::myLog(3, "More than one match ($match2) in page [[$page]]\n");
		}
		if($match2 > 100)
		{
			Pearle::myLog(1, "Too many matches ($match2) in page [[$page]].  Skipping.\n");
			botwarnlog("*Too many matches ($match2) in page [[$page]].  Skipping.\n");
			return 0;
		}
	}

	if($text =~ /<gallery/i)
	{
		Pearle::myLog(3, "*Possible image gallery in page [[$page]]\n");
		if($new_image =~ /$regex3g/)
		{
			Pearle::myLog(2, "New image name $new_image is a substring of old image name $image\n");
			botwarnlog("*New image [[$new_image]] is a substring of [[$image]] in page [[$page]]\n");
		}
		else
		{
			my $gallery_matches = $text =~ s/$regex3g/$new_image$1/g;
			$match2 += $gallery_matches;
		}
	}

	# Improved infobox replacement
	my $infobox_regex = "([-A-Za-z0-9_]+[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}]*=)[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*";
	my $infobox_regex_full = "([-A-Za-z0-9_]+[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}]*=)[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . '(?:[Ii][Mm][Aa][Gg][Ee]|[Ff][Ii][Ll][Ee])[[ _]*:[ _]*' . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*";
	if($text =~ /$infobox_regex/)
	{
		Pearle::myLog(3, "Matched on infobox regex: $infobox_regex\n");
		Pearle::myLog(3, "Infobox parameter: $1\n");
		if($& =~ /puic/)
		{
			botwarnlog(FixupLinks("*PUIC in page [[$page]]\n"));
		}
		else
		{

			my $sub = $1;
			my $match_regex = MakeWikiRegex($sub) . "[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*";
			my $non_prefixed_new_image = $new_image;
			$non_prefixed_new_image =~ s/^(Image|File)://;
			$text =~ s/$match_regex/${sub}${non_prefixed_new_image}/g;
			$match2 += 1;
		}
	}
	if($text =~ /$infobox_regex_full/)
	{
		Pearle::myLog(3, "Matched on infobox regex: $infobox_regex_full\n");
		Pearle::myLog(3, "Infobox parameter: $1\n");
		if($& =~ /puic/)
		{
			botwarnlog(FixupLinks("*PUIC in page [[$page]]\n"));
		}
		else
		{
			my $sub = $1;
			my $match_regex = MakeWikiRegex($sub) . "[\\p{IsSpace}\x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*" . '(?:[Ii][Mm][Aa][Gg][Ee]|[Ff][Ii][Ll][Ee])[ _]*:[ _]*' . MakeWikiRegex($raw_image) . "[ \x{200E}\x{200F}\x{202A}\x{202B}\x{202C}\x{202D}\x{202E}_]*";
			$text =~ s/$match_regex/${sub}$new_image/g;
			$match2 += 1;
		}
	}

	if($match2)	# No need to null-edit articles anymore
	{
		if($test_only)
		{
			notelog("Test removal from page succeeded\n");
		}
		else
		{
			# Submit the changes
			$wikipage->setEditableText($text);
			eval
			{
				Pearle::postPage($wikipage, $reason, 0);
			};
			if($@)
			{
				if($@ =~ /^924 Spam filter: (.*)$/)
				{
					botwarnlog("*Spam filter on page [[$page]], url $1\n");
					$match2 = 0;	# We weren't able to remove it
				}
				elsif($@ =~ /^922/)
				{
					# Edit conflict.  Try editing the page again.
					botwarnlog("*Edit conflict on page [[$page]]\n");
					goto tryagain;
				}
				else
				{
					die;
				}
			}
		}
	}
	
	return ($match2)
}

# Returns 1 if the user has been notified, or 0 if they haven't
sub IsNotified
{
	my $uploader = shift;
	my $image_regex = shift;
	my $image_name = shift;
	my $notes_ref = shift;
	my $donts_ref = shift;

	# Check notification list
	if(defined($notes_ref) and $notes_ref->{"$uploader,$image_name"})
	{
		Pearle::myLog(2, "Already notified for this image\n");
		return 1;
	}

	if(defined($donts_ref) and $donts_ref->{$uploader})
	{
		Pearle::myLog(2, "On exception list: $uploader\n");
		return 1;
	}
	
	# Check uploader's talkpage
	my $page_data = Pearle::APIQuery(titles => "User talk:$uploader", prop => ['links', 'templates'], plnamespace => 6, pllimit => 500, tlnamespace => 10, tllimit => 500);
	$image_regex = MakeWikiRegex($image_name) if(!defined($image_regex));
	if($page_data =~ /$image_regex/)
	{
		Pearle::myLog(2, "Has a link from userpage\n");
		return 1;
	}
	if(usesTemplate($page_data, "Template:Nobots"))
	{
		Pearle::myLog(2, "Uses {{nobots}}\n");
		return 1;
	}
	return 0;
}

# Returns 1 if the page has been notified, or 0 if it hasn't
sub IsPageNotified
{
	my $page = shift;
	my $image_regex = shift;
	my $image_name = shift;
	my $notes_ref = shift;
	my $donts_ref = shift;
	
	# Check notification list
	if($notes_ref->{"$page,$image_name"})
	{
		Pearle::myLog(2, "Already notified for this image\n");
		return 1;
	}

	if($donts_ref->{$page})
	{
		Pearle::myLog(2, "On exception list: $page\n");
		return 1;
	}

	# Check page
	my $page_data = Pearle::APIQuery(titles => $page, prop => 'links', plnamespace => 6);
	if($page_data =~ /$image_regex/)
	{
		Pearle::myLog(2, "Has a link from page\n");
		return 1;
	}
	return 0;
}


sub isDated
{
	my $image_text = shift;
	if($image_text =~ /\((\d\d?) (\w*) (\d\d\d\d)\)/)	# Dated template
	{
		Pearle::myLog(4, "Dated tag $1 $2 $3\n");
		return 1;
	}
	# as of 6 October 2006">
	elsif($image_text =~ /as of (\d\d?) (\w*) (\d\d\d\d)/) # Template borked, working off category
	{
		Pearle::myLog(4, "Template borked; category $1 $2 $3\n");
		return 1;
	}
	elsif($image_text =~ /{{{day}}} {{{month}}} \d\d\d\d/)	# Generic template
	{
		Pearle::myLog(4, "Generic tag\n");
		return 0;
	}
	else
	{
		Pearle::myLog(4, "No tag match\n");
		return 0;
	}
}

# Return the tag date if there is one, the upload date if not
# Returns in (day, month, year) format
sub getDate
{
	my $image_text = shift;
	if($image_text =~ /\((\d\d?) (\w*) (\d\d\d\d)\)/)
	{
		Pearle::myLog(4, "Template date $1-$2-$3\n");
		return ($1, $2, $3);
	}
	elsif($image_text =~ /as of (\d\d?) (\w*) (\d\d\d\d)/) # Template borked, working off category
	{
		Pearle::myLog(4, "Category date 'as of' $1-$2-$3\n");
		return ($1, $2, $3);
	}
	elsif($image_text =~ /from (\d\d?) (\w*) (\d\d\d\d)/) # Alternate category naming
	{
		Pearle::myLog(4, "Category date 'from' $1-$2-$3\n");
		return ($1, $2, $3);
	}
	else
	{
		Pearle::myLog(4, "No date\n");
		return (1, "January", 2007);
	}
}

# Return a list of upload dates
sub getUploadDates
{
	my @dates;
	my $image_text = shift;
	while($image_text =~ />\d\d?:\d\d, (\d\d?) (\w*) (\d\d\d\d)</g)
	{
		push @dates, [$1, $2, $3];
	}
	return @dates;
}

sub getLastEditDate
{
	my ($day, $month, $year);
	my $image = shift;
	
	my @history = Pearle::parseHistory($image);
	(undef, $day, $month, $year) = @{$history[0]};
	
	return ($day, $month, $year);
}

# Find the most recent non-vandal, non-revert uploader
sub GetImageUploader
{
	my $image_data = shift;
	my ($uploader, $sha1, $comment);
	my @uploaders;
	my $uploader_data;
	my $i = 0;
	my $count = 0;
	
	my $parsed_xml = Pearle::getXMLParser()->XMLin($image_data);

	Pearle::myLog(4, Dumper($parsed_xml));
		
	if(exists($parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}) and defined($parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}))
	{
		if(ref($parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}) eq 'ARRAY')
		{
			@uploaders = @{$parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}};
		}
		else
		{
			return $parsed_xml->{query}->{pages}->{page}->{imageinfo}->{ii}->{user};
		}
	}
	else
	{
		return undef;
	}
	
	$uploader = $uploaders[0]->{user};
	$sha1 = $uploaders[0]->{sha1};
	$comment = $uploaders[0]->{comment} || "";
	
	my $done = 0;
	while(!$done)
	{
		if($comment =~ /^Reverted/)
		{
			Pearle::myLog(4, "Revert found\n");
			$i += 1;
			while($uploaders[$i]->{sha1} ne $sha1)
			{
				$i = $i + 1;
			}
		}
		elsif($comment =~ /optimi(z|s)ed|adjust|tweak|scale|crop|change|resize|remove|reduc(e|ing)/i)
		{
			Pearle::myLog(4, "Tweak found\n");
			$i = $i + 1;
		}
		elsif(!defined($uploader))
		{
			Pearle::myLog(4, "Something went wrong with finding the uploader\n");
			$done = 1;
		}
		elsif($count > 500)
		{
			Pearle::myLog(4, "Took too long finding the uploader\n");
			$uploader = undef;
			$done = 1;
		}
		else
		{
			$done = 1;
		}
		$uploader = $uploaders[$i]->{user};
		$sha1 = $uploaders[$i]->{sha1};
		$comment = $uploaders[$i]->{comment} || "";
		$count = $count + 1;
	}
	if(defined($uploader))
	{
		Pearle::myLog(4, "Uploader: $uploader\n");
		return $uploader;
	}
	else
	{
		return undef;
	}
}

sub loadNotificationList
{
	my $file = shift;
	my %notelist;
	my $i = 0;
	notelog("File: $file\n");
	open INFILE, "<:utf8", $file;
	while(<INFILE>)
	{
		$_ =~ s/\s*#.*$//g;
		chomp;
		$notelist{$_} = 1;
		$i++;
	}
	close INFILE;
	notelog("$i notifications loaded\n");
	return %notelist;
}

sub saveNotificationList
{
	return if($test_only);
	
	my $file = shift;
	my %notelist = @_;
	my $key;
	
	open OUTFILE, ">:utf8", $file;
	foreach $key (keys(%notelist))
	{
		print OUTFILE "$key\n";
	}
	close OUTFILE;
}

# Does a page transclude any of a set of templates?  Template names must be in the canonical form, with prefix.
sub usesTemplate
{
	my $xml = shift;
	my @templates = @_;
	
	my $parsed_xml = ref($xml)?$xml:Pearle::getXMLParser()->XMLin($xml, ForceArray => ['tl']);
	Pearle::myLog(4, "Templates: " . join(", ", @templates) . "\n");
#	Pearle::myLog(4, Dumper($parsed_xml));
	
	if(!exists($parsed_xml->{query}->{pages}->{page}->{templates}->{tl}) or !defined($parsed_xml->{query}->{pages}->{page}->{templates}->{tl}))
	{
		Pearle::myLog(4, "No templates or no page\n");
		return 0;
	}
	
	my $result = eval
	{
		my @used_templates = @{$parsed_xml->{query}->{pages}->{page}->{templates}->{tl}};
		@used_templates = map {$_->{title}} @used_templates;
		
		Pearle::myLog(4, "Used: " . join(", ", @used_templates) . "\n");
		Pearle::myLog(4, "Intersect: " . join(", ", Array::Utils::intersect( @templates, @used_templates )) . "\n");
		
		if(Array::Utils::intersect( @templates, @used_templates ))
		{
			return 1;
		}
		else
		{
			return 0;
		}
	};
	if($@)
	{
		# Probably more than one page in the xml
		print "usesTemplate error: $@";
		return 0;
	}
	return $result;
}

sub DoesPageExist
{
	my $page = shift;
	
	my $xml = Pearle::APIQuery(titles => [$page], prop => ['info']);
	# TODO: Handle API query errors ('undef' return values)
	if($xml =~ /missing=""/)
	{
		return 0;
	}
	return 1;
}

1;
</nowiki>