User:B-bot/source/No longer orphaned checker
Appearance
< User:B-bot | source
This task will loop though the dated Category:Orphaned non-free use Wikipedia files category that is due to be deleted tomorrow. For each image in the category, it will purge the image and purge any articles for which there is a fair use rationale. If it finds the image to be no longer orphaned, it will remove the {{di-orphaned fair use}} tag.
private const String CategoryNameFormatString = "Category:Orphaned non-free use Wikipedia files as of {0:d MMMM yyyy}";
/// <summary>
/// Images that will be deleted tomorrow (if nothing changes)s
/// </summary>
public String SixDaysAgoCategory;
public String FourDaysAgoCategory;
public String TwoDaysAgoCategory;
Page m_pgUserspaceTest = null;
int m_intImagesTagged = 0;
/// Gets the name for this job
/// </summary>
/// <returns></returns>
public override string GetJobName()
{
return "No Longer Orphaned Checker";
}
/// <summary>
/// Constructor for this class. Initializes the category names, et al
/// </summary>
public NoLongerOrphaned()
{
// Note that it's really seven days ago because we don't delete until the 8th day (after 7 full days would have elapsed)
DateTime SixDaysAgo = DateTime.UtcNow.AddDays(-7);
DateTime FourDaysAgo = DateTime.UtcNow.AddDays(-5);
DateTime TwoDaysAgo = DateTime.UtcNow.AddDays(-3);
SixDaysAgoCategory = string.Format(CategoryNameFormatString, SixDaysAgo);
FourDaysAgoCategory = string.Format(CategoryNameFormatString, FourDaysAgo);
TwoDaysAgoCategory = string.Format(CategoryNameFormatString, TwoDaysAgo);
}
/// <summary>
/// Does the task for one of the categories
/// </summary>
/// <param name="strCat"></param>
private void DoTask(String strCat)
{
// Connect to Wikipedia
Site site = TryToConnect("https://en.wikipedia.org", Properties.Settings.Default.BotUserName, Properties.Settings.Default.BotPassword);
// Use a separate connection for our API calls - this seems to time out less frequently
Site site2 = TryToConnect("https://en.wikipedia.org", Properties.Settings.Default.BotUserName, Properties.Settings.Default.BotPassword);
// Grab the list of pages in the category
PageList pl = new PageList(site);
pl.FillAllFromCategory(strCat);
System.Threading.Thread.Sleep(1000 * Properties.Settings.Default.CheckStopDelaySeconds);
if (UserspaceTest)
{
m_pgUserspaceTest.text = "Now beginning No Longer Orphaned task on " + DateTime.Now.ToString() + " (local time) ...\r\n\r\n";
m_pgUserspaceTest.text += "{| class=\"wikitable sortable\"\r\n|-\r\n! Page !! Timestamp !! Former text !! Proposed text\r\n";
m_pgUserspaceTest.Save();
}
// Loop through each page
foreach (Page pgCurrentImagePage in pl)
{
if (6 != pgCurrentImagePage.GetNamespace())
{
LogToEventLog(ref site2, MessageType.Error, "Error: non-file [[:" + pgCurrentImagePage.title + "]] found in category [[:" + strCat + "]]", null);
continue;
}
// Event log message for successful removal
String strMessage = "";
// Event log message for failed removal
String strErrorMessage = "";
if (BotStop(site2))
{
LogToEventLog(ref site, MessageType.Error, "I was ordered to abort.", null);
return;
}
pgCurrentImagePage.Load();
// Loop through and find all of the articles in the rationale
String strArticleName = "";
try
{
for (Match match = Regex.Match(pgCurrentImagePage.text, @"\|\s*(a|A)rticle\s*=");
null != match && 0 != match.Index; match = match.NextMatch())
{
strArticleName = pgCurrentImagePage.text.Substring(match.Index + match.Length);
// Now, Find the end of the article name
Match matchEnd = Regex.Match(strArticleName, @"\|");
strArticleName = strArticleName.Substring(0, matchEnd.Index).Trim();
// If we found an article name, then purge it
if (!String.IsNullOrWhiteSpace(strArticleName))
{
PurgeImage(site2, strArticleName);
}
}
}
catch (Exception ex)
{
ErrorLogging.SendEmailMessage(true, "Error calling purge API", "There was an error calling purge api for " + strArticleName + ". Will ignore and move on.\r\n\r\n" + ex.ToString());
SleepApiDelay();
}
// Now use the API to check and see if the image is still an orphan
try
{
String strImageUsage = site2.GetWebPage("https://en.wikipedia.org/w/api.php?action=query&list=imageusage&iutitle=" + Bot.UrlEncode(pgCurrentImagePage.title.Replace(" ", "_")) + "&iunamespace=0&format=json");
if (!String.IsNullOrWhiteSpace(strImageUsage))
{
if (!Regex.IsMatch(strImageUsage, @"imageusage.*\[\s*\]"))
{
// The image is no longer orphaned.
String strArticle = strImageUsage.Substring(1 + strImageUsage.IndexOf("\"title\":\""));
strArticle = strArticle.Substring(8);
strArticle = strArticle.Substring(0, strArticle.IndexOf("\""));
// Convert special characters like \u00e8
strArticleName = UnescapeString(strArticle);
strMessage = "Removed {{tls|orfud}} tag from [[:" + pgCurrentImagePage.title + "]] because the image is no longer orphaned. The image is used in [[" + strArticleName + "]].";
strErrorMessage = "Failed to remove {{tls|orfud}} tag from [[:" + pgCurrentImagePage.title + "]]. The image is used in [[" + strArticleName + "]]. ";
}
else
{
continue;
}
}
}
catch (Exception ex)
{
ErrorLogging.SendEmailMessage(true, "Error calling imageusage API", "There was an error calling imageusage for getting page history for " + pgCurrentImagePage.title + ". Will ignore and move on.\r\n\r\n" + ex.ToString());
SleepApiDelay();
continue;
}
try
{
m_intImagesTagged++;
// If we are here, the image is no longer orphaned
Match matchOrfud = Regex.Match(pgCurrentImagePage.text, @"\{\{\s*(d|D)i-orphaned (fair|non-free) use[^\{]*\}\}");
if (null != matchOrfud)
{
if (0 < matchOrfud.Length)
{
// Remove the tag from the string
String strNewText = "";
if (0 < matchOrfud.Index)
{
strNewText = pgCurrentImagePage.text.Substring(0, matchOrfud.Index);
}
if (matchOrfud.Index + matchOrfud.Length < pgCurrentImagePage.text.Length - 1)
{
strNewText += pgCurrentImagePage.text.Substring(matchOrfud.Index + matchOrfud.Length);
}
// Trim whitespace
strNewText = strNewText.Trim();
String strEditSummary = String.Format(Properties.Settings.Default.OrfudRemoveTagComment, "[[" + strArticleName + "]]");
if (UserspaceTest)
{
m_pgUserspaceTest.text += "|-\r\n| [[:" + pgCurrentImagePage.title + "]] || ~~~~~ || <pre>" +
pgCurrentImagePage.text.Substring(0, Math.Min(300, pgCurrentImagePage.text.Length)) +
"</pre> || <pre>" + strNewText.Substring(0, Math.Min(300, strNewText.Length)) + "</pre>\r\n";
m_pgUserspaceTest.Save(strEditSummary, false);
}
else
{
pgCurrentImagePage.Save(strNewText, strEditSummary, false);
}
LogToEventLog(ref site2, MessageType.Informational, strMessage, null);
}
else
{
LogToEventLog(ref site2, MessageType.Error, strErrorMessage + "The regex match was empty.", null);
}
}
else
{
LogToEventLog(ref site2, MessageType.Error, strErrorMessage + "The regex match was null.", null);
}
}
catch (Exception ex)
{
LogToEventLog(ref site2, MessageType.Error, strErrorMessage, ex);
}
}
}
/// <summary>
/// This function will perform the task
/// </summary>
public void PerformTask()
{
// Connect to Wikipedia
Site site = TryToConnect("https://en.wikipedia.org", Properties.Settings.Default.BotUserName, Properties.Settings.Default.BotPassword);
if (BotStop(site))
{
return;
}
// Use a separate connection for our API calls - this seems to time out less frequently
Site site2 = TryToConnect("https://en.wikipedia.org", Properties.Settings.Default.BotUserName, Properties.Settings.Default.BotPassword);
m_pgUserspaceTest = new Page(site, Properties.Settings.Default.UserspaceTestPage);
DateTime dtmStarted = DateTime.Now;
if (UserspaceTest)
{
LogToEventLog(ref site, MessageType.Start, "B-Bot \"no longer orphaned\" process now commencing <font color='red'>'''IN TEST MODE'''</font>. This process checks images from two, four, and six days ago - [[:"
+ TwoDaysAgoCategory + "]], [[:"
+ FourDaysAgoCategory + "]], and [[:"
+ SixDaysAgoCategory + "]] - to remove images that are no longer orphaned.", null);
}
else
{
LogToEventLog(ref site, MessageType.Start, "B-Bot \"no longer orphaned\" process now commencing. This process checks images from two, four, and six days ago — [[:"
+ TwoDaysAgoCategory + "]], [[:"
+ FourDaysAgoCategory + "]], and [[:"
+ SixDaysAgoCategory + "]] - to remove images that are no longer orphaned.", null);
}
DoTask(TwoDaysAgoCategory);
DoTask(FourDaysAgoCategory);
DoTask(SixDaysAgoCategory);
LogToEventLog(ref site2, MessageType.Finish, "B-Bot orphaned \"no longer orphaned\" process completed. " + m_intImagesTagged.ToString() + " images are no longer orphaned.", null);
if (UserspaceTest)
{
m_pgUserspaceTest.text += "|}\r\n";
m_pgUserspaceTest.Save();
}
}