Jump to content

User:B-bot/source/No longer orphaned checker

From Wikipedia, the free encyclopedia

This task will loop though the dated Category:Orphaned non-free use Wikipedia files category that is due to be deleted tomorrow. For each image in the category, it will purge the image and purge any articles for which there is a fair use rationale. If it finds the image to be no longer orphaned, it will remove the {{di-orphaned fair use}} tag.

private const String CategoryNameFormatString = "Category:Orphaned non-free use Wikipedia files as of {0:d MMMM yyyy}";

/// <summary>
/// Images that will be deleted tomorrow (if nothing changes)s
/// </summary>
public String SixDaysAgoCategory;

public String FourDaysAgoCategory;
public String TwoDaysAgoCategory;

Page m_pgUserspaceTest = null;

int m_intImagesTagged = 0;

/// Gets the name for this job
/// </summary>
/// <returns></returns>
public override string GetJobName()
{
    return "No Longer Orphaned Checker";
}

/// <summary>
/// Constructor for this class.  Initializes the category names, et al
/// </summary>
public NoLongerOrphaned()
{
    // Note that it's really seven days ago because we don't delete until the 8th day (after 7 full days would have elapsed)
    DateTime SixDaysAgo = DateTime.UtcNow.AddDays(-7);
    DateTime FourDaysAgo = DateTime.UtcNow.AddDays(-5);
    DateTime TwoDaysAgo = DateTime.UtcNow.AddDays(-3);

    SixDaysAgoCategory = string.Format(CategoryNameFormatString, SixDaysAgo);
    FourDaysAgoCategory = string.Format(CategoryNameFormatString, FourDaysAgo);
    TwoDaysAgoCategory = string.Format(CategoryNameFormatString, TwoDaysAgo);
}

/// <summary>
/// Does the task for one of the categories
/// </summary>
/// <param name="strCat"></param>
private void DoTask(String strCat)
{
    // Connect to Wikipedia
    Site site = TryToConnect("https://en.wikipedia.org", Properties.Settings.Default.BotUserName, Properties.Settings.Default.BotPassword);

    // Use a separate connection for our API calls - this seems to time out less frequently
    Site site2 = TryToConnect("https://en.wikipedia.org", Properties.Settings.Default.BotUserName, Properties.Settings.Default.BotPassword);

    // Grab the list of pages in the category
    PageList pl = new PageList(site);
    pl.FillAllFromCategory(strCat);

    System.Threading.Thread.Sleep(1000 * Properties.Settings.Default.CheckStopDelaySeconds);

    if (UserspaceTest)
    {
        m_pgUserspaceTest.text = "Now beginning No Longer Orphaned task on " + DateTime.Now.ToString() + " (local time) ...\r\n\r\n";
        m_pgUserspaceTest.text += "{| class=\"wikitable sortable\"\r\n|-\r\n! Page !! Timestamp !! Former text !! Proposed text\r\n";
        m_pgUserspaceTest.Save();
    }

    // Loop through each page
    foreach (Page pgCurrentImagePage in pl)
    {
        if (6 != pgCurrentImagePage.GetNamespace())
        {
            LogToEventLog(ref site2, MessageType.Error, "Error: non-file [[:" + pgCurrentImagePage.title + "]] found in category [[:" + strCat + "]]", null);
            continue;
        }

        // Event log message for successful removal
        String strMessage = "";

        // Event log message for failed removal
        String strErrorMessage = "";

        if (BotStop(site2))
        {
            LogToEventLog(ref site, MessageType.Error, "I was ordered to abort.", null);
            return;
        }

        pgCurrentImagePage.Load();

        // Loop through and find all of the articles in the rationale
        String strArticleName = "";
        try
        {
            for (Match match = Regex.Match(pgCurrentImagePage.text, @"\|\s*(a|A)rticle\s*=");
                    null != match && 0 != match.Index; match = match.NextMatch())
            {
                strArticleName = pgCurrentImagePage.text.Substring(match.Index + match.Length);

                // Now, Find the end of the article name
                Match matchEnd = Regex.Match(strArticleName, @"\|");
                strArticleName = strArticleName.Substring(0, matchEnd.Index).Trim();

                // If we found an article name, then purge it
                if (!String.IsNullOrWhiteSpace(strArticleName))
                {
                    PurgeImage(site2, strArticleName);
                }
            }
        }
        catch (Exception ex)
        {
            ErrorLogging.SendEmailMessage(true, "Error calling purge API", "There was an error calling purge api for " + strArticleName + ".  Will ignore and move on.\r\n\r\n" + ex.ToString());
            SleepApiDelay();
        }

        // Now use the API to check and see if the image is still an orphan
        try
        {
            String strImageUsage = site2.GetWebPage("https://en.wikipedia.org/w/api.php?action=query&list=imageusage&iutitle=" + Bot.UrlEncode(pgCurrentImagePage.title.Replace(" ", "_")) + "&iunamespace=0&format=json");

            if (!String.IsNullOrWhiteSpace(strImageUsage))
            {
                if (!Regex.IsMatch(strImageUsage, @"imageusage.*\[\s*\]"))
                {
                    // The image is no longer orphaned.
                    String strArticle = strImageUsage.Substring(1 + strImageUsage.IndexOf("\"title\":\""));
                    strArticle = strArticle.Substring(8);
                    strArticle = strArticle.Substring(0, strArticle.IndexOf("\""));

                    // Convert special characters like \u00e8
                    strArticleName = UnescapeString(strArticle);

                    strMessage = "Removed {{tls|orfud}} tag from [[:" + pgCurrentImagePage.title + "]] because the image is no longer orphaned.  The image is used in [[" + strArticleName + "]].";
                    strErrorMessage = "Failed to remove {{tls|orfud}} tag from [[:" + pgCurrentImagePage.title + "]].  The image is used in [[" + strArticleName + "]].  ";
                }
                else
                {
                    continue;
                }
            }
        }
        catch (Exception ex)
        {
            ErrorLogging.SendEmailMessage(true, "Error calling imageusage API", "There was an error calling imageusage for getting page history for " + pgCurrentImagePage.title + ".  Will ignore and move on.\r\n\r\n" + ex.ToString());
            SleepApiDelay();
            continue;
        }

        try
        {
            m_intImagesTagged++;

            // If we are here, the image is no longer orphaned
            Match matchOrfud = Regex.Match(pgCurrentImagePage.text, @"\{\{\s*(d|D)i-orphaned (fair|non-free) use[^\{]*\}\}");
            if (null != matchOrfud)
            {
                if (0 < matchOrfud.Length)
                {
                    // Remove the tag from the string
                    String strNewText = "";
                    if (0 < matchOrfud.Index)
                    {
                        strNewText = pgCurrentImagePage.text.Substring(0, matchOrfud.Index);
                    }
                    if (matchOrfud.Index + matchOrfud.Length < pgCurrentImagePage.text.Length - 1)
                    {
                        strNewText += pgCurrentImagePage.text.Substring(matchOrfud.Index + matchOrfud.Length);
                    }

                    // Trim whitespace
                    strNewText = strNewText.Trim();

                    String strEditSummary = String.Format(Properties.Settings.Default.OrfudRemoveTagComment, "[[" + strArticleName + "]]");

                    if (UserspaceTest)
                    {
                        m_pgUserspaceTest.text += "|-\r\n| [[:" + pgCurrentImagePage.title + "]] || ~~~~~ || <pre>" +
                                                pgCurrentImagePage.text.Substring(0, Math.Min(300, pgCurrentImagePage.text.Length)) +
                                                "</pre> || <pre>" + strNewText.Substring(0, Math.Min(300, strNewText.Length)) + "</pre>\r\n";
                        m_pgUserspaceTest.Save(strEditSummary, false);
                    }
                    else
                    {
                        pgCurrentImagePage.Save(strNewText, strEditSummary, false);
                    }

                    LogToEventLog(ref site2, MessageType.Informational, strMessage, null);
                }
                else
                {
                    LogToEventLog(ref site2, MessageType.Error, strErrorMessage + "The regex match was empty.", null);
                }
            }
            else
            {
                LogToEventLog(ref site2, MessageType.Error, strErrorMessage + "The regex match was null.", null);
            }
        }
        catch (Exception ex)
        {
            LogToEventLog(ref site2, MessageType.Error, strErrorMessage, ex);
        }
    }
}

/// <summary>
/// This function will perform the task
/// </summary>
public void PerformTask()
{
    // Connect to Wikipedia
    Site site = TryToConnect("https://en.wikipedia.org", Properties.Settings.Default.BotUserName, Properties.Settings.Default.BotPassword);

    if (BotStop(site))
    {
        return;
    }

    // Use a separate connection for our API calls - this seems to time out less frequently
    Site site2 = TryToConnect("https://en.wikipedia.org", Properties.Settings.Default.BotUserName, Properties.Settings.Default.BotPassword);

    m_pgUserspaceTest = new Page(site, Properties.Settings.Default.UserspaceTestPage);

    DateTime dtmStarted = DateTime.Now;

    if (UserspaceTest)
    {
        LogToEventLog(ref site, MessageType.Start, "B-Bot \"no longer orphaned\" process now commencing <font color='red'>'''IN TEST MODE'''</font>.  This process checks images from two, four, and six days ago - [[:"
                                                    + TwoDaysAgoCategory + "]], [[:"
                                                    + FourDaysAgoCategory + "]], and [[:"
                                                    + SixDaysAgoCategory + "]] - to remove images that are no longer orphaned.", null);
    }
    else
    {
        LogToEventLog(ref site, MessageType.Start, "B-Bot \"no longer orphaned\" process now commencing.  This process checks images from two, four, and six days ago &mdash; [[:"
                                                    + TwoDaysAgoCategory + "]], [[:"
                                                    + FourDaysAgoCategory + "]], and [[:"
                                                    + SixDaysAgoCategory + "]] - to remove images that are no longer orphaned.", null);
    }

    DoTask(TwoDaysAgoCategory);
    DoTask(FourDaysAgoCategory);
    DoTask(SixDaysAgoCategory);

    LogToEventLog(ref site2, MessageType.Finish, "B-Bot orphaned \"no longer orphaned\" process completed.  " + m_intImagesTagged.ToString() + " images are no longer orphaned.", null);
    if (UserspaceTest)
    {
        m_pgUserspaceTest.text += "|}\r\n";
        m_pgUserspaceTest.Save();
    }
}