User:CleanupListingBot/Source
Appearance
The source of the bot written in vb.net 2.0. Requires the DotNetWikiBot library. Currently alpha...not all exceptions caught.Smallman12q (talk) 17:43, 30 August 2010 (UTC)
Imports DotNetWikiBot
Imports System.Collections
Imports System.IO.File
Imports System.IO
Module Module1
'Structure articlesubcategory
' 'declaring a structure named articlesubcategory
' Dim article As String
' Dim subcategory As String
'End Structure
Dim enWiki As Site = Nothing
Dim catfilecounter As Integer = 0
Dim appbase As String = AppDomain.CurrentDomain.SetupInformation.ApplicationBase()
Dim categorytochecktitles As New ArrayList
Dim report As String = Nothing
Dim directoryindex() As String = Nothing
Sub Main()
'Log in
Dim username, password As String
Console.Write("Please enter username: ")
username = Console.ReadLine()
Console.Write("Please enter password: ")
password = Console.ReadLine()
Console.Clear()
Console.WriteLine("Attempting to log into the english wiki as: " + username)
Console.WriteLine("Please note that https is not used.")
Try
enWiki = New Site("http://en.wikipedia.org", username, password)
Catch e As Exception
Console.WriteLine("Login error: " + e.Message)
quit()
End Try
Console.WriteLine("Log in successful....clearing user name and password for security...")
username = Nothing
password = Nothing
Console.Clear()
'Get the cleanup categories
'Get cleanup categories
'Create a directory for subcategory of them
'Copy articles from each cat to a text file
'Load text files and compare
Console.WriteLine("Loading category: Wikipedia maintenance categories sorted by month")
Dim categoryname As String = "Wikipedia maintenance categories sorted by month"
Dim currentdirectory As String = appbase + "\" + categoryname
If Not Directory.Exists(currentdirectory) Then
Directory.CreateDirectory(currentdirectory)
Console.WriteLine("Category not found locally...begin downloading...")
getcat(categoryname, currentdirectory)
Console.WriteLine("Done downloading cleanup...")
pause()
End If
'Get category to check
Console.Write("Enter category to check: ")
Dim categorytocheck As String = Console.ReadLine()
'Get the category
Dim categorytochecklist As PageList = Nothing 'As New PageList(enWiki) '
Try
Console.WriteLine("Loading category pages...")
categorytochecklist = New PageList(enWiki)
categorytochecklist.FillAllFromCategoryTree(categorytocheck) 'Get category titles
Console.WriteLine("Category loading complete....removing non-articles.")
categorytochecklist.FilterNamespaces({0}) 'Remove non-articles
Console.WriteLine("Category filtering...complete.")
Console.WriteLine("There are " + categorytochecklist.Count.ToString + " articles.")
Catch ex As Exception
exceptionquit("Loading category error", ex)
End Try
'Send the page titles to an arraylist
'Dim categorytochecktitles As New ArrayList 'Global
For Each article As Page In categorytochecklist
categorytochecktitles.Add(article.title)
Next
categorytochecklist = Nothing 'Clear out pagelist
'''''''''''''''''
'Compare
'Check each directory for articles.txt and compare against it
'Write results to 0.txt
'http://www.java2s.com/Code/VB/Data-Structure/ListallDirectoriesunderadirectory.htm
currentdirectory = appbase + "\Wikipedia maintenance categories sorted by month"
intersectcat(currentdirectory)
''''''''''''Report
'Load index
''
'appbase + "\Category directory.txt"
report += "The following is a cleanup report generated on " + Date.UtcNow.ToString
Dim index As New ArrayList
'Read from file 'http://msdn.microsoft.com/en-us/library/db5x7c0d.aspx
Try
' Create an instance of StreamReader to read from a file.
' The using statement also closes the StreamReader.
Using sr As New StreamReader(currentdirectory + "\articles.txt")
Dim line As String
' Read and display lines from the file until the end of
' the file is reached.
Do
line = sr.ReadLine()
If Not (line Is Nothing) Then
'Console.WriteLine("Adding " + line)
Dim parts() As String = line.Split("#") 'c, 2)
'remove the number #
index.Add(parts(1))
End If
Loop Until line Is Nothing
End Using
Catch e As Exception
' Let the user know what went wrong.
Console.WriteLine("The file could not be read:")
Console.WriteLine(e.Message)
End Try
directoryindex = arraylisttostring(index)
index = Nothing
currentdirectory = appbase + "\Wikipedia maintenance categories sorted by month"
reportoncat(currentdirectory, 1)
Dim objWriter As New System.IO.StreamWriter(appbase + "\Report.txt", True) 'Append
objWriter.WriteLine(report) ' + vbNewLine)'it's appended
'Console.WriteLine("Writing...")
objWriter.Close()
'Logout missing?
End Sub
Function intersect(ByRef list1 As ArrayList, ByRef list2 As ArrayList) As String()
Dim intersection As New ArrayList
If (list1.Count > list2.Count) Then
'use list2
For Each piece In list2
If list1.Contains(piece) Then
intersection.Add(piece)
End If
Next
Else
'user list1
For Each piece In list1
If list2.Contains(piece) Then
intersection.Add(piece)
End If
Next
End If
Return arraylisttostring(intersection)
End Function
Function arraylisttostring(ByRef array As ArrayList) As String()
Return DirectCast(array.ToArray(GetType(String)), String())
End Function
Function header(ByVal input As String, ByVal depth As Integer) As String
'Return ("=" * depth) + input + ("=" * depth)'this would've worked in Python *.*
Dim equals As String = "==================================="
Dim equalstoadd = equals.Substring(0, depth)
Return equalstoadd + input + equalstoadd
End Function
Sub reportoncat(ByVal currentdirectory As String, ByVal depth As Integer)
Dim Root As New DirectoryInfo(currentdirectory)
Dim Dirs As DirectoryInfo() = Root.GetDirectories()
'Find category real name
Dim rootname As String = Root.Name
rootname = directoryindex(rootname.Split("#")(0))
report += vbNewLine + header(rootname, depth) + vbNewLine
'check this cat's articles
Dim f As New IO.FileInfo(currentdirectory + "\0.txt")
If (f.Exists = True) Then
'read(File)
Dim pages As New ArrayList
'Read from file 'http://msdn.microsoft.com/en-us/library/db5x7c0d.aspx
Try
' Create an instance of StreamReader to read from a file.
' The using statement also closes the StreamReader.
Using sr As New StreamReader(currentdirectory + "\0.txt")
Dim line As String
' Read and display lines from the file until the end of
' the file is reached.
Do
line = sr.ReadLine()
If Not (line Is Nothing) Then
'Console.WriteLine("Adding " + line)
'pages.Add(line)
report += "* [[" + line + "]]" + vbNewLine
End If
Loop Until line Is Nothing
End Using
Catch e As Exception
' Let the user know what went wrong.
Console.WriteLine("The file could not be read:")
Console.WriteLine(e.Message)
End Try
End If
'Each subcat
For Each DirectoryName As DirectoryInfo In Dirs
Try
reportoncat(DirectoryName.FullName, depth + 1)
Catch E As Exception
Console.WriteLine("Error accessing")
End Try
Next
End Sub
Sub intersectcat(ByVal thecurrentdirectory As String)
Dim Root As New DirectoryInfo(thecurrentdirectory)
Dim Dirs As DirectoryInfo() = Root.GetDirectories()
'check this cat's articles
Dim f As New IO.FileInfo(thecurrentdirectory + "\articles.txt")
If (f.Exists = True) Then
'read(File)
Dim pages As New ArrayList
'Read from file 'http://msdn.microsoft.com/en-us/library/db5x7c0d.aspx
Try
' Create an instance of StreamReader to read from a file.
' The using statement also closes the StreamReader.
Using sr As New StreamReader(thecurrentdirectory + "\articles.txt")
Dim line As String
' Read and display lines from the file until the end of
' the file is reached.
Do
line = sr.ReadLine()
If Not (line Is Nothing) Then
'Console.WriteLine("Adding " + line)
pages.Add(line)
End If
Loop Until line Is Nothing
End Using
Catch e As Exception
' Let the user know what went wrong.
Console.WriteLine("The file could not be read:")
Console.WriteLine(e.Message)
End Try
'Now intersect them
''''''''''''''''''''''''''''Need .Net 4.0 for this
Dim intersection() As String
intersection = intersect(categorytochecktitles, pages)
pages = Nothing
'We now write the intresection to file
If intersection.Length > 0 Then
Dim x As New PageList(enWiki, intersection)
x.SaveTitlesToFile(thecurrentdirectory + "/0.txt")
End If
'categorytochecktitles.inte()
'ArrayA.Intersect(ArrayB).Any()
'Dim intersection =
End If
'Each subcat
For Each DirectoryName As DirectoryInfo In Dirs
Try
intersectcat(DirectoryName.FullName)
Catch E As Exception
Console.WriteLine("Error accessing")
End Try
Next
End Sub
Sub getcat(ByVal categoryname As String, ByVal thecurrrentdirectory As String)
Dim maintenancecategory, maintenancesubcategory As New PageList(enWiki)
maintenancecategory.FillFromCategory(categoryname)
maintenancesubcategory.FillSubsFromCategory(categoryname)
thecurrrentdirectory += "\" + catfilecounter.ToString + "#"
createandrecorddirectory(categoryname, thecurrrentdirectory)
For Each one As Page In maintenancesubcategory
one.RemoveNSPrefix()
Next
'Write articles to file in directory
If (maintenancecategory.Count > 0) Then
maintenancecategory.SaveTitlesToFile(thecurrrentdirectory + "\articles.txt")
Console.WriteLine("Saved files of category" + categoryname)
maintenancecategory = Nothing 'clear out category when done
End If
'Check the subcategories
For Each subcat As Page In maintenancesubcategory
Console.WriteLine("Getting subcategory: '" + subcat.title + " of '" + categoryname + "'")
getcat(subcat.title, thecurrrentdirectory)
Next
End Sub
Sub createandrecorddirectory(ByVal categoryname As String, ByVal thecurrentdirectory As String)
Dim piece As String
piece = catfilecounter.ToString + "#"
catfilecounter += 1
Directory.CreateDirectory(thecurrentdirectory) ' + "\" + piece)
Dim objWriter As New System.IO.StreamWriter(appbase + "\Category directory.txt", True) 'Append
objWriter.WriteLine(piece + categoryname) ' + vbNewLine)'it's appended
'Console.WriteLine("Writing...")
objWriter.Close()
End Sub
Sub quit()
Console.WriteLine("Press any key to quit...")
Console.ReadLine()
End
End Sub
Sub pause()
Console.WriteLine("Press any key to continue...")
Console.ReadLine()
End Sub
Sub exceptionquit(ByVal errorwith As String, ByVal ex As Exception)
Console.WriteLine(errorwith + ":" + ex.Message)
quit()
End Sub
End Module