User:LemmeyBOT/RefHistoryFix3
Appearance
#!/usr/bin/python
# -*- coding: utf-8 -*-
""""
Bot:LemmeyBOT
FileName:RefHistoryFix3.py
Author: Lemmey 3-1-2008
Tagline:Add Value to wikipedia, Be an Editor!
Purpose:Restores references lost due to vandalism, bad editors, massive changes.
Method:Looks to a specified article for the lost reference.
""""
__version__ = '$Id: basic.py 3998 2007-08-07 20:28:27Z wikipedian $'
import wikipedia, catlib
import pagegenerators
import sys
import BeautifulSoup
import urllib
import re
from datetime import date
message = ""
docuReplacements = {
'¶ms;': pagegenerators.parameterHelp
}
rtnln = u'''
'''
class BasicBot:
msg = {
'de': u'Bot: Ändere ...',
'en': u'Restored missing content of named reference using article history',
}
def __init__(self, generator, debug):
"""
Constructor. Parameters:
* generator - The page generator that determines on which pages
to work on.
* debug - If True, doesn't do any real changes, but only shows
what would have been changed.
"""
self.generator = generator
self.debug = debug
def run(self):
wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.msg))
for page in self.generator:
self.treat(page)
def treat(self, page):
global message
"""
Loads the given page, does some changes, and saves it.
"""
try:
text = page.get(throttle = False)
origional = text
except wikipedia.NoPage:
wikipedia.output(u"Page %s does not exist; skipping." % page.aslink())
return
except wikipedia.IsRedirectPage:
wikipedia.output(u"Page %s is a redirect; skipping." % page.aslink())
return
except wikipedia.LockedPage:
wikipedia.output(u"Page %s is locked; skipping." % page.aslink())
return
showtext = text
if text != showtext : save_page(page,origional,text,"","Removed a hollow reference")
try:
text=fixBroken(text,page)
showtext = text
except wikipedia.IsRedirectPage:
Ignore(page.title())
if showtext != origional:
wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
try:
wikipedia.showDiff(origional, showtext)
except:
pass
def save_page(page,oldtext,newtext,ref,message):
print "Message: ",message
wikipedia.showDiff(oldtext, newtext)
choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N')
text = newtext
##choice = 'y' #HARD CODED
if choice == 'y':
try:
page.put(newtext,minorEdit=True,comment=message)
except wikipedia.EditConflict:
wikipedia.output(u'Skipping %s because of edit conflict')
except wikipedia.SpamfilterError, error:
wikipedia.output(u'Cannot change %s because of spam blacklist entry %s')
newtext=oldtext.replace(ref,u'{{subst:fact-now}}')
message = "original reference: "+ref+" is a blacklisted source - replaced with a Fact tag"
text = newtext
try:
text = save_page(page,oldtext,newtext,ref,message)
except:
pass
return text
def broken_refs(text):
flag = False
a=Get_Named_Ref_Whole(text)
b=Get_Named_Ref_Stubs(text)
stub_array=[]
for match in b:
name = match.group()
s=re.compile('"[\w- ]+"',re.I | re.S)
name = s.finditer(name)
for mname in name:
if mname.group() not in stub_array: stub_array.append( mname.group())
whole_array=[]
for match in a:
name = match.group()
s=re.compile('"[\w- ]+"',re.I | re.S)
name = s.finditer(name)
for mname in name:
if mname.group() not in whole_array: whole_array.append( mname.group())
print stub_array
for stub in stub_array:
if stub not in whole_array:
flag = True
return flag
def get_lost_stubs(text):
flag = False
a=Get_Named_Ref_Whole(text)
b=Get_Named_Ref_Stubs(text)
stub_array=[]
stub_list=[]
for match in b:
name = match.group()
s=re.compile('= *[^/]+? */',re.I | re.S)
search = s.search(name)
if search != None:
tag = search.group()
tag = tag[1:-1]
tag = tag.strip()
if name not in stub_list:
stub_array.append(tag)
stub_list.append(name)
whole_array=[]
for match in a:
name = match.group()
s=re.compile('= *[^/]+? ?>',re.I | re.S)
search = s.search(name)
if search != None:
tag = search.group()
tag = tag[1:-1]
tag = tag.strip()
whole_array.append(tag)
lost_stubs=[]
for x in range(0,len(stub_array)):
stub=stub_array[x]
if stub not in whole_array and (stub,stub_list[x]) not in lost_stubs:
lost_stubs.append((stub,stub_list[x]))
return lost_stubs
def fixBroken(text,page):
b=get_lost_stubs(text)
global message
fileflag=False
pageTitle = wikipedia.input(u'Please enter a page: ')
if pageTitle == "refs.txt":
f=open('refs.txt', 'r')
text2=f.read()
f.close()
fileflag=True
else:
page2 = wikipedia.Page(wikipedia.getSite(), pageTitle)
text2 = page2.get()
for item in b:
stub = item[0]
ref = item[1]
a=None
FOUND = False
a=Get_Specific_Named_Whole(text2,stub)
if a!= None and Check_for_Blank(stub,a.group()) == False:
newtext=text.replace(ref,a.group(),1)
if newtext!=text:
FOUND=True
wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % pageTitle)
if fileflag: message = u'Restored missing content of named reference '+ref
else: message = u'Restored missing content of named reference '+ref+' using identical reference in [['+pageTitle+']]'
text = save_page(page,text,newtext,ref,message)
if not FOUND:
print "Hit Bottom: ",stub
Ignore(page.title())
return text
def Get_Page_List(page):
linked = page.linkedPages()
catagories = page.categories()
articles = []
for c in catagories:
##print c
cat = catlib.Category(wikipedia.getSite(), str(c).strip("[]") )
listOfArticles = cat.articlesList()
for article in listOfArticles:
if article not in articles: articles.append(article)
getlist=[]
for article in articles:
if article in linked and article not in getlist:
getlist.append(article)
finallist = []
for p in getlist:
name = str(p).strip("[]")
if name != str(page.title()): finallist.append(name)
return finallist
def Check_for_Blank(name,tag):
pattern='< ?ref ?name *= *'+name+' ?> *< ?/ ?ref ?>'
a=re.compile(pattern,re.I | re.S)##<ref name = "larry">
search = a.search(tag)
if search!= None:
return True
else:
return False
def Get_Specific_Named_Whole(text2,name):
array=[]
##Named refs without closings
pattern='< ?ref ?name *= *'+name+' ?>.+?< ?/ ?ref ?>'
a=re.compile(pattern,re.I | re.S)##<ref name = "larry">
search = a.search(text2)
return search
def Get_Named_Ref_Stubs(text):
array=[]
##Named refs with closings
##pattern = '< *ref *name *= *[\w "-]+? */ *>'
pattern = '< *ref *name *= *[^>]+? */ *>'
b=re.compile(pattern,re.I | re.S)##<ref name = "larry"/>
iterator = b.finditer(text)
return iterator
def Get_Named_Ref_Whole(text):
array=[]
##Named refs without closings
pattern = '< ?ref ?name *= *[^/]+? ?>.+?< ?/ ?ref ?>'
a=re.compile(pattern,re.I | re.S)##<ref name = "larry">
iterator = a.finditer(text)
return iterator
def Ignore(article):
f=open('list.txt', 'a')
f.write(article + '\n')
f.close()
def main():
genFactory = pagegenerators.GeneratorFactory()
gen = None
pageTitleParts = []
debug = False
for arg in wikipedia.handleArgs():
if arg.startswith("-debug"):
debug = True
else:
generator = genFactory.handleArg(arg)
if generator:
gen = generator
else:
pageTitleParts.append(arg)
if pageTitleParts != []:
pageTitle = ' '.join(pageTitleParts)
page = wikipedia.Page(wikipedia.getSite(), pageTitle)
gen = iter([page])
if gen:
gen = pagegenerators.PreloadingGenerator(gen)
bot = BasicBot(gen, debug)
bot.run()
else:
wikipedia.showHelp()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()