User:YpnBot/Vital article talk pages update/source
Appearance
import pywikibot
from pywikibot import pagegenerators
import re
import string
site = pywikibot.getSite('en', 'wikipedia')
global number
number = 0
vitalPages = [
['Wikipedia:Vital articles/Level/1', 1, '', ''],
['Wikipedia:Vital articles/Level/2', 2, '', ''],
['Wikipedia:Vital articles', 3, '', ''],
['Wikipedia:Vital articles/Expanded/People', 4, 'People', ''],
['Wikipedia:Vital articles/Expanded/History', 4, 'History', ''],
['Wikipedia:Vital articles/Expanded/Geography', 4, 'Geography', ''],
['Wikipedia:Vital articles/Expanded/Arts', 4, 'Art', ''],
['Wikipedia:Vital articles/Expanded/Philosophy and religion', 4, 'Philosophy', ''],
['Wikipedia:Vital articles/Expanded/Anthropology, psychology, and everyday life', 4, '', ''],
['Wikipedia:Vital articles/Expanded/Society and social sciences', 4, 'Society', ''],
['Wikipedia:Vital articles/Expanded/Biology and health sciences', 4, 'Science', 'Biology'],
['Wikipedia:Vital articles/Expanded/Physical sciences', 4, 'Science', 'Physics'],
['Wikipedia:Vital articles/Expanded/Technology', 4, 'Technology', ''],
['Wikipedia:Vital articles/Expanded/Mathematics', 4, 'Mathematics', '']
]
patterns = {
'link' : re.compile(r'\[\[(.*?)[\]\|]'),
'fa' : re.compile(r'\{\{[Ff]eatured article\}\}'),
'ga' : re.compile(r'\{\{[Gg]ood article\}\}'),
'a' : re.compile(r'\|\s*[Cc]lass\s*\=\s*[Aa]'),
'b' : re.compile(r'\|\s*[Cc]lass\s*\=\s*[Bb]'),
'c' : re.compile(r'\|\s*[Cc]lass\s*\=\s*[Cc]'),
'start' : re.compile(r'\|\s*[Cc]lass\s*\=\s*[Ss]tart'),
'stub' : re.compile(r'\|\s*[Cc]lass\s*\=\s*[Ss]tub'),
'stubTemp' : re.compile(r'\{\{[Ss]tub\}\}')
}
sections2 = {
'Art' : 'Art',
'Geography' : 'Geography',
'History' : 'History',
'Everyday life' : '',
'Mathematics' : 'Mathematics',
'Philosophy' : 'Philosophy',
'Science' : 'Science',
'Society' : 'Society',
'Technology' : 'Technology'
}
sections3 = {
'People' : 'People',
'History' : 'History',
'Geography' : 'Geography',
'Arts and culture' : 'Art',
'Philosophy and religion' : 'Philosophy',
'Everyday life' : '',
'Society and social sciences' : 'Society',
'Health and medicine' : 'Science',
'Science' : 'Science',
'Technology' : 'Technology',
'Mathematics' : 'Mathematics'
}
vitalArticles = {}
def determineTemplates(listPages, topic, level, subpage='') :
print 'level:' + str(level)
print 'topic:' + str(topic)
for page in listPages :
if page.namespace() == 0 and page.title() not in vitalArticles :
if page.isRedirectPage() :
page = page.getRedirectTarget()
#print page.title()
talkPage = page.toggleTalkPage()
articleClass = ''
if patterns['fa'].search(page.text) :
articleClass = 'FA'
elif patterns['a'].search(talkPage.text) :
articleClass = 'A'
elif patterns['ga'].search(page.text) :
articleClass = 'GA'
elif patterns['b'].search(talkPage.text) :
articleClass = 'B'
elif patterns['c'].search(talkPage.text) :
articleClass = 'C'
elif patterns['start'].search(talkPage.text) :
articleClass = 'Start'
elif patterns['stub'].search(talkPage.text) :
articleClass = 'Stub'
elif patterns['stubTemp'].search(page.text) :
articleClass = 'Stub'
vitalArticles[page.title()] = [page, str(level), topic, articleClass, subpage]
def updateTemplate(talkPage) :
if talkPage.namespace() == 1 :
page = talkPage.toggleTalkPage()
talkPageText = talkPage.text
if page.title() not in vitalArticles :
talkPageText = re.sub(r'\{\{[Vv](ital article|A|a)(.|\n)*?\}\}', '', talkPageText)
talkPage.put(talkPageText, 'removed {{[[Template:Vital article|Vital article]]}}')
print 'removed'
global number
number = number + 1
else :
if re.search(r'(\{\{[Vv](ital article|A|a)(.|\n)*?\}\})', talkPageText) :
correctParams = vitalArticles[page.title()]
template = re.search(r'(\{\{[Vv](ital article|A|a)(.|\n)*?\}\})', talkPageText).group(1)
levelIsCorrect = re.search(r'level\s*\=\s*' + correctParams[1], template) or not correctParams[1]
topicIsCorrect = re.search(r'topic\s*\=\s*' + correctParams[2], template) or not correctParams[2]
classIsCorrect = re.search(r'class\s*\=\s*' + correctParams[3], template) or not correctParams[3]
if not (levelIsCorrect and topicIsCorrect and classIsCorrect) :
templateToAdd = '{{Vital article|level=' + correctParams[1]
if correctParams[2] :
templateToAdd = templateToAdd + '|topic=' + correctParams[2]
if correctParams[3] :
templateToAdd = templateToAdd + '|class=' + correctParams[3]
if correctParams[4] :
templateToAdd = templateToAdd + '|subpage=' + correctParams[4]
templateToAdd = templateToAdd + '}}'
talkPageText = string.replace(talkPageText, template, templateToAdd)
talkPage.put(talkPageText, 'updated {{[[Template:Vital article|Vital article]]}}')
print levelIsCorrect and 1 == 1
print topicIsCorrect and 1 == 1
print classIsCorrect and 1 == 1
global number
number = number + 1
def run() :
print 'Running'
for vitalPage in vitalPages :
vitalPageName = pywikibot.Page(site, vitalPage[0])
level = vitalPage[1]
topic = vitalPage[2]
if level == 1 or (level == 4 and topic) : # don't search by section
listPages = pagegenerators.LinkedPageGenerator(vitalPageName)
subpage = vitalPage[3]
determineTemplates(listPages, topic, level, subpage)
elif level == 2 :
for sectionName, topicName in sections2.iteritems() :
section = re.search(r'==\s*'+sectionName+r'.*\n((.|\s)+?)(\=|\Z)', vitalPageName.text).group(1)
listPageNames = re.findall(patterns['link'], section)
listPages = []
for pageName in listPageNames :
listPages.append(pywikibot.Page(site, pageName))
determineTemplates(listPages, topicName, 2)
elif level == 3 :
for sectionName, topicName in sections3.iteritems() :
section = re.search(r'==\s*'+sectionName+r'.*\n((.|\s)+?)[^=]\=\=[^=]', vitalPageName.text).group(1)
if topicName :
listPageNames = re.findall(patterns['link'], section)
listPages = []
for pageName in listPageNames :
listPages.append(pywikibot.Page(site, pageName))
determineTemplates(listPages, topicName, 3)
else :
subSection = re.search(r'===\s*Language.*\n((.|\s)+?)[^=]\=\=\=[^=]', section).group(1)
listPageNames = re.findall(patterns['link'], subSection)
listPages = []
for pageName in listPageNames :
listPages.append(pywikibot.Page(site, pageName))
determineTemplates(listPages, 'Language', 3)
listPageNames = re.findall(patterns['link'], section)
listPages = []
for pageName in listPageNames :
listPages.append(pywikibot.Page(site, pageName))
determineTemplates(listPages, 'Life', 3)
else : # Level 4, Life+Language
section = re.search(r'==\s*Language.*\n((.|\s)+?)[^=]\=\=[^=]', vitalPageName.text).group(1)
listPageNames = re.findall(patterns['link'], section)
listPages = []
for pageName in listPageNames :
listPages.append(pywikibot.Page(site, pageName))
determineTemplates(listPages, 'Language', 4)
listPageNames = re.findall(patterns['link'], vitalPageName.text)
listPages = []
for pageName in listPageNames :
listPages.append(pywikibot.Page(site, pageName))
determineTemplates(listPages, 'Life', 4)
print 'finished dictionary'
listTemplatedPages = pagegenerators.CategorizedPageGenerator(pywikibot.Category(pywikibot.Page(site, 'Category:All Wikipedia vital articles')))
for page in listTemplatedPages :
updateTemplate(page)
run()