Jump to content

User:YpnBot/Vital article talk pages update/source

From Wikipedia, the free encyclopedia
import pywikibot
from pywikibot import pagegenerators
import re
import string

site = pywikibot.getSite('en', 'wikipedia')
global number
number = 0
vitalPages = [
	['Wikipedia:Vital articles/Level/1', 1, '', ''],
	['Wikipedia:Vital articles/Level/2', 2, '', ''],
	['Wikipedia:Vital articles', 3, '', ''],
	['Wikipedia:Vital articles/Expanded/People', 4, 'People', ''],
	['Wikipedia:Vital articles/Expanded/History', 4, 'History', ''],
	['Wikipedia:Vital articles/Expanded/Geography', 4, 'Geography', ''],
	['Wikipedia:Vital articles/Expanded/Arts', 4, 'Art', ''],
	['Wikipedia:Vital articles/Expanded/Philosophy and religion', 4, 'Philosophy', ''],
	['Wikipedia:Vital articles/Expanded/Anthropology, psychology, and everyday life', 4, '', ''],
	['Wikipedia:Vital articles/Expanded/Society and social sciences', 4, 'Society', ''],
	['Wikipedia:Vital articles/Expanded/Biology and health sciences', 4, 'Science', 'Biology'],
	['Wikipedia:Vital articles/Expanded/Physical sciences', 4, 'Science', 'Physics'],
	['Wikipedia:Vital articles/Expanded/Technology', 4, 'Technology', ''],
	['Wikipedia:Vital articles/Expanded/Mathematics', 4, 'Mathematics', '']
]
patterns = {
	'link' : re.compile(r'\[\[(.*?)[\]\|]'),
	'fa' : re.compile(r'\{\{[Ff]eatured article\}\}'),
	'ga' : re.compile(r'\{\{[Gg]ood article\}\}'),
	'a' : re.compile(r'\|\s*[Cc]lass\s*\=\s*[Aa]'),
	'b' : re.compile(r'\|\s*[Cc]lass\s*\=\s*[Bb]'),
	'c' : re.compile(r'\|\s*[Cc]lass\s*\=\s*[Cc]'),
	'start' : re.compile(r'\|\s*[Cc]lass\s*\=\s*[Ss]tart'),
	'stub' : re.compile(r'\|\s*[Cc]lass\s*\=\s*[Ss]tub'),
	'stubTemp' : re.compile(r'\{\{[Ss]tub\}\}')
}
sections2 = {
	'Art' : 'Art',
	'Geography' : 'Geography',
	'History' : 'History',
	'Everyday life' : '',
	'Mathematics' : 'Mathematics',
	'Philosophy' : 'Philosophy',
	'Science' : 'Science',
	'Society' : 'Society',
	'Technology' : 'Technology'
}
sections3 = {
	'People' : 'People',
	'History' : 'History',
	'Geography' : 'Geography',
	'Arts and culture' : 'Art',
	'Philosophy and religion' : 'Philosophy',
	'Everyday life' : '',
	'Society and social sciences' : 'Society',
	'Health and medicine' : 'Science',
	'Science' : 'Science',
	'Technology' : 'Technology',
	'Mathematics' : 'Mathematics'
}

vitalArticles = {}

def determineTemplates(listPages, topic, level, subpage='') :
	print 'level:' + str(level)
	print 'topic:' + str(topic)
	for page in listPages :
		if page.namespace() == 0 and page.title() not in vitalArticles :
			if page.isRedirectPage() :
				page = page.getRedirectTarget()
			#print page.title()
			talkPage = page.toggleTalkPage()
			articleClass = ''
			if patterns['fa'].search(page.text) :
				articleClass = 'FA'
			elif patterns['a'].search(talkPage.text) :
				articleClass = 'A'
			elif patterns['ga'].search(page.text) :
				articleClass = 'GA'
			elif patterns['b'].search(talkPage.text) :
				articleClass = 'B'
			elif patterns['c'].search(talkPage.text) :
				articleClass = 'C'
			elif patterns['start'].search(talkPage.text) :
				articleClass = 'Start'
			elif patterns['stub'].search(talkPage.text) :
				articleClass = 'Stub'
			elif patterns['stubTemp'].search(page.text) :
				articleClass = 'Stub'
			vitalArticles[page.title()] = [page, str(level), topic, articleClass, subpage]
			
def updateTemplate(talkPage) :
	if talkPage.namespace() == 1 :
		page = talkPage.toggleTalkPage()
		talkPageText = talkPage.text
		if page.title() not in vitalArticles :
			talkPageText = re.sub(r'\{\{[Vv](ital article|A|a)(.|\n)*?\}\}', '', talkPageText)
			talkPage.put(talkPageText, 'removed {{[[Template:Vital article|Vital article]]}}')
			print 'removed'
			global number
			number = number + 1
		else :
			if re.search(r'(\{\{[Vv](ital article|A|a)(.|\n)*?\}\})', talkPageText) :
				correctParams = vitalArticles[page.title()]
				template = re.search(r'(\{\{[Vv](ital article|A|a)(.|\n)*?\}\})', talkPageText).group(1)
				levelIsCorrect = re.search(r'level\s*\=\s*' + correctParams[1], template) or not correctParams[1]
				topicIsCorrect = re.search(r'topic\s*\=\s*' + correctParams[2], template) or not correctParams[2]
				classIsCorrect = re.search(r'class\s*\=\s*' + correctParams[3], template) or not correctParams[3]
				if not (levelIsCorrect and topicIsCorrect and classIsCorrect) :
					templateToAdd = '{{Vital article|level=' + correctParams[1]
					if correctParams[2] :
						templateToAdd = templateToAdd + '|topic=' + correctParams[2]
					if correctParams[3] :
						templateToAdd = templateToAdd + '|class=' + correctParams[3]
					if correctParams[4] :
						templateToAdd = templateToAdd + '|subpage=' + correctParams[4]
					templateToAdd = templateToAdd + '}}'
					talkPageText = string.replace(talkPageText, template, templateToAdd)
					talkPage.put(talkPageText, 'updated {{[[Template:Vital article|Vital article]]}}')
					print levelIsCorrect and 1 == 1
					print topicIsCorrect and 1 == 1
					print classIsCorrect and 1 == 1
					global number
					number = number + 1
			
def run() :
	print 'Running'
	for vitalPage in vitalPages :
		vitalPageName = pywikibot.Page(site, vitalPage[0])
		level = vitalPage[1]
		topic = vitalPage[2]
		if level == 1 or (level == 4 and topic) : # don't search by section
			listPages = pagegenerators.LinkedPageGenerator(vitalPageName)
			subpage = vitalPage[3]
			determineTemplates(listPages, topic, level, subpage)
		elif level == 2 :
			for sectionName, topicName in sections2.iteritems() :
				section = re.search(r'==\s*'+sectionName+r'.*\n((.|\s)+?)(\=|\Z)', vitalPageName.text).group(1)
				listPageNames = re.findall(patterns['link'], section)
				listPages = []
				for pageName in listPageNames :
					listPages.append(pywikibot.Page(site, pageName))
				determineTemplates(listPages, topicName, 2)
		elif level == 3 :
			for sectionName, topicName in sections3.iteritems() :
				section = re.search(r'==\s*'+sectionName+r'.*\n((.|\s)+?)[^=]\=\=[^=]', vitalPageName.text).group(1)
				if topicName :
					listPageNames = re.findall(patterns['link'], section)
					listPages = []
					for pageName in listPageNames :
						listPages.append(pywikibot.Page(site, pageName))
					determineTemplates(listPages, topicName, 3)
				else :
					subSection = re.search(r'===\s*Language.*\n((.|\s)+?)[^=]\=\=\=[^=]', section).group(1)
					listPageNames = re.findall(patterns['link'], subSection)
					listPages = []
					for pageName in listPageNames :
						listPages.append(pywikibot.Page(site, pageName))
					determineTemplates(listPages, 'Language', 3)
					listPageNames = re.findall(patterns['link'], section)
					listPages = []
					for pageName in listPageNames :
						listPages.append(pywikibot.Page(site, pageName))
					determineTemplates(listPages, 'Life', 3)
		else : # Level 4, Life+Language
			section = re.search(r'==\s*Language.*\n((.|\s)+?)[^=]\=\=[^=]', vitalPageName.text).group(1)
			listPageNames = re.findall(patterns['link'], section)
			listPages = []
			for pageName in listPageNames :
				listPages.append(pywikibot.Page(site, pageName))
			determineTemplates(listPages, 'Language', 4)
			listPageNames = re.findall(patterns['link'], vitalPageName.text)
			listPages = []
			for pageName in listPageNames :
				listPages.append(pywikibot.Page(site, pageName))
			determineTemplates(listPages, 'Life', 4)
	print 'finished dictionary'
	listTemplatedPages = pagegenerators.CategorizedPageGenerator(pywikibot.Category(pywikibot.Page(site, 'Category:All Wikipedia vital articles')))
	for page in listTemplatedPages :
		updateTemplate(page)

run()