Jump to content

User:PotatoBot/Code/7

From Wikipedia, the free encyclopedia
#!/usr/bin/python
# -*- coding: utf-8  -*-

import pywikibot as w
import scripts.noreferences, re, mysave
from datetime import date

# PotatoBot Task 7: Adds Glottolog codes to language infoboxes

def main():
	datapage = 'Wikipedia:WikiProject Languages/Glottolog 2.2 language names'
	errNobox, errIsSection, errMultiISO, errWrongGlotto, errMultiISOGlotto, errWrongISO, errNoISO = [], [], [], [], [], [], []
	errIO, refAdded = '', ''
	refbot = scripts.noreferences.NoReferencesBot(None, True)
	for glottoline in w.Page(w.getSite(), datapage).get().splitlines():
		# glottoline == ' aari1239 [[ISO 639:aiw|[aiw]]] [[Aari language|Aari]]'
		i = glottoline.find('[[ISO 639:')
		if i > 1:
			w.output('')
			if '|' in glottoline[i+24:]:
				pipe = glottoline.find('|', i+24)
				langpage = mysave.resolveredir(w.Page(w.getSite(), glottoline[i+24 : pipe]))
				glottoname = glottoline[pipe+1 : glottoline.find(']', pipe)]
			else:
				glottoname = glottoline[i+24 : glottoline.find(']', i+24)]
				langpage = mysave.resolveredir(w.Page(w.getSite(), glottoname))
			glottocode = glottoline[1 : glottoline.find(' ', 1)]
			isocode = glottoline[i+10 : i+13]
			w.output(langpage.title() + ' (' + isocode + '/' + glottocode + '):')
			langbox = False
			paramISO3, paramLC, paramLD, paramGlotto, paramGlottoref = '', [], [], [], ''
			if langpage.exists() and '#' not in langpage.title():
				for template in langpage.templatesWithParams():
					if template[0].title() == 'Template:Infobox language' or template[0].title() == 'Template:Infobox Language':
						langbox = True
						for param in template[1]:
							pp = param.partition('=')
							pname = pp[0].strip()
							pval = pp[2].strip()
							if pname == 'iso3':
								paramISO3 = pval
								paramLC.append(pval)
							elif pname[:2] == 'lc' and pname[2:].isdigit():
								paramLC.append(pval)
							elif pname[:2] == 'ld' and pname[2:].isdigit():
								paramLD.append('"' + pval + '"')
							elif pname[:6] == 'glotto' and (len(pname) == 6 or pname[6:].isdigit()):
								paramGlotto.append(pval)
							elif pname == 'glottorefname':
								paramGlottoref = pval
			if len(paramLD) > 0:
				if len(paramLD) == 1: paramLC += paramLD
				else: paramLC.append('several dialect names')
			entry = '#[[' + langpage.title() + ']] (' + isocode + '/' + glottocode + ')'
			
			if not langbox:
				if '#' in langpage.title():
					w.output('  \03{yellow}redirect to section\03{default}')
					errIsSection.append(entry)
				else:
					w.output('  \03{yellow}no language infobox\03{default}')
					errNobox.append(entry)
			elif len(paramLC) > 1:
				if len(paramGlotto) == 0:
					w.output('  \03{yellow}multiple ISO codes, but no glottocode\03{default}')
					errMultiISO.append(entry + ': ' + ', '.join(paramLC))
				else:
					w.output('  \03{yellow}multiple ISO codes and glottocode(s)\03{default}')
					errMultiISOGlotto.append(entry + ': ' + ', '.join(paramLC) + ' / ' + ', '.join(paramGlotto))
			else:
				if len(paramGlotto) == 0:
					langtext = langpage.get()
					isoparam = re.search(r'\|\s*iso3\s*=[^\|\}]*', langtext)
					if isoparam and paramISO3 != '':
						if paramISO3 != isocode:
							w.output('  \03{yellow}mismatching ISO code\03{default}')
							errWrongISO.append(entry + ': ' + paramISO3)
						else:
							langtext = langtext[:isoparam.end()] + '|glotto=' + glottocode + '\n' +\
								('|glottorefname=' + glottoname + '\n') * (paramGlottoref == '') + langtext[isoparam.end():]
							summary = 'Add [[Glottolog]] code ' + glottocode
							w.output('  NoReferencesBot says: ', newline = False)
							if refbot.lacksReferences(langtext + '</ref>'):
								langtext = refbot.addReferences(langtext)
								summary += ' and &lt;references /&gt;'
								refAdded += entry + '\n'
							errIO += mysave.savepage(langpage, langtext, '7', summary)
					else:
						w.output('  \03{yellow}no ISO code\03{default}')
						errNoISO.append(entry)
				elif len(paramGlotto) > 1 or paramGlotto[0] != glottocode:
					w.output('  \03{yellow}mismatching glottocode(s)\03{default}')
					errWrongGlotto.append(entry + ': ' + ', '.join(paramGlotto))
	errNobox.sort()
	errIsSection.sort()
	errWrongGlotto.sort()
	errMultiISO.sort()
	errMultiISOGlotto.sort()
	errWrongISO.sort()
	errNoISO.sort()
	mysave.savepage(w.Page(w.getSite(), 'User:PotatoBot/Lists/Glottolog log'), 
		'Log for the addition of [[Glottolog]] codes to language infoboxes ([[WP:Bots/Requests for approval/PotatoBot 7|Task 7]]). Date: ' +\
		mysave.fmtdate(date.today()) + '.\n\n' + "''Syntax'':" + '\n' + '#[[Some language]] ([[ISO 639-3]] code/glottocode, from [[' +\
		datapage + '|database]]): ISO codes and dialect names / glottocodes, from {{tl|Infobox language}} (if applicable)\n\n{{TOC right}}\n' +\
		'=== No language infobox ===\n' + '\n'.join(errNobox) + '\n\n' +\
		'=== Redirect to section ===\n' + '\n'.join(errIsSection) + '\n\n' +\
		'=== One ISO code and mismatching glottocode(s) ===\n' + '\n'.join(errWrongGlotto) + '\n\n' +\
		'=== Multiple ISO codes, but no glottocode ===\n' + '\n'.join(errMultiISO) + '\n\n' +\
		'=== Multiple ISO codes and mismatching, missing or surplus glottocode(s) ===\n' + '\n'.join(errMultiISOGlotto) + '\n\n' +\
		'=== Mismatching ISO code ===\n' + '\n'.join(errWrongISO) + '\n\n' +\
		'=== No ISO code ===\n' + '\n'.join(errNoISO) + '\n\n' +\
		'=== &lt;references /&gt; added ===\n' + refAdded +\
		'=== I/O error ===\n' + errIO, '7', 'Creating [[Glottolog]] log')

if __name__ == "__main__":
	try:
		main()
	finally:
		w.stopme()

mysave.py

[edit]
#!/usr/bin/python
# -*- coding: utf-8  -*-

import pywikibot as w
import re

# Code for saving redirects and other pages

def savepage(page, text, BRFANo, summary = '', minor = False):
	"""Save text to a page and log exceptions."""
	if summary != '':
		w.setAction(summary + '. See [[Wikipedia:Bots/Requests for approval/PotatoBot ' + BRFANo + '|approval]]. Report errors and suggestions at [[User talk:PotatoBot]].')
	try:
		if not '#' in page.title():
			page.put(text, minorEdit = minor)
			w.output('  \03{green}saving %s -> \03{gray}%s\03{default}' % (page.title(), text))
			return ''
		else:
			w.output('  \03{red}cannot save %s because it is a section\03{default}' % page.title())
			return '# %s: this is a secion title' % page.title(aslink=True)
	except w.LockedPage:
		w.output('  \03{red}cannot save %s because it is locked\03{default}' % page.title())
		return '# %s: page was locked\n' % page.title(aslink=True)
	except w.EditConflict:
		w.output('  \03{red}cannot save %s because of edit conflict\03{default}' % page.title())
		return '# %s: edit conflict occurred\n' % page.title(aslink=True)
	except w.SpamfilterError, error:
		w.output('  \03{red}cannot save %s because of spam blacklist entry %s\03{default}' % (page.title(), error.url))
		return '# %s: spam blacklist entry\n' % page.title(aslink=True)
	except:
		w.output('  \03{red}unknown error on saving %s\03{default}' % page.title())
		return '# %s: unknown error occurred\n' % page.title(aslink=True)

def resolveredir(page):
	"""Return target if input is a redirect, else return input."""
	try:
		if page.isRedirectPage():
			try:
				w.output('  \03{gray}resolving redir %s to %s\03{default}'\
					% (page.title(), page.getRedirectTarget().title()))
				return page.getRedirectTarget()
			except:
				w.output('  \03{yellow}target %s is a broken redir\03{default}' % page.title())
				return w.Page(w.getSite(), page.title() + ' (broken redirect)')
		else:
			return page
	except:
		w.output('  \03{yellow}target %s is a bad link\03{default}' % page.title())
		return w.Page(w.getSite(), page.title() + ' (bad link)') # workaround for wikipedia.py breaking wikiasite: links

def makeredir(redirpage, page, BRFANo, templates = ''):
	"""Create a redirect and log existing page that isn't a redirect to the desired article."""
	page = resolveredir(page)
	if redirpage.exists():
		comment = ''
		if redirpage.isDisambig():
			comment = ' (disambiguation)'
			dab = redirpage
		if redirpage.isRedirectPage():
			try:
				if redirpage.getRedirectTarget().title() == page.title() or \
						redirpage.getRedirectTarget().sectionFreeTitle() == page.title():
					# Already a redir to the desired article
					return ''
				elif redirpage.getRedirectTarget().isDisambig():
					comment = ' (redirect to disambiguation)'
					dab = redirpage.getRedirectTarget()
				else:
					comment = ' (redirect)'
			except:
				comment = ' (broken redir)'
		if 'disambiguation' in comment and page in [resolveredir(p) for p in dab.linkedPages()]:
			w.output('  link to %s already on dab page %s' % (page.title(), redirpage.title()))
			return ''
		elif redirpage.title() != page.title():
			w.output('  \03{yellow}redir to %s failed, page %s already exists\03{default}' % (page.title(), redirpage.title()))
			return '# %s: redirecting to %s failed, page already exists%s\n' % (redirpage.title(aslink=True), page.title(aslink=True), comment)
		else:
			return ''
	# Else create redirect, or write page name to list if an error occurs
	else:
		return savepage(redirpage, '#REDIRECT %s %s' % (page.title(aslink=True), templates), BRFANo, 'Redirect to ' + page.title(aslink=True))

def findATCs(page, includeVet = True):
	"""Look for ATC codes in infoboxes."""
	ATCvet, prefix, suffix, supp = False, '', '', ''
	ATCvetpos, prefixpos, suffixpos, supppos = -1, -1, -1, -1
	templatenames = ('Drugbox', 'Chembox Identifiers')
	templates = page.templatesWithParams()
	for tuple in templates:
		if tuple[0] in templatenames:
			idx = templatenames.index(tuple[0])
			templatepos = templates.index(tuple)
			for param in tuple[1]:
				value = param.partition('=')
				if value[0].strip() == 'ATCvet':
					ATCvet = value[2].strip() == 'yes' and includeVet
					ATCvetpos = tuple[1].index(param)
				elif value[0].strip() == ('ATC_prefix', 'ATCCode_prefix')[idx] and value[2].strip().lower != 'none':
					prefix = value[2].strip()
					prefixpos = tuple[1].index(param)
				elif value[0].strip() == ('ATC_suffix', 'ATCCode_suffix')[idx]:
					suffix = value[2].strip()
					suffixpos = tuple[1].index(param)
				elif value[0].strip() == ('ATC_supplemental', 'ATC_Supplemental')[idx]:
					supp = value[2].strip()
					supppos = tuple[1].index(param)
	codes = (prefix != '') * [(ATCvet*'Q' + prefix + suffix)]
	for tupleSupp in page.templatesWithParams(supp):
		if tupleSupp[0] in ['ATC', 'ATCvet']:
			codes.append((tupleSupp[0] == 'ATCvet')*'Q' + tupleSupp[1][0] + tupleSupp[1][1])
	return (codes, ATCvetpos, prefixpos, suffixpos, supppos)

def addTemplateParam(page, newtemplates, BRFANo, summary = 'Updating template', minor = False):
	text = page.get()
	oldtemplates = page.templatesWithParams()
	pointer = 0
	for i in range(len(oldtemplates)):
		search1 = re.compile(r'\{\{\s*(%s|%s)%s\s*\|' % (oldtemplates[i][0][0].upper(), oldtemplates[i][0][0].lower(),\
			oldtemplates[i][0].replace(' ', '( |_)'))).search(text, pointer)
		if search1:
			pointer = end() - 1
		if newtemplates[i] != oldtemplates[i]:
			if newtemplates[i][0].strip() == oldtemplates[i][0].strip():
				for j in range(len(oldtemplates[i][1])):
					oldparam = oldtemplates[i][1][j].partition('=')
					newparam = newtemplates[i][1][j].partition('=')
					# Todo: unnamed params #
					if newparam[0] == oldparam[0]:
						span = re.compile(r'\|\s*%s\s*=\s*([^|}\s]*)\s*(}|\|)' % oldparam[0]).\
							search(text, pointer).span(1)
						pointer = span(1)
						if newparam[2].strip() != oldparam[2].strip():
							text = text[:span(0)] + newparam[2] + text[span(1):]
					else:
						text = text[:] + newtemplates[i][1][j] + text[:]
						pointer = len(text[:] + newtemplates[i][1][j])
			else:
				w.output('\03{yellow}template list does not match page %s: %s vs. %s\03{default}' % \
					(page.title(), newtemplates[i][0].strip(), oldtemplates[i][0].strip()))
				return '# %s: template list did not match templates on page' % page.title(aslink=True)
	if text != page.get():
		return savepage(page, text, BRFANo, summary, minor)
	else:
		return ''

def fmtdate(date):
	"""Format date in English w style."""
	return '%d %s %d' % (date.day, ('', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August',\
		'September', 'October', 'November', 'December')[date.month], date.year)