Jump to content

User:PointBot/source

From Wikipedia, the free encyclopedia
# coding: utf-8
import urllib2, time, urllib
import random

#cookielib:
import cookielib
urlopen = urllib2.urlopen
Request = urllib2.Request
cj = cookielib.LWPCookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
urllib2.install_opener(opener)

#################################
#	Wikipedia functions	#
#################################

def parse(page, tag):
  for line in page:
    if tag + '''="''' in line:
      value=''
      for letter in line[line.find(tag + '''="''')+len(tag)+2:]:
	if letter=='''"''':return value
	value+=letter
	
def load(name):
  data=urllib.urlencode({'format':'xml', 'action':'query','prop':'revisions', 'rvprop':'content', 'titles':name})
  loadString='http://en.wikipedia.org/w/api.php?'
  page=urllib2.urlopen(loadString, data)
  pagestring=''
  for i in page.readlines():pagestring+=i
  pagestring=pagestring[pagestring.find('''xml:space'''):]
  return pagestring[pagestring.find('''>''')+1:pagestring.find('''</rev>''')]
  
def login():
  #Get token
  data=urllib.urlencode({'format':'xml', 'action':'login', 'lgname':'PointBot', 'lgpassword':password})
  loginString='http://en.wikipedia.org/w/api.php?'
  loginpage=urllib2.urlopen(loginString, data)
  loginpage=loginpage.readlines()
  token=parse(loginpage, 'token')
  cookieprefix=parse(loginpage, 'cookieprefix')
  sessionid=parse(loginpage, 'sessionid')
  #login
  data=urllib.urlencode({'enwiki_session':sessionid,'format':'xml', 'action':'login', 'lgname':'PointBot', 'lgpassword':password, 'lgtoken':token})
  loginString='http://en.wikipedia.org/w/api.php?'
  loginpage=urllib2.urlopen(loginString, data)
  loginpage=loginpage.readlines()
  lguserid=parse(loginpage, 'lguserid')
  lgtoken=parse(loginpage, 'lgtoken')
  sessionid=parse(loginpage, 'sessionid')
  print 'Login was: ', parse(loginpage, 'result')
  print lguserid, lgtoken, sessionid
  return lguserid, lgtoken, sessionid

def get_edit_token(name, lguserid, lgtoken, sessionid):
  data=urllib.urlencode({'format':'xml', 'action':'query', 'prop':'info|revisions', 'intoken':'edit', 'titles':'Main Page'})
  headers={'enwikiUserName':'PointBot','enwikiUserID':lguserid,'enwikiToken':lgtoken, 'enwiki_session':sessionid}
  loadString='http://en.wikipedia.org/w/api.php?'
  req=urllib2.Request(loadString, data)
  page=urllib2.urlopen(req)
  page=page.readlines()
  timestamp=parse(page, 'timestamp')
  edittoken=parse(page, 'edittoken')
  return timestamp, edittoken

def edit_full(name, newcontent, timestamp, edittoken, summary):
  data=urllib.urlencode({'format':'xml', 'action':'edit', 'title':name, 'summary':summary, 'text':newcontent, 'basetimestamp':timestamp, 'token':edittoken})
  loadString='http://en.wikipedia.org/w/api.php?'
  page=urllib2.urlopen(loadString, data)  
  
  
def edit_add(name, newcontent, timestamp, edittoken, summary):
  data=urllib.urlencode({'format':'xml', 'action':'edit', 'title':name,'section':'new', 'summary':summary, 'text':newcontent, 'basetimestamp':timestamp, 'token':edittoken})
  loadString='http://en.wikipedia.org/w/api.php?'
  page=urllib2.urlopen(loadString, data)  
  
def setup():
  lguserid, lgtoken, sessionid=login()
  timestamp, edittoken=get_edit_token('User:PointBot/log', lguserid, lgtoken, sessionid)
  return lguserid, lgtoken, sessionid, edittoken

#################################
#	Analysis functions	#
#################################

def findNextLink(page):
  #grab all the links in page and return random one. This function takes a list. It is useful for randomly surfing wikipedia.
  links=[]
  for i in range(len(page)-1):
    if page[i] == '[':
      if page[i+1] == '[':
	link=''
	j=int(i)+1
	while ']' not in link and '|' not in link:
	  j+=1
	  link+=page[j]
	if ':' not in link:links.append(link[:-1])#if link is not to another wiki, that would be boring.
  return random.choice(links)

def getFirstSentence(page):
  #This function trys to get the first sentence of a page, but it uses a lot of rules. There's probably a better way to do this.
  score=0
  found=0
  italics=0
  for i in range(len(page)):
    if page[i] == '{' or page[i] == '[' or page[i] == '(' or page[i] =='<' or page[i:i+4] == '&lt;':score-=1
    if page[i] == '}' or page[i] == ']' or page[i] == ')' or page[i] =='>' or page[i:i+4] == '&gt;':score+=1
    if page[i] == """'""" and page[i+1] == """'""":
      if italics == 0:italics=1
      elif italics == 1:italics=0
    if score == 0 and italics == 0:
      if page[i]=='.' and page[i-2] != ' ' and page[i-2] != '.':
	if page[i-3:i-1] != """''""":
	  found = 1
	  return page[:i+1]
	  
def verb_in_first_sentence(page):
  #checks if a verb is in the sentence.
  verbs=['is', 'are', 'were', 'was', 'will', 'refers']
  first=getFirstSentence(page)
  found=0
  for verb in verbs:
    if verb in first:
      found=1
  return found

def run(name, names, lguserid, lgtoken, sessionid, edittoken):
  page=load(name)
  print 'Checking: ', name
  if page!='':
    if '''{{disambiguation}}''' not in page and '''{{disambig}}''' not in page and page[0] != '#':#if it is not a disambugation page
      if verb_in_first_sentence(page) == 0:#if no correct verb is in first sentence
	timestamp, edittoken=get_edit_token(name, lguserid, lgtoken, sessionid)
	oldpage=load('User:PointBot/log')
	first=first=getFirstSentence(page)
	if name not in oldpage:
	  edit_full('User:PointBot/log', load('User:PointBot/log') + '\n\nArticle [[' + name + ']] lacks a proper descriptive introduction and could use some editing.\n' + str(time.time()), timestamp, edittoken, 'Verb report')
	print 'Article ' + name + ' lacks proper descriptive introduction', first
      try:
	nextname=findNextLink(page)
      except:nextname=random.choice(names)
    else:nextname=random.choice(names)#if page was disamb
  else:nextname=random.choice(names)#if page was blank
  return nextname



if True:
  password='*********'
  #good example: urban design
  lguserid, lgtoken, sessionid, edittoken=setup()
  names=['wiki']
  name=run('wiki', names, lguserid, lgtoken, sessionid, edittoken)
  while True:
    try:
      names.append(name)
      name=run(name, names, lguserid, lgtoken, sessionid, edittoken)
    except:name=random.choice(names)