Utente:Alfiobot/bot.py
Aspetto
(la formattazione viene male perché io uso i tab a 4 spazi mentre qui pare che siano fissi ad 8).
#!/usr/bin/python
#
# Wrapper around the pywikipediabot framework
import os, re, time, sys
import wikipedia, catlib, pagegenerators
# Logins to wikipedia server
def login( password):
os.system("./login.py -pass:"+password)
# Logouts from wikipedia server
def logout():
os.unlink("login-data/wikipedia-it-Alfiobot-login.data")
print "Logged out"
# Sets the action text
def setaction(text):
wikipedia.setAction(text)
# Returns a page object for the given page title
def getpage(title):
return wikipedia.Page(wikipedia.getSite(), title)
# Returns a list of links in the specified page, using Page objects
def list_links( title):
site = wikipedia.getSite()
return wikipedia.Page(site, pagetitle).linkedPages()
# Returns a list of pages in the specified category, using Page objects
def list_cat_pages( title):
site = wikipedia.getSite()
return catlib.Category(site, title).articles()
# Checks whether a page object is in the specified category.
# Returns True if so, False otherwise
def isincat( page, catTitle):
if not isinstance(page, wikipedia.Page):
page = getpage(page)
site = wikipedia.getSite()
cat_namespace = site.category_namespaces()[0]
catTitle = catTitle[:1].upper() + catTitle[1:]
cats = page.categories()
catpl = wikipedia.Page(site, cat_namespace + ':' + catTitle)
return catpl in cats
# Checks whether the specified category is surrounded by <noinclude> tags
def is_cat_noinclude( page, catTitle):
if not isinstance(page, wikipedia.Page):
page = getpage(page)
text = page.get()
return re.search("<noinclude>.*?\[\["+wikipedia.getSite().category_namespace()+"\:"+catTitle+"\]\].*?</noinclude>", text)
# Adds a category to the specified page object
# (copy and paste from category.py with small changes)
def addcat( page, catTitle, noinclude = False):
if not isinstance(page, wikipedia.Page):
page = getpage(page)
site = wikipedia.getSite()
cat_namespace = site.category_namespaces()[0]
catTitle = catTitle[:1].upper() + catTitle[1:]
try:
cats = page.categories()
catsWithSortKeys = page.categories(withSortKeys = True)
except wikipedia.NoPage:
wikipedia.output(u"%s doesn't exist yet. Ignoring." % (page.title()))
pass
except wikipedia.IsRedirectPage,arg:
redirTarget = wikipedia.Page(site,arg.args[0])
wikipedia.output(u"WARNING: %s is redirect to %s. Ignoring." % (page.title(), redirTarget.title()))
else:
wikipedia.output(u"Current categories:")
for cat in cats:
wikipedia.output(u"* %s" % cat.title())
catpl = wikipedia.Page(site, cat_namespace + ':' + catTitle)
if catpl in cats:
wikipedia.output(u"%s is already in %s." % (page.title(), catpl.title()))
return True
else:
wikipedia.output(u'Adding %s' % catpl.aslink())
catsWithSortKeys.append(catpl)
text = page.get()
text = wikipedia.replaceCategoryLinks(text, catsWithSortKeys)
if noinclude:
cat_ns = wikipedia.getSite().category_namespace()
text = text.replace("[["+cat_ns+":"+catTitle+"]]", "<noinclude>[["+cat_ns+":"+catTitle+"]]</noinclude>")
page.put(text)
return False
# Removes a category from an article, without touching any other text
def removecat( page, catTitle):
replace( page, [('\[\['+wikipedia.getSite().category_namespace()+'\:'+catTitle+'.*?\]\]', '')])
# Sends a page object contents as the new page. It only accepts
# page objects and not titles, otherwise it would send an empty page
def sendpage( page):
page.put( page._contents )
# Appends text to an article
def append( page, text, verbose = False, send = True):
replace( page, [("$", text)], verbose = verbose, send = send)
# Replaces text inside a page using regular expressions or text substitutions
# <replace_list> is a list of tuples, each with two members:
# - regular expression to match
# - replacement text
#
# Each tuple will be applied to the text, in order
#
# Returns True if some substitutions have been made, False otherwise
def replace( page, replace_list, use_regexp = True, verbose = False, send = True, interactive = False, exclude = None):
# If a title was passed, get the corresponding page
if not isinstance(page, wikipedia.Page):
page = getpage(page)
text = page.get()
old_text = text
if exclude:
[text, removed] = tearout( text, exclude)
for regexp, newtext in replace_list:
if use_regexp:
text = re.sub('(?u)'+regexp, newtext, text)
else:
text = text.replace( regexp, newtext)
if exclude:
text = placein( text, removed)
if text != old_text:
if interactive:
wikipedia.output(u'>>> %s <<<' % page.title())
wikipedia.showDiff(old_text, text)
choice = wikipedia.inputChoice(u'Accetti i cambiamenti?', ['Yes', 'No'], ['y', 'N'], 'N')
if choice not in ['y', 'Y']:
return
if verbose:
log( u'Replacing text in ', onscreen = True, newline= False)
log( page.aslink(), onscreen = True)
if send:
page.put(text)
# Update local copy anyway
page._contents = text
return True
else:
wikipedia.output(u'No changes in page %s' % page.aslink())
if verbose:
log( u'No changes necessary in', onscreen = True, newline = False)
log( page.aslink(), onscreen = True)
return False
# Removes any text matching a list of delimiter pairs, and places an unique marker in its place
# Returns two values: the modified text and a dictionary with marker/removed text couples
def tearout( text, delimiters):
removed = {}
for start,end in delimiters:
while True:
pos1 = text.find(start)
pos2 = text.find(end)
if 0 <= pos1 < pos2:
fragment = text[pos1:pos2+len(end)+1]
marker = unique(text, "MARKER%dMARKER")
text = text[:pos1] + marker + text[pos2+len(end)+1:]
removed[marker] = fragment
else:
break
return [text, removed]
# Places back in the text removed by tearout()
def placein( text, removed):
for marker, val in removed.items():
text = text.replace(marker, val)
return text
# Returns an unique identifier (one that is not already present in the string)
def unique( text, marker):
counter=0
while True:
m = marker % counter
if m not in text:
return m
counter += 1
# Open/close/write log
def openlog(task=''):
global logfile
logfile = file("logs/%s" % task, 'a')
log( "\n"+task.upper()+" run started "+time.asctime(time.gmtime())+" (UTC)")
def closelog(task=''):
log( task.upper()+" run ended "+time.asctime(time.gmtime())+" (UTC)")
logfile.close()
def log(text='', newline = True, onscreen = False, page = None):
if page:
log( page.aslink()+': ', newline = False)
try:
logfile.write(text)
except UnicodeEncodeError:
logfile.write("(utf-8 error)")
if onscreen:
try:
wikipedia.output(text)
except UnicodeEncodeError:
wikipedia.output(u'(utf-8 error)')
if newline:
logfile.write('\n')
# General exception handler - does nothing except a clean exit on a Ctrl-C
def handle_exceptions():
exc_info = sys.exc_info()
if str(exc_info[0]) == 'exceptions.KeyboardInterrupt':
sys.exit()
log( "Error: "+str(exc_info[0]))
# Starts a page generator and executes the given function on each page
def do_generator( generator, function, preload = False):
if preload:
generator2 = pagegenerators.PreloadingGenerator(generator, pageNumber = 100)
else:
generator2 = generator
for page in generator2:
try:
page.get()
except wikipedia.IsRedirectPage, wikipedia.NoPage:
print "Skipping redirect"
continue
function(page)
class XmlDumpPageGenerator:
"""
Generator which will yield pages from an xml file
The yielded object is a class with the following attributes:
* title
* id
* text
* timestamp
* editRestriction
* moveRestriction
Arguments:
* xmlfilename - The dump's path, either absolute or relative
"""
def __init__(self, xmlfilename):
self.xmlfilename = xmlfilename
def __iter__(self):
import xmlreader
mysite = wikipedia.getSite()
dump = xmlreader.XmlDump(self.xmlfilename)
for entry in dump.parse():
yield entry