Naudotojas:Vpovilaitis/replacewords.py

# -*- coding: utf-8 -*-
"""
This bot will make direct text replacements. It will retrieve information on
which pages might need changes either from an XML dump or a text file, or only
change a single page.
 
These command line parameters can be used to specify which pages to work on:
 
&params;
 
-xml              Retrieve information from a local XML dump (pages-articles
                  or pages-meta-current, see http://download.wikimedia.org).
                  Argument can also be given as "-xml:filename".
 
-page             Only edit a specific page.
                  Argument can also be given as "-page:pagetitle". You can
                  give this parameter multiple times to edit multiple pages.
 
Furthermore, the following command line parameters are supported:
 
-regex            Make replacements using regular expressions. If this argument
                  isn't given, the bot will make simple text replacements.
 
-nocase           Use case insensitive regular expressions.
 
-dotall           Make the dot match any character at all, including a newline.
                  Without this flag, '.' will match anything except a newline.
 
-multiline        '^' and '$' will now match begin and end of each line.
 
-xmlstart         (Only works with -xml) Skip all articles in the XML dump
                  before the one specified (may also be given as
                  -xmlstart:Article).
 
-addcat:cat_name  Adds "cat_name" category to every altered page.
 
-excepttitle:XYZ  Skip pages with titles that contain XYZ. If the -regex
                  argument is given, XYZ will be regarded as a regular
                  expression.
 
-requiretitle:XYZ Only do pages with titles that contain XYZ. If the -regex
                  argument is given, XYZ will be regarded as a regular
                  expression.
 
-excepttext:XYZ   Skip pages which contain the text XYZ. If the -regex
                  argument is given, XYZ will be regarded as a regular
                  expression.
 
-exceptinside:XYZ Skip occurences of the to-be-replaced text which lie
                  within XYZ. If the -regex argument is given, XYZ will be
                  regarded as a regular expression.
 
-exceptinsidetag:XYZ Skip occurences of the to-be-replaced text which lie
                  within an XYZ tag.
 
-summary:XYZ      Set the summary message text for the edit to XYZ, bypassing
                  the predefined message texts with original and replacements
                  inserted.
 
-sleep:123        If you use -fix you can check multiple regex at the same time
                  in every page. This can lead to a great waste of CPU because
                  the bot will check every regex without waiting using all the
                  resources. This will slow it down between a regex and another
                  in order not to waste too much CPU.
 
-fix:XYZ          Perform one of the predefined replacements tasks, which are
                  given in the dictionary 'fixes' defined inside the file
                  fixes.py.
                  The -regex and -nocase argument and given replacements will
                  be ignored if you use -fix.
                  Currently available predefined fixes are:
&fixes-help;
 
-always           Don't prompt you for each replacement
 
-recursive        Recurse replacement as long as possible. Be careful, this
                  might lead to an infinite loop.
 
-allowoverlap     When occurences of the pattern overlap, replace all of them.
                  Be careful, this might lead to an infinite loop.
 
other:            First argument is the old text, second argument is the new
                  text. If the -regex argument is given, the first argument
                  will be regarded as a regular expression, and the second
                  argument might contain expressions like \\1 or \g<name>.
 
Examples:
 
If you want to change templates from the old syntax, e.g. {{msg:Stub}}, to the
new syntax, e.g. {{Stub}}, download an XML dump file (pages-articles) from
http://download.wikimedia.org, then use this command:
 
    python replace.py -xml -regex "{{msg:(.*?)}}" "{{\\1}}"
 
If you have a dump called foobar.xml and want to fix typos in articles, e.g.
Errror -> Error, use this:
 
    python replace.py -xml:foobar.xml "Errror" "Error" -namespace:0
 
If you have a page called 'John Doe' and want to fix the format of ISBNs, use:
 
    python replace.py -page:John_Doe -fix:isbn
 
This command will change 'referer' to 'referrer', but not in pages which
talk about HTTP, where the typo has become part of the standard:
 
    python replace.py referer referrer -file:typos.txt -excepttext:HTTP
"""
#
# (C) Daniel Herding & the Pywikipediabot Team, 2004-2008
#
# Distributed under the terms of the MIT license.
#
 
from __future__ import generators
 
import os, codecs, re, time
from string import Template
import wikipedia, pagegenerators
import editarticle
import webbrowser
 
# Imports predefined replacements tasks from fixeswords.py
import fixeswords #, nesusije
 
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
    '&params;':     pagegenerators.parameterHelp,
    '&fixes-help;': fixeswords.help,
}
 
__version__='$Id: replace.py 6412 2009-02-22 16:13:01Z nicdumz $'
 
 
# Summary messages in different languages
# NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
# below.`v
msg = {
    'ar': u'%s روبوت : استبدال تلقائي للنص',
    'ca': u'Robot: Reemplaçament automàtic de text %s',
    'cs': u'Robot automaticky nahradil text: %s',
    'de': u'Bot: Automatisierte Textersetzung %s',
    'el': u'Ρομπότ: Αυτόματη αντικατάσταση κειμένου %s',
    'en': u'Robot: Automated text replacement %s',
    'es': u'Robot: Reemplazo automático de texto %s',
    'fa': u'ربات: تغییر خودکار متن %s',
    'fr': u'Bot : Remplacement de texte automatisé %s',
    'he': u'בוט: החלפת טקסט אוטומטית %s',
    'hu': u'Robot: Automatikus szövegcsere %s',
    'ia': u'Robot: Reimplaciamento automatic de texto %s',
    'id': u'Bot: Penggantian teks otomatis %s',
    'is': u'Vélmenni: breyti texta %s',
    'it': u'Bot: Sostituzione automatica %s',
    'ja': u'ロボットによる: 文字置き換え %s',
    'ka': u'რობოტი: ტექსტის ავტომატური შეცვლა %s',
    'kk': u'Бот: Мәтінді өздікті алмастырды: %s',
    'ksh': u'Bot: hät outomatesch Täx jetuusch: %s',
    'lt': u'robotas: Automatinis teksto keitimas %s',
    'nds': u'Bot: Text automaatsch utwesselt: %s',
    'nds-nl': u'Bot: autematisch tekse vervungen %s',
    'nl': u'Bot: automatisch tekst vervangen %s',
    'nn': u'robot: automatisk teksterstatning: %s',
    'no': u'robot: automatisk teksterstatning: %s',
    'pl': u'Robot automatycznie zamienia tekst %s',
    'pt': u'Bot: Mudança automática %s',
    'ru': u'Робот: Автоматизированная замена текста %s',
    'sr': u'Бот: Аутоматска замена текста %s',
    'sv': u'Bot: Automatisk textersättning: %s',
    'zh': u'機器人:執行文字代換作業 %s',
}
 
 
class XmlDumpReplacePageGenerator:
    """
    Iterator that will yield Pages that might contain text to replace.
 
    These pages will be retrieved from a local XML dump file.
    Arguments:
        * xmlFilename  - The dump's path, either absolute or relative
        * xmlStart     - Skip all articles in the dump before this one
        * replacements - A list of 2-tuples of original text (as a
                         compiled regular expression) and replacement
                         text (as a string).
        * exceptions   - A dictionary which defines when to ignore an
                         occurence. See docu of the ReplaceRobot
                         constructor below.
 
    """
    def __init__(self, xmlFilename, xmlStart, replacements, exceptions):
        self.xmlFilename = xmlFilename
        self.replacements = replacements
        self.exceptions = exceptions
        self.xmlStart = xmlStart
        self.skipping = bool(xmlStart)
 
        self.excsInside = []
        if 'inside-tags' in self.exceptions:
            self.excsInside += self.exceptions['inside-tags']
        if 'inside' in self.exceptions:
            self.excsInside += self.exceptions['inside']
        import xmlreader
        self.site = wikipedia.getSite()
        dump = xmlreader.XmlDump(self.xmlFilename)
        self.parser = dump.parse()
 
    def __iter__(self):
        try:
            for entry in self.parser:
                if self.skipping:
                    if entry.title != self.xmlStart:
                        continue
                    self.skipping = False
                if not self.isTitleExcepted(entry.title) \
                        and not self.isTextExcepted(entry.text):
                    new_text = entry.text
                    for old, new in self.replacements:
                        new_text = wikipedia.replaceExcept(new_text, old, new, self.excsInside, self.site)
                    if new_text != entry.text:
                        yield wikipedia.Page(self.site, entry.title)
        except KeyboardInterrupt:
            try:
                if not self.skipping:
                    wikipedia.output(
                        u'To resume, use "-xmlstart:%s" on the command line.'
                        % entry.title)
            except NameError:
                pass
 
    def isTitleExcepted(self, title):
        if 'title' in self.exceptions:
            for exc in self.exceptions['title']:
                if exc.search(title):
                    return True
        if 'require-title' in self.exceptions:
            for req in self.exceptions['require-title']:
                if not req.search(title): # if not all requirements are met:
                    return True
 
        return False
 
    def isTextExcepted(self, text):
        if 'text-contains' in self.exceptions:
            for exc in self.exceptions['text-contains']:
                if exc.search(text):
                    return True
        if 'require-text' in self.exceptions:
            ret = True
            for exc in self.exceptions['require-text']:
                if exc.search(text):
                    return False
            return ret                
 
        return False
 
class PageCreateReader:
    def __init__(self, words):
        self.words = words
 
    def run(self):
        #wikipedia.output('Beginning \'%s\'...' % self.filesinfo)
        for page, contents, wordsk,  anton in self.words:
            yield page, contents, wordsk, anton
 
class PageNesusijeReader:
    def __init__(self, words):
        self.words = words
 
    def run(self):
        #wikipedia.output('Beginning \'%s\'...' % self.filesinfo)
        for page, topage in self.words:
            try:
                wikipedia.output('Beginning >>> \03{lightpurple}%s\03{default} <<<>>> \03{lightred}%s\03{default} <<<...' % (page, topage))
            except:
                wikipedia.output(u'Except on Output')
            yield page, topage
 
class PageNesusijeRobot:
    """
    Responsible for writing pages to the wiki, with the titles and contents
    given by a PageFromFileReader.
    """
 
    msg = {
        'lt': u'Automated creating of articles',
    }
 
    # The following messages are added to topic when the page already exists
    msg_top = {
        'lt': u'append on top',
    }
 
    msg_bottom = {
        'lt': u'append on bottom',
    }
 
    msg_force = {
        'lt': u'existing text overwritten',
    }
 
    append = ''
 
    def __getattr__(self, append):
        return self.append
 
    def __init__(self, reader = False, force = False, append = False, summary = 'Susiejama', minor = False,
                 autosummary = False, debug = False, acceptall=False, acceptallnew=False, quit=False):
        self.reader = reader
        self.force = force
        self.append = append
        self.summary = summary
        self.minor = minor
        self.autosummary = autosummary
        self.debug = debug
        self.acceptall = acceptall
        self.acceptallnew = acceptallnew
        self.quit = quit
 
    def run(self):
        for word, toword in self.reader.run():
            self.put(word, toword)
            if self.quit:
                return
 
    def put(self, word, toword):
        mysite = wikipedia.getSite()
 
        page = wikipedia.Page(mysite, word)
        # Show the title of the page we're working on.
        # Highlight the title in purple.
        try:
            wikipedia.output(u">>> \03{lightpurple}%s\03{default} <<<" % page.title())
        except:
            wikipedia.output(u'Except on Output')
 
        if self.summary:
            comment = self.summary
        else:
            comment = wikipedia.translate(mysite, self.msg)
 
        if page.exists():
            original_text = page.get()
            isnew = True
            new_text = original_text
            needchange = True
            #wikipedia.output(old)
            temp = re.compile(ur'\{\{see\|(?P<parms>[^\}]*)\}\}', re.MULTILINE)
            while temp.search(new_text) is not None and needchange:
                for m in temp.finditer(new_text):
                    text = m.group()
                    isnew = False
                    parms = m.group('parms').strip()
                    oldsee = parms.split('|')
                    see = '{{see'
                    for wd in oldsee:
                        if wd.strip() == toword:
                            needchange = False
                            new_text = original_text
                            break
                        else:
                            see += '|' + wd.strip()
                    if not needchange:
                        break
                    see += '|' + toword + '}}'
                    new_text = new_text.replace(text, see)
                    break
            if isnew:
                new_text = '{{see|' + toword + u'}}\n' + original_text
 
            while True:
                if new_text == original_text:
                    try:
                        wikipedia.output('No changes were necessary in >>> \03{lightpurple}%s\03{default} <<< %s >>>'
                                     % (page.aslink(), new_text[0:50]))
                    except:
                        wikipedia.output(u'Except on Output')
                    break
                # Show the title of the page we're working on.
                # Highlight the title in purple.
                #wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                #                 % page.title())
                try:
                    wikipedia.showDiff(original_text, new_text)
                except:
                    wikipedia.output(u'Except on Output')
                if self.acceptall:
                    break
                choice = wikipedia.inputChoice(
                            u'Do you want to accept these changes?',
                            ['Yes', 'No', 'Edit', 'open in Browser', 'All', "Quit"],
                            ['y', 'N', 'e', 'b', 'a', 'q'], 'N')
                if choice == 'e':
                    editor = editarticle.TextEditor()
                    as_edited = editor.edit(original_text)
                    # if user didn't press Cancel
                    if as_edited and as_edited != new_text:
                        new_text = as_edited
                    continue
                if choice == 'b':
                    webbrowser.open("http://%s%s" % (
                        page.site().hostname(),
                        page.site().nice_get_address(page.title())
                    ))
                    wikipedia.input("Press Enter when finished in browser.")
                    original_text = page.get(get_redirect=True, force=True)
                    new_text = original_text
                    continue
                if choice == 'q':
                    return
                if choice == 'a':
                    self.acceptall = True
                if choice == 'y':
                    page.put_async(new_text)
                # choice must be 'N'
                break
            if self.acceptall and new_text != original_text:
                try:
                    page.put(new_text, comment = comment, minorEdit = self.minor)
                except wikipedia.LockedPage:
                    wikipedia.output(u"Page %s is locked; skipping." % title)
                except wikipedia.EditConflict:
                    wikipedia.output(u'Skipping %s because of edit conflict' % title)
                except wikipedia.SpamfilterError, error:
                    wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (title, error.url))
 
prot = None
 
class PageCreateRobot:
    """
    Responsible for writing pages to the wiki, with the titles and contents
    given by a PageFromFileReader.
    """
 
    msg = {
        'lt': u'Automated creating of articles',
    }
 
    # The following messages are added to topic when the page already exists
    msg_top = {
        'lt': u'append on top',
    }
 
    msg_bottom = {
        'lt': u'append on bottom',
    }
 
    msg_force = {
        'lt': u'existing text overwritten',
    }
 
    append = ''
 
    def __getattr__(self, append):
        return self.append
 
    def __init__(self, reader, force, append, summary, minor = False,
                 autosummary = False, debug = False, acceptall=False, acceptallnew=False, quit=False,
                 acceptallnewnotauto=False, prot=None, test=True, fromword = None):
        self.reader = reader
        self.force = force
        self.append = append
        self.summary = summary
        self.minor = minor
        self.autosummary = autosummary
        self.debug = debug
        self.acceptall = acceptall
        self.acceptallnew = acceptallnew
        self.quit = quit
        self.acceptallnewnotauto=acceptallnewnotauto
        self.prot = prot
        self.test = test
        self.fromword = fromword
        self.skip = False
        if self.fromword is not None:
            self.skip = True
 
    def run(self):
        for word, contents, wordsk, anton in self.reader.run():
            self.put(word, contents['title'], contents['contents'], contents['auto'], contents['reddir'], contents['nenaud'])
            if self.quit:
                return None
        return self.fromword
 
    def put(self, word, title, contents, auto, reddir, nenaud):
        if self.fromword is not None:
            if word == self.fromword:
                self.skip = False
                self.fromword = None
        if self.skip:
            return
        wikipedia.output(u">>> \03{lightpurple}%s\03{default} <<<" % word)
        #wikipedia.output(u">>> \03{lightpurple}%s\03{default} <<<\n%s" % (word,contents))
        if self.test:
            self.prot.write('# [[%s]]\r\n' % word)
            return
        mysite = wikipedia.getSite()
 
        page = wikipedia.Page(mysite, word)
        # Show the title of the page we're working on.
        # Highlight the title in purple.
        # wikipedia.output(u">>> \03{lightpurple}%s\03{default} <<<" % page.title())
 
        if self.summary:
            comment = self.summary
        else:
            comment = wikipedia.translate(mysite, self.msg)
 
        comment_top = comment + " - " + wikipedia.translate(mysite, self.msg_top)
        comment_bottom = comment + " - " + wikipedia.translate(mysite, self.msg_bottom)
        comment_force = comment + " *** " + wikipedia.translate(mysite, self.msg_force) + " ***"

        rezfile = u'Zodziai/'
 
        if page.exists():
            old = page.get(get_redirect=True)
            try:
                wikipedia.output(old)
            except:
                wikipedia.output(u'Except on Output')
                
            if page.isRedirectPage():
                _fnf = os.path.join('', rezfile+word+'.txt')
                wprot = codecs.open(_fnf, "a+", "utf-8")
                wprot.write('%s' % title + '\n' + contents)
                return
            
##            if self.test:
##                self.prot.write('# [[%s]] - egzistavo, reikia patikrinti\r\n' % word)
##                return
            interwiki = wikipedia.interwikiFormat(wikipedia.getLanguageLinks(old))
            #wikipedia.output('Interwiki: %s\n' % interwiki)
            cat = wikipedia.categoryFormat(wikipedia.getCategoryLinks(old,mysite))
            #wikipedia.output('Kategorijos: %s\n' % cat)
            txt = wikipedia.removeLanguageLinks(old)
            txt = wikipedia.removeCategoryLinks(txt,mysite)
            #wikipedia.output('text: %s\n' % txt)
            #wikipedia.output('text: %s\n' % (txt + '\n' + contents + '\n' + cat + '\n' + interwiki))
 
            if self.append == None:
                self.append = wikipedia.inputChoice(
                            u'Page <<%s>> already exists. What do?\n'  % word,
                            ['Top', 'Bottom', 'Change', 'Skip'],
                            ['t', 'b', 'c', 's'], 's')
 
            if self.append == "t":
                wikipedia.output(u"Page %s already exists, appending on top!" % word)
                contents = title + '\n' + contents + '\n' + txt + '\n'
                if self.acceptallnewnotauto or self.force:
                    #contents += '{{patikrinti}}\n'
                    pass
                contents += cat + '\n' + interwiki
                comment = comment_top
            elif self.append == "b":
                wikipedia.output(u"Page %s already exists, appending on bottom!" % word)
                contents = txt + '\n' + contents + '\n'
                if self.acceptallnewnotauto or self.force:
                    #contents += '{{patikrinti}}\n'
                    pass
                contents += cat + '\n' + interwiki
                comment = comment_bottom
            elif self.append == "c":
                wikipedia.output(u"Page %s already exists, ***overwriting!" % word)
                contents = title + '\n' + contents + '\n'
                if self.acceptallnewnotauto or self.force:
                    contents += '{{patikrinti}}\n'
                    pass
                contents += cat + '\n' + interwiki
                comment = comment_force
            else:
                contents = old
 
            if not self.acceptall or not self.force or not auto:
                choice = None
                if not auto:
                    if self.acceptallnewnotauto and not self.force:
                        choice = 'y'
                    elif self.force:
                        choice = 'y'
                    else:
                        choice = 'e'
                else:
                    if self.force:
                        choice = 'y'
                    else:
                        choice = wikipedia.inputChoice(
                                u'\nDo you want to accept these changes?\n',
                                ['Yes', 'No', 'Edit', 'Browser', 'All', 'AllOld', 'Skip', 'Change', "Quit"],
                                ['y', 'N', 'e', 'b', 'a', 'ao', 's', 'c', 'q'], 'N')
                if choice == 'e':
                    editor = editarticle.TextEditor()
                    as_edited = editor.edit(contents)
                    # if user didn't press Cancel
                    if as_edited and as_edited != contents:
                        contents = as_edited
                elif choice == 'b':
                    webbrowser.open("http://%s%s" % (
                        page.site().hostname(),
                        page.site().nice_get_address(page.title())
                    ))
                    wikipedia.input("Press Enter when finished in browser.")
                    old = page.get(get_redirect=True, force=True)
                    contents = old
                elif choice == 'a':
                    self.acceptall = True
                elif choice == 'ao':
                    self.acceptall = True
                elif choice == 's':
                    self.append = 's'
                    wikipedia.output(u"Page %s already exists, not adding!" % word)
                    return
                elif choice == 'c':
                    self.append = None
                elif choice == 'q':
                    wikipedia.output(u"Page %s already exists, not adding!" % word)
                    self.quit = True
                    contents = old
                    return
                elif choice != 'y':
                    wikipedia.output(u"Page %s already exists, not adding!" % word)
                    contents = old
                    return
                if choice == 'c':            
                    choice = wikipedia.inputChoice(
                                u'\nDo you want to accept these changes?\n',
                                ['Yes', 'No', 'Edit', 'Browser', 'All', 'AllOld', 'Skip', "Quit"],
                                ['y', 'N', 'e', 'b', 'a', 'ao', 's', 'q'], 'N')
                    if choice == 'e':
                        editor = editarticle.TextEditor()
                        as_edited = editor.edit(contents)
                        # if user didn't press Cancel
                        if as_edited and as_edited != contents:
                            contents = as_edited
                    elif choice == 'a':
                        self.acceptall = True
                    elif choice == 'ao':
                        self.acceptall = True
                    elif choice == 'b':
                        webbrowser.open("http://%s%s" % (
                            page.site().hostname(),
                            page.site().nice_get_address(page.title())
                        ))
                        wikipedia.input("Press Enter when finished in browser.")
                        old = page.get(get_redirect=True, force=True)
                        contents = old
                    elif choice == 's':
                        wikipedia.output(u"Not adding!")
                        self.append = 's'
                        return
                    elif choice == 'q':
                        wikipedia.output(u"Not adding!")
                        self.quit = True
                        return
                    elif choice != 'y':
                        wikipedia.output(u"Not adding!")
                        return
                if old == contents:
                    wikipedia.output(u"Page %s already exists, not adding!" % word)
                    return
        else:
##            if self.test: auto, reddir, nenaud
##                return
 
            if nenaud and auto:
                _fnf = os.path.join('', rezfile+word+'.txt')
                wprot = codecs.open(_fnf, "a+", "utf-8")
                wprot.write('%s' % title + '\n' + contents)
                contents = reddir
            else:
                contents = title + '\n' + contents
            
            if not self.acceptallnew or not auto:            
                choice = None
                if not auto:
                    if self.acceptallnewnotauto:
                        contents = contents #+ '{{patikrinti}}\n'
                        choice = 'y'
                    else:
                        choice = 'e'
                else:
                    choice = wikipedia.inputChoice(
                            u'\nDo you want to accept these changes?\n',
                            ['Yes', 'No', 'Edit', 'All', 'AllNew', 'Skip', 'Change', "Quit"],
                            ['y', 'N', 'e', 'a', 'an', 's', 'c', 'q'], 'N')
                if choice == 'e':
                    editor = editarticle.TextEditor()
                    as_edited = editor.edit(contents)
                    # if user didn't press Cancel
                    if as_edited and as_edited != contents:
                        contents = as_edited
                elif choice == 'a':
                    self.acceptallnew = True
                elif choice == 'an':
                    self.acceptallnew = True
                elif choice == 's':
                    wikipedia.output(u"Not adding!")
                    self.append = 's'
                    return
                elif choice == 'c':
                    self.append = None
                elif choice == 'q':
                    wikipedia.output(u"Not adding!")
                    self.quit = True
                    return
                elif choice != 'y':
                    wikipedia.output(u"Not adding!")
                    return
                if choice == 'c':            
                    choice = wikipedia.inputChoice(
                                u'\nDo you want to accept these changes?\n',
                                ['Yes', 'No', 'Edit', 'All', 'AllNew', 'Skip', "Quit"],
                                ['y', 'N', 'e', 'a', 'an', 's', 'q'], 'N')
                    if choice == 'e':
                        editor = editarticle.TextEditor()
                        as_edited = editor.edit(contents)
                        # if user didn't press Cancel
                        if as_edited and as_edited != contents:
                            contents = as_edited
                    elif choice == 'a':
                        self.acceptallnew = True
                    elif choice == 'an':
                        self.acceptallnew = True
                    elif choice == 's':
                        wikipedia.output(u"Not adding!")
                        self.append = 's'
                        return
                    elif choice == 'q':
                        wikipedia.output(u"Not adding!")
                        self.quit = True
                        return
                    elif choice != 'y':
                        wikipedia.output(u"Not adding!")
                        return
            if self.autosummary:
                comment = ''
                wikipedia.setAction('')
 
        # Remove trailing newlines (cause troubles when creating redirects)
        contents = re.sub('^[\r\n]*','', contents)
 
        if self.debug:
            wikipedia.output("*** Debug mode ***\n" + \
                "\03{lightpurple}word\03{default}: " + word + "\n" + \
                "\03{lightpurple}contents\03{default}:\n" + contents + "\n" \
                "\03{lightpurple}comment\03{default}: " + comment + "\n")
            return
 
        try:
            page.put(contents, comment = comment, minorEdit = self.minor)
        except wikipedia.LockedPage:
            wikipedia.output(u"Page %s is locked; skipping." % title)
        except wikipedia.EditConflict:
            wikipedia.output(u'Skipping %s because of edit conflict' % title)
        except wikipedia.SpamfilterError, error:
            wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (title, error.url))
 
class ReplaceRobot:
    """
    A bot that can do text replacements.
    """
    def __init__(self, generator, replacements, exceptions={},
                 acceptall=False, allowoverlap=False, recursive=False,
                 addedCat=None, sleep=None):
        """
        Arguments:
            * generator    - A generator that yields Page objects.
            * replacements - A list of 2-tuples of original text (as a
                             compiled regular expression) and replacement
                             text (as a string).
            * exceptions   - A dictionary which defines when not to change an
                             occurence. See below.
            * acceptall    - If True, the user won't be prompted before changes
                             are made.
            * allowoverlap - If True, when matches overlap, all of them are
                             replaced.
            * addedCat     - If set to a value, add this category to every page
                             touched.
 
        Structure of the exceptions dictionary:
        This dictionary can have these keys:
 
            title
                A list of regular expressions. All pages with titles that
                are matched by one of these regular expressions are skipped.
            text-contains
                A list of regular expressions. All pages with text that
                contains a part which is matched by one of these regular
                expressions are skipped.
            inside
                A list of regular expressions. All occurences are skipped which
                lie within a text region which is matched by one of these
                regular expressions.
            inside-tags
                A list of strings. These strings must be keys from the
                exceptionRegexes dictionary in wikipedia.replaceExcept().
 
        """
        self.generator = generator
        self.replacements = replacements
        self.exceptions = exceptions
        self.acceptall = acceptall
        self.allowoverlap = allowoverlap
        self.recursive = recursive
        if addedCat:
            site = wikipedia.getSite()
            cat_ns = site.category_namespaces()[0]
            self.addedCat = wikipedia.Page(site,
                                           cat_ns + ':' + addedCat)
        self.sleep = sleep
        #wikipedia.output(u'fix gen = %s' % self.replacements)
 
    def isTitleExcepted(self, title):
        """
        Iff one of the exceptions applies for the given title, returns True.
        """
        if 'title' in self.exceptions:
            for exc in self.exceptions['title']:
                if exc.search(title):
                    return True
        if 'require-title' in self.exceptions:
            for req in self.exceptions['require-title']:
                if not req.search(title):
                    return True
        return False
 
    def isTextExcepted(self, original_text):
        """
        Iff one of the exceptions applies for the given page contents,
        returns True.
        """
        if 'text-contains' in self.exceptions:
            for exc in self.exceptions['text-contains']:
                if exc.search(original_text):
                    return True
        if 'require-title' in self.exceptions:
            for req in self.exceptions['require-title']:
                if not req.search(title): # if not all requirements are met:
                    return True
        if 'require-text' in self.exceptions:
            ret = True
            for exc in self.exceptions['require-text']:
                if exc.search(original_text):
                    return False
            return ret
        return False
 
    def doReplacements(self, original_text):
        """
        Returns the text which is generated by applying all replacements to
        the given text.
        """
        new_text = original_text
        exceptions = []
        if 'inside-tags' in self.exceptions:
            exceptions += self.exceptions['inside-tags']
        if 'inside' in self.exceptions:
            exceptions += self.exceptions['inside']
        for old, new in self.replacements:
            if self.sleep != None:
                time.sleep(self.sleep)
            new_text = wikipedia.replaceExcept(new_text, old, new, exceptions,
                                               allowoverlap=self.allowoverlap)
        return new_text
 
    def run(self):
        """
        Starts the robot.
        """
        # Run the generator which will yield Pages which might need to be
        # changed.
        #wikipedia.output(u'fix gen = %s' % self.generator)
        #return
        for page in self.generator:
            wikipedia.output(
                u'File %s .'
                % page.aslink())
            if self.isTitleExcepted(page.title()):
                wikipedia.output(
                    u'Skipping %s because the title is on the exceptions list.'
                    % page.aslink())
                continue
            try:
                # Load the page's text from the wiki
                original_text = page.get(get_redirect=True)
                if not page.canBeEdited():
                    wikipedia.output(u"You can't edit page %s"
                                     % page.aslink())
                    continue
            except wikipedia.NoPage:
                wikipedia.output(u'Page %s not found' % page.aslink())
                continue
            new_text = original_text
            while True:
                if self.isTextExcepted(new_text):
                    wikipedia.output(
    u'Skipping %s because it contains text that is on the exceptions list.'
                        % page.aslink())
                    break
                new_text = self.doReplacements(new_text)
                if new_text == original_text:
                    wikipedia.output('No changes were necessary in %s'
                                     % page.aslink())
                    break
                if self.recursive:
                    newest_text = self.doReplacements(new_text)
                    while newest_text!=new_text:
                        new_text = newest_text
                        newest_text = self.doReplacements(new_text)
                if hasattr(self, "addedCat"):
                    cats = page.categories(nofollow_redirects=True)
                    if self.addedCat not in cats:
                        cats.append(self.addedCat)
                        new_text = wikipedia.replaceCategoryLinks(new_text,
                                                                  cats)
                # Show the title of the page we're working on.
                # Highlight the title in purple.
                wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                                 % page.title())
                wikipedia.showDiff(original_text, new_text)
                if self.acceptall:
                    break
                choice = wikipedia.inputChoice(
                            u'Do you want to accept these changes?',
                            ['Yes', 'No', 'Edit', 'open in Browser', 'All', "Quit"],
                            ['y', 'N', 'e', 'b', 'a', 'q'], 'N')
                if choice == 'e':
                    editor = editarticle.TextEditor()
                    as_edited = editor.edit(original_text)
                    # if user didn't press Cancel
                    if as_edited and as_edited != new_text:
                        new_text = as_edited
                    continue
                if choice == 'b':
                    webbrowser.open("http://%s%s" % (
                        page.site().hostname(),
                        page.site().nice_get_address(page.title())
                    ))
                    wikipedia.input("Press Enter when finished in browser.")
                    original_text = page.get(get_redirect=True, force=True)
                    new_text = original_text
                    continue
                if choice == 'q':
                    return
                if choice == 'a':
                    self.acceptall = True
                if choice == 'y':
                    page.put_async(new_text)
                # choice must be 'N'
                break
            if self.acceptall and new_text != original_text:
                try:
                    page.put(new_text)
                except wikipedia.EditConflict:
                    wikipedia.output(u'Skipping %s because of edit conflict'
                                     % (page.title(),))
                except wikipedia.SpamfilterError, e:
                    wikipedia.output(
                        u'Cannot change %s because of blacklist entry %s'
                        % (page.title(), e.url))
                except wikipedia.PageNotSaved, error:
                    wikipedia.output(u'Error putting page: %s'
                                     % (error.args,))
                except wikipedia.LockedPage:
                    wikipedia.output(u'Skipping %s (locked page)'
                                     % (page.title(),))
 
def prepareRegexForMySQL(pattern):
    pattern = pattern.replace('\s', '[:space:]')
    pattern = pattern.replace('\d', '[:digit:]')
    pattern = pattern.replace('\w', '[:alnum:]')
 
    pattern = pattern.replace("'", "\\" + "'")
    #pattern = pattern.replace('\\', '\\\\')
    #for char in ['[', ']', "'"]:
    #    pattern = pattern.replace(char, '\%s' % char)
    return pattern
 
def noskiem(word):
    return word.replace('~', '')
 
def skiem(word):
    return word.replace('~~', '-').replace('~','')
 
def cikleWords (wordsList, force, append, acceptallnew=False, acceptallnewnotauto=False,
                acceptallgroups=False, prot=None, test=True, fromword = None, zodziai={}):
    acceptallgroup = acceptallgroups
    for word in wordsList:
        #wikipedia.output(u'word <<< %s >>>.' % word)
        wordform = fixeswords.wordforms[word['dalis']][word['form']]
        eti = u''
        if word['fs0'] != u'':
            t1 = Template(wordform[word['fs0']])
            eti = t1.substitute(s1 = noskiem(word['s1']), s2 = noskiem(word['s2']),
                                s3 = noskiem(word['s3']), s4 = noskiem(word['s4']), s5 = noskiem(word['s5']),
                                fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'],
                                fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
##        if bot.append == 's':
##            bot.append = None
        subwordsList = []
        for word2 in wordform['wordsList']:
            #wikipedia.output(u'word2 <<< %s >>>.' % word2)
            #wikipedia.output(u'word <<< %s >>>.' % word)
            reiksmesimas = ''
            reiksmesejas = ''
            reiksmeseja = ''
            reiksmesojas = ''
            reiksmesoja = ''
            if u'reiksmesimas' in word:
                reiksmesimas = word[u'reiksmesimas']
            if u'reiksmesejas' in word:
                reiksmesejas = word[u'reiksmesejas']
            if u'reiksmeseja' in word:
                reiksmeseja = word[u'reiksmeseja']
            if u'reiksmesojas' in word:
                reiksmesojas = word[u'reiksmesojas']
            if u'reiksmesoja' in word:
                reiksmesoja = word[u'reiksmesoja']
            
            lg1 = Template(word2['g1'])
            lg1 = lg1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
            ls1 = Template(word2['s1'])
            ls1 = ls1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
            ls2 = Template(word2['s2'])
            ls2 = ls2.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
            ls3 = Template(word2['s3'])
            ls3 = ls3.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
            ls4 = Template(word2['s4'])
            ls4 = ls4.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
            ls5 = Template(word2['s5'])
            ls5 = ls5.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
            lfs0 = Template(word2['fs0'])
            lfs0 = noskiem(lfs0.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
            lfs1 = Template(word2['fs1'])
            lfs1 = noskiem(lfs1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
            lfs2 = Template(word2['fs2'])
            lfs2 = noskiem(lfs2.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
            lfs3 = Template(word2['fs3'])
            lfs3 = noskiem(lfs3.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
            lfs4 = Template(word2['fs4'])
            lfs4 = noskiem(lfs4.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
            lfs5 = Template(word2['fs5'])
            lfs5 = noskiem(lfs5.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
            ltipas = Template(word2['tipas'])
            ltipas = noskiem(ltipas.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
            lreiksmes = Template(word2['reiksmes'])
            lreiksmes = noskiem(lreiksmes.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'],
                                                     tipas = word['tipas'], form = word['form'],
                                                     reiksmesimas = reiksmesimas, reiksmesejas = reiksmesejas, reiksmeseja = reiksmeseja,
                                                     reiksmesojas = reiksmesojas, reiksmesoja = reiksmesoja))
            lpozymis = Template(word2['pozymis'])
            lpozymis = noskiem(lpozymis.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'],
                                                   pozymis = word['pozymis'], x = u'${x}'))
            sins = word2['sinonimai']
            lsinonimai = []
            for sin in sins:
                lsinonimai.append(sin)
            isrs = word2['israiskos']
            lisraiskos = []
            for isr in isrs:
                lisraiskos.append(isr)
 
            newword = {'dalis': word2['dalis'],
                                'form': word2['form'],
                                'g1': lg1, 's1': ls1, 's2': ls2, 's3': ls3, 's4': ls4, 's5': ls5,
                                'fs0': lfs0, 'fs1': lfs1, 'fs2': lfs2,
                                'pozymis': lpozymis,
                                'israiskos': lisraiskos,
                                'sinonimai': lsinonimai,
                                'reiksmes': lreiksmes,
                                'fs3': lfs3, 'fs4': eti, 'fs5': lfs5, 'tipas': ltipas,
                      }
            if u'List' in word2:
                newword['List'] = word2['List']
            if '[[neig.]] [[neig.]] ' not in ltipas:
                subwordsList.append(newword)
 
        #wikipedia.output(u'subwordsList <<< %s >>>.' % subwordsList)
        #return
        if u'List' in word:
            for word2 in word['List']:
                #wikipedia.output(u'word2 <<< %s >>>.' % word2)
                #wikipedia.output(u'word <<< %s >>>.' % word)
                reiksmesimas = ''
                reiksmesejas = ''
                reiksmeseja = ''
                reiksmesojas = ''
                reiksmesoja = ''
                if u'reiksmesimas' in word2:
                    reiksmesimas = word2[u'reiksmesimas']
                if u'reiksmesejas' in word2:
                    reiksmesejas = word2[u'reiksmesejas']
                if u'reiksmeseja' in word2:
                    reiksmeseja = word2[u'reiksmeseja']
                if u'reiksmesojas' in word2:
                    reiksmesojas = word2[u'reiksmesojas']
                if u'reiksmesoja' in word2:
                    reiksmesoja = word2[u'reiksmesoja']
            
                lg1 = Template(word2['g1'])
                lg1 = lg1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                                   fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
                ls1 = Template(word2['s1'])
                ls1 = ls1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                                   fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
                ls2 = Template(word2['s2'])
                ls2 = ls2.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                                   fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
                ls3 = Template(word2['s3'])
                ls3 = ls3.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                                   fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
                ls4 = Template(word2['s4'])
                ls4 = ls4.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                                   fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
                ls5 = Template(word2['s5'])
                ls5 = ls5.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                                   fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
                lfs0 = Template(word2['fs0'])
                lfs0 = noskiem(lfs0.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                                   fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
                lfs1 = Template(word2['fs1'])
                lfs1 = noskiem(lfs1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                                   fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
                lfs2 = Template(word2['fs2'])
                lfs2 = noskiem(lfs2.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                                   fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
                lfs3 = Template(word2['fs3'])
                lfs3 = noskiem(lfs3.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                                   fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
                lfs4 = Template(word2['fs4'])
                lfs4 = noskiem(lfs4.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                                   fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
                lfs5 = Template(word2['fs5'])
                lfs5 = noskiem(lfs5.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                                   fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
                ltipas = Template(word2['tipas'])
                ltipas = noskiem(ltipas.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                                   fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
                lreiksmes = Template(word2['reiksmes'])
                lreiksmes = noskiem(lreiksmes.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                                   fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'],
                                                         tipas = word['tipas'], form = word['form'],
                                                         reiksmesimas = reiksmesimas, reiksmesejas = reiksmesejas, reiksmeseja = reiksmeseja,
                                                         reiksmesojas = reiksmesojas, reiksmesoja = reiksmesoja))
                lpozymis = Template(word2['pozymis'])
                lpozymis = noskiem(lpozymis.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                                   fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'],
                                                       pozymis = word['pozymis'], x = u'${x}'))
                sins = word2['sinonimai']
                lsinonimai = []
                for sin in sins:
                    lsinonimai.append(sin)
                isrs = word2['israiskos']
                lisraiskos = []
                for isr in isrs:
                    lisraiskos.append(isr)
                newword = {'dalis': word2['dalis'],
                                    'form': word2['form'],
                                    'g1': lg1, 's1': ls1, 's2': ls2, 's3': ls3, 's4': ls4, 's5': ls5,
                                    'fs0': lfs0, 'fs1': lfs1, 'fs2': lfs2,
                                    'pozymis': lpozymis,
                                    'israiskos': lisraiskos,
                                    'sinonimai': lsinonimai,
                                    'reiksmes': lreiksmes,
                                    'fs3': lfs3, 'fs4': eti, 'fs5': lfs5, 'tipas': ltipas,
                          }
                if u'List' in word2:
                    newword['List'] = word2['List']
                if '[[neig.]] [[neig.]] ' not in ltipas:
                    subwordsList.append(newword)
 
        wordsll = []
        for wrd in subwordsList:
            wrdform = fixeswords.wordforms[wrd['dalis']][wrd['form']]
            wrdref = Template(wrdform['ref'])
            wrdref = wrdref.substitute(s1 = noskiem(wrd['s1']), s2 = noskiem(wrd['s2']), s3 = noskiem(wrd['s3']),
                                       s4 = noskiem(wrd['s4']), s5 = noskiem(wrd['s5']),
                                       fs0 = wrd['fs0'], fs1 = wrd['fs1'], fs2 = wrd['fs2'], fs3 = wrd['fs3'],
                                       fs4 = wrd['fs4'], fs5 = wrd['fs5'], tipas = wrd['tipas'], form = wrd['form'])
            if wrdref != u'':
                wordsllt = u'* '+wrdref
                if wrd[u'tipas'] != u'':
                    wordsllt += u' ('+wrd[u'tipas']+u')'
                pozymis = Template(wrd['pozymis'])
                pozymis = pozymis.substitute(x = u'{{x}}')
                wordsllt += pozymis
                wordsll.append(wordsllt)
                
        wordsll.sort()
        for wrd in wordsll:
            wikipedia.output(u'    <<<\03{lightpurple}%s\03{default}>>> ' % wrd)
 
        Words = []
        for wordtemp, form in wordform['forms']:
            t1 = Template(wordtemp)
            t1 = t1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'],
                               fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
            skiemt1 = skiem(t1)
            noskiemt1 = noskiem(t1)
            ps1 = noskiem(word['s1'])
            pg1 = noskiem(word['g1'])
            tf = Template(form)
            tf = tf.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'],
                               fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
            t2 = Template(wordform[tf]['text'])
            sinonimai = u''
            if len(word['sinonimai']) > 0:
                sinonimai = u'==== Sinonimai ====\n\n{{sin-top|kalba=lt|vardas='+noskiemt1+u'|tipas=bendrai}}\n'
                for wordii,sin in enumerate(word['sinonimai']):
                    sinonimai += u'* {{t+|lt|'+sin+u'}}\n'
                    if wordii < len(word['sinonimai'])/2.0 <= wordii+1:
                        sinonimai += u'{{sin-mid}}\n'
                sinonimai += u'{{sin-bottom}}\n\n'
            antonimai = u''
            antonimas = u''
            if noskiemt1.find('ne') == 0 and ('[[ne-]]' in word['fs4']
                                              or '[[ne-]]' in word['fs5']):
                antonimas = noskiemt1[2:]
                antonimai = u'==== Antonimai ====\n\n{{ant-top|kalba=lt|vardas='+noskiemt1+u'|tipas=bendrai}}\n'
                antonimai += u'* {{t+|lt|'+noskiemt1[2:]+u'}}\n'
                antonimai += u'{{ant-mid}}\n{{ant-bottom}}\n\n'
            else:
                if word['fs5'].find(u'{{sangrž.}}') == 0:
                    antonimas = 'nesi'+noskiemt1
                    if antonimas.endswith('si'):
                        antonimas = antonimas[:-2]
                    if antonimas.endswith('s'):
                        antonimas = antonimas[:-1]
                    antonimai = u'==== Antonimai ====\n\n{{ant-top|kalba=lt|vardas='+noskiemt1+u'|tipas=bendrai}}\n'
                    antonimai += u'* {{t+|lt|'+antonimas+u'}}\n'
                    antonimai += u'{{ant-mid}}\n{{ant-bottom}}\n\n'
                else:
                    antonimas = 'ne'+noskiemt1
                    antonimai = u'==== Antonimai ====\n\n{{ant-top|kalba=lt|vardas='+noskiemt1+u'|tipas=bendrai}}\n'
                    antonimai += u'* {{t+|lt|ne'+noskiemt1+u'}}\n'
                    antonimai += u'{{ant-mid}}\n{{ant-bottom}}\n\n'
            isvestiniai = u''
            if len(wordsll) > 0:
                isvestiniai = u'==== Išvestiniai žodžiai ====\n\n{{rel-top|kalba=lt|vardas='+noskiemt1+u'|tipas=Išvestiniai žodžiai}}\n'
                for wordii,wrd in enumerate(wordsll):
                    isvestiniai += wrd+u'\n'
                    if wordii < len(wordsll)/2.0 <= wordii+1:
                        isvestiniai = isvestiniai + u'{{rel-mid}}\n'
                isvestiniai += u'{{rel-bottom}}\n\n'
            israiskos = u''
            if len(word['israiskos']) > 0:
                israiskos = u'==== Išraiškos arba posakiai ====\n\n{{rel-top|kalba=lt|vardas='+noskiemt1+u'|tipas=Išraiškos arba posakiai}}\n'
                for wordii,isr in enumerate(word['israiskos']):
                    israiskos += u'* {{t+|lt|'+isr+u'}}\n'
                    if wordii < len(word['israiskos'])/2.0 <= wordii+1:
                        israiskos += u'{{rel-mid}}\n'
                israiskos += u'{{rel-bottom}}\n\n'
            vertimai = u''
            sabl = re.compile(u'^\#+\s+(?P<tipas>.*?)(?:\:|\.)$', re.M)
            inmatch = 0
            match = sabl.search(word['reiksmes'], inmatch)
            while match != None:
                inmatch = match.end()
                vtipas = match.group('tipas')
                if vtipas != None:
                    sabl1 = re.compile(u'\[\[(?P<n1>[^\[\]\|]*?)\|(?P<n2>[^\[\]\|]*?)\]\]', re.M)
                    match1 = sabl1.search(vtipas, 0)
                    while match1 != None:
                        n1 = match1.group('n1')
                        n2 = match1.group('n2')
                        vtipas = vtipas[:match1.start()] + n2 + vtipas[match1.end():]
                        match1 = sabl1.search(vtipas, 0)
                    sabl1 = re.compile(u'\[\[(?P<n1>[^\[\]\|]*?)\]\]', re.M)
                    match1 = sabl1.search(vtipas, 0)
                    while match1 != None:
                        n1 = match1.group('n1')
                        vtipas = vtipas[:match1.start()] + n1 + vtipas[match1.end():]
                        match1 = sabl1.search(vtipas, 0)
                    vertimai += u'{{trans-top|kalba=lt|vardas=' + noskiemt1 + u'|tipas='+ vtipas + u'}}\n' + \
                                u'{{trans-mid}}\n' + \
                                u'{{trans-bottom}}\n'
                match = sabl.search(word['reiksmes'], inmatch)

            if noskiemt1 not in zodziai:
                zodziai[noskiemt1] = {}
            if u'pagrf' not in zodziai[noskiemt1]:
                zodziai[noskiemt1][u'pagrf'] = not wordform[tf]['auto']
            elif not zodziai[noskiemt1][u'pagrf']:
                zodziai[noskiemt1][u'pagrf'] = not wordform[tf]['auto']

            pozymis = Template(word['pozymis'])
            if zodziai[noskiemt1][u'pagrf']:
                pozymis = pozymis.substitute(x = u'{{x|lt|p}}')
            else:
                pozymis = pozymis.substitute(x = u'{{x|lt|}}')

            reddir = Template(u'#REDIRECT [[${reddir}]]')
            reddir = reddir.substitute(reddir = ps1+pg1)

            sabl2 = re.compile(u'\{\{x\|lt\|p?\}\}', re.M)
            match2 = sabl2.search(pozymis, 0)
            nenaud = False
            if match2 != None:
                nenaud = True

            t2 = t2.substitute(s1 = noskiem(word['s1']), s2 = noskiem(word['s2']),
                               s3 = noskiem(word['s3']), s4 = noskiem(word['s4']), s5 = noskiem(word['s5']),
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'],
                               fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'],
                               reiksmes = word['reiksmes'], pozymis = pozymis,
                               sinonimai = sinonimai, antonimai = antonimai, isvestiniai = isvestiniai, israiskos = israiskos,
                               vertimai = vertimai, word = noskiemt1, words = skiemt1)
            sabl3 = re.compile(u'\[\[\]\]\, \[\[\]\]\, \[\[\]\]', re.M)
            match3 = sabl3.search(t2, 0)
            t3 = Template(wordform[tf]['title'])
            if match3 != None:
                t2 = t2[:match3.start()] + t2[match3.end():]
                
            t3 = t3.substitute(s1 = noskiem(word['s1']), s2 = noskiem(word['s2']),
                               s3 = noskiem(word['s3']), s4 = noskiem(word['s4']), s5 = noskiem(word['s5']),
                               fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'],
                               fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'],
                               word = noskiemt1, words = skiemt1)
            Words.append((noskiemt1, {'title':t3, 'contents':t2, 'auto':not zodziai[noskiemt1][u'pagrf'],
                                      'reddir':reddir, 'nenaud':nenaud}, skiemt1, antonimas))
 
        preloadingGen = PageCreateReader(Words)
        bot = PageCreateRobot(preloadingGen, force, append, wordform['msg'], acceptallnew=acceptallnew,
                              acceptallnewnotauto=acceptallnewnotauto, prot=prot, test=test, fromword = fromword)
        wikipedia.output(u'Bus formuojami: ')
        for wordl, descr, wordlsk, anton in Words:
            wikipedia.output(u'    <<<\03{lightpurple}%s\03{default}>>> <<<\03{lightpurple}%s\03{default}>>> <<<\03{lightpurple}%s\03{default}>>> ' % (wordl, wordlsk, anton))
        if acceptallgroup:
            fromword = bot.run()
        else:
            choice = wikipedia.inputChoice(
                        u'Ar formuoti?',
                        ['Create', 'CreateAll', 'No'],
                        ['c', 'ca', 'N'], 'N')
            if choice == 'c':
                fromword = bot.run()
            elif choice == 'ca':
                acceptallgroup = True
                fromword = bot.run()
        (acceptallgroup, fromword, zodziai) = cikleWords (subwordsList, force, bot.append, acceptallnew=acceptallnew,
                                     acceptallnewnotauto=acceptallnewnotauto, acceptallgroups=acceptallgroup, prot=prot,
                                     test=test, fromword = fromword, zodziai=zodziai)
    return (acceptallgroup, fromword, zodziai)
 
def main(*args):
    add_cat = None
    gen = None
    # summary message
    summary_commandline = None
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title':         [],
        'text-contains': [],
        'require-text':  [],
        'inside':        [],
        'inside-tags':   [],
        'require-title': [], # using a seperate requirements dict needs some
    }                        # major refactoring of code.
 
    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fix = None
    force = False
    append = 'b'
    summary = None
    minor = False
    autosummary = False
    debug = False
 
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    PageTitles = []
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # Load default summary message.
    # BUG WARNING: This is probably incompatible with the -lang parameter.
    wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg))
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None
 
    #Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE
 
    # Read commandline parameters.
    for arg in wikipedia.handleArgs(*args):
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = wikipedia.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = wikipedia.input(
                    u'Please enter the XML dump\'s filename:')
            else:
                xmlFilename = arg[5:]
        elif arg =='-sql':
            useSql = True
        elif arg.startswith('-page'):
            if len(arg) == 5:
                PageTitles.append(wikipedia.input(
                                    u'Which page do you want to change?'))
            else:
                PageTitles.append(arg[6:])
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg == "-appendtop":
            append = "t"
        elif arg == "-appendbottom":
            append = "b"
        elif arg == "-force":
            force=True
        elif arg == '-minor':
            minor = True
        elif arg.startswith("-summary:"):
            summary = arg[9:]
        elif arg == '-autosummary':
            autosummary = True
        elif arg.startswith('-fix:'):
            fix = arg[5:]
        elif arg.startswith('-sleep:'):
            sleep = float(arg[7:])
        elif arg == '-always':
            acceptall = True
        elif arg == '-recursive':
            recursive = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg == '-dotall':
            dotall = True
        elif arg == '-multiline':
            multiline = True
        elif arg.startswith('-addcat:'):
            add_cat = arg[len('addcat:'):]
        elif arg.startswith('-summary:'):
            wikipedia.setAction(arg[len('-summary:'):])
            summary_commandline = True
        elif arg.startswith('-allowoverlap'):
            allowoverlap = True
        else:
            if not genFactory.handleArg(arg):
                commandline_replacements.append(arg)
 
    if len(commandline_replacements) % 2:
        raise wikipedia.Error, 'require even number of replacements.'
    elif len(commandline_replacements) == 2 and fix == None:
        replacements.append((commandline_replacements[0],
                             commandline_replacements[1]))
        if summary_commandline == None:
            wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg )
                                % (' (-' + commandline_replacements[0] + ' +'
                                   + commandline_replacements[1] + ')'))
    elif len(commandline_replacements) > 1:
        if fix == None:
            for i in xrange (0, len(commandline_replacements), 2):
                replacements.append((commandline_replacements[i],
                                     commandline_replacements[i + 1]))
            if summary_commandline == None:
                pairs = [( commandline_replacements[i],
                           commandline_replacements[i + 1] )
                         for i in range(0, len(commandline_replacements), 2)]
                replacementsDescription = '(%s)' % ', '.join(
                    [('-' + pair[0] + ' +' + pair[1]) for pair in pairs])
                wikipedia.setAction(
                    wikipedia.translate(wikipedia.getSite(), msg )
                    % replacementsDescription)
        else:
           raise wikipedia.Error(
               'Specifying -fix with replacements is undefined')
    elif fix == None:
        old = wikipedia.input(u'Please enter the text that should be replaced:')
        new = wikipedia.input(u'Please enter the new text:')
        change = '(-' + old + ' +' + new
        replacements.append((old, new))
        while True:
            old = wikipedia.input(
u'Please enter another text that should be replaced, or press Enter to start:')
            if old == '':
                change = change + ')'
                break
            new = wikipedia.input(u'Please enter the new text:')
            change = change + ' & -' + old + ' +' + new
            replacements.append((old, new))
        if not summary_commandline == True:
            default_summary_message =  wikipedia.translate(wikipedia.getSite(), msg) % change
            wikipedia.output(u'The summary message will default to: %s'
                             % default_summary_message)
            summary_message = wikipedia.input(
u'Press Enter to use this default message, or enter a description of the\nchanges your bot will make:')
            if summary_message == '':
                summary_message = default_summary_message
            wikipedia.setAction(summary_message)
 
    elif fix not in ('word', 'nesusije') :
        # Perform one of the predefined actions.
        try:
            fix = fixeswords.fixes[fix]
        except KeyError:
            wikipedia.output(u'Available predefined fixes are: %s'
                             % fixeswords.fixes.keys())
            return
        if 'regex' in fix:
            regex = fix['regex']
        if 'msg' in fix:
            wikipedia.setAction(
                wikipedia.translate(wikipedia.getSite(), fix['msg']))
        if 'exceptions' in fix:
            exceptions = fix['exceptions']
        if 'nocase' in fix:
            caseInsensitive = fix['nocase']
        replacements = fix['replacements']
 
        #wikipedia.output(u'fix get = %s' % fix)
 
    # Pre-compile all regular expressions here to save time later
    for i in range(len(replacements)):
        old, new = replacements[i]
        if not regex:
            old = re.escape(old)
        oldR = re.compile(old, flags)
        replacements[i] = oldR, new
 
    for exceptionCategory in ['title', 'require-title', 'text-contains', 'inside', 'require-text']:
        if exceptionCategory in exceptions:
            patterns = exceptions[exceptionCategory]
            if not regex:
                patterns = [re.escape(pattern) for pattern in patterns]
            patterns = [re.compile(pattern, flags) for pattern in patterns]
            exceptions[exceptionCategory] = patterns
 
    #wikipedia.output(u'fix rep = %s' % replacements)
    #wikipedia.output(u'fix page = %s' % PageTitles)
    #return
 
    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart,
                                          replacements, exceptions)
    elif useSql:
        whereClause = 'WHERE (%s)' % ' OR '.join(
            ["old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern)
             for (old, new) in replacements])
        if exceptions:
            exceptClause = 'AND NOT (%s)' % ' OR '.join(
                ["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                 for exc in exceptions])
        else:
            exceptClause = ''
        query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)
    elif PageTitles:
        pages = [wikipedia.Page(wikipedia.getSite(), PageTitle)
                 for PageTitle in PageTitles]
        gen = iter(pages)
 
    if fix not in ('word', 'nesusije'):
        gen = genFactory.getCombinedGenerator(gen)
 
    if fix not in ('word', 'nesusije'):
        if not gen:
            # syntax error, show help text from the top of this file
            wikipedia.showHelp('replace')
            return
    if xmlFilename:
        # XML parsing can be quite slow, so use smaller batches and
        # longer lookahead.
        preloadingGen = pagegenerators.PreloadingGenerator(gen,
                                            pageNumber=20, lookahead=100)
    elif fix not in ('word', 'nesusije'):
        preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=60)
 
    _fnf = os.path.join('', 'protokolas.txt')
    prot = codecs.open(_fnf, "w+", "utf-8")
    if fix == 'word':
        fromword = None
        zodziai = {}
        (acceptallgroup, fromword, zodziai) = cikleWords(fixeswords.wordsList, force, append, acceptallnew=True,
                                                acceptallnewnotauto=True, acceptallgroups=False, prot=prot,
                                                test=True, fromword=fromword, zodziai=zodziai)
        _fnf = os.path.join('', 'protokolas2.txt')
        prot = codecs.open(_fnf, "w+", "utf-8")
        wikipedia.output(u'<<<\03{lightpurple} Prasideda straipsnių kūrimo etapas. \03{default}>>>')
        (acceptallgroup, fromword, zodziai) = cikleWords(fixeswords.wordsList, force, append, acceptallnew=True,
                                                acceptallnewnotauto=True, acceptallgroups=False, prot=prot,
                                                test=False, fromword=fromword, zodziai=zodziai)
    elif fix == 'nesusije':
        wikipedia.output(u'fix = %s' % fix)
        preloadingGen = PageNesusijeReader(nesusije.wordsList)
        bot = PageNesusijeRobot(preloadingGen)
        bot.run()
    else:
        bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep)
        #wikipedia.output(u'fix gen = %s' % preloadingGen)
        #return
 
        bot.run()
 
if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()