mediawiki_messages.py :  » Network » Python-Wikipedia-Robot-Framework » pywikipedia » archive » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Network » Python Wikipedia Robot Framework 
Python Wikipedia Robot Framework » pywikipedia » archive » mediawiki_messages.py
# -*- coding: utf-8 -*-
"""
Allows access to the MediaWiki messages, that's the label texts of the MediaWiki
software in the current language. These can be used in other bots.

The function refresh_messages() downloads all the current messages and saves
them to disk. It is run automatically when a bot first tries to access one of
the messages. It can be updated manually by running this script, e.g. when
somebody changed the current message at the wiki. The texts will also be
reloaded automatically once a month.

Syntax: python mediawiki_messages [-all]

Command line options:
    -refresh - Reloads messages for the home wiki or for the one defined via
               the -lang and -family parameters.

    -all     - Reloads messages for all wikis where messages are already present

    If another parameter is given, it will be interpreted as a MediaWiki key.
    The script will then output the respective value, without refreshing..
    
"""

# (C) Daniel Herding, 2004
#
# Distributed under the terms of the MIT license.

##THIS MODULE IS DEPRECATED AND HAS BEEN REPLACED BY NEW FUNCTIONALITY IN
##WIKIPEDIA.PY.  It is being retained solely for compatibility in case any
##custom-written bots rely upon it.  Bot authors should replace any uses
##of this module as follows:
##
##    OLD:    mediawiki_messages.get(key, site)
##    NEW:    site.mediawiki_message(key)
##
##    OLD:    mediawiki_messages.has(key, site)
##    NEW:    site.has_mediawiki_message(key)
##
##    OLD:    mediawiki_messages.makepath(path)
##    NEW:    wikipedia.makepath(path)
##
##########################################################################

import warnings
warnings.warn(
"""The mediawiki_messages module is deprecated and no longer
maintained; see the source code for new methods to replace
calls to this module.""",
            DeprecationWarning, stacklevel=2)


import wikipedia
import re, sys, pickle
import os.path
import time
import codecs
import urllib
from BeautifulSoup import *

__version__='$Id: mediawiki_messages.py 3731 2007-06-20 14:42:55Z russblau $'

loaded = {}

def get(key, site = None, allowreload = True):
    site = site or wikipedia.getSite()
    if site in loaded:
        # Use cached copy if it exists.
        dictionary = loaded[site]
    else:
        fn = 'mediawiki-messages/mediawiki-messages-%s-%s.dat' % (site.family.name, site.lang)
        try:
            # find out how old our saved dump is (in seconds)
            file_age = time.time() - os.path.getmtime(fn)
            # if it's older than 1 month, reload it
            if file_age > 30 * 24 * 60 * 60:
                print 'Current MediaWiki message dump is one month old, reloading'
                refresh_messages(site)
        except OSError:
            # no saved dumped exists yet
            refresh_messages(site)
        f = open(fn, 'r')
        dictionary = pickle.load(f)
        f.close()
        loaded[site] = dictionary
    key = key[0].lower() + key[1:]
    if key in dictionary:
        return dictionary[key]
    elif allowreload:
        refresh_messages(site = site)
        return get(key, site = site, allowreload = False)
    else:
        raise KeyError('MediaWiki Key %s not found' % key)

def has(key, site = None, allowreload = True):
    try:
        get(key, site, allowreload)
        return True
    except KeyError:
        return False

def makepath(path):
    """ creates missing directories for the given path and
        returns a normalized absolute version of the path.

    - if the given path already exists in the filesystem
      the filesystem is not modified.

    - otherwise makepath creates directories along the given path
      using the dirname() of the path. You may append
      a '/' to the path if you want it to be a directory path.

    from holger@trillke.net 2002/03/18
    """
    from os import makedirs
    from os.path import normpath,dirname,exists,abspath

    dpath = normpath(dirname(path))
    if not exists(dpath): makedirs(dpath)
    return normpath(abspath(path))
    
def refresh_messages(site = None):
    site = site or wikipedia.getSite()
    # get 'all messages' special page's path
    path = site.allmessages_address()
    print 'Retrieving MediaWiki messages for %s' % repr(site)
    wikipedia.put_throttle() # It actually is a get, but a heavy one.
    allmessages = site.getUrl(path)

    print 'Parsing MediaWiki messages'
    soup = BeautifulSoup(allmessages,
                         convertEntities=BeautifulSoup.HTML_ENTITIES)
    # The MediaWiki namespace in URL-encoded format, as it can contain
    # non-ASCII characters and spaces.
    quotedMwNs = urllib.quote(site.namespace(8).replace(' ', '_').encode(site.encoding()))
    mw_url = site.path() + "?title=" + quotedMwNs + ":"
    altmw_url = site.path() + "/" + quotedMwNs + ":"
    nicemw_url = site.nice_get_address(quotedMwNs + ":")
    shortmw_url = "/" + quotedMwNs + ":"
    ismediawiki = lambda url:url and (url.startswith(mw_url)
                                      or url.startswith(altmw_url)
                                      or url.startswith(nicemw_url)
                                      or url.startswith(shortmw_url))
    # we will save the found key:value pairs here
    dictionary = {}

    try:
        for keytag in soup('a', href=ismediawiki):
            # Key strings only contain ASCII characters, so we can save them as
            # strs
            key = str(keytag.find(text=True))
            keyrow = keytag.parent.parent
            if keyrow['class'] == "orig":
                valrow = keyrow.findNextSibling('tr')
                assert valrow['class'] == "new"
                value = unicode(valrow.td.string).strip()
            elif keyrow['class'] == 'def':
                value = unicode(keyrow('td')[1].string).strip()
            else:
                raise AssertionError("Unknown tr class value: %s" % keyrow['class'])
            dictionary[key] = value
    except Exception, e:
        wikipedia.debugDump( 'MediaWiki_Msg', site, u'%s: %s while processing URL: %s' % (repr(e), str(e), unicode(path)), allmessages)
        raise

    # Save the dictionary to disk
    # The file is stored in the mediawiki_messages subdir. Create if necessary. 
    if dictionary == {}:
        wikipedia.debugDump( 'MediaWiki_Msg', site, u'Error URL: '+unicode(path), allmessages )
        sys.exit()
    else:
        f = open(makepath('mediawiki-messages/mediawiki-messages-%s-%s.dat' % (site.family.name, site.lang)), 'w')
        pickle.dump(dictionary, f)
        f.close()
    print "Loaded %i values from %s" % (len(dictionary.keys()), site)
    #print dictionary['sitestatstext']

def refresh_all_messages():
    import dircache, time
    filenames = dircache.listdir('mediawiki-messages')
    message_filenameR = re.compile('mediawiki-messages-([a-z:]+)-([a-z:]+).dat')
    for filename in filenames:
        match = message_filenameR.match(filename)
        if match:
            family = match.group(1)
            lang = match.group(2)
            site = wikipedia.getSite(code = lang, fam = family)
            refresh_messages(site)

def main():
    refresh_all = False
    refresh = False
    key = None
    for arg in wikipedia.handleArgs():
        if arg == '-all':
            refresh_all = True
        elif arg == '-refresh':
            refresh = True
        else:
            key = arg
    if key:
        wikipedia.output(get(key), toStdout = True)
    elif refresh_all:
        refresh_all_messages()
    elif refresh:
        refresh_messages(wikipedia.getSite())
    else:
        wikipedia.showHelp('mediawiki_messages')

if __name__ == "__main__":
    try:
        main()
    except:
        wikipedia.stopme()
        raise
    else:
        wikipedia.stopme()

www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.