google.py :  » Search » PyGoogle » pygoogle-0.6 » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Search » PyGoogle 
PyGoogle » pygoogle 0.6 » google.py
"""
Python wrapper for Google web APIs

This module allows you to access Google's web APIs through SOAP,
to do things like search Google and get the results programmatically.
Described U{here <http://www.google.com/apis/>}
  
You need a Google-provided license key to use these services.
Follow the link above to get one.  These functions will look in
several places (in this order) for the license key:

    - the "license_key" argument of each function
    - the module-level LICENSE_KEY variable (call setLicense once to set it)
    - an environment variable called GOOGLE_LICENSE_KEY
    - a file called ".googlekey" in the current directory
    - a file called "googlekey.txt" in the current directory
    - a file called ".googlekey" in your home directory
    - a file called "googlekey.txt" in your home directory
    - a file called ".googlekey" in the same directory as google.py
    - a file called "googlekey.txt" in the same directory as google.py

Sample usage::
    
    >>> import google
    >>> google.setLicense('...') # must get your own key!
    >>> data = google.doGoogleSearch('python')
    >>> data.meta.searchTime
    0.043221000000000002
    
    >>> data.results[0].URL
    'http://www.python.org/'
    
    >>> data.results[0].title
    '<b>Python</b> Language Website'

@newfield contrib: Contributors
@author:   Mark Pilgrim <f8dy@diveintomark.org>
@author:   Brian Landers <brian@bluecoat93.org>
@license:  Python
@version:  0.6
@contrib:  David Ascher, for the install script
@contrib:  Erik Max Francis, for the command line interface
@contrib:  Michael Twomey, for HTTP proxy support
@contrib:  Mark Recht, for patches to support SOAPpy
"""

__author__ = "Mark Pilgrim (f8dy@diveintomark.org)"
__version__ = "0.6"
__cvsversion__ = "$Revision: 1.5 $"[11:-2]
__date__ = "$Date: 2004/02/25 23:46:07 $"[7:-2]
__copyright__ = "Copyright (c) 2002 Mark Pilgrim"
__license__ = "Python"
__credits__ = """David Ascher, for the install script
Erik Max Francis, for the command line interface
Michael Twomey, for HTTP proxy support"""

import os, sys, getopt
import GoogleSOAPFacade

LICENSE_KEY = None
HTTP_PROXY  = None

#
# Constants
#
_url         = 'http://api.google.com/search/beta2'
_namespace   = 'urn:GoogleSearch'
_googlefile1 = ".googlekey"
_googlefile2 = "googlekey.txt"

_false = GoogleSOAPFacade.false
_true  = GoogleSOAPFacade.true

_licenseLocations = (
    ( lambda key: key,
      'passed to the function in license_key variable' ),
    ( lambda key: LICENSE_KEY, 
      'module-level LICENSE_KEY variable (call setLicense to set it)' ),
    ( lambda key: os.environ.get( 'GOOGLE_LICENSE_KEY', None ),
      'an environment variable called GOOGLE_LICENSE_KEY' ),
    ( lambda key: _contentsOf( os.getcwd(), _googlefile1 ), 
      '%s in the current directory' % _googlefile1),
    ( lambda key: _contentsOf( os.getcwd(), _googlefile2 ),
      '%s in the current directory' % _googlefile2),
    ( lambda key: _contentsOf( os.environ.get( 'HOME', '' ), _googlefile1 ),
      '%s in your home directory' % _googlefile1),
    ( lambda key: _contentsOf( os.environ.get( 'HOME', '' ), _googlefile2 ),
      '%s in your home directory' % _googlefile2 ),
    ( lambda key: _contentsOf( _getScriptDir(), _googlefile1 ),
      '%s in the google.py directory' % _googlefile1 ),
    ( lambda key: _contentsOf( _getScriptDir(), _googlefile2 ),
      '%s in the google.py directory' % _googlefile2 )
)

## ----------------------------------------------------------------------
## Exceptions
## ----------------------------------------------------------------------

class NoLicenseKey(Exception): 
    """
    Thrown when the API is unable to find a valid license key.
    """
    pass

## ----------------------------------------------------------------------
## administrative functions (non-API)
## ----------------------------------------------------------------------

def _version():
    """
    Display a formatted version string for the module
    """
    print """PyGoogle %(__version__)s
%(__copyright__)s
released %(__date__)s

Thanks to:
%(__credits__)s""" % globals()

    
def _usage():
    """
    Display usage information for the command-line interface
    """
    program = os.path.basename(sys.argv[0])
    print """Usage: %(program)s [options] [querytype] query

options:
  -k, --key= <license key> Google license key (see important note below)
  -1, -l, --lucky          show only first hit
  -m, --meta               show meta information
  -r, --reverse            show results in reverse order
  -x, --proxy= <url>       use HTTP proxy
  -h, --help               print this help
  -v, --version            print version and copyright information
  -t, --test               run test queries

querytype:
  -s, --search= <query>    search (default)
  -c, --cache= <url>       retrieve cached page
  -p, --spelling= <word>   check spelling

IMPORTANT NOTE: all Google functions require a valid license key;
visit http://www.google.com/apis/ to get one.  %(program)s will look in
these places (in order) and use the first license key it finds:
  * the key specified on the command line""" % vars()
    for get, location in _licenseLocations[2:]:
        print "  *", location

## ----------------------------------------------------------------------
## utility functions (API)
## ----------------------------------------------------------------------

def setLicense(license_key):
    """
    Set the U{Google APIs <http://www.google.com/api>} license key
    
    @param license_key: The new key to use
    @type  license_key: String
    @todo: validate the key?
    """
    global LICENSE_KEY
    LICENSE_KEY = license_key
    
    
def getLicense(license_key = None):
    """
    Get the U{Google APIs <http://www.google.com/api>} license key
    
    The key can be read from any number of locations.  See the module-leve
    documentation for the search order.
    
    @return: the license key
    @rtype:  String
    @raise NoLicenseKey: if no valid key could be found
    """
    for get, location in _licenseLocations:
        rc = get(license_key)
        if rc: return rc
    _usage()
    raise NoLicenseKey, 'get a license key at http://www.google.com/apis/'


def setProxy(http_proxy):
    """
    Set the HTTP proxy to be used when accessing Google
    
    @param http_proxy: the proxy to use
    @type  http_proxy: String
    @todo: validiate the input?
    """
    global HTTP_PROXY
    HTTP_PROXY = http_proxy


def getProxy(http_proxy = None):
    """
    Get the HTTP proxy we use for accessing Google
    
    @return: the proxy
    @rtype:  String
    """
    return http_proxy or HTTP_PROXY


def _contentsOf(dirname, filename):
    filename = os.path.join(dirname, filename)
    if not os.path.exists(filename): return None
    fsock = open(filename)
    contents = fsock.read()
    fsock.close()
    return contents


def _getScriptDir():
    if __name__ == '__main__':
        return os.path.abspath(os.path.dirname(sys.argv[0]))
    else:
        return os.path.abspath(os.path.dirname(sys.modules[__name__].__file__))


def _marshalBoolean(value):
    if value:
        return _true
    else:
        return _false


def _getRemoteServer( http_proxy ):
    return GoogleSOAPFacade.getProxy( _url, _namespace, http_proxy )
    

## ----------------------------------------------------------------------
## search results classes
## ----------------------------------------------------------------------

class _SearchBase:
    def __init__(self, params):
        for k, v in params.items():
            if isinstance(v, GoogleSOAPFacade.structType):
                v = GoogleSOAPFacade.toDict( v )
                
            try:
                if isinstance(v[0], GoogleSOAPFacade.structType):
                    v = [ SOAPProxy.toDict( node ) for node in v ]

            except:
                pass
            self.__dict__[str(k)] = v

## ----------------------------------------------------------------------

class SearchResultsMetaData(_SearchBase):
    """
    Container class for metadata about a given search query's results.

    @ivar documentFiltering: is duplicate page filtering active?

    @ivar searchComments: human-readable informational message

        example::

             "'the' is a very common word and was not included in your search"

    @ivar estimatedTotalResultsCount: estimated total number of results 
        for this query.

    @ivar estimateIsExact: is estimatedTotalResultsCount an exact value?

    @ivar searchQuery: search string that initiated this search

    @ivar startIndex: index of the first result returned (zero-based)

    @ivar endIndex: index of the last result returned (zero-based)

    @ivar searchTips: human-readable informational message on how to better
       use Google.

    @ivar directoryCategories: list of categories for the search results

       This field is a list of dictionaries, like so::

           { 'fullViewableName': 'the Open Directory category',
             'specialEncoding':  'encoding scheme of this directory category'
           }

    @ivar searchTime: total search time, in seconds
    """    
    pass

## ----------------------------------------------------------------------

class SearchResult(_SearchBase):
    """
    Encapsulates the results from a search.

    @ivar URL: URL

    @ivar title: title (HTML)

    @ivar snippet: snippet showing query context (HTML

    @ivar cachedSize: size of cached version of this result, (KB)

    @ivar relatedInformationPresent: is the "related:" keyword supported?

        Flag indicates that the "related:" keyword is supported for this URL

    @ivar hostName:  used when filtering occurs

        When filtering occurs, a maximum of two results from any given
        host is returned.  When this occurs, the second resultElement
        that comes from that host contains the host name in this parameter.

    @ivar directoryCategory: Open Directory category information

        This field is a dictionary with the following values::

            { 'fullViewableName': 'the Open Directory category',
              'specialEncoding' : 'encoding scheme of this directory category'
            }

    @ivar directoryTitle: Open Directory title of this result (or blank)

    @ivar summary: Open Directory summary for this result (or blank)
    """
    pass

## ----------------------------------------------------------------------

class SearchReturnValue:
    """
    complete search results for a single query

    @ivar meta: L{SearchResultsMetaData} instance for this query

    @ivar results: list of L{SearchResult} objects for this query 
    """
    def __init__( self, metadata, results ):
        self.meta    = metadata
        self.results = results

## ----------------------------------------------------------------------
## main functions
## ----------------------------------------------------------------------

def doGoogleSearch( q, start = 0, maxResults = 10, filter = 1,
                    restrict='', safeSearch = 0, language = '',
                    inputencoding = '', outputencoding = '',\
                    license_key = None, http_proxy = None ):
    """
    Search Google using the SOAP API and return the results.

    You need a license key to call this function; see the
    U{Google APIs <http://www.google.com/apis/>} site to get one.
    Then you can either pass it to this function every time, or
    set it globally; see the L{google} module-level docs for details.
    
    See U{http://www.google.com/help/features.html}
    for examples of advanced features.  Anything that works at the 
    Google web site will work as a query string in this method.
    
    You can use the C{start} and C{maxResults} parameters to page
    through multiple pages of results.  Note that 'maxResults' is
    currently limited by Google to 10.
            
    See the API reference for more advanced examples and a full list of
    country codes and topics for use in the C{restrict} parameter, along
    with legal values for the C{language}, C{inputencoding}, and
    C{outputencoding} parameters.
    
    You can download the API documentation 
    U{http://www.google.com/apis/download.html <here>}.
    
    @param q: search string.  
    @type  q: String

    @param start: (optional) zero-based index of first desired result.
    @type  start: int

    @param maxResults: (optional) maximum number of results to return.
    @type  maxResults: int

    @param filter: (optional) flag to request filtering of similar results
    @type  filter: int

    @param restrict: (optional) restrict results by country or topic.
    @type  restrict: String    

    @param safeSearch: (optional)
    @type  safeSearch: int

    @param language: (optional)
    @type  language: String

    @param inputencoding: (optional)
    @type  inputencoding: String

    @param outputencoding: (optional)
    @type  outputencoding: String

    @param license_key: (optional) the Google API license key to use
    @type  license_key: String

    @param http_proxy: (optional) the HTTP proxy to use for talking to Google
    @type  http_proxy: String
    
    @return: the search results encapsulated in an object
    @rtype:  L{SearchReturnValue}
    """
    license_key  = getLicense( license_key )    
    http_proxy   = getProxy( http_proxy )
    remoteserver = _getRemoteServer( http_proxy )
                                   
    filter     = _marshalBoolean( filter )
    safeSearch = _marshalBoolean( safeSearch )
    
    data = remoteserver.doGoogleSearch( license_key, q, start, maxResults,
                                        filter, restrict, safeSearch,
                                        language, inputencoding, 
                                        outputencoding )

    metadata = GoogleSOAPFacade.toDict( data )
    del metadata["resultElements"]
    
    metadata = SearchResultsMetaData( metadata )
    
    results = [ SearchResult( GoogleSOAPFacade.toDict( node ) ) \
                    for node in data.resultElements ]
    
    return SearchReturnValue( metadata, results )

## ----------------------------------------------------------------------

def doGetCachedPage( url, license_key = None, http_proxy = None ):
    """
    Retrieve a page from the Google cache.

    You need a license key to call this function; see the
    U{Google APIs <http://www.google.com/apis/>} site to get one.
    Then you can either pass it to this function every time, or
    set it globally; see the L{google} module-level docs for details.
    
    @param url: full URL to the page to retrieve
    @type  url: String
    
    @param license_key: (optional) the Google API key to use
    @type  license_key: String
    
    @param http_proxy:  (optional) the HTTP proxy server to use
    @type  http_proxy:  String
    
    @return: full text of the cached page
    @rtype:  String
    """
    license_key  = getLicense( license_key )
    http_proxy   = getProxy( http_proxy )
    remoteserver = _getRemoteServer( http_proxy )
                                   
    return remoteserver.doGetCachedPage( license_key, url )

## ----------------------------------------------------------------------

def doSpellingSuggestion( phrase, license_key = None, http_proxy = None ):
    """
    Get spelling suggestions from Google

    You need a license key to call this function; see the
    U{Google APIs <http://www.google.com/apis/>} site to get one.
    Then you can either pass it to this function every time, or
    set it globally; see the L{google} module-level docs for details.

    @param phrase: word or phrase to spell-check
    @type  phrase: String

    @param license_key: (optional) the Google API key to use
    @type  license_key: String
    
    @param http_proxy: (optional) the HTTP proxy to use
    @type  http_proxy: String
    
    @return: text of any suggested replacement, or None
    """
    license_key  = getLicense( license_key )    
    http_proxy   = getProxy( http_proxy) 
    remoteserver = _getRemoteServer( http_proxy )
                                   
    return remoteserver.doSpellingSuggestion( license_key, phrase )

## ----------------------------------------------------------------------
## functional test suite (see googletest.py for unit test suite)
## ----------------------------------------------------------------------

def _test():
    """
    Run functional test suite.
    """
    try:
        getLicense(None)
    except NoLicenseKey:
        return
        
    print "Searching for Python at google.com..."
    data = doGoogleSearch( "Python" )
    _output( data, { "func": "doGoogleSearch"} )

    print "\nSearching for 5 _French_ pages about Python, "
    print "encoded in ISO-8859-1..."

    data = doGoogleSearch( "Python", language = 'lang_fr',                 
                                     outputencoding = 'ISO-8859-1',
                                     maxResults = 5 )
                                     
    _output( data, { "func": "doGoogleSearch" } )

    phrase = "Pyhton programming languager"
    print "\nTesting spelling suggestions for '%s'..." % phrase
    
    data = doSpellingSuggestion( phrase )
    
    _output( data, { "func": "doSpellingSuggestion" } )

## ----------------------------------------------------------------------
## Command-line interface
## ----------------------------------------------------------------------

class _OutputFormatter:
    def boil(self, data):
        if type(data) == type(u""):
            return data.encode("ISO-8859-1", "replace")
        else:
            return data

class _TextOutputFormatter(_OutputFormatter):
    def common(self, data, params):
        if params.get("showMeta", 0):
            meta = data.meta
            for category in meta.directoryCategories:
                print "directoryCategory: %s" % \
                  self.boil(category["fullViewableName"])
            for attr in [node for node in dir(meta) if \
              node <> "directoryCategories" and node[:2] <> '__']:
                print "%s:" % attr, self.boil(getattr(meta, attr))
        
    def doGoogleSearch(self, data, params):
        results = data.results
        if params.get("feelingLucky", 0):
            results = results[:1]
        if params.get("reverseOrder", 0):
            results.reverse()
        for result in results:
            for attr in dir(result):
                if attr == "directoryCategory":
                    print "directoryCategory:", \
                      self.boil(result.directoryCategory["fullViewableName"])
                elif attr[:2] <> '__':
                    print "%s:" % attr, self.boil(getattr(result, attr))
            print
        self.common(data, params)
    
    def doGetCachedPage(self, data, params):
        print data
        self.common(data, params)

    doSpellingSuggestion = doGetCachedPage

def _makeFormatter(outputFormat):
    classname = "_%sOutputFormatter" % outputFormat.capitalize()
    return globals()[classname]()

def _output(results, params):
    formatter = _makeFormatter(params.get("outputFormat", "text"))
    outputmethod = getattr(formatter, params["func"])
    outputmethod(results, params)

def main(argv):
    """
    Command-line interface.
    """
    if not argv:
        _usage()
        return
    q = None
    func = None
    http_proxy = None
    license_key = None
    feelingLucky = 0
    showMeta = 0
    reverseOrder = 0
    runTest = 0
    outputFormat = "text"
    try:
        opts, args = getopt.getopt(argv, "s:c:p:k:lmrx:hvt1",
            ["search=", "cache=", "spelling=", "key=", "lucky", "meta",
             "reverse", "proxy=", "help", "version", "test"])
    except getopt.GetoptError:
        _usage()
        sys.exit(2)
    for opt, arg in opts:
        if opt in ("-s", "--search"):
            q = arg
            func = "doGoogleSearch"
        elif opt in ("-c", "--cache"):
            q = arg
            func = "doGetCachedPage"
        elif opt in ("-p", "--spelling"):
            q = arg
            func = "doSpellingSuggestion"
        elif opt in ("-k", "--key"):
            license_key = arg
        elif opt in ("-l", "-1", "--lucky"):
            feelingLucky = 1
        elif opt in ("-m", "--meta"):
            showMeta = 1
        elif opt in ("-r", "--reverse"):
            reverseOrder = 1
        elif opt in ("-x", "--proxy"):
            http_proxy = arg
        elif opt in ("-h", "--help"):
            _usage()
        elif opt in ("-v", "--version"):
            _version()
        elif opt in ("-t", "--test"):
            runTest = 1
    if runTest:
        setLicense(license_key)
        setProxy(http_proxy)
        _test()
    if args and not q:
        q = args[0]
        func = "doGoogleSearch"
    if func:
        results = globals()[func]( q, http_proxy=http_proxy, 
                                   license_key=license_key )
        _output(results, locals())

if __name__ == '__main__':
    main(sys.argv[1:])
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.