wntools.py :  » Database » PyWordNet » pywordnet-2.0.1 » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Database » PyWordNet 
PyWordNet » pywordnet 2.0.1 » wntools.py
# Module wordnet.py
#
# Original author: Oliver Steele <steele@osteele.com>
# Project Page: http://sourceforge.net/projects/pywordnet
#
# Copyright (c) 1998-2004 by Oliver Steele.  Use is permitted under
# the Artistic License
# <http://www.opensource.org/licenses/artistic-license.html>

"""Utility functions to use with the wordnet module.

Usage
-----
    >>> dog = N['dog'][0]

    # (First 10) adjectives that are transitively SIMILAR to the main sense of 'red'
    >>> closure(ADJ['red'][0], SIMILAR)[:10]
    ['red' in {adjective: red, reddish, ruddy, blood-red, carmine, cerise, cherry, cherry-red, crimson, ruby, ruby-red, scarlet}, {adjective: chromatic}, {adjective: amber, brownish-yellow, yellow-brown}, {adjective: amethyst}, {adjective: aureate, gilded, gilt, gold, golden}, {adjective: azure, cerulean, sky-blue, bright blue}, {adjective: blue, bluish, blueish, light-blue, dark-blue, blue-black}, {adjective: bluish green, blue-green, cyan, teal}, {adjective: blushful, rosy}, {adjective: bottle-green}]

    >>> # Adjectives that are transitively SIMILAR to any of the senses of 'red'
    >>> #flatten1(map(lambda sense:closure(sense, SIMILAR), ADJ['red']))    # too verbose

    >>> # Hyponyms of the main sense of 'dog'(n.) that are homophonous with verbs
    >>> filter(lambda sense:V.get(sense.form), flatten1(map(lambda e:e.getSenses(), hyponyms(N['dog'][0]))))
    ['dog' in {noun: dog, domestic dog, Canis familiaris}, 'pooch' in {noun: pooch, doggie, doggy, barker, bow-wow}, 'toy' in {noun: toy dog, toy}, 'hound' in {noun: hound, hound dog}, 'basset' in {noun: basset, basset hound}, 'cocker' in {noun: cocker spaniel, English cocker spaniel, cocker}, 'bulldog' in {noun: bulldog, English bulldog}]

    >>> # Find the senses of 'raise'(v.) and 'lower'(v.) that are antonyms
    >>> filter(lambda p:p[0] in p[1].pointerTargets(ANTONYM), product(V['raise'].getSenses(), V['lower'].getSenses()))
    [('raise' in {verb: raise, lift, elevate, get up, bring up}, 'lower' in {verb: lower, take down, let down, get down, bring down})]
"""

__author__  = "Oliver Steele <steele@osteele.com>"
__version__ = "2.0"

from wordnet import *

#
# Domain utilities
#

def _requireSource(entity):
    if not hasattr(entity, 'pointers'):
        if isinstance(entity, Word):
            raise TypeError, `entity` + " is not a Sense or Synset.  Try " + `entity` + "[0] instead."
        else:
            raise TypeError, `entity` + " is not a Sense or Synset"

def tree(source, pointerType):
    """
    >>> dog = N['dog'][0]
    >>> from pprint import pprint
    >>> pprint(tree(dog, HYPERNYM))
    ['dog' in {noun: dog, domestic dog, Canis familiaris},
     [{noun: canine, canid},
      [{noun: carnivore},
       [{noun: placental, placental mammal, eutherian, eutherian mammal},
        [{noun: mammal},
         [{noun: vertebrate, craniate},
          [{noun: chordate},
           [{noun: animal, animate being, beast, brute, creature, fauna},
            [{noun: organism, being},
             [{noun: living thing, animate thing},
              [{noun: object, physical object}, [{noun: entity}]]]]]]]]]]]]
    >>> #pprint(tree(dog, HYPONYM)) # too verbose to include here
    """
    if isinstance(source,  Word):
        return map(lambda s, t=pointerType:tree(s,t), source.getSenses())
    _requireSource(source)
    return [source] + map(lambda s, t=pointerType:tree(s,t), source.pointerTargets(pointerType))

def closure(source, pointerType, accumulator=None):
    """Return the transitive closure of source under the pointerType
    relationship.  If source is a Word, return the union of the
    closures of its senses.
    
    >>> dog = N['dog'][0]
    >>> closure(dog, HYPERNYM)
    ['dog' in {noun: dog, domestic dog, Canis familiaris}, {noun: canine, canid}, {noun: carnivore}, {noun: placental, placental mammal, eutherian, eutherian mammal}, {noun: mammal}, {noun: vertebrate, craniate}, {noun: chordate}, {noun: animal, animate being, beast, brute, creature, fauna}, {noun: organism, being}, {noun: living thing, animate thing}, {noun: object, physical object}, {noun: entity}]
    """
    if isinstance(source, Word):
        return reduce(union, map(lambda s, t=pointerType:tree(s,t), source.getSenses()))
    _requireSource(source)
    if accumulator is None:
        accumulator = []
    if source not in accumulator:
        accumulator.append(source)
        for target in source.pointerTargets(pointerType):
            closure(target, pointerType, accumulator)
    return accumulator

def hyponyms(source):
    """Return source and its hyponyms.  If source is a Word, return
    the union of the hyponyms of its senses."""
    return closure(source, HYPONYM)

def hypernyms(source):
    """Return source and its hypernyms.  If source is a Word, return
    the union of the hypernyms of its senses."""

    return closure(source, HYPERNYM)

def meet(a, b, pointerType=HYPERNYM):
    """Return the meet of a and b under the pointerType relationship.
    
    >>> meet(N['dog'][0], N['cat'][0])
    {noun: carnivore}
    >>> meet(N['dog'][0], N['person'][0])
    {noun: organism, being}
    >>> meet(N['thought'][0], N['belief'][0])
    {noun: content, cognitive content, mental object}
    """
    return (intersection(closure(a, pointerType), closure(b, pointerType)) + [None])[0]


#
# String Utility Functions
#
def startsWith(str, prefix):
    """Return true iff _str_ starts with _prefix_.
    
    >>> startsWith('unclear', 'un')
    1
    """
    return str[:len(prefix)] == prefix

def endsWith(str, suffix):
    """Return true iff _str_ ends with _suffix_.
    
    >>> endsWith('clearly', 'ly')
    1
    """
    return str[-len(suffix):] == suffix

def equalsIgnoreCase(a, b):
    """Return true iff a and b have the same lowercase representation.
    
    >>> equalsIgnoreCase('dog', 'Dog')
    1
    >>> equalsIgnoreCase('dOg', 'DOG')
    1
    """
    # test a == b first as an optimization where they're equal
    return a == b or string.lower(a) == string.lower(b)


#
# Sequence Utility Functions
#
def issequence(item):
    """Return true iff _item_ is a Sequence (a List, String, or Tuple).
    
    >>> issequence((1,2))
    1
    >>> issequence([1,2])
    1
    >>> issequence('12')
    1
    >>> issequence(1)
    0
    """
    return type(item) in (ListType, StringType, TupleType)

def intersection(u, v):
    """Return the intersection of _u_ and _v_.
    
    >>> intersection((1,2,3), (2,3,4))
    [2, 3]
    """
    w = []
    for e in u:
        if e in v:
            w.append(e)
    return w

def union(u, v):
    """Return the union of _u_ and _v_.
    
    >>> union((1,2,3), (2,3,4))
    [1, 2, 3, 4]
    """
    w = list(u)
    if w is u:
        import copy
        w = copy.copy(w)
    for e in v:
        if e not in w:
            w.append(e)
    return w

def product(u, v):
    """Return the Cartesian product of u and v.
    
    >>> product("123", "abc")
    [('1', 'a'), ('1', 'b'), ('1', 'c'), ('2', 'a'), ('2', 'b'), ('2', 'c'), ('3', 'a'), ('3', 'b'), ('3', 'c')]
    """
    return flatten1(map(lambda a, v=v:map(lambda b, a=a:(a,b), v), u))

def removeDuplicates(sequence):
    """Return a copy of _sequence_ with equal items removed.
    
    >>> removeDuplicates("this is a test")
    ['t', 'h', 'i', 's', ' ', 'a', 'e']
    >>> removeDuplicates(map(lambda tuple:apply(meet, tuple), product(N['story'].getSenses(), N['joke'].getSenses())))
    [{noun: message, content, subject matter, substance}, None, {noun: abstraction}, {noun: communication}]
    """
    accumulator = []
    for item in sequence:
        if item not in accumulator:
            accumulator.append(item)
    return accumulator


#
# Tree Utility Functions
#

def flatten1(sequence):
    accumulator = []
    for item in sequence:
        if type(item) == TupleType:
            item = list(item)
        if type(item) == ListType:
            accumulator.extend(item)
        else:
            accumulator.append(item)
    return accumulator


#
# WordNet utilities
#

GET_INDEX_SUBSTITUTIONS = ((' ', '-'), ('-', ' '), ('-', ''), (' ', ''), ('.', ''))

def getIndex(form, pos='noun'):
    """Search for _form_ in the index file corresponding to
    _pos_. getIndex applies to _form_ an algorithm that replaces
    underscores with hyphens, hyphens with underscores, removes
    hyphens and underscores, and removes periods in an attempt to find
    a form of the string that is an exact match for an entry in the
    index file corresponding to _pos_.  getWord() is called on each
    transformed string until a match is found or all the different
    strings have been tried. It returns a Word or None."""
    def trySubstitutions(trySubstitutions, form, substitutions, lookup=1, dictionary=dictionaryFor(pos)):
        if lookup and dictionary.has_key(form):
            return dictionary[form]
        elif substitutions:
            (old, new) = substitutions[0]
            substitute = string.replace(form, old, new) and substitute != form
            if substitute and dictionary.has_key(substitute):
                return dictionary[substitute]
            return              trySubstitutions(trySubstitutions, form, substitutions[1:], lookup=0) or \
                (substitute and trySubstitutions(trySubstitutions, substitute, substitutions[1:]))
    return trySubstitutions(returnMatch, form, GET_INDEX_SUBSTITUTIONS)


MORPHOLOGICAL_SUBSTITUTIONS = {
    NOUN:
    [('s', ''),
     ('ses', 's'),
     ('ves', 'f'),
     ('xes', 'x'),
     ('zes', 'z'),
     ('ches', 'ch'),
     ('shes', 'sh'),
     ('men', 'man'),
     ('ies', 'y')],
    VERB:
    [('s', ''),
     ('ies', 'y'),
     ('es', 'e'),
     ('es', ''),
     ('ed', 'e'),
     ('ed', ''),
     ('ing', 'e'),
     ('ing', '')],
    ADJECTIVE:
    [('er', ''),
     ('est', ''),
     ('er', 'e'),
     ('est', 'e')],
    ADVERB: []}

def morphy(form, pos='noun', collect=0):
    """Recursively uninflect _form_, and return the first form found
    in the dictionary.  If _collect_ is true, a sequence of all forms
    is returned, instead of just the first one.
    
    >>> morphy('dogs')
    'dog'
    >>> morphy('churches')
    'church'
    >>> morphy('aardwolves')
    'aardwolf'
    >>> morphy('abaci')
    'abacus'
    >>> morphy('hardrock', 'adv')
    """
    from wordnet import _normalizePOS,_dictionaryFor
    pos = _normalizePOS(pos)
    fname = os.path.join(WNSEARCHDIR, {NOUN: 'noun', VERB: 'verb', ADJECTIVE: 'adj', ADVERB: 'adv'}[pos] + '.exc')
    excfile = open(fname)
    substitutions = MORPHOLOGICAL_SUBSTITUTIONS[pos]
    def trySubstitutions(trySubstitutions,  # workaround for lack of nested closures in Python < 2.1
                         form,        # reduced form
                         substitutions,    # remaining substitutions
                         lookup=1,
                         dictionary=_dictionaryFor(pos),
                         excfile=excfile,
                         collect=collect,
                         collection=[]):
        import string
        exceptions = binarySearchFile(excfile, form)
        if exceptions:
            form = exceptions[string.find(exceptions, ' ')+1:-1]
        if lookup and dictionary.has_key(form):
            if collect:
                collection.append(form)
            else:
                return form
        elif substitutions:
            old, new = substitutions[0]
            substitutions = substitutions[1:]
            substitute = None
            if endsWith(form, old):
                substitute = form[:-len(old)] + new
                #if dictionary.has_key(substitute):
                #   return substitute
            form =              trySubstitutions(trySubstitutions, form, substitutions) or \
                (substitute and trySubstitutions(trySubstitutions, substitute, substitutions))
            return (collect and collection) or form
        elif collect:
            return collection
    return trySubstitutions(trySubstitutions, form, substitutions)

#
# Testing
#
def _test(reset=0):
    import doctest, wntools
    if reset:
        doctest.master = None # This keeps doctest from complaining after a reload.
    return doctest.testmod(wntools)
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.