Iri.py :  » XML » 4Suite » 4Suite-XML-1.0.2 » Ft » Lib » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » XML » 4Suite 
4Suite » 4Suite XML 1.0.2 » Ft » Lib » Iri.py
########################################################################
# $Header: /var/local/cvsroot/4Suite/Ft/Lib/Iri.py,v 1.4.4.1 2006/08/23 06:46:34 jkloth Exp $
"""
Classes and functions related to IRI processing

Copyright 2004 Fourthought, Inc. (USA).
Detailed license and copyright information: http://4suite.org/COPYRIGHT
Project home, documentation, distributions: http://4suite.org/
"""

import sys

def IriToUri(iri, convertHost=False):
    r"""
    Converts an IRI or IRI reference to a URI or URI reference,
    implementing sec. 3.1 of draft-duerst-iri-10.

    The convertHost flag indicates whether to perform conversion of
    the ireg-name (host) component of the IRI to an RFC 2396-compatible
    URI reg-name (IDNA encoded), e.g.
    IriToUri(u'http://r\xe9sum\xe9.example.org/', convertHost=False)
    => u'http://r%C3%A9sum%C3%A9.example.org/'
    IriToUri(u'http://r\xe9sum\xe9.example.org/', convertHost=True)
    => u'http://xn--rsum-bpad.example.org/'

    Ordinarily, the IRI should be given as a unicode string. If the IRI
    is instead given as a byte string, then it will be assumed to be
    UTF-8 encoded, will be decoded accordingly, and as per the
    requirements of the conversion algorithm, will NOT be normalized.
    """
    if not isinstance(iri, str):
        iri = NfcNormalize(iri)

    if convertHost and sys.version_info[0:2] >= (2,3):
        # first we have to get the host
        from Ft.Lib.Uri import SplitUriRef,UnsplitUriRef
        (scheme, auth, path, query, frag) = SplitUriRef(iri)
        if auth and auth.find('@') > -1:
            userinfo, hostport = auth.split('@')
        else:
            userinfo = None
            hostport = auth
        if hostport and hostport.find(':') > -1:
            host, port = hostport.split(':')
        else:
            host = hostport
            port = None
        if host:
            host = ConvertIregName(host)
            auth = ''
            if userinfo:
                auth += userinfo + '@'
            auth += host
            if port:
                auth += ':' + port
        iri = UnsplitUriRef((scheme, auth, path, query, frag))

    res = u''
    pos = 0
    surrogate = None
    for c in iri:
        cp = ord(c)
        if cp > 128:
            if cp < 160:
                # FIXME: i18n
                raise ValueError("Illegal character at position %d (0-based) of IRI %r" % (pos, iri))
            # 'for c in iri' may give us surrogate pairs
            elif cp > 55295:
                if cp < 56320:
                    # d800-dbff
                    surrogate = c
                    continue
                elif cp < 57344:
                    # dc00-dfff
                    if surrogate is None:
                        raise ValueError("Illegal surrogate pair in %r" % iri)
                    c = surrogate + c
                else:
                    raise ValueError("Illegal surrogate pair in %r" % iri)
                surrogate = None
            for octet in c.encode('utf-8'):
                res += u'%%%02X' % ord(octet)
        else:
            res += c
        pos += 1
    return res


def NfcNormalize(iri):
    """
    On Python 2.3 and higher, normalizes the given unicode string
    according to Unicode Normalization Form C (NFC), so that it can
    be used as an IRI or IRI reference.
    """
    try:
        from unicodedata import normalize
        iri = normalize('NFC', iri)
    except ImportError:
        pass
    return iri


def ConvertIregName(iregname):
    """
    On Python 2.3 and higher, converts the given ireg-name component
    of an IRI to a string suitable for use as a URI reg-name in pre-
    rfc2396bis schemes and resolvers. Returns the ireg-name
    unmodified on Python 2.2.
    """
    try:
        # I have not yet verified that the default IDNA encoding
        # matches the algorithm required by the IRI spec, but it
        # does work on the one simple example in the spec.
        iregname = iregname.encode('idna')
    except:
        pass
    return iregname
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.