pyval_repr.py :  » Development » Epydoc » epydoc-3.0.1 » epydoc » markup » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Development » Epydoc 
Epydoc » epydoc 3.0.1 » epydoc » markup » pyval_repr.py
# epydoc -- Marked-up Representations for Python Values
#
# Copyright (C) 2005 Edward Loper
# Author: Edward Loper <edloper@loper.org>
# URL: <http://epydoc.sf.net>
#
# $Id: apidoc.py 1448 2007-02-11 00:05:34Z dvarrazzo $

"""
Syntax highlighter for Python values.  Currently provides special
colorization support for:

  - lists, tuples, sets, frozensets, dicts
  - numbers
  - strings
  - compiled regexps

The highlighter also takes care of line-wrapping, and automatically
stops generating repr output as soon as it has exceeded the specified
number of lines (which should make it faster than pprint for large
values).  It does I{not} bother to do automatic cycle detection,
because maxlines is typically around 5, so it's really not worth it.

The syntax-highlighted output is encoded using a
L{ParsedEpytextDocstring}, which can then be used to generate output in
a variety of formats.
"""
__docformat__ = 'epytext en'

# Implementation note: we use exact tests for classes (list, etc)
# rather than using isinstance, because subclasses might override
# __repr__.

import types, re
import epydoc.apidoc
from epydoc.util import decode_with_backslashreplace
from epydoc.util import plaintext_to_html,plaintext_to_latex
from epydoc.compat import *
import sre_parse, sre_constants

from epydoc.markup.epytext import Element,ParsedEpytextDocstring

def is_re_pattern(pyval):
    return type(pyval).__name__ == 'SRE_Pattern'

class _ColorizerState:
    """
    An object uesd to keep track of the current state of the pyval
    colorizer.  The L{mark()}/L{restore()} methods can be used to set
    a backup point, and restore back to that backup point.  This is
    used by several colorization methods that first try colorizing
    their object on a single line (setting linebreakok=False); and
    then fall back on a multi-line output if that fails.  The L{score}
    variable is used to keep track of a 'score', reflecting how good
    we think this repr is.  E.g., unhelpful values like '<Foo instance
    at 0x12345>' get low scores.  If the score is too low, we'll use
    the parse-derived repr instead.
    """
    def __init__(self):
        self.result = []
        self.charpos = 0
        self.lineno = 1
        self.linebreakok = True
        
        #: How good this represention is?
        self.score = 0

    def mark(self):
        return (len(self.result), self.charpos,
                self.lineno, self.linebreakok, self.score)

    def restore(self, mark):
        n, self.charpos, self.lineno, self.linebreakok, self.score = mark
        del self.result[n:]

class _Maxlines(Exception):
    """A control-flow exception that is raised when PyvalColorizer
    exeeds the maximum number of allowed lines."""
    
class _Linebreak(Exception):
    """A control-flow exception that is raised when PyvalColorizer
    generates a string containing a newline, but the state object's
    linebreakok variable is False."""

class ColorizedPyvalRepr(ParsedEpytextDocstring):
    """
    @ivar score: A score, evaluating how good this repr is.
    @ivar is_complete: True if this colorized repr completely describes
       the object.
    """
    def __init__(self, tree, score, is_complete):
        ParsedEpytextDocstring.__init__(self, tree)
        self.score = score
        self.is_complete = is_complete

def colorize_pyval(pyval, parse_repr=None, min_score=None,
                   linelen=75, maxlines=5, linebreakok=True, sort=True):
    return PyvalColorizer(linelen, maxlines, linebreakok, sort).colorize(
        pyval, parse_repr, min_score)

class PyvalColorizer:
    """
    Syntax highlighter for Python values.
    """

    def __init__(self, linelen=75, maxlines=5, linebreakok=True, sort=True):
        self.linelen = linelen
        self.maxlines = maxlines
        self.linebreakok = linebreakok
        self.sort = sort

    #////////////////////////////////////////////////////////////
    # Colorization Tags & other constants
    #////////////////////////////////////////////////////////////

    GROUP_TAG = 'variable-group'     # e.g., "[" and "]"
    COMMA_TAG = 'variable-op'        # The "," that separates elements
    COLON_TAG = 'variable-op'        # The ":" in dictionaries
    CONST_TAG = None                 # None, True, False
    NUMBER_TAG = None                # ints, floats, etc
    QUOTE_TAG = 'variable-quote'     # Quotes around strings.
    STRING_TAG = 'variable-string'   # Body of string literals

    RE_CHAR_TAG = None
    RE_GROUP_TAG = 're-group'
    RE_REF_TAG = 're-ref'
    RE_OP_TAG = 're-op'
    RE_FLAGS_TAG = 're-flags'

    ELLIPSIS = Element('code', u'...', style='variable-ellipsis')
    LINEWRAP = Element('symbol', u'crarr')
    UNKNOWN_REPR = Element('code', u'??', style='variable-unknown')
    
    GENERIC_OBJECT_RE = re.compile(r'^<.* at 0x[0-9a-f]+>$', re.IGNORECASE)

    ESCAPE_UNICODE = False # should we escape non-ascii unicode chars?

    #////////////////////////////////////////////////////////////
    # Entry Point
    #////////////////////////////////////////////////////////////

    def colorize(self, pyval, parse_repr=None, min_score=None):
        """
        @return: A L{ColorizedPyvalRepr} describing the given pyval.
        """
        UNKNOWN = epydoc.apidoc.UNKNOWN
        # Create an object to keep track of the colorization.
        state = _ColorizerState()
        state.linebreakok = self.linebreakok
        # Colorize the value.  If we reach maxlines, then add on an
        # ellipsis marker and call it a day.
        try:
            if pyval is not UNKNOWN:
                self._colorize(pyval, state)
            elif parse_repr not in (None, UNKNOWN):
                self._output(parse_repr, None, state)
            else:
                state.result.append(PyvalColorizer.UNKNOWN_REPR)
            is_complete = True
        except (_Maxlines, _Linebreak):
            if self.linebreakok:
                state.result.append('\n')
                state.result.append(self.ELLIPSIS)
            else:
                if state.result[-1] is self.LINEWRAP:
                    state.result.pop()
                self._trim_result(state.result, 3)
                state.result.append(self.ELLIPSIS)
            is_complete = False
        # If we didn't score high enough, then try again.
        if (pyval is not UNKNOWN and parse_repr not in (None, UNKNOWN)
            and min_score is not None and state.score < min_score):
            return self.colorize(UNKNOWN, parse_repr)
        # Put it all together.
        tree = Element('epytext', *state.result)
        return ColorizedPyvalRepr(tree, state.score, is_complete)

    def _colorize(self, pyval, state):
        pyval_type = type(pyval)
        state.score += 1
        
        if pyval is None or pyval is True or pyval is False:
            self._output(unicode(pyval), self.CONST_TAG, state)
        elif pyval_type in (int, float, long, types.ComplexType):
            self._output(unicode(pyval), self.NUMBER_TAG, state)
        elif pyval_type is str:
            self._colorize_str(pyval, state, '', 'string-escape')
        elif pyval_type is unicode:
            if self.ESCAPE_UNICODE:
                self._colorize_str(pyval, state, 'u', 'unicode-escape')
            else:
                self._colorize_str(pyval, state, 'u', None)
        elif pyval_type is list:
            self._multiline(self._colorize_iter, pyval, state, '[', ']')
        elif pyval_type is tuple:
            self._multiline(self._colorize_iter, pyval, state, '(', ')')
        elif pyval_type is set:
            self._multiline(self._colorize_iter, self._sort(pyval),
                            state, 'set([', '])')
        elif pyval_type is frozenset:
            self._multiline(self._colorize_iter, self._sort(pyval),
                            state, 'frozenset([', '])')
        elif pyval_type is dict:
            self._multiline(self._colorize_dict, self._sort(pyval.items()),
                            state, '{', '}')
        elif is_re_pattern(pyval):
            self._colorize_re(pyval, state)
        else:
            try:
                pyval_repr = repr(pyval)
                if not isinstance(pyval_repr, (str, unicode)):
                    pyval_repr = unicode(pyval_repr)
                pyval_repr_ok = True
            except KeyboardInterrupt:
                raise
            except:
                pyval_repr_ok = False
                state.score -= 100

            if pyval_repr_ok:
                if self.GENERIC_OBJECT_RE.match(pyval_repr):
                    state.score -= 5
                self._output(pyval_repr, None, state)
            else:
                state.result.append(self.UNKNOWN_REPR)

    def _sort(self, items):
        if not self.sort: return items
        try: return sorted(items)
        except KeyboardInterrupt: raise
        except: return items
        
    def _trim_result(self, result, num_chars):
        while num_chars > 0:
            if not result: return 
            if isinstance(result[-1], Element):
                assert len(result[-1].children) == 1
                trim = min(num_chars, len(result[-1].children[0]))
                result[-1].children[0] = result[-1].children[0][:-trim]
                if not result[-1].children[0]: result.pop()
                num_chars -= trim
            else:
                trim = min(num_chars, len(result[-1]))
                result[-1] = result[-1][:-trim]
                if not result[-1]: result.pop()
                num_chars -= trim

    #////////////////////////////////////////////////////////////
    # Object Colorization Functions
    #////////////////////////////////////////////////////////////

    def _multiline(self, func, pyval, state, *args):
        """
        Helper for container-type colorizers.  First, try calling
        C{func(pyval, state, *args)} with linebreakok set to false;
        and if that fails, then try again with it set to true.
        """
        linebreakok = state.linebreakok
        mark = state.mark()
        
        try:
            state.linebreakok = False
            func(pyval, state, *args)
            state.linebreakok = linebreakok

        except _Linebreak:
            if not linebreakok:
                raise
            state.restore(mark)
            func(pyval, state, *args)
            
    def _colorize_iter(self, pyval, state, prefix, suffix):
        self._output(prefix, self.GROUP_TAG, state)
        indent = state.charpos
        for i, elt in enumerate(pyval):
            if i>=1:
                if state.linebreakok:
                    self._output(',', self.COMMA_TAG, state)
                    self._output('\n'+' '*indent, None, state)
                else:
                    self._output(', ', self.COMMA_TAG, state)
            self._colorize(elt, state)
        self._output(suffix, self.GROUP_TAG, state)

    def _colorize_dict(self, items, state, prefix, suffix):
        self._output(prefix, self.GROUP_TAG, state)
        indent = state.charpos
        for i, (key, val) in enumerate(items):
            if i>=1:
                if state.linebreakok:
                    self._output(',', self.COMMA_TAG, state)
                    self._output('\n'+' '*indent, None, state)
                else:
                    self._output(', ', self.COMMA_TAG, state)
            self._colorize(key, state)
            self._output(': ', self.COLON_TAG, state)
            self._colorize(val, state)
        self._output(suffix, self.GROUP_TAG, state)

    def _colorize_str(self, pyval, state, prefix, encoding):
        # Decide which quote to use.
        if '\n' in pyval and state.linebreakok: quote = "'''"
        else: quote = "'"
        # Divide the string into lines.
        if state.linebreakok:
            lines = pyval.split('\n')
        else:
            lines = [pyval]
        # Open quote.
        self._output(prefix+quote, self.QUOTE_TAG, state)
        # Body
        for i, line in enumerate(lines):
            if i>0: self._output('\n', None, state)
            if encoding: line = line.encode(encoding)
            self._output(line, self.STRING_TAG, state)
        # Close quote.
        self._output(quote, self.QUOTE_TAG, state)

    def _colorize_re(self, pyval, state):
        # Extract the flag & pattern from the regexp.
        pat, flags = pyval.pattern, pyval.flags
        # If the pattern is a string, decode it to unicode.
        if isinstance(pat, str):
            pat = decode_with_backslashreplace(pat)
        # Parse the regexp pattern.
        tree = sre_parse.parse(pat, flags)
        groups = dict([(num,name) for (name,num) in
                       tree.pattern.groupdict.items()])
        # Colorize it!
        self._output("re.compile(r'", None, state)
        self._colorize_re_flags(tree.pattern.flags, state)
        self._colorize_re_tree(tree, state, True, groups)
        self._output("')", None, state)

    def _colorize_re_flags(self, flags, state):
        if flags:
            flags = [c for (c,n) in sorted(sre_parse.FLAGS.items())
                     if (n&flags)]
            flags = '(?%s)' % ''.join(flags)
            self._output(flags, self.RE_FLAGS_TAG, state)

    def _colorize_re_tree(self, tree, state, noparen, groups):
        assert noparen in (True, False)
        if len(tree) > 1 and not noparen:
            self._output('(', self.RE_GROUP_TAG, state)
        for elt in tree:
            op = elt[0]
            args = elt[1]
    
            if op == sre_constants.LITERAL:
                c = unichr(args)
                # Add any appropriate escaping.
                if c in '.^$\\*+?{}[]|()\'': c = '\\'+c
                elif c == '\t': c = '\\t'
                elif c == '\r': c = '\\r'
                elif c == '\n': c = '\\n'
                elif c == '\f': c = '\\f'
                elif c == '\v': c = '\\v'
                elif ord(c) > 0xffff: c = r'\U%08x' % ord(c)
                elif ord(c) > 0xff: c = r'\u%04x' % ord(c)
                elif ord(c)<32 or ord(c)>=127: c = r'\x%02x' % ord(c)
                self._output(c, self.RE_CHAR_TAG, state)
            
            elif op == sre_constants.ANY:
                self._output('.', self.RE_CHAR_TAG, state)
                
            elif op == sre_constants.BRANCH:
                if args[0] is not None:
                    raise ValueError('Branch expected None arg but got %s'
                                     % args[0])
                for i, item in enumerate(args[1]):
                    if i > 0:
                        self._output('|', self.RE_OP_TAG, state)
                    self._colorize_re_tree(item, state, True, groups)
                
            elif op == sre_constants.IN:
                if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY):
                    self._colorize_re_tree(args, state, False, groups)
                else:
                    self._output('[', self.RE_GROUP_TAG, state)
                    self._colorize_re_tree(args, state, True, groups)
                    self._output(']', self.RE_GROUP_TAG, state)
                    
            elif op == sre_constants.CATEGORY:
                if args == sre_constants.CATEGORY_DIGIT: val = r'\d'
                elif args == sre_constants.CATEGORY_NOT_DIGIT: val = r'\D'
                elif args == sre_constants.CATEGORY_SPACE: val = r'\s'
                elif args == sre_constants.CATEGORY_NOT_SPACE: val = r'\S'
                elif args == sre_constants.CATEGORY_WORD: val = r'\w'
                elif args == sre_constants.CATEGORY_NOT_WORD: val = r'\W'
                else: raise ValueError('Unknown category %s' % args)
                self._output(val, self.RE_CHAR_TAG, state)
                
            elif op == sre_constants.AT:
                if args == sre_constants.AT_BEGINNING_STRING: val = r'\A'
                elif args == sre_constants.AT_BEGINNING: val = r'^'
                elif args == sre_constants.AT_END: val = r'$'
                elif args == sre_constants.AT_BOUNDARY: val = r'\b'
                elif args == sre_constants.AT_NON_BOUNDARY: val = r'\B'
                elif args == sre_constants.AT_END_STRING: val = r'\Z'
                else: raise ValueError('Unknown position %s' % args)
                self._output(val, self.RE_CHAR_TAG, state)
                
            elif op in (sre_constants.MAX_REPEAT, sre_constants.MIN_REPEAT):
                minrpt = args[0]
                maxrpt = args[1]
                if maxrpt == sre_constants.MAXREPEAT:
                    if minrpt == 0:   val = '*'
                    elif minrpt == 1: val = '+'
                    else: val = '{%d,}' % (minrpt)
                elif minrpt == 0:
                    if maxrpt == 1: val = '?'
                    else: val = '{,%d}' % (maxrpt)
                elif minrpt == maxrpt:
                    val = '{%d}' % (maxrpt)
                else:
                    val = '{%d,%d}' % (minrpt, maxrpt)
                if op == sre_constants.MIN_REPEAT:
                    val += '?'
                    
                self._colorize_re_tree(args[2], state, False, groups)
                self._output(val, self.RE_OP_TAG, state)
                
            elif op == sre_constants.SUBPATTERN:
                if args[0] is None:
                    self._output('(?:', self.RE_GROUP_TAG, state)
                elif args[0] in groups:
                    self._output('(?P<', self.RE_GROUP_TAG, state)
                    self._output(groups[args[0]], self.RE_REF_TAG, state)
                    self._output('>', self.RE_GROUP_TAG, state)
                elif isinstance(args[0], (int, long)):
                    # This is cheating:
                    self._output('(', self.RE_GROUP_TAG, state)
                else:
                    self._output('(?P<', self.RE_GROUP_TAG, state)
                    self._output(args[0], self.RE_REF_TAG, state)
                    self._output('>', self.RE_GROUP_TAG, state)
                self._colorize_re_tree(args[1], state, True, groups)
                self._output(')', self.RE_GROUP_TAG, state)
    
            elif op == sre_constants.GROUPREF:
                self._output('\\%d' % args, self.RE_REF_TAG, state)
    
            elif op == sre_constants.RANGE:
                self._colorize_re_tree( ((sre_constants.LITERAL, args[0]),),
                                        state, False, groups )
                self._output('-', self.RE_OP_TAG, state)
                self._colorize_re_tree( ((sre_constants.LITERAL, args[1]),),
                                        state, False, groups )
                
            elif op == sre_constants.NEGATE:
                self._output('^', self.RE_OP_TAG, state)
    
            elif op == sre_constants.ASSERT:
                if args[0] > 0:
                    self._output('(?=', self.RE_GROUP_TAG, state)
                else:
                    self._output('(?<=', self.RE_GROUP_TAG, state)
                self._colorize_re_tree(args[1], state, True, groups)
                self._output(')', self.RE_GROUP_TAG, state)
                               
            elif op == sre_constants.ASSERT_NOT:
                if args[0] > 0:
                    self._output('(?!', self.RE_GROUP_TAG, state)
                else:
                    self._output('(?<!', self.RE_GROUP_TAG, state)
                self._colorize_re_tree(args[1], state, True, groups)
                self._output(')', self.RE_GROUP_TAG, state)
    
            elif op == sre_constants.NOT_LITERAL:
                self._output('[^', self.RE_GROUP_TAG, state)
                self._colorize_re_tree( ((sre_constants.LITERAL, args),),
                                        state, False, groups )
                self._output(']', self.RE_GROUP_TAG, state)
            else:
                log.error("Error colorizing regexp: unknown elt %r" % elt)
        if len(tree) > 1 and not noparen: 
            self._output(')', self.RE_GROUP_TAG, state)
                           
    #////////////////////////////////////////////////////////////
    # Output function
    #////////////////////////////////////////////////////////////

    def _output(self, s, tag, state):
        """
        Add the string `s` to the result list, tagging its contents
        with tag `tag`.  Any lines that go beyond `self.linelen` will
        be line-wrapped.  If the total number of lines exceeds
        `self.maxlines`, then raise a `_Maxlines` exception.
        """
        # Make sure the string is unicode.
        if isinstance(s, str):
            s = decode_with_backslashreplace(s)
        
        # Split the string into segments.  The first segment is the
        # content to add to the current line, and the remaining
        # segments are new lines.
        segments = s.split('\n')

        for i, segment in enumerate(segments):
            # If this isn't the first segment, then add a newline to
            # split it from the previous segment.
            if i > 0:
                if (state.lineno+1) > self.maxlines:
                    raise _Maxlines()
                if not state.linebreakok:
                    raise _Linebreak()
                state.result.append(u'\n')
                state.lineno += 1
                state.charpos = 0

            # If the segment fits on the current line, then just call
            # markup to tag it, and store the result.
            if state.charpos + len(segment) <= self.linelen:
                state.charpos += len(segment)
                if tag:
                    segment = Element('code', segment, style=tag)
                state.result.append(segment)

            # If the segment doesn't fit on the current line, then
            # line-wrap it, and insert the remainder of the line into
            # the segments list that we're iterating over.  (We'll go
            # the the beginning of the next line at the start of the
            # next iteration through the loop.)
            else:
                split = self.linelen-state.charpos
                segments.insert(i+1, segment[split:])
                segment = segment[:split]
                if tag:
                    segment = Element('code', segment, style=tag)
                state.result += [segment, self.LINEWRAP]

www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.