main.py :  » Network » Grail-Internet-Browser » grail-0.6 » printing » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Network » Grail Internet Browser 
Grail Internet Browser » grail 0.6 » printing » main.py
"""HTML to PostScript translator.

This module uses the AbstractWriter class interface defined by in the
standard formatter module to generate PostScript corresponding to a
stream of HTML text.  The HTMLParser class scans the HTML stream,
generating high-level calls to an AbstractWriter object.

Note that this module can be run as a standalone script for command
line conversion of HTML files to PostScript.  Use the '-h' option to
see information about all-too-many command-line options.

"""

import os
import sys
import posixpath
import string
import traceback
import urllib
import urlparse

from types import TupleType

# local modules:
import epstools
import fonts                            # nested package
import utils
import PSParser
import PSWriter

from grailbase.uricontext import URIContext


MULTI_DO_PAGE_BREAK = 1                 # changing this breaks stuff




#  The main program.  Really needs to be broken up a bit!


def run(app):
    global logfile
    import getopt
    import paper
    import settings
    settings = settings.get_settings(app.prefs)
    # do this after loading the settings so the user can just call
    # get_settings() w/out an arg to get a usable object.
    load_rcscript()
    context = None
    help = None
    error = 0
    logfile = None
    title = ''
    url = ''
    tabstop = None
    multi = 0
    verbose = 0
    printer = None
    copies = 1
    levels = None
    outfile = None
    #
    try:
        options, args = getopt.getopt(sys.argv[1:],
                                      'mvhdcaUl:u:t:sp:o:f:C:P:T:',
                                      ['color',
                                       'copies=',
                                       'debug',
                                       'fontsize=',
                                       'footnote-anchors',
                                       'help',
                                       'images',
                                       'logfile=',
                                       'multi',
                                       'orientation=',
                                       'output=',
                                       'papersize=',
                                       'paragraph-indent=',
                                       'paragraph-skip=',
                                       'printer=',
                                       'strict-parsing',
                                       'tab-width=',
                                       'tags=',
                                       'title=',
                                       'underline-anchors',
                                       'url=',
                                       'verbose',
                                       ])
    except getopt.error, err:
        error = 1
        help = 1
        options = ()
        sys.stderr.write("option failure: %s\n" % err)
    for opt, arg in options:
        if opt in ('-h', '--help'):
            help = 1
        elif opt in ('-a', '--footnote-anchors'):
            settings.footnoteflag = not settings.footnoteflag
        elif opt in ('-i', '--images'):
            settings.imageflag = not settings.imageflag
        elif opt in ('-d', '--debug'):
            utils.set_debugging(1)
        elif opt in ('-l', '--logfile'):
            logfile = arg
        elif opt in ('-o', '--orientation'):
            settings.orientation = arg
        elif opt in ('-f', '--fontsize'):
            settings.set_fontsize(arg)
        elif opt in ('-t', '--title'):
            title = arg
        elif opt in ('-u', '--url'):
            url = arg
        elif opt in ('-U', '--underline-anchors'):
            settings.underflag = not settings.underflag
        elif opt in ('-c', '--color'):
            settings.greyscale = not settings.greyscale
        elif opt in ('-p', '--papersize'):
            settings.papersize = arg
        elif opt in ('-s', '--strict-parsing'):
            settings.strict_parsing = not settings.strict_parsing
        elif opt in ('-C', '--copies'):
            copies = string.atoi(arg)
        elif opt in ('-P', '--printer'):
            printer = arg
        elif opt in ('-T', '--tab-width'):
            tabstop = string.atof(arg)
        elif opt in ('-m', '--multi'):
            multi = 1
        elif opt in ('-v', '--verbose'):
            verbose = verbose + 1
        elif opt == '--output':
            outfile = arg
        elif opt == '--tags':
            if not load_tag_handler(app, arg):
                error = 2
                help = 1
        elif opt == '--paragraph-indent':
            # negative indents should indicate hanging indents, but we don't
            # do those yet, so force to normal interpretation
            settings.paragraph_indent = max(string.atof(arg), 0.0)
        elif opt == '--paragraph-skip':
            settings.paragraph_skip = max(string.atof(arg), 0.0)
    if help:
        usage(settings)
        sys.exit(error)
    # crack open log file if given
    stderr = sys.stderr
    if logfile:
        try: sys.stderr = open(logfile, 'a')
        except IOError: sys.stderr = stderr
    utils.debug("Using Python version " + sys.version)
    # crack open the input file, or stdin
    outfp = None
    if printer:
        if copies < 1:
            copies = 1
        outfile = "|lpr -#%d -P%s" % (copies, printer)
    if args:
        infile = args[0]
        if args[1:]:
            multi = 1
        infp, outfn = open_source(infile)
        if not outfile:
            outfile = (os.path.splitext(outfn)[0] or 'index') + '.ps'
    else:
        infile = None
        infp = sys.stdin
        outfile = '-'
    #
    # open the output file
    #
    if outfile[0] == '|':
        cmd = string.strip(outfile[1:])
        outfile = '|' + cmd
        outfp = os.popen(cmd, 'w')
    elif outfile == '-':
        outfp = sys.stdout
    else:
        outfp = open(outfile, 'w')
    if outfile != '-':
        print 'Outputting PostScript to', outfile

    if url:
        context = URIContext(url)
    elif infile:
        url = infile
        context = URIContext(url)
    else:
        # BOGOSITY: reading from stdin
        context = URIContext("file:/index.html")
    context.app = app
    paper = printing.paper.PaperInfo(settings.papersize,
                                     margins=settings.margins,
                                     rotation=settings.orientation)
    if tabstop and tabstop > 0:
        paper.TabStop = tabstop
    if utils.get_debugging('paper'):
        paper.dump()
    # create the writer & parser
    fontsize, leading = settings.get_fontsize()
    w = PSWriter.PSWriter(outfp, title or None, url or '',
                          #varifamily='Palatino',
                          paper=paper, settings=settings)
    ctype = "text/html"
    mod = app.find_type_extension("printing.filetypes", ctype)
    if not mod.parse:
        sys.exit("cannot load printing support for " + ctype)
    p = mod.parse(w, settings, context)
    if multi:
        if args[1:]:
            xform = explicit_multi_transform(args[1:])
        else:
            xform = multi_transform(context, levels)
        p.add_anchor_transform(xform)
        p.feed(infp.read())
        docs = [(context.get_url(), 1, w.ps.get_title(), 1)]
        #
        # This relies on xform.get_subdocs() returning the list used
        # internally to accumulate subdocs.  Make a copy to go only one
        # level deep.
        #
        for url in xform.get_subdocs():
            xform.set_basedoc(url)
            while p.sgml_parser.get_depth():
                p.sgml_parser.lex_endtag(p.sgml_parser.get_stack()[0])
            try:
                infp, fn = open_source(url)
            except IOError, err:
                if verbose and outfp is not sys.stdout:
                    print "Error opening subdocument", url
                    print "   ", err
            else:
                new_ctype = get_ctype(app, url, infp)
                if new_ctype != ctype:
                    if verbose:
                        print "skipping", url
                        print "  wrong content type:", new_ctype
                    continue
                if verbose and outfp is not sys.stdout:
                    print "Subdocument", url
                w.ps.close_line()
                if MULTI_DO_PAGE_BREAK: # must be true for now, not sure why
                    pageend = w.ps.push_page_end()
                    context.set_url(url)
                    w.ps.set_pageno(w.ps.get_pageno() + 1)
                    w.ps.set_url(url)
                    w.ps.push_page_start(pageend)
                else:
                    context.set_url(url)
                    w.ps.set_url(url)
                pageno = w.ps.get_pageno()
                p.feed(infp.read())
                infp.close()
                title = w.ps.get_title()
                p._set_docinfo(url, pageno, title)
                spec = (url, pageno, title, xform.get_level(url))
                docs.append(spec)
    else:
        p.feed(infp.read())
    p.close()
    w.close()



#  Lots of helper functions....


def load_tag_handler(app, arg):
    loader = app.get_loader("html.postscript")
    narg = os.path.join(os.getcwd(), arg)
    if os.path.isdir(narg):
        loader.add_directory(narg)
    elif os.path.isfile(narg):
        basename, ext = os.path.splitext(narg)
        if ext != ".py":
            sys.stdout = sys.stderr
            print ("Extra tags must be defined in a"
                   " Python source file with '.py' extension.")
            print
            return 0
        dirname, modname = os.path.split(basename)
        oldpath = sys.path
        try:
            sys.path = [dirname] + oldpath
            exec "import %s ; mod = %s" % (modname, modname)
            loader.load_tag_handlers(mod)
        finally:
            sys.path = oldpath
    else:
        sys.stdout = sys.stderr
        print "Could not locate tag handler", arg
        print
        print "Argument to --tags must be a directory to be added to the html"
        print "package or a file containing tag handler functions.  The tag"
        print "handlers defined in the directory or file will take precedence"
        print "over any defined in other extensions."
        print
        return 0
    return 1


def get_ctype(app, url, infp):
    """Attempt to determine the MIME content-type as best as possible."""
    try:
        return infp.info()["content-type"]
    except (AttributeError, KeyError):
        return app.guess_type(url)[0]


def load_rcscript():
    try:
        import grailutil
    except ImportError:
        return
    graildir = grailutil.getgraildir()
    userdir = os.path.join(graildir, "user")
    if os.path.isdir(userdir):
        sys.path.insert(0, userdir)
        try:
            import html2psrc
        except ImportError:
            pass
        except:
            traceback.print_exc()
            sys.stderr.write("[Traceback generated in html2psrc module.]\n")


def open_source(infile):
    try:
        infp = open(infile, 'r')
    except IOError:
        # derive file object via URL; still needs to be HTML.
        infp = urllib.urlopen(infile)
        # use posixpath since URLs are expected to be POSIX-like; don't risk
        # that we're running on NT and os.path.basename() doesn't "do the
        # right thing."
        fn = posixpath.basename(urlparse.urlparse(infile)[2])
    else:
        fn = infile
    return infp, fn


class multi_transform:
    def __init__(self, context, levels=None):
        self.__app = context.app
        baseurl = context.get_baseurl()
        scheme, netloc, path, params, query, frag = urlparse.urlparse(baseurl)
        self.__scheme = scheme
        self.__netloc = string.lower(netloc)
        self.__path = os.path.dirname(path)
        self.__subdocs = []
        self.__max_levels = levels
        self.__level = 0
        self.__docs = {baseurl: 0}

    def __call__(self, url, attrs):
        scheme, netloc, path, params, query, frag = urlparse.urlparse(url)
        if params or query:             # safety restraint
            return url
        netloc = string.lower(netloc)
        if scheme != self.__scheme or netloc != self.__netloc:
            return url
        # check the paths:
        stored_url = urlparse.urlunparse((scheme, netloc, path, '', '', ''))
        if self.__docs.has_key(stored_url):
            return url
        if len(path) < len(self.__path):
            return url
        if path[:len(self.__path)] != self.__path:
            return url
        if (not self.__max_levels) \
           or (self.__max_levels and self.__level < self.__max_levels):
            self.__docs[stored_url] = self.__level + 1
            self.insert(stored_url)
        return url

    def get_subdocs(self):
        return self.__subdocs

    __base_index = None
    def set_basedoc(self, url):
        level = 1
        if self.__docs.has_key(url):
            level = self.__docs[url]
        self.__level = level
        self.__current_base = url
        try:
            self.__base_index = self.__subdocs.index(url)
        except ValueError:
            self.__base_index = None

    def insert(self, url):
        if self.__base_index is not None:
            i = self.__base_index + 1
            scheme, netloc, path, x, y, z = urlparse.urlparse(url)
            basepath = os.path.dirname(path)
            while i < len(self.__subdocs):
                scheme, netloc, path, x, y, z = urlparse.urlparse(
                    self.__subdocs[i])
                path = os.path.dirname(path)
                i = i + 1
                if path != basepath:
                    break
            self.__subdocs.insert(i, url)
            return
        self.__subdocs.append(url)

    def get_level(self, url):
        return self.__docs[url]


class explicit_multi_transform:
    def __init__(self, subdocs):
        self.__subdocs = map(None, subdocs)

    def __call__(self, url, attrs):
        return url

    def get_subdocs(self):
        return map(None, self.__subdocs)

    def set_basedoc(self, url):
        pass

    def get_level(self, url):
        return 1


def usage(settings):
    import printing.paper
    #
    progname = os.path.basename(sys.argv[0])
    print 'Usage:', progname, '[options] [file-or-url]'
    print '    -u: URL for footer'
    print '    -t: title for header'
    print '    -a: toggle anchor footnotes (default is %s)' \
          % _onoff(settings.footnoteflag)
    print '    -U: toggle anchor underlining (default is %s)' \
          % _onoff(settings.underflag)
    print '    -o: orientation; portrait, landscape, or seascape'
    print '    -p: paper size; letter, legal, a4, etc.',
    print '(default is %s)' % settings.papersize
    print '    -f: font size, in points (default is %s/%s)' \
          % settings.get_fontsize()
    print '    -d: turn on debugging'
    print '    -l: logfile for debugging, otherwise stderr'
    print '    -s: toggle "advanced" SGML recognition (default is %s)'\
          % _onoff(settings.strict_parsing)
    print '    -T: size of tab stop in points (default is %s)' \
          % printing.paper.PaperInfo.TabStop
    print '    -P: specify output printer'
    print '    -m: descend tree starting from specified document,'
    print '        printing all HTML documents found'
    print '    -h: this help message'
    print '[file]: file to convert, otherwise from stdin'


def _onoff(bool):
    return bool and "ON" or "OFF"


#  main() & relations....


import BaseApplication


class Application(BaseApplication.BaseApplication):
    def __init__(self, prefs=None):
        BaseApplication.BaseApplication.__init__(self, prefs)
        import GlobalHistory
        self.global_history = GlobalHistory.GlobalHistory(self, readonly=1)

    def exception_dialog(self, message='', *args):
        traceback.print_exc()
        if message:
            sys.stderr.write(message + "\n")


def main():
    app = Application()
    try:
        run(app)
    except KeyboardInterrupt:
        if utils.get_debugging():
            app.exception_dialog()
        sys.exit(1)


def profile_main(n=18):
    import profile, pstats
    print "Running under profiler...."
    profiler = profile.Profile()
    try:
        profiler.runctx('main()', globals(), locals())
    finally:
        sys.stdout = logfile
        profiler.dump_stats('@html2ps.prof')
        p = pstats.Stats('@html2ps.prof')
        p.strip_dirs().sort_stats('time').print_stats(n)
        p.print_callers(n)
        p.sort_stats('cum').print_stats(n)
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.