# Sketch - A Python-based interactive drawing program
# Copyright (C) 1998, 2000, 2004 by Bernhard Herzog
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# A simple parser for PostScript files that conform to the Document
# Structuring Conventions (DSC).
#
# In its current form this is mainly intended for parsing EPS files and
# extract the information necessary for Sketch (BoundingBox and resource
# dependencies)
#
import re, string
from string import split,strip,atof
import streamfilter
try:
from warn import warn,INTERNAL
except ImportError:
def warn(*args):
pass
INTERNAL = None
# match a line containing a DSC-comment.
rx_dsccomment = re.compile('^%%([a-zA-Z+]+):?')
# match the beginning of an EPS file.
rx_eps_magic = re.compile('^%!.*EPSF')
endcommentchars = string.maketrans('','')[33:127]
ATEND = '(atend)'
class DSCError(Exception):
pass
#
# Class EpsInfo
#
# The instance variables of this class are the key/value pairs extracted
# from the header comments of an EPS file.
#
# BoundingBox:
#
# The bounding box of the document as a 4-tuple of floats. The DSC
# say that the BoundingBox should be given in UINTs but since some
# programs (incorrectly) use floats here we also use float here.
#
# DocumentNeededResources:
#
# A dictionary describing the resources needed by the document.
# The information is stored in the *keys* of the dictionary.
#
# A key has the form (TYPE, VALUE) where TYPE is a string giving
# the resource type (such as 'font') and value is a string
# describing the resource (such as 'Times-Roman')
#
# DocumentSuppliedResources:
#
# The resources supplied by the document in the same format as
# DocumentNeededResources.
#
# atend:
#
# True, if any comment in the header had a value of `(atend)'.
# (Used internally by the parsing functions)
class EpsInfo:
def __init__(self):
self.DocumentSuppliedResources = {}
self.DocumentNeededResources = {}
self.BoundingBox = None
self.atend = 0
def NeedResources(self, type, resources):
for res in resources:
self.DocumentNeededResources[(type, res)] = 1
def SupplyResources(self, type, resources):
for res in resources:
self.DocumentSuppliedResources[(type, res)] = 1
def print_info(self):
# print the contents of self in a readable manner. (for debugging)
print 'BoundingBox:\t%s' % `self.BoundingBox`
print 'DocumentNeededResources: [',
for res in self.DocumentNeededResources.keys():
print res,
print ']'
print 'DocumentSuppliedResources: [',
for res in self.DocumentSuppliedResources.keys():
print res,
print ']'
for key, value in self.__dict__.items():
if key not in ('BoundingBox', 'DocumentNeededResources',
'DocumentSuppliedResources', 'atend'):
print '%s\t%s' % (key, value)
def IsEpsFileStart(data):
# return true if data might be the beginning of an Encapsulated
# PostScript file.
return rx_eps_magic.match(data)
def parse_header(file, info):
# Parse the header section of FILE and store the information found
# in the INFO object which is assumed to be an instance of EpsInfo.
#
# This works for the %%Trailer section as well so that parsing the
# beginning (until %%EndComments) and end (from %%Trailer) if
# necessary with the same INFO object should get all information
# available.
line = file.readline()
last_key = ''
while line:
match = rx_dsccomment.match(line)
if match:
key = match.group(1)
value = strip(line[match.end(0):])
if key == 'EndComments' or key == 'EOF':
break
if key == '+':
key = last_key
else:
last_key = ''
if key == 'BoundingBox':
if value != ATEND:
# the bounding box should be given in UINTs
# but may also (incorrectly) be float.
info.BoundingBox = tuple(map(atof, split(value)))
else:
info.atend = 1
elif key == 'DocumentNeededResources':
if value != ATEND:
if value:
[type, value] = split(value, None, 1)
if type == 'font':
info.NeedResources(type, split(value))
else:
# XXX: might occasionally be interesting for the
# user
warn(INTERNAL, 'needed resource %s %s ignored',
type, value)
else:
info.atend = 1
elif key == 'DocumentNeededFonts':
if value != ATEND:
info.NeedResources('font', split(value))
else:
info.atend = 1
elif key == 'DocumentSuppliedResources':
if value != ATEND:
if value:
[type, value] = split(value, None, 1)
if type == 'font':
info.NeedResources(type, split(value))
else:
# XXX: might occasionally be interesting for the
# user
warn(INTERNAL, 'supplied resource %s %s ignored',
type, value)
else:
info.atend = 1
else:
setattr(info, key, value)
#
last_key = key
else:
# the header comments end at a line not beginning with %X,
# where X is a printable character not in SPACE, TAB, NL
# XXX: It is probably wrong to do this in the %%Trailer
if line[0] != '%':
break
if len(line) == 1 or line[1] not in endcommentchars:
break
line = file.readline()
def skip_to_comment(file, comment):
# Read lines from FILE until a line with a DSC comment COMMENT is
# found. Handles (it should at least) (binary) data and embedded
# documents correctly (i.e. isn't confused by embedded documents
# containing COMMENT as well, if they are enclosed in
# Begin/EndDocument comments).
#
# The file is positioned right after the line containing the
# comment. Raise a DSCError if the comment is not found
line = file.readline()
while line:
match = rx_dsccomment.match(line)
if match:
key = match.group(1)
if key == comment:
return
elif key == 'BeginDocument':
# skip embedded document
skip_to_comment(file, 'EndDocument')
elif key == 'BeginData':
value = split(strip(line[match.end(0):]))
lines = 0
if len(value) >= 1:
count = int(value[0])
if len(value) == 3:
lines = value[2] == 'Lines'
else:
# should never happen in a conforming document...
count = 0
if count > 0:
if lines:
for i in range(count):
file.readline()
else:
blocksize = 4000
while count:
if count > blocksize:
count = count - len(file.read(blocksize))
else:
count = count - len(file.read(count))
line = file.readline()
else:
raise DSCError('DSC-Comment %s not found' % comment)
def parse_eps_file(filename):
# Extract information from the EPS file FILENAME. Return an instance
# of EpsInfo with the appropriate parameters. Raise a DSCError, if
# the file is not an EPS file.
file = streamfilter.LineDecode(open(filename, 'r'))
line = file.readline()
info = EpsInfo()
if IsEpsFileStart(line):
parse_header(file, info)
if info.atend:
skip_to_comment(file, 'Trailer')
parse_header(file, info)
else:
raise DSCError('%s is not an EPS file' % filename)
file.close()
return info
#
#
#
if __name__ == '__main__':
import sys
file = open(sys.argv[1], 'r')
info = EpsInfo()
parse_header(file, info)
if info.atend:
skip_to_comment(file, 'Trailer')
parse_header(file, info)
file.close()
info.print_info()
|