pygram.py : » Web-Frameworks » Zope » Zope-2.6.0 » lib » python » Products » ZGadflyDA » gadfly » Python Open Source

1.	3.1.2 Python
2.	Ajax
3.	Aspect Oriented
4.	Blog
5.	Build
6.	Business Application
7.	Chart Report
8.	Content Management Systems
9.	Cryptographic
10.	Database
11.	Development
12.	Editor
13.	Email
14.	ERP
15.	Game 2D 3D
16.	GIS
17.	GUI
18.	IDE
19.	Installer
20.	IRC
21.	Issue Tracker
22.	Language Interface
23.	Log
24.	Math
25.	Media Sound Audio
26.	Mobile
27.	Network
28.	Parser
29.	PDF
30.	Project Management
31.	RSS
32.	Search
33.	Security
34.	Template Engines
35.	Test
36.	UML
37.	USB Serial
38.	Web Frameworks
39.	Web Server
40.	Web Services
41.	Web Unit
42.	Wiki
43.	Windows
44.	XML
Python Open Source » Web Frameworks » Zope
Zope » Zope 2.6.0 » lib » python » Products » ZGadflyDA » gadfly » pygram.py
# rules for python
# based on grammar given in Programming Python by Mark Lutz

# EDIT THIS: THE DIRECTORY IN WHICH TO MARSHAL THE
# GRAMMAR DATA STRUCTURES.
#
ARCHIVE = "."  

marshalfilename = ARCHIVE + "/pygram.mar"

pyrules = """

all ::

## input terminates with "fake" dedent (forces read of all file)

@R all1 :: all >> file_input DEDENT

## 1 term newline

##@R lead_blank :: file_input >> NEWLINE file_input

@R top_stmt :: file_input >> file_input stmt
@R file_input :: file_input >> stmt


## 2
@R simple :: stmt >> simple_stmt
@R compound :: stmt >> compound_stmt

## 3 punct ; term NEWLINE
@R one_small :: simple_stmt >> small_stmt NEWLINE
@R more_small :: simple_stmt >> small_stmt ; simple_stmt
@R small_semi :: simple_stmt >> small_stmt ; NEWLINE

## 4 kw pass
@R smexpr :: small_stmt >> expr_stmt
@R smassn :: small_stmt >> assn
@R smprint :: small_stmt >> print_stmt
@R smdel :: small_stmt >> del_stmt
@R smpass :: small_stmt >> pass
@R smflow :: small_stmt >> flow_stmt
@R smimport :: small_stmt >> import_stmt
@R smglobal :: small_stmt >> global_stmt
## access ignored
@R smexec :: small_stmt >> exec_stmt

## 5
@R cmif :: compound_stmt >> if_stmt
@R cmwhile :: compound_stmt >> while_stmt
@R cmfor :: compound_stmt >> for_stmt
@R cmtry :: compound_stmt >> try_stmt
@R cmdef :: compound_stmt >> funcdef
@R cmclass :: compound_stmt >> classdef

##6
@R exprlist :: expr_stmt >> testlist
##@R assignment :: expr_stmt >> assn
@R assn1 :: assn >> testlist = testlist

@R assnn :: assn >> testlist = assn

@R assn1c :: assn >> testlist , = testlist

@R assn1c2 :: assn >> testlist , = testlist ,

@R assnnc :: assn >> testlist , = assn

##testing @R exprassn :: expr_stmt >> expr_stmt = testlist 

@R exprlistc :: expr_stmt >> testlist ,

##testing @R exprassnc :: expr_stmt >> expr_stmt = testlist ,

##7 kw print
@R rprint0 :: print_stmt >> print
@R rprint :: print_stmt >> print testlist 
@R rprintc :: print_stmt >> print testlist ,

##8 kw del
@R rdel :: del_stmt >> del exprlist

##9 trivially handled in #4

##10 kw raise continue break return

## eliminates 11 12 13 14
@R rbreak  :: flow_stmt >> break
@R rcontinue :: flow_stmt >> continue
@R rreturn0 :: flow_stmt >> return
@R rreturn :: flow_stmt >> return testlist 
@R rreturnc :: flow_stmt >> return testlist ,
@R rraise1 :: flow_stmt >> raise test
@R rraise2 :: flow_stmt >> raise test , test
@R rraise3 :: flow_stmt >> raise test , test , test

## 11 12 13 14 skipped

## 15 kw import from
@R rimport :: import_stmt >> import dotted_name_list 
@R rimportc :: import_stmt >> import dotted_name_list ,
@R dnlist1 :: dotted_name_list >> dotted_name
@R dnlistn :: dotted_name_list >> dotted_name_list , dotted_name
@R rfrom :: import_stmt >> from dotted_name import name_list 
@R rfroms :: import_stmt >> from dotted_name import *
@R rfromc :: import_stmt >> from dotted_name import name_list ,
@R nlistn :: name_list >> name_list  , NAME
@R nlist1 :: name_list >> NAME

##16 nt NAME
@R dn1 :: dotted_name >> NAME
@R dnn :: dotted_name >> dotted_name . NAME

##17 kw global
@R global1 :: global_stmt >> global NAME 
@R globaln :: global_stmt >> global_stmt , NAME 

## 18 19 ignored

##20 kw exec in
@R exec1 :: exec_stmt >> exec expr
@R exec2 :: exec_stmt >> exec expr in test
@R exec3 :: exec_stmt >> exec expr in test , test

##21  kw if elif else punct :
@R ifr :: if_stmt >> if test : suite elifs
@R elifs0 :: elifs >>
@R relse :: elifs >> else : suite
@R elifsn :: elifs >> elif test : suite elifs

##22 kw while
@R while1 :: while_stmt >> 
while test : 
    suite
@R while2 :: while_stmt >> 
while test : 
   suite 
else : 
   suite

##23 kw for
@R for1 :: for_stmt >> 
for exprlist in testlist  : 
     suite
@R for2 :: for_stmt >> 
for exprlist in testlist  : 
     suite 
else : 
     suite

##24 kw try
@R tryr :: try_stmt >> try : suite excepts
@R excepts1 :: excepts >> except_clause : suite
@R excepts2 :: excepts >> except_clause : suite else : suite
@R exceptsn :: excepts >> except_clause : suite excepts
@R tryf :: try_stmt >> try : suite finally : suite

##25 kw except
@R except0 :: except_clause >> except 
@R except1 :: except_clause >> except test
@R except2 :: except_clause >> except test , test

##26
@R class1 :: classdef  >> class NAME : suite
@R class2 :: classdef  >> class NAME ( testlist ) : suite

##27 kw def
@R rdef :: funcdef >> def NAME parameters : suite

##28, 29 punct = * 

## (modified from grammar presented)
@R params1 :: parameters >> ( varargslist )
@R params1c :: parameters >> ( varargslist , )
@R params2 :: varargslist >> 

## this is way too permissive: fix at semantic level
@R params3 :: varargslist >> arg
@R params4 :: varargslist >> varargslist , arg
@R argd :: arg >> NAME = test
@R arg2 :: arg >> fpdef
@R arg3 :: arg >> * NAME
@R arg4 :: arg >> ** NAME

## 30
@R fpdef1 :: fpdef  >> NAME
@R fpdef2 :: fpdef  >>  ( fplist )
@R fpdef2c :: fpdef  >>  ( fplist , )

##31
@R fplist1 :: fplist >> fpdef
@R fplistn :: fplist >> fplist , fpdef

##32 t INDENT DEDENT
@R ssuite :: suite >> simple_stmt
@R csuite :: suite >> NEWLINE INDENT stmtseq DEDENT
@R stmtseq1 :: stmtseq >> stmt
@R stmtseqn :: stmtseq >> stmtseq stmt

##33 kw or cancels 53
@R testor :: test >> or_test
@R testand :: or_test >> and_test
@R testor1 :: or_test >> or_test or and_test
## @R testlambda0 :: test >> lambda : test REDUNDANT
@R testlambda1 :: test >> lambda varargslist : test

##34 kw and
@R andnot :: and_test >> not_test
@R andand :: and_test >> and_test and not_test

##35 kw not
@R notnot :: not_test >> not not_test
@R notcmp :: not_test >> comparison

##36 NOTE KWS == >= <= <> !=
@R cmpexpr :: comparison >> expr
@R cmplt :: comparison >> comparison < expr
@R cmpgt :: comparison >> comparison > expr
@R cmpeq :: comparison >> comparison == expr
@R cmpge :: comparison >> comparison >= expr
@R cmple :: comparison >> comparison <=  expr
@R cmpnep :: comparison >> comparison <> expr
@R cmpne :: comparison >> comparison != expr
@R cmpin :: comparison >> comparison in expr
@R cmpnotin :: comparison >> comparison not in expr
@R cmpis :: comparison >> comparison is expr
@R cmpisnot :: comparison >> comparison is not expr

##37 kw is not punct > < ! (eliminated)

##38 p |
@R expr_xor :: expr >> xor_expr
@R expr_lor :: expr >> expr | xor_expr

##39 p ^
@R xor_and :: xor_expr >> and_expr
@R xor_xor :: xor_expr >> xor_expr ^ and_expr

##40
@R and_shift :: and_expr >> shift_expr
@R and_and :: and_expr >> and_expr & shift_expr

##41 note kw's << >x> note goofy x to avoid confusing the grammar
@R shift_arith :: shift_expr >> arith_expr
@R shift_left :: shift_expr >> shift_expr << arith_expr
@R shift_right :: shift_expr >> shift_expr >x> arith_expr

##42
@R arith_term :: arith_expr >> term
@R arith_plus :: arith_expr >> arith_expr + term
@R arith_minus :: arith_expr >> arith_expr - term

##43 p */%
@R termfactor :: term >> factor
@R termmul :: term >> term * factor
@R termdiv :: term >> term / factor
@R termmod :: term >> term % factor

## stuff for power
@R factorpower :: factor >> power
@R factorexp :: factor >> factor ** power

##44 p ~
@R powera :: power >> atom trailerlist
@R trailerlist0 :: trailerlist >> 
@R trailerlistn :: trailerlist >> trailer trailerlist
@R powerp :: power >> + power
@R powerm :: power >> - power
@R poweri :: power >> ~ power

##45 t NUMBER STRING
@R nulltup :: atom >> ( )
@R parens :: atom >> ( testlist )
@R parensc :: atom >> ( testlist , )
@R nulllist :: atom >> [ ]
@R list :: atom >> [ testlist  ]
@R listc :: atom >> [ testlist , ]
@R nulldict :: atom >> { }
@R dict :: atom >> { dictmaker   }
@R dictc :: atom >> { dictmaker , }
@R repr :: atom >> ` testlist  `
## @R reprc :: atom >> ` testlist , ` doesn't work, apparently
@R aname :: atom >> NAME
## note number to be broken out into FLOAT OCTINT HEXINT INT
@R anumber :: atom >> NUMBER
@R astring :: atom >> stringseq
@R stringseq0 :: stringseq >> STRING
@R stringseqn :: stringseq >> stringseq STRING

##46 
@R nullcall :: trailer >> ( )
@R call :: trailer >> ( arglist  )
@R callc :: trailer >> ( arglist , )
@R index :: trailer >> [ subscriptdots ]
@R getattr :: trailer >> . NAME

##47
@R arg1 :: arglist >> argument
@R argn :: arglist >> arglist , argument
##@R argn1 :: arglist >> arglist , NAME = test

##48 ( !!!! is this wrong in PP?)

@R posarg :: argument >> test

## here the left test should be a NAME always, but parser doesn't like it
@R namearg :: argument >> test = test

##49 this IS wrong in PP (numeric ext)
@R nodots :: subscriptdots >> subscriptseq
@R yesdots :: subscriptdots >> subscriptseq , . . . , subscriptseq
@R subscript1 :: subscriptseq >> subscript
@R subscriptn :: subscriptseq >> subscriptseq , subscript
@R subscriptt :: subscript >> test
@R subscripts0 :: subscript >> :
@R subscriptsL :: subscript >> test :
@R subscriptsR :: subscript >> : test
@R subscripts :: subscript >> test : test

##50
@R exprlist1 :: exprlist >> expr
@R exprlistn :: exprlist >> exprlist , expr

##51
@R testlist0 :: testlist >> test
@R testlistn :: testlist >> testlist , test

##52
@R dictmaker1 :: dictmaker >> test : test
@R dictmaker2 :: dictmaker >> dictmaker , test : test

"""

nonterms = """
subscriptdots subscript arg
argument arglist subscriptseq params trailerlist
factor atom trailer dictmaker stringseq power
xor_expr and_expr shift_expr arith_expr term
and_test or_test not_test comparison comp_op expr
fplist stmtseq varargslist assn
expr elifs suite excepts parameters pbasic pdefault pspecial
testlist exprlist test dotted_name_list dotted_name name_list
if_stmt while_stmt for_stmt try_stmt funcdef classdef
expr_stmt print_stmt del_stmt flow_stmt import_stmt global_stmt
small_stmt compound_stmt stmt simple_stmt exec_stmt
file_input except_clause fpdef cmp_op
all
"""

import string
# python needs special handling for the lexical stuff
NAMEre = "[" + string.letters + "_][" + string.letters+string.digits +"]*"
NUMBERre = "[" + string.digits + "]+" # temporary!
STRINGre = '"[^"\n]*"' # to be overridden in lexdict
#NEWLINEre = "\n" # to be overridden in lexdict
INDENTre = "#" # a fake! to be overridden
DEDENTre = "#" # a fake! to be overridden

def echo(str):
    return str

def DeclareTerminals(Grammar):
    Grammar.Addterm("NAME", NAMEre, echo)
    Grammar.Addterm("NUMBER", NUMBERre, echo)
    Grammar.Addterm("STRING", STRINGre, echo)
    #Grammar.Addterm("NEWLINE", NEWLINEre, echo) # newline is kw!
    Grammar.Addterm("INDENT", INDENTre, echo)
    Grammar.Addterm("DEDENT", DEDENTre, echo)

# not >x> is a fake!
keywords = """
and break class continue def del elif else except exec
finally for from global if import in is lambda not or pass
print raise return try while == >= <= <> != >x> << NEWLINE
**
"""

import kjParser, string, re
from kjParser import KEYFLAG,ENDOFFILETERM

alphanumunder = string.letters+string.digits+"_"
alpha = string.letters + "_"

# components that are part of a identifier (cannot be next to kw).
id_letters = map(None, alphanumunder)

# terminator re for names
nametermre = "[^" + alphanumunder + "]"
nameterm = re.compile(nametermre)

# terminator re for numbers (same as above but allow "." in num).
numtermre =  "[^" + alphanumunder + "\.]"
numterm = re.compile(numtermre)

parseerror = "parseerror"

pycommentre = r"(#.*)"

# whitespace regex outside of brackets
#  white followed by (comment\n maybe repeated)
#  DON'T EAT NEWLINE!!
pywhiteoutre = r"([ \t\r\014]|[\]\n)*%s?" % pycommentre
pywhiteout = re.compile(pywhiteoutre)

# whitespace regex inside brackets
#  white or newline possibly followed by comment, all maybe repeated
pywhiteinre = pywhiteoutre #"[ \t\r]*(\\\\\n)*%s?" % pycommentre
pywhitein = re.compile(pywhiteinre)

# totally blank lines (only recognize if next char is newline)
#allblankre = "\n" + pywhiteinre
#allblank = re.compile(allblankre)

# re for indentation (might accept empty string)
indentp = re.compile(r"[\t ]*")

# two char kws and puncts
char2kw = ["if", "or", "in", "is"]
punct2 = ["<>", "<<", ">>", "<=", ">=", "!=", "**", "=="]

# >two char kws as map of first 3 chars to others
char3k_data = """
  and break class continue def del elif else except
  finally for from global import lambda not pass print
  raise return try while exec
"""

char3kw = string.split(char3k_data)
char3kwdict = {}
for x in char3kw:
    char3kwdict[x[:3]] = x

# NOTE: newline is treated same as a punctuation
# NOTE: "' ARE NOT PUNCTS
punct = "~!#%^&*()-+=|{}<>,.;:/[]{}\n`"
punctlist = map(None, punct)

kwmap = {}
for x in char2kw + punct2 + char3kw + map(None, punct):
    # everything parses as length 1 to the outer world.
    kwmap[x] = (((KEYFLAG, x), x), 1)

# special hack
kwmap[">>"] = (((KEYFLAG, ">x>"), ">x>"), 1)
newlineresult = kwmap["\n"] = (((KEYFLAG, "NEWLINE"), "NEWLINE"), 1)

#finaldedent = (((TERMFLAG, "DEDENT"), ""), 1)

# Python lexical dictionary.

### MUST HANDLE WHOLELY BLANK LINES CORRECTLY!

def RMATCH(re, key, start=0):
    group = re.match(key, start)
    if group is None: return -1
    return group.end() - group.start()

class pylexdict(kjParser.LexDictionary):
   def __init__(self):
       kjParser.LexDictionary.__init__(self)
       # need to add special map for >>
       self.brackets = 0 # count of active brackets
       self.realindex = 0 # where to start
       self.indents = [""] # stack of indents (start with a fake one)
       self.lineno = 0
       self.atdedent = 0
       ### handle multiple dedents correctly!!!
       ### translate tabs to 8 spaces...
       from kjParser import TERMFLAG
       self.NAMEflag = (TERMFLAG, "NAME")
       self.STRINGflag = (TERMFLAG, "STRING")
       self.NEWLINEflag = (TERMFLAG, "NEWLINE")
       self.INDENTflag = (TERMFLAG, "INDENT")
       self.DEDENTflag = (TERMFLAG, "DEDENT")
       self.NUMBERflag = (TERMFLAG, "NUMBER")

   def endoffile(self, String):
       # pop off all indentations!
       indents = self.indents
       #lastresult = self.lastresult
       self.realindex = len(String)
       if not indents:
          # pop indents
          #print "eof after dedent"
          result = self.lastresult = (ENDOFFILETERM, 0)
       else:
          #print "eof as dedent after", self.lastresult
          del indents[-1]
          if indents:
             dedent = indents[-1]
          else:
             dedent = ""
          result = self.lastresult = ((self.DEDENTflag, dedent), 1)
       #print "returning eof", result, "after", lastresult
       return result

   def Token(self, String, StartPosition):
       #print "Token", (StartPosition, 
       #  `String[self.realindex:self.realindex+20]`, self.lastresult)
       # HAVE TO FAKE OUT LEXER FOR DEDENTS
       # STARTPOSITION COUNTS # OF TOKEN, NOT STRING POSITION
       # STRING POSITION IS MAINTAINED IN LexDict object.
       lastindex = self.lastindex
       lastresult = self.lastresult
       if self.laststring is not String:
          #print "parsing new string"
          self.laststring = String
          # special hack: skip lead whitespace
          cursor = 0
          self.lineno = 1
          while 1:
             test = RMATCH(pywhitein,String, cursor)
             if test<0: break
             next = cursor + test
             #print "lead skip:", next, String[cursor:next]
             if String[next]!="\n": break
             #skipped = String[cursor:next]
             #if "\n" in skipped:
             #   self.lineno = (
             #    self.lineno + len(string.splitfields(skipped, "\n")))
             #self.lineno = self.lineno+1
             cursor = next + 1
          self.realindex = cursor
          self.saveindex = 0
          self.indents = [""] # stack of indents (start with a fake one)
          # pretend we saw a newline
          self.lastresult = newlineresult
          if StartPosition!=0:
             self.laststring = None
             raise ValueError, "python lexical parsing must start at zero"
          lastindex = self.lastindex
          lastresult = None
       elif lastindex == StartPosition:
          #print "returning lastresult ", lastresult
          return lastresult
       elif lastindex != StartPosition-1:
          raise ValueError, "python lexer can't skip tokens"

       #print "parsing", StartPosition, lastresult
       # do newline counting here!
       delta = String[self.saveindex: self.realindex]
       #print "delta", `delta`
       if "\n" in delta:
          #print self.lineno, self.saveindex, self.realindex, `delta`
          self.lineno = self.lineno + len(
            string.splitfields(delta, "\n")) - 1
       realindex = self.saveindex = self.realindex
       self.lastindex = StartPosition

       # skip whitespace (including comments)
       ### needs to be improved to parse blank lines, count line numbers...
       # skip all totally blank lines (don't eat last newline)
       atlineend = (String[realindex:realindex+1] == "\n"
                    or lastresult is newlineresult
                    or self.atdedent)
       skipnewlines = (lastresult is newlineresult or
                       self.atdedent or
                       self.brackets>0)
       if atlineend: #String[realindex:realindex+1]=="\n":
          #print "trying to skip blank lines", String[realindex:realindex+10]
          while 1:
             #if String[realindex:realindex+1]=="\n":
             #   start = realindex+1 # move past current newline
             #   self.lineno = self.lineno + 1
             #else:
             #   start = realindex
             start = realindex
             if skipnewlines:
                while String[start:start+1]=="\n":
                   start = start+1
                   #self.lineno = self.lineno+1
             #print "matching", `String[start:start+10]`
             skip = RMATCH(pywhitein,String, start)
             #print "skip=", skip
             if skip<0: break
             rs = skip + realindex + (start-realindex)
             if rs==realindex: break
             #print "at", rs, `String[rs]`
             if (rs<len(String) and 
                 (String[rs] == "\n" or 
                  (skipnewlines and String[rs-1:rs]=="\n"))):
                #print "skipping blank line"
                #if lastresult is newlineresult or self.brackets>0: 
                #   rs = rs + 1
                #skipped = String[start:rs]
                #if "\n" in skipped:
                   #self.lineno = self.lineno + len(
                   #   string.splitfields(skipped, "\n"))
                self.realindex = realindex = rs
                #self.lineno = self.lineno+1
             else:
                if skipnewlines: self.realindex = realindex = start
                break
       #print "after skipping blank lines", `String[realindex:realindex+20]`
       skipto = realindex
       skip = 0
       if self.brackets>0:
          while 1:
             #print "skipping white in brackets", skipto
             if realindex>len(String):
                break
             if String[skipto]=="\n":
                #self.lineno = self.lineno+1
                skipto = skipto + 1
                self.realindex = realindex = skipto
                continue
             skip = RMATCH(pywhiteout,String, skipto)
             nextskipto = skipto+skip
             #skipped = String[skipto:nextskipto]
             #if "\n" in skipped:
             #   self.lineno = self.lineno+len(
             #       string.splitfields(skipped, "\n"))
             if skip>0:
                skipto = nextskipto
             else: break
          skip = skipto - realindex
       elif not atlineend:
          skip = RMATCH(pywhitein,String, realindex)
       if skip<=0: 
          skip = 0
       else:
          #print "skipping", skip
          nextri = realindex + skip
          #skipped = String[realindex:nextri]
          #if "\n" in skipped:
          #   self.lineno = self.lineno + len(
          #    string.splitfields(skipped, "\n"))
          realindex = self.realindex = nextri
       if realindex>=len(String):
          return self.endoffile(String)
       # now look for a keyword, name, number, punctuation, 
       # INDENT, DEDENT, NEWLINE
       first = String[realindex]
       #if last parse was newline and not in brackets:
       #   look for indent/dedent
       if (self.brackets<=0 and (lastresult is newlineresult or self.atdedent)
           and first != "\n"):
          #print "looking for dent", realindex, `String[realindex:realindex+20]`
          match = RMATCH(indentp,String, realindex)
          if match>=0:
             dent = String[realindex: realindex+match]
             #print "dent match", match, `dent`
             oldindex = realindex
             self.realindex = realindex = realindex+match
             # replace tabs with 8 spaces
             dent = string.joinfields(string.splitfields(dent, "\t"),
                                      "        ")
             dents = self.indents
             lastdent = dents[-1]
             ldl = len(lastdent)
             dl = len(dent)
             #print "last", ldl, dents
             if ldl<dl:
                self.atdedent = 0
                result = self.lastresult = ((self.INDENTflag, dent), 1)
                dents.append(dent)
                #print "indent ", result, dents
                return result
             if ldl>dl:
                self.realindex = oldindex # back up, may have to see it again!
                self.atdedent = 1
                result = self.lastresult = ((self.DEDENTflag, dent), 1)
                del dents[-1]
                #print "dedent ", result, dl, dents
                return result
             # otherwise, indentation is same, keep looking
             # might be at eof now:
             if realindex>=len(String):
                #print "returning eof"
                return self.endoffile(String)
             first = String[realindex]
       self.atdedent = 0
       from string import digits#, letters
if(firstinpunctlistand
           # special case for .123 numbers (yuck!)
           (first!="." or String[realindex+1] not in digits)):
          # is it a 2 char punct?
          first2 = String[realindex:realindex+2]
          if first2 in punct2:
             result = self.lastresult = kwmap[first2]
             self.realindex = realindex+2
             #print "2 digit punct", result
             return result
          # otherwise, just return normal punct
          result = self.lastresult = kwmap[first]
          self.realindex = self.realindex + 1
          ### special bookkeeping
          if first=="\n":
             result = newlineresult
             #print "newline!"
             #self.lineno = self.lineno+1
          elif first in "[{(":
             #print "bracket!"
             self.brackets = self.brackets + 1
          elif first in "]})":
             #print "close bracket!"
             self.brackets = self.brackets - 1
          #print "1 digit punct", result
          return result
       if first in digits or first==".":
          # parse a number...
          skip = numterm.search(String, realindex)
          if skip<=realindex:
             raise parseerror, "number length<1 (!)"
          thenumber = String[realindex:skip]
          self.realindex = skip
          ### note don't interpret number here!!
          result = self.lastresult = ((self.NUMBERflag, thenumber), 1)
          #print "number", result
          return result
       if first in alpha:
          # try keyword...
          first2 = String[realindex: realindex+2]
          if first2 in char2kw:
             if String[realindex+2:realindex+3] not in id_letters:
                # parse a 2 char kw first2
                result = self.lastresult = kwmap[first2]
                self.realindex = self.realindex+2
                #print "keyword 2", result
                return result
          first3 = String[realindex: realindex+3]
          if char3kwdict.has_key(first3):
             the_kw = char3kwdict[first3]
             the_end = realindex+len(the_kw)
             if ((the_end<len(String)) and 
                 (String[the_end] not in id_letters) and
                 (String[realindex:the_end]==the_kw)):
                # parse the_kw
                self.realindex = the_end
                result = self.lastresult = kwmap[the_kw]
                #print "keyword +", result
                return result
          #otherwise parse an identifier
          #print "looking for name:", `String[realindex:realindex+10]`
          skip = nameterm.search(String, realindex)
          if skip<=realindex:
             raise parseerror, "identifier length<1 (!)"
          theid = String[realindex:skip]
          self.realindex = skip
          ### note don't interpret number here!!
          result = self.lastresult = ((self.NAMEflag, theid), 1)
          #print "id", result
          return result
       if first in "\"'":
          # check for triplequotes
          first3 = first*3
          if String[realindex: realindex+3] == first3:
             # parse triple quotes
             start = place = realindex+3
             while 1:
                last = string.find(String, first3, place)
                if last<0:
                   raise parseerror, "failed to terminate triple quotes"
                if String[last-1:last]=="\\" and String[last-2:last-1]!="\\":
                   place = last+1
                else: break
             the_string = String[start: last]
             self.realindex = last+3
             result = self.lastresult = ((self.STRINGflag, the_string), 1)
             #print "3q string", result
             # count the newlines!
             #newlinecount = len(string.splitfields(the_string, "\n"))
             #self.lineno = self.lineno+newlinecount
             #print "triple quotes", result
             return result
          else:
             # parse single quotes
             sanity = start = place = realindex+1
             done = 0
             while 1:
                sanity = min(string.find(String, "\n", sanity), len(String))
                if sanity<start: 
                   sanity=len(String)
                   break
                if String[sanity-1]!="\\":
                   break
                else:
                   #self.lineno = self.lineno+1
                   sanity = sanity + 1
             while 1:
                last = string.find(String, first, place)
                if last<0 or last>sanity:
                   raise parseerror, "failed to terminate single quotes"
                if String[last-1:last]=="\\":
                   # are we at the end of an odd number of backslashes? (yuck!)
                   bplace = last-1
                   while String[bplace:bplace+1]=="\\":
                      bplace = bplace-1
                   if (last-bplace)%2==1: 
                      break # the end quote is real!
                   place = last+1
                else: break
             the_string = String[start:last]
             self.realindex = last+1
             result = self.lastresult = ((self.STRINGflag, the_string), 1)
             #print "1q string", result
             return result
       #print (String[realindex-20:realindex-1], String[realindex],
       #       String[realindex+1:realindex+20])
       raise parseerror, "invalid first: " + `first`

# use a modified lexstringwalker
class pylexstringwalker(kjParser.LexStringWalker):
   def DUMP(self):
       kjParser.DumpStringWindow(self.String, self.LexDict.realindex)

## a HORRIBLE HACK! of a hack: override the DoParse of Grammar
## to give Python line numbers.  RELIES ON GLOBAL pyg
##
def hackDoParse(String, Context=None, DoReductions=1):
    import sys, kjParser
    try:
        # construct the ParserObj
        # add a newline to front to avoid problem with leading comment
        #String = "\n%s\n" % String
        Stream = pylexstringwalker( String, pyg.LexD )
        Stack = [] # {-1:0} #Walkers.SimpleStack()
        ParseOb = kjParser.ParserObj( pyg.RuleL, Stream, pyg.DFA, Stack, \
                         DoReductions, Context )
        # do the parse
        ParseResult = ParseOb.GO()
        # return final result of reduction and the context
        return (ParseResult[1], Context)
        #return kjParser.Grammar.DoParse(pyg, String, Context, DoReductions)
    except: ### for testing!!
        t, v = sys.exc_type, sys.exc_value
        v = ("near line", pyg.LexD.lineno, v)
        raise t, v

buildinfo = """
Please edit the ARCHIVE parameter of this module (%s)
to place the python grammar archive in a standard
directory to prevent the module from rebuilding
the python grammar over and over and over...
""" % __name__

def GrammarBuild():
    global pyg
    import kjParseBuild
    pyg = kjParseBuild.NullCGrammar()
    pyg.DoParse = hackDoParse
    # override lexical dict here
    pyg.LexD = pylexdict()
    DeclareTerminals(pyg)
    pyg.Keywords(keywords)
    pyg.punct("~!#%^&*()-+=|{}'`<>,.;:/[]{}")
    pyg.Nonterms(nonterms)
    pyg.Declarerules(pyrules)
    print buildinfo
    print "compiling... this may take a while..."
    pyg.Compile()
    print "dumping"
    outfile = open(marshalfilename, "wb")
    pyg.MarshalDump(outfile)
    outfile.close()
    print "self testing the grammar"
    test(pyg)
    print "\n\ndone with regeneration"
    return pyg

def unMarshalpygram():
    global pyg
    import kjParser
    print "loading"
    try:
       infile = open(marshalfilename, "rb")
    except IOError:
       print marshalfilename, "not found, attempting creation"
       pyg = GrammarBuild()
    else:
       pyg = kjParser.UnMarshalGram(infile)
       infile.close()
    pyg.DoParse = hackDoParse
    # lexical override
    pyg.LexD = pylexdict()
    DeclareTerminals(pyg)
    # BindRules(pyg)
    if dotest: 
       print "self testing the grammar"
       test(pyg)
    return pyg


# not used, commented
#### interpretation rules/classes
#
#def zeroth(list, Context):
#    return list[0] # eg, for all1, ignore all but first
#
## file_input, stmt, simple_stmt, compound_stmt give list of statement_ob
#def append(list, Context):
#    "eg, for top_stmt, conjoin two smt lists"
#    return list[0] + list[1]
#
## file_input >zeroth
#
## simple, compound, one_small, small_semi: echol
#def echol(list, Context):
#    return list
#
## more_small > seq_sep
#def seq_sep(list, Context):
#    list[0].append(list[2])
#    return list[0]
#
## smexpr, smassn, smpring, smdel, smflow, smimport, smglobal, smexec
##  > zeroth
#
## cmif, cmwhile, cmfor, cmtry, cmdef, cmclass > zeroth
#
#
#def BindRules(pyg):
#    for name in string.split("""
#        all1 file_input cmif cmwhile cmfor cmtry cmdef cmclass
#        smexpr smassn smprint smdel smflow smimport smglobal smexec
#        """):
#        pyg.Bind(name, zeroth)
#    for name in string.split("""
#        simple compound one_small small_semi
#        """):
#        pyg.Bind(name, echol)
#    pyg.Bind("top_stmt", append)
#    pyg.Bind("more_small", seq_sep)

teststring = """#
#
# a test string
#
from string import join, split
'''
import re

for a in l:
    a.attr, a[x], b = c
else:
    d = b
'''
class zzz:
   ''' 
   #doc string 
   '''
   '''
   global re, join
   
   d = {} 
   for i in range(10): d[i] = i
   '''
   def test(c,s):
       return "this" 
       while not done:
             print done
             break
       list = [1,2,3]
         # comment
       return 5
   
   
   n,x = 89 >> 90 + 6 / 7 % x + z << 6 + 2 ** 8

if x==5:
   while y:
     for i in range(6):
         raise SystemError, "oops"


"""

#teststring ="""\
## comment
#if x in y: print z
#elif 1: print w
#"""

'''
teststring="""
exec "print 1"
"""
'''

def test(grammar, context=None, teststring=teststring):
       from time import time
       now = time()
       x = grammar.DoParse1(teststring, context)
       elapsed = time()-now
       print x
       print elapsed
       return x
   
regen = 0
dotest = 0
   
if __name__ == "__main__" : 
      if regen: GrammarBuild()
      unMarshalpygram()
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.