kqb_parser.py : » Language-Interface » Python-Knowledge-Engine » pyke-1.1.1 » pyke » krb_compiler » Python Open Source

1.	3.1.2 Python
2.	Ajax
3.	Aspect Oriented
4.	Blog
5.	Build
6.	Business Application
7.	Chart Report
8.	Content Management Systems
9.	Cryptographic
10.	Database
11.	Development
12.	Editor
13.	Email
14.	ERP
15.	Game 2D 3D
16.	GIS
17.	GUI
18.	IDE
19.	Installer
20.	IRC
21.	Issue Tracker
22.	Language Interface
23.	Log
24.	Math
25.	Media Sound Audio
26.	Mobile
27.	Network
28.	Parser
29.	PDF
30.	Project Management
31.	RSS
32.	Search
33.	Security
34.	Template Engines
35.	Test
36.	UML
37.	USB Serial
38.	Web Frameworks
39.	Web Server
40.	Web Services
41.	Web Unit
42.	Wiki
43.	Windows
44.	XML
Python Open Source » Language Interface » Python Knowledge Engine
Python Knowledge Engine » pyke 1.1.1 » pyke » krb_compiler » kqb_parser.py
# $Id: kqb_parser.py 49507964ae64 2010-03-27 mtnyogi $
# coding=utf-8
# 
# Copyright  2008 Bruce Frederiksen
# 
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# 
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

from __future__ import with_statement
from string import Template
import re
import os.path
from pyke import question_base
from pyke import user_question
from pyke import qa_helpers
from pyke.krb_compiler import scanner

class kqb_parser(object):
    blank_line = re.compile(ur'(\s*#)|(\s*$)', re.UNICODE)
    tokenizer = re.compile(ur''' [ \t\f\r\v]* (?: \#.* )? (?:
            (["']) (?P<str> (?: \\. | .)*? ) \1 |        # this must be first!
            [[] (?P<prompt> (?: \\. | .)*? ) []] |
            [$] (?P<param> [a-zA-Z_] [a-zA-Z_0-9]* ) |
            / (?P<regexp1> (?: \\. | .)*? ) / |
            /// (?P<regexp2> (?: \\. | \n | .)*? ) /// | # FIX: this won't work!
            (?P<const> True | False | None ) |
            (?P<id> [a-zA-Z_] [a-zA-Z_0-9]* ) |
            (?P<number> (?: \d+ (?: \.\d* )? |
                            \.\d+ )
                        (?: [eE][-+]?\d+ )? ) |
            (?P<lparen> [(] ) |
            (?P<rparen> [)] ) |
            (?P<comma> , ) |
            (?P<bar> [|] ) |
            (?P<bang> ! ) |
            (?P<equal> = ) |
            (?P<hyphen> - ) |
            (?P<colon> : ) 
        ) [ \t\f\r\v]* (?: \#.* )? ''', re.UNICODE | re.VERBOSE)
    pushed_token = None

    def __init__(self, f):
        # f needs readline() and name.
        self.f = f
        self.lineno = 0
        self.line = ''
        self.column = 0
        self.eof = False

    def readline(self):
        r'''
            >>> from StringIO import StringIO
            >>> p = kqb_parser(StringIO("""
            ... line 1
            ...     # this should be ignored
            ...  line 2
            ...
            ...   line 3
            ... """))
            >>> p.readline()
            >>> p.indent, p.line, p.lineno, p.column
            (0, 'line 1', 2, 0)
            >>> p.readline()
            >>> p.indent, p.line, p.lineno, p.column
            (1, ' line 2', 4, 1)
            >>> p.readline()
            >>> p.indent, p.line, p.lineno, p.column
            (2, '  line 3', 6, 2)
            >>> p.eof
            False
            >>> p.readline()
            >>> p.eof
            True
        '''
        while True:
            line = self.f.readline()
            if line == '':
                self.eof = True
                break
            self.lineno += 1
            line = line.rstrip('\n')
            if not self.blank_line.match(line):
                self.indent, self.column = scanner.count_indent(line)
                self.line = line
                break

    def SyntaxError(self, msg, last_token=True):
        if last_token:
            raise SyntaxError(msg,
                              (self.f.name, self.last_lineno,
                               self.last_column + 1, self.last_line))
        raise SyntaxError(msg,
                          (self.f.name, self.lineno, self.column + 1,
                           self.line))

    def push_token(self):
        #print "push_token:", self.last_token  # FIX
        self.pushed_token = self.last_token
        self.pushed_indent = self.indent
        self.pushed_column = self.column
        self.indent = self.last_indent
        self.column = self.last_column

    def get_token(self, check_token=None):
        r'''
            >>> from StringIO import StringIO
            >>> f = StringIO(r"""
            ...   line 1=2.5: ( /\s* /, $foo) # comment
            ... ,|!-True  "hi\n"  [mom]
            ... """)
            >>> f.name = 'StringIO'
            >>> p = kqb_parser(f)
            >>> p.get_token()
            ('id', 'line')
            >>> p.get_token()
            ('number', 1)
            >>> p.get_token()
            ('equal', None)
            >>> p.get_token()
            ('number', 2.5)
            >>> p.get_token()
            ('colon', None)
            >>> p.get_token()
            ('lparen', None)
            >>> p.get_token()
            ('regexp', '\\s* ')
            >>> p.get_token()
            ('comma', None)
            >>> p.get_token()
            ('param', 'foo')
            >>> p.get_token()
            ('rparen', None)
            >>> p.get_token()
            ('comma', None)
            >>> p.get_token()
            ('bar', None)
            >>> p.get_token()
            ('bang', None)
            >>> p.get_token()
            ('hyphen', None)
            >>> p.get_token()
            ('const', True)
            >>> p.get_token()
            ('str', 'hi\n')
            >>> p.get_token()
            ('prompt', 'mom')
            >>> p.get_token()
            (None, None)
        '''
        if self.pushed_token:
            ans = self.pushed_token
            self.indent = self.pushed_indent
            self.column = self.pushed_column
            self.pushed_token = self.pushed_column = self.pushed_indent = None
            if check_token and check_token != ans[0]:
                self.SyntaxError("expected %s, got %s" % (check_token, ans[0]))
            #print "get_token: returning pushed_token", ans  # FIX
            return ans
        if self.column < len(self.line): self.skip_spaces()
        if self.column >= len(self.line):
            self.readline()
            if self.eof:
                self.last_token = None, None
                #print "get_token: returning EOF"  # FIX
                return self.last_token
        self.last_line = self.line
        self.last_lineno = self.lineno
        self.last_column = self.column
        self.last_indent = self.indent
        match = self.tokenizer.match(self.line, self.column)
        if not match: self.SyntaxError("Scanner error: no legal token")
        token = match.lastgroup
        chars = match.group(token)
        end = match.end()
        indent, ignore = scanner.count_indent(self.line[self.column:end], True)
        self.indent += indent
        self.column = end
        if token == 'str' or token == 'prompt':
            value = scanner.unescape(chars)
        elif token == 'const':
            value = eval(chars)
        elif token == 'number':
            try:
                value = int(chars)
            except ValueError:
                value = float(chars)
        elif token == 'param' or token == 'id':
            value = chars
        elif token == 'regexp1' or token == 'regexp2':
            # FIX
            token = 'regexp'
            value = chars
            self.lineno += chars.count('\n')
            last_nl = chars.rfind('\n')
            if last_nl >= 0:
                self.column = len(chars) - last_nl + 4
        else:
            value = None
        if check_token and check_token != token:
            self.SyntaxError("expected %s, got %s" % (check_token, token))
        self.last_token = str(token), value
        #print "get_token: returning", self.last_token  # FIX
        return self.last_token

    def get_block_string(self, stop=None, hanging=False, ending_newlines=False):
        r'''
            >>> from StringIO import StringIO
            >>> f = StringIO(r"""
            ...     line 1 # comment
            ...        more stuff
            ...     last line
            ... blah    hanging line 1
            ...
            ...         line 2
            ...           indented
            ...         last line
            ...         ! the end !
            ... """)
            >>> f.name = 'StringIO'
            >>> p = kqb_parser(f)
            >>> p.get_block_string()
            u'line 1 # comment\n   more stuff\nlast line'
            >>> p.column = 4
            >>> p.indent = 4
            >>> p.get_block_string('!', True)
            u'hanging line 1\n\nline 2\n  indented\nlast line'
            >>> f = StringIO(r"""
            ...     ! line 1 # comment
            ...          more stuff
            ...       last line
            ... blah
            ... """)
            >>> f.name = 'StringIO'
            >>> p = kqb_parser(f)
            >>> p.readline()
            >>> p.get_token('bang')
            ('bang', None)
            >>> p.get_block_string(hanging=True)
            u'line 1 # comment\n   more stuff\nlast line'
        '''
        if hanging:
            indent, more_chars = \
                scanner.count_indent(self.line[self.column:])
            self.indent += indent
            self.column += more_chars
        else:
            self.readline()
        indent = self.indent
        if self.eof: self.SyntaxError("expected block string, got EOF", False)
        rest_line = self.line[self.column:]
        if self.blank_line.match(rest_line):
            ans = []
        else:
            ans = [rest_line]
        while not self.eof:
            last_lineno = self.lineno
            self.readline()
            if ending_newlines:
                for i in range(self.lineno - last_lineno - 1): ans.append('')
            if self.eof or self.indent < indent or \
               stop and self.line[self.column:].startswith(stop):
                break
            if not ending_newlines:
                for i in range(self.lineno - last_lineno - 1): ans.append('')
            ans.append(' ' * (self.indent - indent) + self.line[self.column:])
        if not ans: self.SyntaxError("expected block string", False)
        return u'\n'.join(scanner.unescape(str) for str in ans)

    def parse_simple_match(self):
        token, value = self.get_token()
        if token == 'str' or token == 'id' or token == 'number' or \
           token == 'const':
            next_token, next_value = self.get_token()
            if next_token == 'prompt' and token == 'str':
                final_token, final_value = self.get_token('regexp')
                return qa_helpers.regexp(final_value, value, next_value)
            if next_token == 'regexp' and token == 'str':
                return qa_helpers.regexp(next_value, value)
            if next_token == 'equal':
                return qa_helpers.qmap(self.parse_simple_match(), value)
            if next_token == 'hyphen' and token == 'number':
                final_token, final_value = self.get_token()
                if final_token == 'number':
                    return slice(value, final_value)
                self.push_token()
                return slice(value, None)
            self.push_token()
            return value
        if token == 'prompt':
            next_token, next_value = self.get_token('regexp')
            return qa_helpers.regexp(next_value, prompt=value)
        if token == 'lparen':
            ans = self.parse_match()
            self.get_token('rparen')
            return ans
        if token == 'regexp':
            return qa_helpers.regexp(value)
        if token == 'hyphen':
            next_token, next_value = self.get_token('number')
            return slice(None, next_value)
        self.SyntaxError("expected match, got %s" % token)

    def parse_match(self):
        r'''
            >>> from StringIO import StringIO
            >>> def do(str):
            ...    ans = StringIO(str)
            ...    ans.name = 'StringIO'
            ...    return kqb_parser(ans).parse_match()
            >>> do(r'/reg\exp/ bob')
            <regexp /reg\exp/>
            >>> do(r'"msg"/reg\exp/ bob')
            <regexp 'msg'/reg\exp/>
            >>> do(r'[prompt]/reg\exp/ bob')
            <regexp [prompt]/reg\exp/>
            >>> do(r'"msg"[prompt]/reg\exp/ bob')
            <regexp 'msg'[prompt]/reg\exp/>
            >>> do(r"44 = id")
            <qmap 44 = 'id'>
            >>> do(r"-5 bob")
            slice(None, 5, None)
            >>> do(r"0-5 bob")
            slice(0, 5, None)
            >>> do(r"0- bob")
            slice(0, None, None)
            >>> do(r"/regexp/|44|3-5 bob")
            (<regexp /regexp/>, 44, slice(3, 5, None))
            >>> do(r"44 id")
            44
        '''
        ans = [self.parse_simple_match()]
        token, value = self.get_token()
        while token == 'bar':
            ans.append(self.parse_simple_match())
            token, value = self.get_token()
        self.push_token()
        if len(ans) == 1: return ans[0]
        return tuple(ans)

    def get_value(self):
        token, value = self.get_token()
        if token not in ('const', 'number', 'id', 'str'):
            self.SyntaxError("expected value, got %s" % token)
        return value

    def skip_spaces(self, pre_increment=0):
        if pre_increment:
            indent, chars = \
                scanner.count_indent(self.line[self.column:
                                               self.column+pre_increment], True)
            self.indent += indent
            self.column += chars
        indent, chars = scanner.count_indent(self.line[self.column:])
        self.indent += indent
        self.column += chars

    def parse_alternatives(self):
        r'''
            >>> from StringIO import StringIO
            >>> def do(str):
            ...    ans = StringIO(str)
            ...    ans.name = 'StringIO'
            ...    p = kqb_parser(ans)
            ...    alt, review = p.parse_alternatives()
            ...    for tag, msg in alt:
            ...        print '%s: %s' % (repr(tag), repr(msg.template))
            ...    for key, msg in sorted(review, key=lambda x: repr(x[0])):
            ...        print repr(key), '!', repr(msg.template)
            >>> do(r"""
            ...     1: hi mom
            ...        how are you?
            ...        ! Just reward!
            ...     bob: yep this is bob
            ...          ! =1
            ...     44: nope, this is just 44
            ...         ! = bob
            ... next
            ... """)
            1: u'hi mom\nhow are you?'
            'bob': u'yep this is bob'
            44: u'nope, this is just 44'
            (1, 'bob', 44) ! u'Just reward!'
        '''
        if self.column >= len(self.line):
            self.readline()
        if self.eof or self.indent == 0:
            self.SyntaxError("no alternatives", False)
        indent = self.indent
        review = {}
        ans = []
        while not self.eof and self.indent == indent:
            tag = self.get_value()
            if tag in review: self.SyntaxError("duplicate tag: %s" % tag)
            self.get_token('colon')
            ans.append((tag, Template(self.get_block_string(stop='!',
                                                            hanging=True))))
            if self.line[self.column] == '!':
                self.skip_spaces(1)
                if self.line[self.column] == '=':
                    self.indent += 1
                    self.column += 1
                    old_value = self.get_value()
                    while not isinstance(review[old_value], tuple):
                        old_value = review[old_value]
                    review[old_value][0].append(tag)
                    review[tag] = old_value
                    self.readline()
                else:
                    review[tag] = \
                        [tag], Template(self.get_block_string(hanging=True))
        if not self.eof and self.indent > indent:
            self.SyntaxError("unexpected indent", False)
        return tuple(ans), \
               tuple((value[0][0] if len(value[0]) == 1
                                  else tuple(value[0]),
                      value[1])
                     for value in review.itervalues()
                      if isinstance(value, tuple)) \
                  if review \
                  else None

    def parse_review(self):
        r'''
            >>> from StringIO import StringIO
            >>> def do(str):
            ...    ans = StringIO(str)
            ...    ans.name = 'StringIO'
            ...    p = kqb_parser(ans)
            ...    review = p.parse_review()
            ...    for key, msg in sorted(review, key=lambda x: repr(x[0])):
            ...        print repr(key), '!', repr(msg.template)
            >>> do(r"""
            ...     1 ! hi mom
            ...         how are you?
            ...         ! Just reward!
            ...     bob! yep this is bob
            ...     3-5! nope, this is just 44
            ... next
            ... """)
            'bob' ! u'yep this is bob'
            1 ! u'hi mom\nhow are you?\n! Just reward!'
            slice(3, 5, None) ! u'nope, this is just 44'
        '''
        if self.column >= len(self.line):
            self.readline()
        if self.eof or self.indent == 0:
            #print "parse_review: None"  # FIX
            return None
        #print "parse_review: self.indent", self.indent, \
        #      "self.column", self.column   # FIX
        indent = self.indent
        review = []
        while not self.eof and self.indent == indent:
            match = self.parse_match()
            self.get_token('bang')
            review.append(
                (match, Template(self.get_block_string(hanging=True))))
        if not self.eof and self.indent > indent:
            self.SyntaxError("unexpected indent", False)
        #print "parse_review:", tuple(review)   # FIX
        return tuple(review)

    def parse_questions(self):
        r''' question_base.question generator.

            >>> from StringIO import StringIO
            >>> def do(str):
            ...    ans = StringIO(str)
            ...    ans.name = 'StringIO'
            ...    p = kqb_parser(ans)
            ...    for q in p.parse_questions():
            ...        print q
            >>> do(r"""
            ... question1($ans)
            ...     This is the question?
            ...     ---
            ...     $ans = yn
            ...
            ... question2($ans)
            ...     This is the second question?
            ...     ---
            ...     $ans = select_1
            ...         1: first
            ...         2: second
            ...         3: third
            ...
            ... """)
            <question question1($ans): $ans = <yn: u'This is the question?'>>
            <question question2($ans): $ans = <select_1(1: 2: 3:): u'This is the second question?'>>
        '''
        self.readline()
        while not self.eof:
            if self.indent > 0: self.SyntaxError("unexpected indent", False)
            token, name = self.get_token('id')
            self.get_token('lparen')
            params = []
            token, param = self.get_token()
            if token != 'rparen':
                while True:
                    if token != 'param':
                        self.SyntaxError("expected $param, got %s" % token)
                    params.append(param)
                    token, ignore = self.get_token()
                    if token == 'rparen': break
                    if token != 'comma':
                        self.SyntaxError("expected comma or rparen, got %s" %
                                         token)
                    token, param = self.get_token()
            format = self.get_block_string(stop='---', ending_newlines=True)
            self.readline()     # ---
            token, answer_param = self.get_token('param')
            self.get_token('equal')
            token, cls = self.get_token('id')
            user_q = getattr(user_question, cls)(format)
            user_q.parse(self)
            yield question_base.question(name, tuple(params), answer_param,
                                         user_q)
            if self.column >= len(self.line): self.readline()

def parse_kqb(filename):
    name = os.path.basename(filename)[:-4]
    with open(filename, 'rU') as f:
        base = question_base.question_base(name)
        parser = kqb_parser(f)
        for question in parser.parse_questions():
            base.add_question(question)
    return base
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.