0001: /*
0002: * Copyright 2003-2005 Sun Microsystems, Inc. All Rights Reserved.
0003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0004: *
0005: * This code is free software; you can redistribute it and/or modify it
0006: * under the terms of the GNU General Public License version 2 only, as
0007: * published by the Free Software Foundation. Sun designates this
0008: * particular file as subject to the "Classpath" exception as provided
0009: * by Sun in the LICENSE file that accompanied this code.
0010: *
0011: * This code is distributed in the hope that it will be useful, but WITHOUT
0012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
0014: * version 2 for more details (a copy is included in the LICENSE file that
0015: * accompanied this code).
0016: *
0017: * You should have received a copy of the GNU General Public License version
0018: * 2 along with this work; if not, write to the Free Software Foundation,
0019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0020: *
0021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
0022: * CA 95054 USA or visit www.sun.com if you need additional information or
0023: * have any questions.
0024: */
0025:
0026: package com.sun.java_cup.internal.runtime;
0027:
0028: import java.util.Stack;
0029:
0030: /** This class implements a skeleton table driven LR parser. In general,
0031: * LR parsers are a form of bottom up shift-reduce parsers. Shift-reduce
0032: * parsers act by shifting input onto a parse stack until the Symbols
0033: * matching the right hand side of a production appear on the top of the
0034: * stack. Once this occurs, a reduce is performed. This involves removing
0035: * the Symbols corresponding to the right hand side of the production
0036: * (the so called "handle") and replacing them with the non-terminal from
0037: * the left hand side of the production. <p>
0038: *
0039: * To control the decision of whether to shift or reduce at any given point,
0040: * the parser uses a state machine (the "viable prefix recognition machine"
0041: * built by the parser generator). The current state of the machine is placed
0042: * on top of the parse stack (stored as part of a Symbol object representing
0043: * a terminal or non terminal). The parse action table is consulted
0044: * (using the current state and the current lookahead Symbol as indexes) to
0045: * determine whether to shift or to reduce. When the parser shifts, it
0046: * changes to a new state by pushing a new Symbol (containing a new state)
0047: * onto the stack. When the parser reduces, it pops the handle (right hand
0048: * side of a production) off the stack. This leaves the parser in the state
0049: * it was in before any of those Symbols were matched. Next the reduce-goto
0050: * table is consulted (using the new state and current lookahead Symbol as
0051: * indexes) to determine a new state to go to. The parser then shifts to
0052: * this goto state by pushing the left hand side Symbol of the production
0053: * (also containing the new state) onto the stack.<p>
0054: *
0055: * This class actually provides four LR parsers. The methods parse() and
0056: * debug_parse() provide two versions of the main parser (the only difference
0057: * being that debug_parse() emits debugging trace messages as it parses).
0058: * In addition to these main parsers, the error recovery mechanism uses two
0059: * more. One of these is used to simulate "parsing ahead" in the input
0060: * without carrying out actions (to verify that a potential error recovery
0061: * has worked), and the other is used to parse through buffered "parse ahead"
0062: * input in order to execute all actions and re-synchronize the actual parser
0063: * configuration.<p>
0064: *
0065: * This is an abstract class which is normally filled out by a subclass
0066: * generated by the JavaCup parser generator. In addition to supplying
0067: * the actual parse tables, generated code also supplies methods which
0068: * invoke various pieces of user supplied code, provide access to certain
0069: * special Symbols (e.g., EOF and error), etc. Specifically, the following
0070: * abstract methods are normally supplied by generated code:
0071: * <dl compact>
0072: * <dt> short[][] production_table()
0073: * <dd> Provides a reference to the production table (indicating the index of
0074: * the left hand side non terminal and the length of the right hand side
0075: * for each production in the grammar).
0076: * <dt> short[][] action_table()
0077: * <dd> Provides a reference to the parse action table.
0078: * <dt> short[][] reduce_table()
0079: * <dd> Provides a reference to the reduce-goto table.
0080: * <dt> int start_state()
0081: * <dd> Indicates the index of the start state.
0082: * <dt> int start_production()
0083: * <dd> Indicates the index of the starting production.
0084: * <dt> int EOF_sym()
0085: * <dd> Indicates the index of the EOF Symbol.
0086: * <dt> int error_sym()
0087: * <dd> Indicates the index of the error Symbol.
0088: * <dt> Symbol do_action()
0089: * <dd> Executes a piece of user supplied action code. This always comes at
0090: * the point of a reduce in the parse, so this code also allocates and
0091: * fills in the left hand side non terminal Symbol object that is to be
0092: * pushed onto the stack for the reduce.
0093: * <dt> void init_actions()
0094: * <dd> Code to initialize a special object that encapsulates user supplied
0095: * actions (this object is used by do_action() to actually carry out the
0096: * actions).
0097: * </dl>
0098: *
0099: * In addition to these routines that <i>must</i> be supplied by the
0100: * generated subclass there are also a series of routines that <i>may</i>
0101: * be supplied. These include:
0102: * <dl>
0103: * <dt> Symbol scan()
0104: * <dd> Used to get the next input Symbol from the scanner.
0105: * <dt> Scanner getScanner()
0106: * <dd> Used to provide a scanner for the default implementation of
0107: * scan().
0108: * <dt> int error_sync_size()
0109: * <dd> This determines how many Symbols past the point of an error
0110: * must be parsed without error in order to consider a recovery to
0111: * be valid. This defaults to 3. Values less than 2 are not
0112: * recommended.
0113: * <dt> void report_error(String message, Object info)
0114: * <dd> This method is called to report an error. The default implementation
0115: * simply prints a message to System.err and where the error occurred.
0116: * This method is often replaced in order to provide a more sophisticated
0117: * error reporting mechanism.
0118: * <dt> void report_fatal_error(String message, Object info)
0119: * <dd> This method is called when a fatal error that cannot be recovered from
0120: * is encountered. In the default implementation, it calls
0121: * report_error() to emit a message, then throws an exception.
0122: * <dt> void syntax_error(Symbol cur_token)
0123: * <dd> This method is called as soon as syntax error is detected (but
0124: * before recovery is attempted). In the default implementation it
0125: * invokes: report_error("Syntax error", null);
0126: * <dt> void unrecovered_syntax_error(Symbol cur_token)
0127: * <dd> This method is called if syntax error recovery fails. In the default
0128: * implementation it invokes:<br>
0129: * report_fatal_error("Couldn't repair and continue parse", null);
0130: * </dl>
0131: *
0132: * @see com.sun.java_cup.internal.runtime.Symbol
0133: * @see com.sun.java_cup.internal.runtime.Symbol
0134: * @see com.sun.java_cup.internal.runtime.virtual_parse_stack
0135: * @version last updated: 7/3/96
0136: * @author Frank Flannery
0137: */
0138:
0139: public abstract class lr_parser {
0140:
0141: /*-----------------------------------------------------------*/
0142: /*--- Constructor(s) ----------------------------------------*/
0143: /*-----------------------------------------------------------*/
0144:
0145: /** Simple constructor. */
0146: public lr_parser() {
0147: /* nothing to do here */
0148: }
0149:
0150: /** Constructor that sets the default scanner. [CSA/davidm] */
0151: public lr_parser(Scanner s) {
0152: this (); /* in case default constructor someday does something */
0153: setScanner(s);
0154: }
0155:
0156: /*-----------------------------------------------------------*/
0157: /*--- (Access to) Static (Class) Variables ------------------*/
0158: /*-----------------------------------------------------------*/
0159:
0160: /** The default number of Symbols after an error we much match to consider
0161: * it recovered from.
0162: */
0163: protected final static int _error_sync_size = 3;
0164:
0165: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0166:
0167: /** The number of Symbols after an error we much match to consider it
0168: * recovered from.
0169: */
0170: protected int error_sync_size() {
0171: return _error_sync_size;
0172: }
0173:
0174: /*-----------------------------------------------------------*/
0175: /*--- (Access to) Instance Variables ------------------------*/
0176: /*-----------------------------------------------------------*/
0177:
0178: /** Table of production information (supplied by generated subclass).
0179: * This table contains one entry per production and is indexed by
0180: * the negative-encoded values (reduce actions) in the action_table.
0181: * Each entry has two parts, the index of the non-terminal on the
0182: * left hand side of the production, and the number of Symbols
0183: * on the right hand side.
0184: */
0185: public abstract short[][] production_table();
0186:
0187: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0188:
0189: /** The action table (supplied by generated subclass). This table is
0190: * indexed by state and terminal number indicating what action is to
0191: * be taken when the parser is in the given state (i.e., the given state
0192: * is on top of the stack) and the given terminal is next on the input.
0193: * States are indexed using the first dimension, however, the entries for
0194: * a given state are compacted and stored in adjacent index, value pairs
0195: * which are searched for rather than accessed directly (see get_action()).
0196: * The actions stored in the table will be either shifts, reduces, or
0197: * errors. Shifts are encoded as positive values (one greater than the
0198: * state shifted to). Reduces are encoded as negative values (one less
0199: * than the production reduced by). Error entries are denoted by zero.
0200: *
0201: * @see com.sun.java_cup.internal.runtime.lr_parser#get_action
0202: */
0203: public abstract short[][] action_table();
0204:
0205: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0206:
0207: /** The reduce-goto table (supplied by generated subclass). This
0208: * table is indexed by state and non-terminal number and contains
0209: * state numbers. States are indexed using the first dimension, however,
0210: * the entries for a given state are compacted and stored in adjacent
0211: * index, value pairs which are searched for rather than accessed
0212: * directly (see get_reduce()). When a reduce occurs, the handle
0213: * (corresponding to the RHS of the matched production) is popped off
0214: * the stack. The new top of stack indicates a state. This table is
0215: * then indexed by that state and the LHS of the reducing production to
0216: * indicate where to "shift" to.
0217: *
0218: * @see com.sun.java_cup.internal.runtime.lr_parser#get_reduce
0219: */
0220: public abstract short[][] reduce_table();
0221:
0222: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0223:
0224: /** The index of the start state (supplied by generated subclass). */
0225: public abstract int start_state();
0226:
0227: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0228:
0229: /** The index of the start production (supplied by generated subclass). */
0230: public abstract int start_production();
0231:
0232: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0233:
0234: /** The index of the end of file terminal Symbol (supplied by generated
0235: * subclass).
0236: */
0237: public abstract int EOF_sym();
0238:
0239: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0240:
0241: /** The index of the special error Symbol (supplied by generated subclass). */
0242: public abstract int error_sym();
0243:
0244: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0245:
0246: /** Internal flag to indicate when parser should quit. */
0247: protected boolean _done_parsing = false;
0248:
0249: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0250:
0251: /** This method is called to indicate that the parser should quit. This is
0252: * normally called by an accept action, but can be used to cancel parsing
0253: * early in other circumstances if desired.
0254: */
0255: public void done_parsing() {
0256: _done_parsing = true;
0257: }
0258:
0259: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0260: /* Global parse state shared by parse(), error recovery, and
0261: * debugging routines */
0262: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0263:
0264: /** Indication of the index for top of stack (for use by actions). */
0265: protected int tos;
0266:
0267: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0268:
0269: /** The current lookahead Symbol. */
0270: protected Symbol cur_token;
0271:
0272: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0273:
0274: /** The parse stack itself. */
0275: protected Stack stack = new Stack();
0276:
0277: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0278:
0279: /** Direct reference to the production table. */
0280: protected short[][] production_tab;
0281:
0282: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0283:
0284: /** Direct reference to the action table. */
0285: protected short[][] action_tab;
0286:
0287: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0288:
0289: /** Direct reference to the reduce-goto table. */
0290: protected short[][] reduce_tab;
0291:
0292: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0293:
0294: /** This is the scanner object used by the default implementation
0295: * of scan() to get Symbols. To avoid name conflicts with existing
0296: * code, this field is private. [CSA/davidm] */
0297: private Scanner _scanner;
0298:
0299: /**
0300: * Simple accessor method to set the default scanner.
0301: */
0302: public void setScanner(Scanner s) {
0303: _scanner = s;
0304: }
0305:
0306: /**
0307: * Simple accessor method to get the default scanner.
0308: */
0309: public Scanner getScanner() {
0310: return _scanner;
0311: }
0312:
0313: /*-----------------------------------------------------------*/
0314: /*--- General Methods ---------------------------------------*/
0315: /*-----------------------------------------------------------*/
0316:
0317: /** Perform a bit of user supplied action code (supplied by generated
0318: * subclass). Actions are indexed by an internal action number assigned
0319: * at parser generation time.
0320: *
0321: * @param act_num the internal index of the action to be performed.
0322: * @param parser the parser object we are acting for.
0323: * @param stack the parse stack of that object.
0324: * @param top the index of the top element of the parse stack.
0325: */
0326: public abstract Symbol do_action(int act_num, lr_parser parser,
0327: Stack stack, int top) throws java.lang.Exception;
0328:
0329: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0330:
0331: /** User code for initialization inside the parser. Typically this
0332: * initializes the scanner. This is called before the parser requests
0333: * the first Symbol. Here this is just a placeholder for subclasses that
0334: * might need this and we perform no action. This method is normally
0335: * overridden by the generated code using this contents of the "init with"
0336: * clause as its body.
0337: */
0338: public void user_init() throws java.lang.Exception {
0339: }
0340:
0341: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0342:
0343: /** Initialize the action object. This is called before the parser does
0344: * any parse actions. This is filled in by generated code to create
0345: * an object that encapsulates all action code.
0346: */
0347: protected abstract void init_actions() throws java.lang.Exception;
0348:
0349: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0350:
0351: /** Get the next Symbol from the input (supplied by generated subclass).
0352: * Once end of file has been reached, all subsequent calls to scan
0353: * should return an EOF Symbol (which is Symbol number 0). By default
0354: * this method returns getScanner().next_token(); this implementation
0355: * can be overriden by the generated parser using the code declared in
0356: * the "scan with" clause. Do not recycle objects; every call to
0357: * scan() should return a fresh object.
0358: */
0359: public Symbol scan() throws java.lang.Exception {
0360: return getScanner().next_token();
0361: }
0362:
0363: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0364:
0365: /** Report a fatal error. This method takes a message string and an
0366: * additional object (to be used by specializations implemented in
0367: * subclasses). Here in the base class a very simple implementation
0368: * is provided which reports the error then throws an exception.
0369: *
0370: * @param message an error message.
0371: * @param info an extra object reserved for use by specialized subclasses.
0372: */
0373: public void report_fatal_error(String message, Object info)
0374: throws java.lang.Exception {
0375: /* stop parsing (not really necessary since we throw an exception, but) */
0376: done_parsing();
0377:
0378: /* use the normal error message reporting to put out the message */
0379: report_error(message, info);
0380:
0381: /* throw an exception */
0382: throw new Exception("Can't recover from previous error(s)");
0383: }
0384:
0385: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0386:
0387: /** Report a non fatal error (or warning). This method takes a message
0388: * string and an additional object (to be used by specializations
0389: * implemented in subclasses). Here in the base class a very simple
0390: * implementation is provided which simply prints the message to
0391: * System.err.
0392: *
0393: * @param message an error message.
0394: * @param info an extra object reserved for use by specialized subclasses.
0395: */
0396: public void report_error(String message, Object info) {
0397: System.err.print(message);
0398: if (info instanceof Symbol)
0399: if (((Symbol) info).left != -1)
0400: System.err.println(" at character "
0401: + ((Symbol) info).left + " of input");
0402: else
0403: System.err.println("");
0404: else
0405: System.err.println("");
0406: }
0407:
0408: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0409:
0410: /** This method is called when a syntax error has been detected and recovery
0411: * is about to be invoked. Here in the base class we just emit a
0412: * "Syntax error" error message.
0413: *
0414: * @param cur_token the current lookahead Symbol.
0415: */
0416: public void syntax_error(Symbol cur_token) {
0417: report_error("Syntax error", cur_token);
0418: }
0419:
0420: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0421:
0422: /** This method is called if it is determined that syntax error recovery
0423: * has been unsuccessful. Here in the base class we report a fatal error.
0424: *
0425: * @param cur_token the current lookahead Symbol.
0426: */
0427: public void unrecovered_syntax_error(Symbol cur_token)
0428: throws java.lang.Exception {
0429: report_fatal_error("Couldn't repair and continue parse",
0430: cur_token);
0431: }
0432:
0433: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0434:
0435: /** Fetch an action from the action table. The table is broken up into
0436: * rows, one per state (rows are indexed directly by state number).
0437: * Within each row, a list of index, value pairs are given (as sequential
0438: * entries in the table), and the list is terminated by a default entry
0439: * (denoted with a Symbol index of -1). To find the proper entry in a row
0440: * we do a linear or binary search (depending on the size of the row).
0441: *
0442: * @param state the state index of the action being accessed.
0443: * @param sym the Symbol index of the action being accessed.
0444: */
0445: protected final short get_action(int state, int sym) {
0446: short tag;
0447: int first, last, probe;
0448: short[] row = action_tab[state];
0449:
0450: /* linear search if we are < 10 entries */
0451: if (row.length < 20)
0452: for (probe = 0; probe < row.length; probe++) {
0453: /* is this entry labeled with our Symbol or the default? */
0454: tag = row[probe++];
0455: if (tag == sym || tag == -1) {
0456: /* return the next entry */
0457: return row[probe];
0458: }
0459: }
0460: /* otherwise binary search */
0461: else {
0462: first = 0;
0463: last = (row.length - 1) / 2 - 1; /* leave out trailing default entry */
0464: while (first <= last) {
0465: probe = (first + last) / 2;
0466: if (sym == row[probe * 2])
0467: return row[probe * 2 + 1];
0468: else if (sym > row[probe * 2])
0469: first = probe + 1;
0470: else
0471: last = probe - 1;
0472: }
0473:
0474: /* not found, use the default at the end */
0475: return row[row.length - 1];
0476: }
0477:
0478: /* shouldn't happened, but if we run off the end we return the
0479: default (error == 0) */
0480: return 0;
0481: }
0482:
0483: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0484:
0485: /** Fetch a state from the reduce-goto table. The table is broken up into
0486: * rows, one per state (rows are indexed directly by state number).
0487: * Within each row, a list of index, value pairs are given (as sequential
0488: * entries in the table), and the list is terminated by a default entry
0489: * (denoted with a Symbol index of -1). To find the proper entry in a row
0490: * we do a linear search.
0491: *
0492: * @param state the state index of the entry being accessed.
0493: * @param sym the Symbol index of the entry being accessed.
0494: */
0495: protected final short get_reduce(int state, int sym) {
0496: short tag;
0497: short[] row = reduce_tab[state];
0498:
0499: /* if we have a null row we go with the default */
0500: if (row == null)
0501: return -1;
0502:
0503: for (int probe = 0; probe < row.length; probe++) {
0504: /* is this entry labeled with our Symbol or the default? */
0505: tag = row[probe++];
0506: if (tag == sym || tag == -1) {
0507: /* return the next entry */
0508: return row[probe];
0509: }
0510: }
0511: /* if we run off the end we return the default (error == -1) */
0512: return -1;
0513: }
0514:
0515: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0516:
0517: /** This method provides the main parsing routine. It returns only when
0518: * done_parsing() has been called (typically because the parser has
0519: * accepted, or a fatal error has been reported). See the header
0520: * documentation for the class regarding how shift/reduce parsers operate
0521: * and how the various tables are used.
0522: */
0523: public Symbol parse() throws java.lang.Exception {
0524: /* the current action code */
0525: int act;
0526:
0527: /* the Symbol/stack element returned by a reduce */
0528: Symbol lhs_sym = null;
0529:
0530: /* information about production being reduced with */
0531: short handle_size, lhs_sym_num;
0532:
0533: /* set up direct reference to tables to drive the parser */
0534:
0535: production_tab = production_table();
0536: action_tab = action_table();
0537: reduce_tab = reduce_table();
0538:
0539: /* initialize the action encapsulation object */
0540: init_actions();
0541:
0542: /* do user initialization */
0543: user_init();
0544:
0545: /* get the first token */
0546: cur_token = scan();
0547:
0548: /* push dummy Symbol with start state to get us underway */
0549: stack.removeAllElements();
0550: stack.push(new Symbol(0, start_state()));
0551: tos = 0;
0552:
0553: /* continue until we are told to stop */
0554: for (_done_parsing = false; !_done_parsing;) {
0555: /* Check current token for freshness. */
0556: if (cur_token.used_by_parser)
0557: throw new Error(
0558: "Symbol recycling detected (fix your scanner).");
0559:
0560: /* current state is always on the top of the stack */
0561:
0562: /* look up action out of the current state with the current input */
0563: act = get_action(((Symbol) stack.peek()).parse_state,
0564: cur_token.sym);
0565:
0566: /* decode the action -- > 0 encodes shift */
0567: if (act > 0) {
0568: /* shift to the encoded state by pushing it on the stack */
0569: cur_token.parse_state = act - 1;
0570: cur_token.used_by_parser = true;
0571: stack.push(cur_token);
0572: tos++;
0573:
0574: /* advance to the next Symbol */
0575: cur_token = scan();
0576: }
0577: /* if its less than zero, then it encodes a reduce action */
0578: else if (act < 0) {
0579: /* perform the action for the reduce */
0580: lhs_sym = do_action((-act) - 1, this , stack, tos);
0581:
0582: /* look up information about the production */
0583: lhs_sym_num = production_tab[(-act) - 1][0];
0584: handle_size = production_tab[(-act) - 1][1];
0585:
0586: /* pop the handle off the stack */
0587: for (int i = 0; i < handle_size; i++) {
0588: stack.pop();
0589: tos--;
0590: }
0591:
0592: /* look up the state to go to from the one popped back to */
0593: act = get_reduce(((Symbol) stack.peek()).parse_state,
0594: lhs_sym_num);
0595:
0596: /* shift to that state */
0597: lhs_sym.parse_state = act;
0598: lhs_sym.used_by_parser = true;
0599: stack.push(lhs_sym);
0600: tos++;
0601: }
0602: /* finally if the entry is zero, we have an error */
0603: else if (act == 0) {
0604: /* call user syntax error reporting routine */
0605: syntax_error(cur_token);
0606:
0607: /* try to error recover */
0608: if (!error_recovery(false)) {
0609: /* if that fails give up with a fatal syntax error */
0610: unrecovered_syntax_error(cur_token);
0611:
0612: /* just in case that wasn't fatal enough, end parse */
0613: done_parsing();
0614: } else {
0615: lhs_sym = (Symbol) stack.peek();
0616: }
0617: }
0618: }
0619: return lhs_sym;
0620: }
0621:
0622: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0623:
0624: /** Write a debugging message to System.err for the debugging version
0625: * of the parser.
0626: *
0627: * @param mess the text of the debugging message.
0628: */
0629: public void debug_message(String mess) {
0630: System.err.println(mess);
0631: }
0632:
0633: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0634:
0635: /** Dump the parse stack for debugging purposes. */
0636: public void dump_stack() {
0637: if (stack == null) {
0638: debug_message("# Stack dump requested, but stack is null");
0639: return;
0640: }
0641:
0642: debug_message("============ Parse Stack Dump ============");
0643:
0644: /* dump the stack */
0645: for (int i = 0; i < stack.size(); i++) {
0646: debug_message("Symbol: "
0647: + ((Symbol) stack.elementAt(i)).sym + " State: "
0648: + ((Symbol) stack.elementAt(i)).parse_state);
0649: }
0650: debug_message("==========================================");
0651: }
0652:
0653: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0654:
0655: /** Do debug output for a reduce.
0656: *
0657: * @param prod_num the production we are reducing with.
0658: * @param nt_num the index of the LHS non terminal.
0659: * @param rhs_size the size of the RHS.
0660: */
0661: public void debug_reduce(int prod_num, int nt_num, int rhs_size) {
0662: debug_message("# Reduce with prod #" + prod_num + " [NT="
0663: + nt_num + ", " + "SZ=" + rhs_size + "]");
0664: }
0665:
0666: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0667:
0668: /** Do debug output for shift.
0669: *
0670: * @param shift_tkn the Symbol being shifted onto the stack.
0671: */
0672: public void debug_shift(Symbol shift_tkn) {
0673: debug_message("# Shift under term #" + shift_tkn.sym
0674: + " to state #" + shift_tkn.parse_state);
0675: }
0676:
0677: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0678:
0679: /** Do debug output for stack state. [CSA]
0680: */
0681: public void debug_stack() {
0682: StringBuffer sb = new StringBuffer("## STACK:");
0683: for (int i = 0; i < stack.size(); i++) {
0684: Symbol s = (Symbol) stack.elementAt(i);
0685: sb.append(" <state " + s.parse_state + ", sym " + s.sym
0686: + ">");
0687: if ((i % 3) == 2 || (i == (stack.size() - 1))) {
0688: debug_message(sb.toString());
0689: sb = new StringBuffer(" ");
0690: }
0691: }
0692: }
0693:
0694: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0695:
0696: /** Perform a parse with debugging output. This does exactly the
0697: * same things as parse(), except that it calls debug_shift() and
0698: * debug_reduce() when shift and reduce moves are taken by the parser
0699: * and produces various other debugging messages.
0700: */
0701: public Symbol debug_parse() throws java.lang.Exception {
0702: /* the current action code */
0703: int act;
0704:
0705: /* the Symbol/stack element returned by a reduce */
0706: Symbol lhs_sym = null;
0707:
0708: /* information about production being reduced with */
0709: short handle_size, lhs_sym_num;
0710:
0711: /* set up direct reference to tables to drive the parser */
0712: production_tab = production_table();
0713: action_tab = action_table();
0714: reduce_tab = reduce_table();
0715:
0716: debug_message("# Initializing parser");
0717:
0718: /* initialize the action encapsulation object */
0719: init_actions();
0720:
0721: /* do user initialization */
0722: user_init();
0723:
0724: /* the current Symbol */
0725: cur_token = scan();
0726:
0727: debug_message("# Current Symbol is #" + cur_token.sym);
0728:
0729: /* push dummy Symbol with start state to get us underway */
0730: stack.removeAllElements();
0731: stack.push(new Symbol(0, start_state()));
0732: tos = 0;
0733:
0734: /* continue until we are told to stop */
0735: for (_done_parsing = false; !_done_parsing;) {
0736: /* Check current token for freshness. */
0737: if (cur_token.used_by_parser)
0738: throw new Error(
0739: "Symbol recycling detected (fix your scanner).");
0740:
0741: /* current state is always on the top of the stack */
0742: //debug_stack();
0743: /* look up action out of the current state with the current input */
0744: act = get_action(((Symbol) stack.peek()).parse_state,
0745: cur_token.sym);
0746:
0747: /* decode the action -- > 0 encodes shift */
0748: if (act > 0) {
0749: /* shift to the encoded state by pushing it on the stack */
0750: cur_token.parse_state = act - 1;
0751: cur_token.used_by_parser = true;
0752: debug_shift(cur_token);
0753: stack.push(cur_token);
0754: tos++;
0755:
0756: /* advance to the next Symbol */
0757: cur_token = scan();
0758: debug_message("# Current token is " + cur_token);
0759: }
0760: /* if its less than zero, then it encodes a reduce action */
0761: else if (act < 0) {
0762: /* perform the action for the reduce */
0763: lhs_sym = do_action((-act) - 1, this , stack, tos);
0764:
0765: /* look up information about the production */
0766: lhs_sym_num = production_tab[(-act) - 1][0];
0767: handle_size = production_tab[(-act) - 1][1];
0768:
0769: debug_reduce((-act) - 1, lhs_sym_num, handle_size);
0770:
0771: /* pop the handle off the stack */
0772: for (int i = 0; i < handle_size; i++) {
0773: stack.pop();
0774: tos--;
0775: }
0776:
0777: /* look up the state to go to from the one popped back to */
0778: act = get_reduce(((Symbol) stack.peek()).parse_state,
0779: lhs_sym_num);
0780: debug_message("# Reduce rule: top state "
0781: + ((Symbol) stack.peek()).parse_state
0782: + ", lhs sym " + lhs_sym_num + " -> state "
0783: + act);
0784:
0785: /* shift to that state */
0786: lhs_sym.parse_state = act;
0787: lhs_sym.used_by_parser = true;
0788: stack.push(lhs_sym);
0789: tos++;
0790:
0791: debug_message("# Goto state #" + act);
0792: }
0793: /* finally if the entry is zero, we have an error */
0794: else if (act == 0) {
0795: /* call user syntax error reporting routine */
0796: syntax_error(cur_token);
0797:
0798: /* try to error recover */
0799: if (!error_recovery(true)) {
0800: /* if that fails give up with a fatal syntax error */
0801: unrecovered_syntax_error(cur_token);
0802:
0803: /* just in case that wasn't fatal enough, end parse */
0804: done_parsing();
0805: } else {
0806: lhs_sym = (Symbol) stack.peek();
0807: }
0808: }
0809: }
0810: return lhs_sym;
0811: }
0812:
0813: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0814: /* Error recovery code */
0815: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0816:
0817: /** Attempt to recover from a syntax error. This returns false if recovery
0818: * fails, true if it succeeds. Recovery happens in 4 steps. First we
0819: * pop the parse stack down to a point at which we have a shift out
0820: * of the top-most state on the error Symbol. This represents the
0821: * initial error recovery configuration. If no such configuration is
0822: * found, then we fail. Next a small number of "lookahead" or "parse
0823: * ahead" Symbols are read into a buffer. The size of this buffer is
0824: * determined by error_sync_size() and determines how many Symbols beyond
0825: * the error must be matched to consider the recovery a success. Next,
0826: * we begin to discard Symbols in attempt to get past the point of error
0827: * to a point where we can continue parsing. After each Symbol, we attempt
0828: * to "parse ahead" though the buffered lookahead Symbols. The "parse ahead"
0829: * process simulates that actual parse, but does not modify the real
0830: * parser's configuration, nor execute any actions. If we can parse all
0831: * the stored Symbols without error, then the recovery is considered a
0832: * success. Once a successful recovery point is determined, we do an
0833: * actual parse over the stored input -- modifying the real parse
0834: * configuration and executing all actions. Finally, we return the the
0835: * normal parser to continue with the overall parse.
0836: *
0837: * @param debug should we produce debugging messages as we parse.
0838: */
0839: protected boolean error_recovery(boolean debug)
0840: throws java.lang.Exception {
0841: if (debug)
0842: debug_message("# Attempting error recovery");
0843:
0844: /* first pop the stack back into a state that can shift on error and
0845: do that shift (if that fails, we fail) */
0846: if (!find_recovery_config(debug)) {
0847: if (debug)
0848: debug_message("# Error recovery fails");
0849: return false;
0850: }
0851:
0852: /* read ahead to create lookahead we can parse multiple times */
0853: read_lookahead();
0854:
0855: /* repeatedly try to parse forward until we make it the required dist */
0856: for (;;) {
0857: /* try to parse forward, if it makes it, bail out of loop */
0858: if (debug)
0859: debug_message("# Trying to parse ahead");
0860: if (try_parse_ahead(debug)) {
0861: break;
0862: }
0863:
0864: /* if we are now at EOF, we have failed */
0865: if (lookahead[0].sym == EOF_sym()) {
0866: if (debug)
0867: debug_message("# Error recovery fails at EOF");
0868: return false;
0869: }
0870:
0871: /* otherwise, we consume another Symbol and try again */
0872: if (debug)
0873: debug_message("# Consuming Symbol #"
0874: + cur_err_token().sym);
0875: restart_lookahead();
0876: }
0877:
0878: /* we have consumed to a point where we can parse forward */
0879: if (debug)
0880: debug_message("# Parse-ahead ok, going back to normal parse");
0881:
0882: /* do the real parse (including actions) across the lookahead */
0883: parse_lookahead(debug);
0884:
0885: /* we have success */
0886: return true;
0887: }
0888:
0889: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0890:
0891: /** Determine if we can shift under the special error Symbol out of the
0892: * state currently on the top of the (real) parse stack.
0893: */
0894: protected boolean shift_under_error() {
0895: /* is there a shift under error Symbol */
0896: return get_action(((Symbol) stack.peek()).parse_state,
0897: error_sym()) > 0;
0898: }
0899:
0900: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0901:
0902: /** Put the (real) parse stack into error recovery configuration by
0903: * popping the stack down to a state that can shift on the special
0904: * error Symbol, then doing the shift. If no suitable state exists on
0905: * the stack we return false
0906: *
0907: * @param debug should we produce debugging messages as we parse.
0908: */
0909: protected boolean find_recovery_config(boolean debug) {
0910: Symbol error_token;
0911: int act;
0912:
0913: if (debug)
0914: debug_message("# Finding recovery state on stack");
0915:
0916: /* Remember the right-position of the top symbol on the stack */
0917: int right_pos = ((Symbol) stack.peek()).right;
0918: int left_pos = ((Symbol) stack.peek()).left;
0919:
0920: /* pop down until we can shift under error Symbol */
0921: while (!shift_under_error()) {
0922: /* pop the stack */
0923: if (debug)
0924: debug_message("# Pop stack by one, state was # "
0925: + ((Symbol) stack.peek()).parse_state);
0926: left_pos = ((Symbol) stack.pop()).left;
0927: tos--;
0928:
0929: /* if we have hit bottom, we fail */
0930: if (stack.empty()) {
0931: if (debug)
0932: debug_message("# No recovery state found on stack");
0933: return false;
0934: }
0935: }
0936:
0937: /* state on top of the stack can shift under error, find the shift */
0938: act = get_action(((Symbol) stack.peek()).parse_state,
0939: error_sym());
0940: if (debug) {
0941: debug_message("# Recover state found (#"
0942: + ((Symbol) stack.peek()).parse_state + ")");
0943: debug_message("# Shifting on error to state #" + (act - 1));
0944: }
0945:
0946: /* build and shift a special error Symbol */
0947: error_token = new Symbol(error_sym(), left_pos, right_pos);
0948: error_token.parse_state = act - 1;
0949: error_token.used_by_parser = true;
0950: stack.push(error_token);
0951: tos++;
0952:
0953: return true;
0954: }
0955:
0956: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0957:
0958: /** Lookahead Symbols used for attempting error recovery "parse aheads". */
0959: protected Symbol lookahead[];
0960:
0961: /** Position in lookahead input buffer used for "parse ahead". */
0962: protected int lookahead_pos;
0963:
0964: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0965:
0966: /** Read from input to establish our buffer of "parse ahead" lookahead
0967: * Symbols.
0968: */
0969: protected void read_lookahead() throws java.lang.Exception {
0970: /* create the lookahead array */
0971: lookahead = new Symbol[error_sync_size()];
0972:
0973: /* fill in the array */
0974: for (int i = 0; i < error_sync_size(); i++) {
0975: lookahead[i] = cur_token;
0976: cur_token = scan();
0977: }
0978:
0979: /* start at the beginning */
0980: lookahead_pos = 0;
0981: }
0982:
0983: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0984:
0985: /** Return the current lookahead in our error "parse ahead" buffer. */
0986: protected Symbol cur_err_token() {
0987: return lookahead[lookahead_pos];
0988: }
0989:
0990: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
0991:
0992: /** Advance to next "parse ahead" input Symbol. Return true if we have
0993: * input to advance to, false otherwise.
0994: */
0995: protected boolean advance_lookahead() {
0996: /* advance the input location */
0997: lookahead_pos++;
0998:
0999: /* return true if we didn't go off the end */
1000: return lookahead_pos < error_sync_size();
1001: }
1002:
1003: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
1004:
1005: /** Reset the parse ahead input to one Symbol past where we started error
1006: * recovery (this consumes one new Symbol from the real input).
1007: */
1008: protected void restart_lookahead() throws java.lang.Exception {
1009: /* move all the existing input over */
1010: for (int i = 1; i < error_sync_size(); i++)
1011: lookahead[i - 1] = lookahead[i];
1012:
1013: /* read a new Symbol into the last spot */
1014: cur_token = scan();
1015: lookahead[error_sync_size() - 1] = cur_token;
1016:
1017: /* reset our internal position marker */
1018: lookahead_pos = 0;
1019: }
1020:
1021: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
1022:
1023: /** Do a simulated parse forward (a "parse ahead") from the current
1024: * stack configuration using stored lookahead input and a virtual parse
1025: * stack. Return true if we make it all the way through the stored
1026: * lookahead input without error. This basically simulates the action of
1027: * parse() using only our saved "parse ahead" input, and not executing any
1028: * actions.
1029: *
1030: * @param debug should we produce debugging messages as we parse.
1031: */
1032: protected boolean try_parse_ahead(boolean debug)
1033: throws java.lang.Exception {
1034: int act;
1035: short lhs, rhs_size;
1036:
1037: /* create a virtual stack from the real parse stack */
1038: virtual_parse_stack vstack = new virtual_parse_stack(stack);
1039:
1040: /* parse until we fail or get past the lookahead input */
1041: for (;;) {
1042: /* look up the action from the current state (on top of stack) */
1043: act = get_action(vstack.top(), cur_err_token().sym);
1044:
1045: /* if its an error, we fail */
1046: if (act == 0)
1047: return false;
1048:
1049: /* > 0 encodes a shift */
1050: if (act > 0) {
1051: /* push the new state on the stack */
1052: vstack.push(act - 1);
1053:
1054: if (debug)
1055: debug_message("# Parse-ahead shifts Symbol #"
1056: + cur_err_token().sym + " into state #"
1057: + (act - 1));
1058:
1059: /* advance simulated input, if we run off the end, we are done */
1060: if (!advance_lookahead())
1061: return true;
1062: }
1063: /* < 0 encodes a reduce */
1064: else {
1065: /* if this is a reduce with the start production we are done */
1066: if ((-act) - 1 == start_production()) {
1067: if (debug)
1068: debug_message("# Parse-ahead accepts");
1069: return true;
1070: }
1071:
1072: /* get the lhs Symbol and the rhs size */
1073: lhs = production_tab[(-act) - 1][0];
1074: rhs_size = production_tab[(-act) - 1][1];
1075:
1076: /* pop handle off the stack */
1077: for (int i = 0; i < rhs_size; i++)
1078: vstack.pop();
1079:
1080: if (debug)
1081: debug_message("# Parse-ahead reduces: handle size = "
1082: + rhs_size
1083: + " lhs = #"
1084: + lhs
1085: + " from state #" + vstack.top());
1086:
1087: /* look up goto and push it onto the stack */
1088: vstack.push(get_reduce(vstack.top(), lhs));
1089: if (debug)
1090: debug_message("# Goto state #" + vstack.top());
1091: }
1092: }
1093: }
1094:
1095: /*. . . . . . . . . . . . . . . . . . . . . . . . . . . . . .*/
1096:
1097: /** Parse forward using stored lookahead Symbols. In this case we have
1098: * already verified that parsing will make it through the stored lookahead
1099: * Symbols and we are now getting back to the point at which we can hand
1100: * control back to the normal parser. Consequently, this version of the
1101: * parser performs all actions and modifies the real parse configuration.
1102: * This returns once we have consumed all the stored input or we accept.
1103: *
1104: * @param debug should we produce debugging messages as we parse.
1105: */
1106: protected void parse_lookahead(boolean debug)
1107: throws java.lang.Exception {
1108: /* the current action code */
1109: int act;
1110:
1111: /* the Symbol/stack element returned by a reduce */
1112: Symbol lhs_sym = null;
1113:
1114: /* information about production being reduced with */
1115: short handle_size, lhs_sym_num;
1116:
1117: /* restart the saved input at the beginning */
1118: lookahead_pos = 0;
1119:
1120: if (debug) {
1121: debug_message("# Reparsing saved input with actions");
1122: debug_message("# Current Symbol is #" + cur_err_token().sym);
1123: debug_message("# Current state is #"
1124: + ((Symbol) stack.peek()).parse_state);
1125: }
1126:
1127: /* continue until we accept or have read all lookahead input */
1128: while (!_done_parsing) {
1129: /* current state is always on the top of the stack */
1130:
1131: /* look up action out of the current state with the current input */
1132: act = get_action(((Symbol) stack.peek()).parse_state,
1133: cur_err_token().sym);
1134:
1135: /* decode the action -- > 0 encodes shift */
1136: if (act > 0) {
1137: /* shift to the encoded state by pushing it on the stack */
1138: cur_err_token().parse_state = act - 1;
1139: cur_err_token().used_by_parser = true;
1140: if (debug)
1141: debug_shift(cur_err_token());
1142: stack.push(cur_err_token());
1143: tos++;
1144:
1145: /* advance to the next Symbol, if there is none, we are done */
1146: if (!advance_lookahead()) {
1147: if (debug)
1148: debug_message("# Completed reparse");
1149:
1150: /* scan next Symbol so we can continue parse */
1151: // BUGFIX by Chris Harris <ckharris@ucsd.edu>:
1152: // correct a one-off error by commenting out
1153: // this next line.
1154: /*cur_token = scan();*/
1155:
1156: /* go back to normal parser */
1157: return;
1158: }
1159:
1160: if (debug)
1161: debug_message("# Current Symbol is #"
1162: + cur_err_token().sym);
1163: }
1164: /* if its less than zero, then it encodes a reduce action */
1165: else if (act < 0) {
1166: /* perform the action for the reduce */
1167: lhs_sym = do_action((-act) - 1, this , stack, tos);
1168:
1169: /* look up information about the production */
1170: lhs_sym_num = production_tab[(-act) - 1][0];
1171: handle_size = production_tab[(-act) - 1][1];
1172:
1173: if (debug)
1174: debug_reduce((-act) - 1, lhs_sym_num, handle_size);
1175:
1176: /* pop the handle off the stack */
1177: for (int i = 0; i < handle_size; i++) {
1178: stack.pop();
1179: tos--;
1180: }
1181:
1182: /* look up the state to go to from the one popped back to */
1183: act = get_reduce(((Symbol) stack.peek()).parse_state,
1184: lhs_sym_num);
1185:
1186: /* shift to that state */
1187: lhs_sym.parse_state = act;
1188: lhs_sym.used_by_parser = true;
1189: stack.push(lhs_sym);
1190: tos++;
1191:
1192: if (debug)
1193: debug_message("# Goto state #" + act);
1194:
1195: }
1196: /* finally if the entry is zero, we have an error
1197: (shouldn't happen here, but...)*/
1198: else if (act == 0) {
1199: report_fatal_error("Syntax error", lhs_sym);
1200: return;
1201: }
1202: }
1203:
1204: }
1205:
1206: /*-----------------------------------------------------------*/
1207:
1208: /** Utility function: unpacks parse tables from strings */
1209: protected static short[][] unpackFromStrings(String[] sa) {
1210: // Concatanate initialization strings.
1211: StringBuffer sb = new StringBuffer(sa[0]);
1212: for (int i = 1; i < sa.length; i++)
1213: sb.append(sa[i]);
1214: int n = 0; // location in initialization string
1215: int size1 = (((int) sb.charAt(n)) << 16)
1216: | ((int) sb.charAt(n + 1));
1217: n += 2;
1218: short[][] result = new short[size1][];
1219: for (int i = 0; i < size1; i++) {
1220: int size2 = (((int) sb.charAt(n)) << 16)
1221: | ((int) sb.charAt(n + 1));
1222: n += 2;
1223: result[i] = new short[size2];
1224: for (int j = 0; j < size2; j++)
1225: result[i][j] = (short) (sb.charAt(n++) - 2);
1226: }
1227: return result;
1228: }
1229: }
|