001: /*
002: * Page.java February 2006
003: *
004: * Copyright (C) 2006, Niall Gallagher <niallg@users.sf.net>
005: *
006: * This library is free software; you can redistribute it and/or
007: * modify it under the terms of the GNU Lesser General Public
008: * License as published by the Free Software Foundation.
009: *
010: * This library is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013: * GNU Lesser General Public License for more details.
014: *
015: * You should have received a copy of the GNU Lesser General
016: * Public License along with this library; if not, write to the
017: * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
018: * Boston, MA 02111-1307 USA
019: */
020:
021: package simple.page.translate;
022:
023: import java.util.ArrayList;
024: import java.util.List;
025:
026: /**
027: * The <code>Page</code> object is used to parse the page token. The
028: * page token is the most complex token parsed from the JSP source.
029: * It defines the properties of the genrated page, such as the imports
030: * that should be used, and the content type of the page. For example.
031: * <pre>
032: *
033: * <%@ page import="a,b,c,d"
034: * contentType="text/html; charset=UTF-8"
035: * extend="blah"
036: * language="java" %>
037: *
038: * </pre>
039: * The above token is an example page directive. It includes information
040: * regarding the imports that are to be used, as well as the content
041: * type of the generated page. It also includes the runtime language.
042: *
043: * @author Niall Gallagher
044: */
045: class Page extends Token {
046:
047: /**
048: * This is used to store the tokens taken for any imports used.
049: */
050: private TokenBuffer include;
051:
052: /**
053: * This is used to extract the token used to define the extends.
054: */
055: private TokenBuffer extend;
056:
057: /**
058: * This is used to store the charset token if has been defined.
059: */
060: private TokenBuffer charset;
061:
062: /**
063: * This is used to store the content type of the page.
064: */
065: private TokenBuffer type;
066:
067: /**
068: * This is used to store the runtime language of the page.
069: */
070: private TokenBuffer runtime;
071:
072: /**
073: * This is used to collect the imports extracted from the page.
074: */
075: private List list;
076:
077: /**
078: * Constructor for the <code>Page</code> token. This will create
079: * several buffers used to store the attributes for the page
080: * directive. These buffers are used to accumulate characters.
081: */
082: public Page() {
083: this .runtime = new TokenBuffer();
084: this .extend = new TokenBuffer();
085: this .include = new TokenBuffer();
086: this .charset = new TokenBuffer();
087: this .type = new TokenBuffer();
088: this .list = new ArrayList();
089: }
090:
091: /**
092: * Constructor for the <code>Page</code> token. This will create
093: * several buffers used to store the attributes for the page
094: * directive. These buffers are used to accumulate characters.
095: * The token provided must be a page directive token.
096: *
097: * @param token this is the page token that is to be parsed
098: */
099: public Page(String token) {
100: this ();
101: parse(token);
102: }
103:
104: /**
105: * This will add the imports, runtime language, charset, content
106: * type to the document definition. The tokens are added only if
107: * they have been specified as attributes of the page token.
108: *
109: * @param source this is the document definition to populate
110: * @param builder this is the builder driving the process
111: */
112: public void process(Definition source, Builder builder) {
113: for (int i = 0; i < list.size(); i++) {
114: String include = (String) list.get(i);
115: source.addImport(include);
116: }
117: if (runtime.length() > 0) {
118: source.setLanguage(runtime.text());
119: }
120: if (type.length() > 0) {
121: source.setType(type.text());
122: }
123: if (charset.length() > 0) {
124: source.setCharset(charset.text());
125: }
126: }
127:
128: /**
129: * This will clear the page tokens so that the parse can be reused
130: * by the builder. In practice this method just satisfies the
131: * contract of the token so that this object is not abstract.
132: */
133: protected void init() {
134: include.clear();
135: extend.clear();
136: charset.clear();
137: type.clear();
138: runtime.clear();
139: off = 0;
140: }
141:
142: /**
143: * This method verifies that the token is a directive token before
144: * packing it and extracting the page attributes. The packing is
145: * done to remove all whitespace from the token, this ensutes that
146: * the attributes can be extracted from the source easily.
147: */
148: protected void parse() {
149: if (skip("<%@")) {
150: pack();
151: page();
152: }
153: }
154:
155: /**
156: * So that the token can be parsed in a simple manner this is used
157: * to extract all white space from the token. The resulting text
158: * is much easier to parse, and all have known terminal characters.
159: */
160: private void pack() {
161: int pos = off;
162: int len = 0;
163:
164: while (pos < count) {
165: char ch = buf[pos++];
166:
167: if (!space(ch)) {
168: buf[len++] = ch;
169: }
170: }
171: count = len;
172: off = 0;
173: }
174:
175: /**
176: * This will attempt to extract all attributes from the page token.
177: * This ensures that there is no needed order to the attributes
178: * within the page token. Also, attributes are option in the token.
179: */
180: private void page() {
181: if (skip("page")) {
182: while (off < count) {
183: content();
184: imports();
185: extend();
186: runtime();
187: off++; /* ["] */
188: }
189: }
190: }
191:
192: /**
193: * This will attempt to extract the "contentType" attribute from
194: * the page token. This ensures that the charset and type text
195: * are accumulated into the required buffers so that they can
196: * be pushed into the document definition for generation.
197: */
198: private void content() {
199: if (skip("content")) {
200: type();
201: charset();
202: }
203: }
204:
205: /**
206: * This will extract the charset used for the page. Because this is
207: * an optional element it checks for the ";charset=" token before
208: * accumulating the bytes for the charset. The charser buffer will
209: * contain the charset data should it exist when this is finished.
210: */
211: private void charset() {
212: if (skip(";charset=")) {
213: while (off < count) {
214: char ch = buf[off++];
215:
216: if (terminal(ch)) { /* ["]*/
217: break;
218: }
219: charset.append(ch);
220: }
221: }
222: }
223:
224: /**
225: * This will extract the content type of the page. The content type
226: * is a MIME type such as "text/html" or "text/xhtml". The terminal
227: * for the content type is a quotation character or a semicolon.
228: */
229: private void type() {
230: if (skip("type=")) {
231: while (++off < count) { /* ["]*/
232: char ch = buf[off];
233:
234: if (terminal(ch)) {
235: break;
236: }
237: type.append(ch);
238: }
239: }
240: }
241:
242: /**
243: * This is used to extract the extends attribute. The extends token
244: * defines a class that the page class extends. This is a simple
245: * string terminated by either a quotation or a semicolon.
246: */
247: private void extend() {
248: if (skip("extends=")) {
249: while (++off < count) { /* ["]*/
250: char ch = buf[off];
251:
252: if (terminal(ch)) {
253: break;
254: }
255: extend.append(ch);
256: }
257: }
258: }
259:
260: /**
261: * This is used to extract the runtime language for the page. This
262: * defines how the document definition is used to generate the source
263: * and compile the resulting file. For instance Java or Groovy.
264: */
265: private void runtime() {
266: if (skip("language=")) {
267: while (++off < count) { /* ["]*/
268: char ch = buf[off];
269:
270: if (terminal(ch)) {
271: break;
272: }
273: runtime.append(ch);
274: }
275: }
276: }
277:
278: /**
279: * This is used to extract the imports that will be used by the
280: * page class. Imports appear in a comma seperated list, and is
281: * terminated by a quotation character. Once an import is taken
282: * it is added into a list of imports.
283: */
284: private void imports() {
285: if (skip("import=")) {
286: while (++off < count) { /* ["]*/
287: char ch = buf[off];
288:
289: if (quote(ch)) {
290: break;
291: } else if (skip("%>")) {
292: break;
293: }
294: include();
295: insert();
296: }
297: }
298: }
299:
300: /**
301: * This is used to extract a single import from the comma
302: * seperated list of tokens. Once a comma or a quotation has been
303: * encountered the import is terminated and the token is stored.
304: */
305: private void include() {
306: while (off < count) {
307: char ch = buf[off];
308:
309: if (stop(ch)) {
310: break;
311: } else if (quote(ch)) {
312: off--;
313: break;
314: } else {
315: off++;
316: }
317: include.append(ch);
318: }
319: }
320:
321: /**
322: * This is used to insert an import into the list of imports. This
323: * makes use of the token buffer for the import, if there is data
324: * within the import buffer it is added to the list and cleared.
325: */
326: private void insert() {
327: if (include.length() > 0) {
328: list.add(include.text());
329: }
330: include.clear();
331: }
332:
333: /**
334: * A terminal is considered to be either a quotation character,
335: * which is either a single quote or a double quote, it is also a
336: * comma or semicolon. The terminals are used to delimit all
337: * tokens extracted from the page token.
338: *
339: * @param ch this is the character to be evaluated
340: *
341: * @return this returns true if the character is a terminal
342: */
343: private boolean terminal(char ch) {
344: return quote(ch) || stop(ch);
345:
346: }
347:
348: /**
349: * This is used to determine when the start and end of a token
350: * has been encountered. The terminals are '"' and '"', which
351: * are legal quotations within the JSP syntax.
352: *
353: * @param ch this is the character to be evaluated
354: *
355: * @return this returns true if the character is a quote
356: */
357: private boolean quote(char ch) {
358: return ch == '"' || ch == '\'';
359: }
360:
361: /**
362: * This is used to determine when the start and end of a token
363: * has been encountered. The terminals are "," and ";", which
364: * are legal terminals within the JSP page directive.
365: *
366: * @param ch this is the character to be evaluated
367: *
368: * @return this returns true if the character is a stop
369: */
370: private boolean stop(char ch) {
371: return ch == ',' || ch == ';';
372: }
373: }
|