001: /*
002: * $Id: MarkupParser.java 5791 2006-05-20 00:32:57 +0000 (Sat, 20 May 2006)
003: * joco01 $ $Revision: 498919 $ $Date: 2006-05-20 00:32:57 +0000 (Sat, 20 May
004: * 2006) $
005: *
006: * ==============================================================================
007: * Licensed under the Apache License, Version 2.0 (the "License"); you may not
008: * use this file except in compliance with the License. You may obtain a copy of
009: * the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
015: * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
016: * License for the specific language governing permissions and limitations under
017: * the License.
018: */
019: package wicket.markup;
020:
021: import java.io.IOException;
022: import java.text.ParseException;
023: import java.util.regex.Pattern;
024:
025: import wicket.Application;
026: import wicket.Page;
027: import wicket.markup.parser.IMarkupFilter;
028: import wicket.markup.parser.IXmlPullParser;
029: import wicket.markup.parser.filter.BodyOnLoadHandler;
030: import wicket.markup.parser.filter.HeadForceTagIdHandler;
031: import wicket.markup.parser.filter.HtmlHandler;
032: import wicket.markup.parser.filter.HtmlHeaderSectionHandler;
033: import wicket.markup.parser.filter.TagTypeHandler;
034: import wicket.markup.parser.filter.WicketLinkTagHandler;
035: import wicket.markup.parser.filter.WicketMessageTagHandler;
036: import wicket.markup.parser.filter.WicketNamespaceHandler;
037: import wicket.markup.parser.filter.WicketRemoveTagHandler;
038: import wicket.markup.parser.filter.WicketTagIdentifier;
039: import wicket.settings.IMarkupSettings;
040: import wicket.util.resource.ResourceStreamNotFoundException;
041: import wicket.util.string.AppendingStringBuffer;
042:
043: /**
044: * This is a Wicket MarkupParser specifically for (X)HTML. It makes use of a
045: * streaming XML parser to read the markup and IMarkupFilters to remove
046: * comments, identify Wicket relevant tags, apply html specific treatments etc..
047: * <p>
048: * The result will be an Markup object, which is basically a list, containing
049: * Wicket relevant tags and RawMarkup.
050: *
051: * @see IMarkupFilter
052: * @see IMarkupParserFactory
053: * @see IMarkupSettings
054: * @see Markup
055: *
056: * @author Jonathan Locke
057: * @author Juergen Donnerstag
058: */
059: public class MarkupParser {
060: /** Conditional comment section, which is NOT treated as a comment section */
061: private static final Pattern CONDITIONAL_COMMENT = Pattern
062: .compile("\\[if .+\\]>(.|\n|\r)*<!\\[endif\\]");
063:
064: /** The XML parser to use */
065: private final IXmlPullParser xmlParser;
066:
067: /** The markup handler chain: each filter has a specific task */
068: private IMarkupFilter markupFilterChain;
069:
070: /** The markup created by reading the markup file */
071: private final Markup markup;
072:
073: /** Temporary variable: Application.get().getMarkupSettings() */
074: private final IMarkupSettings markupSettings;
075:
076: /**
077: * Constructor.
078: *
079: * @param xmlParser
080: * The streaming xml parser to read and parse the markup
081: */
082: public MarkupParser(final IXmlPullParser xmlParser) {
083: this .xmlParser = xmlParser;
084: this .markup = new Markup();
085: this .markupSettings = Application.get().getMarkupSettings();
086: }
087:
088: /**
089: * In case you want to analyze markup which BY DEFAULT does not use "wicket"
090: * to find relevant tags.
091: *
092: * @param namespace
093: */
094: public final void setWicketNamespace(final String namespace) {
095: this .markup.setWicketNamespace(namespace);
096: }
097:
098: /**
099: * Applications which subclass initFilterChain() might also wish to access
100: * the markup resource stream.
101: *
102: * @return The markup resource stream
103: */
104: protected MarkupResourceStream getMarkupResourceStream() {
105: return this .markup.getResource();
106: }
107:
108: /**
109: * Create a new markup filter chain and initialize with all default filters
110: * required.
111: */
112: private final void initializeMarkupFilters() {
113: // Chain together all the different markup filters and configure them
114: this .markupFilterChain = xmlParser;
115:
116: appendMarkupFilter(new WicketTagIdentifier(markup));
117: appendMarkupFilter(new TagTypeHandler());
118: appendMarkupFilter(new HtmlHandler());
119: appendMarkupFilter(new WicketRemoveTagHandler());
120: appendMarkupFilter(new WicketLinkTagHandler());
121: appendMarkupFilter(new WicketNamespaceHandler(markup));
122:
123: // Provided the wicket component requesting the markup is known ...
124: final MarkupResourceStream resource = markup.getResource();
125: if (resource != null) {
126: final ContainerInfo containerInfo = resource
127: .getContainerInfo();
128: if (containerInfo != null) {
129: if (WicketMessageTagHandler.enable) {
130: appendMarkupFilter(new WicketMessageTagHandler(
131: containerInfo));
132: }
133:
134: appendMarkupFilter(new BodyOnLoadHandler());
135:
136: // Pages require additional handlers
137: if (Page.class.isAssignableFrom(containerInfo
138: .getContainerClass())) {
139: appendMarkupFilter(new HtmlHeaderSectionHandler(
140: this .markup));
141: }
142:
143: appendMarkupFilter(new HeadForceTagIdHandler(
144: containerInfo.getContainerClass()));
145: }
146: }
147: }
148:
149: /**
150: * By default don't do anything. Subclasses may append additional markup
151: * filters if required.
152: *
153: * @see #appendMarkupFilter(IMarkupFilter)
154: */
155: protected void initFilterChain() {
156: }
157:
158: /**
159: * Append a new filter to the list of already pre-configured markup filters.
160: * To be used by subclasses which implement {@link #initFilterChain()}.
161: *
162: * @param filter
163: * The filter to be appended
164: */
165: public final void appendMarkupFilter(final IMarkupFilter filter) {
166: filter.setParent(this .markupFilterChain);
167: this .markupFilterChain = filter;
168: }
169:
170: /**
171: * Reads and parses markup from a file.
172: *
173: * @param resource
174: * The file
175: * @return The markup
176: * @throws IOException
177: * @throws ResourceStreamNotFoundException
178: */
179: public final Markup readAndParse(final MarkupResourceStream resource)
180: throws IOException, ResourceStreamNotFoundException {
181: // Remove all existing markup elements
182: this .markup.reset();
183:
184: // For diagnostic purposes
185: this .markup.setResource(resource);
186:
187: // Initialize the xml parser
188: this .xmlParser.parse(resource, this .markupSettings
189: .getDefaultMarkupEncoding());
190:
191: // parse the xml markup and tokenize it into wicket relevant markup
192: // elements
193: parseMarkup();
194:
195: this .markup.setEncoding(xmlParser.getEncoding());
196: this .markup.setXmlDeclaration(xmlParser.getXmlDeclaration());
197:
198: return this .markup;
199: }
200:
201: /**
202: * Parse the markup.
203: *
204: * @param string
205: * The markup
206: * @return The markup
207: * @throws IOException
208: * @throws ResourceStreamNotFoundException
209: */
210: public final Markup parse(final String string) throws IOException,
211: ResourceStreamNotFoundException {
212: // Remove all existing markup elements
213: this .markup.reset();
214:
215: // Initialize the xml parser
216: this .xmlParser.parse(string);
217:
218: // parse the xml markup and tokenize it into wicket relevant markup
219: // elements
220: parseMarkup();
221:
222: this .markup.setEncoding(xmlParser.getEncoding());
223: this .markup.setXmlDeclaration(xmlParser.getXmlDeclaration());
224:
225: return this .markup;
226: }
227:
228: /**
229: * Scans the given markup and extracts balancing tags.
230: *
231: */
232: private void parseMarkup() {
233: // Initialize the markup filter chain
234: initializeMarkupFilters();
235:
236: // Allow subclasses to extend the filter chain
237: initFilterChain();
238:
239: // Get relevant settings from the Application
240: final boolean stripComments = this .markupSettings
241: .getStripComments();
242: final boolean compressWhitespace = this .markupSettings
243: .getCompressWhitespace();
244:
245: try {
246: // allways remember the latest index (size)
247: int size = this .markup.size();
248:
249: // Loop through tags
250: for (ComponentTag tag; null != (tag = (ComponentTag) markupFilterChain
251: .nextTag());) {
252: boolean add = (tag.getId() != null);
253: if (!add && tag.getXmlTag().isClose()) {
254: add = ((tag.getOpenTag() != null) && (tag
255: .getOpenTag().getId() != null));
256: }
257:
258: // Add tag to list?
259: if (add || tag.isModified()) {
260: final CharSequence text = xmlParser
261: .getInputFromPositionMarker(tag.getPos());
262:
263: // Add text from last position to tag position
264: if (text.length() > 0) {
265: String rawMarkup = text.toString();
266:
267: if (stripComments) {
268: rawMarkup = removeComment(rawMarkup);
269: }
270:
271: if (compressWhitespace) {
272: rawMarkup = compressWhitespace(rawMarkup);
273: }
274:
275: // Make sure you add it at the correct location.
276: // IMarkupFilters might have added elements as well.
277: this .markup.addMarkupElement(size,
278: new RawMarkup(rawMarkup));
279: }
280:
281: if (add) {
282: // Add to list unless preview component tag remover
283: // flagged
284: // as removed
285: if (!WicketRemoveTagHandler.IGNORE.equals(tag
286: .getId())) {
287: this .markup.addMarkupElement(tag);
288: }
289: } else if (tag.isModified()) {
290: this .markup.addMarkupElement(new RawMarkup(tag
291: .toCharSequence()));
292: }
293:
294: xmlParser.setPositionMarker();
295: }
296:
297: // allways remember the latest index (size)
298: size = this .markup.size();
299: }
300: } catch (final ParseException ex) {
301: // Add remaining input string
302: final CharSequence text = xmlParser
303: .getInputFromPositionMarker(-1);
304: if (text.length() > 0) {
305: this .markup.addMarkupElement(new RawMarkup(text));
306: }
307:
308: this .markup.setEncoding(xmlParser.getEncoding());
309: this .markup
310: .setXmlDeclaration(xmlParser.getXmlDeclaration());
311:
312: final MarkupStream markupStream = new MarkupStream(markup);
313: markupStream.setCurrentIndex(this .markup.size() - 1);
314: throw new MarkupException(markupStream, ex.getMessage(), ex);
315: }
316:
317: // Add tail?
318: final CharSequence text = xmlParser
319: .getInputFromPositionMarker(-1);
320: if (text.length() > 0) {
321: String rawMarkup = text.toString();
322:
323: if (stripComments) {
324: rawMarkup = removeComment(rawMarkup);
325: }
326:
327: if (compressWhitespace) {
328: rawMarkup = compressWhitespace(rawMarkup);
329: }
330:
331: // Make sure you add it at the correct location.
332: // IMarkupFilters might have added elements as well.
333: this .markup.addMarkupElement(new RawMarkup(rawMarkup));
334: }
335:
336: // Make all tags immutable and the list of elements unmodifable
337: this .markup.makeImmutable();
338: }
339:
340: /**
341: * Remove whitespaces from the raw markup
342: *
343: * @param rawMarkup
344: * @return rawMarkup
345: */
346: protected String compressWhitespace(String rawMarkup) {
347: rawMarkup = rawMarkup.replaceAll("[ \\t]+", " ");
348: rawMarkup = rawMarkup.replaceAll("( ?[\\r\\n] ?)+", "\n");
349: return rawMarkup;
350: }
351:
352: /**
353: * Remove all comment sections (<!-- .. -->) from the raw markup. For
354: * reasons I don't understand, the following regex
355: * <code>"<!--(.|\n|\r)*?-->"<code>
356: * causes a stack overflow in some circumstances (jdk 1.5)
357: *
358: * @param rawMarkup
359: * @return raw markup
360: */
361: private String removeComment(String rawMarkup) {
362: int pos1 = rawMarkup.indexOf("<!--");
363: while (pos1 >= 0) {
364: final int pos2 = rawMarkup.indexOf("-->", pos1 + 4);
365:
366: final AppendingStringBuffer buf = new AppendingStringBuffer(
367: rawMarkup.length());
368: if ((pos2 >= 0) && (pos1 > 0)) {
369: final String comment = rawMarkup.substring(pos1 + 4,
370: pos2);
371: if (CONDITIONAL_COMMENT.matcher(comment).matches() == false) {
372: buf.append(rawMarkup.substring(0, pos1 - 1));
373: if (rawMarkup.length() >= pos2 + 4) {
374: buf.append(rawMarkup.substring(pos2 + 4));
375: }
376: rawMarkup = buf.toString();
377: }
378: }
379: pos1 = rawMarkup.length() <= pos1 + 2 ? -1 : rawMarkup
380: .indexOf("<!--", pos1 + 4);
381: }
382: return rawMarkup;
383: }
384: }
|