001: /*
002: * SaxParser.java
003: *
004: * Created on May 20, 2001, 12:26 AM
005: */
006:
007: package hotsax.html.sax;
008:
009: import java.io.*;
010: import java.net.URL;
011: import java.net.MalformedURLException;
012: import java.util.*;
013: import org.xml.sax.*;
014: import org.xml.sax.helpers.*;
015: import org.apache.xerces.utils.*;
016: import org.apache.xerces.readers.*;
017:
018: /**
019: * SaxParser - lite SAX parser. Based only on
020: * @author edh
021: * @version
022: */
023: public class SaxParser implements org.xml.sax.XMLReader {
024:
025: private EntityResolver entityResolver;
026: private DTDHandler dtdHandler;
027: private ContentHandler contentHandler;
028: private ErrorHandler errorHandler;
029:
030: private org.xml.sax.helpers.AttributesImpl attrList; // collect attributes in a list
031:
032: //private EntityHandler entityHandler;
033:
034: /** properties are set but ignored */
035: private HashMap properties;
036:
037: /** features are set but ignored */
038: private HashMap features;
039:
040: /** lexer interface */
041: private HtmlLexer lexer;
042:
043: private Reader reader;
044:
045: /** Creates new SaxParser */
046: public SaxParser() {
047: properties = new HashMap();
048: features = new HashMap();
049: //entityHandler = new DefaultEntityHandler(new StringPool(), null);
050: //entityHandler.setReaderFactory(new DefaultReaderFactory());
051:
052: entityResolver = new DefaultHandler();
053: dtdHandler = new DefaultHandler();
054: contentHandler = new DefaultHandler();
055: errorHandler = new DefaultHandler();
056:
057: attrList = new org.xml.sax.helpers.AttributesImpl();
058: }
059:
060: /**
061: * create a Reader to be used by the lexer based on the InputSource
062: */
063: protected Reader createReader(InputSource source)
064: throws IOException, MalformedURLException {
065: // create reader from source's character stream
066: if (source.getCharacterStream() != null) {
067: return source.getCharacterStream();
068: }
069:
070: // create reader from source's byte stream
071: if (source.getEncoding() != null
072: && source.getByteStream() != null) {
073: java.io.Reader reader = new InputStreamReader(source
074: .getByteStream(), source.getEncoding());
075: return reader;
076: }
077:
078: // create new input stream
079: InputStream is = source.getByteStream();
080: if (is == null) {
081:
082: // create url and open the stream
083: URL url = new URL(source.getSystemId());
084: is = url.openStream();
085: }
086:
087: return new InputStreamReader(is);
088:
089: }
090:
091: public org.xml.sax.ContentHandler getContentHandler() {
092: return contentHandler;
093: }
094:
095: public Object getProperty(String p1)
096: throws org.xml.sax.SAXNotRecognizedException,
097: org.xml.sax.SAXNotSupportedException {
098: return properties.get(p1);
099: }
100:
101: public void setFeature(String p1, boolean p2)
102: throws org.xml.sax.SAXNotRecognizedException,
103: org.xml.sax.SAXNotSupportedException {
104: Boolean bool = new Boolean(p2);
105: features.put(p1, bool);
106: }
107:
108: public void setEntityResolver(org.xml.sax.EntityResolver p1) {
109: entityResolver = p1;
110: }
111:
112: public void setContentHandler(org.xml.sax.ContentHandler p1) {
113: contentHandler = p1;
114: }
115:
116: public void setDTDHandler(org.xml.sax.DTDHandler p1) {
117: dtdHandler = p1;
118: }
119:
120: public org.xml.sax.ErrorHandler getErrorHandler() {
121: return errorHandler;
122: }
123:
124: public org.xml.sax.EntityResolver getEntityResolver() {
125: return entityResolver;
126: }
127:
128: public void setErrorHandler(org.xml.sax.ErrorHandler p1) {
129: errorHandler = p1;
130: }
131:
132: public org.xml.sax.DTDHandler getDTDHandler() {
133: return dtdHandler;
134: }
135:
136: public void setProperty(String p1, Object p2)
137: throws org.xml.sax.SAXNotRecognizedException,
138: org.xml.sax.SAXNotSupportedException {
139: properties.put(p1, p2);
140: }
141:
142: public boolean getFeature(String p1)
143: throws org.xml.sax.SAXNotRecognizedException,
144: org.xml.sax.SAXNotSupportedException {
145: Boolean bool = (Boolean) features.get(p1);
146:
147: return bool.booleanValue();
148: }
149:
150: /**
151: * Parser setup code. Initialize a new reader based on type of URI
152: * Call into actual parser below with newly created InputSource
153: */
154:
155: public void parse(String p1) throws IOException,
156: org.xml.sax.SAXException {
157: InputSource source = entityResolver.resolveEntity(p1, p1);
158: try {
159: source = new InputSource(p1);
160:
161: source.setCharacterStream(createReader(source));
162:
163: parse(source);
164: } catch (Exception ex) {
165: System.err.println("caught exception parsing "
166: + ex.getClass().getName() + " " + ex.getMessage());
167: ex.printStackTrace();
168: } finally {
169: // NOTE: Changed code to attempt to close the stream
170: // even after parsing failure. -Ac
171: try {
172: Reader reader = source.getCharacterStream();
173: if (reader != null) {
174: reader.close();
175: } else {
176: InputStream is = source.getByteStream();
177: if (is != null) {
178: is.close();
179: }
180: }
181: } catch (IOException e) {
182: // ignore
183: }
184: }
185:
186: }
187:
188: /**
189: * Parse the input document using the current InputSource's reader
190: */
191:
192: public void parse(org.xml.sax.InputSource p1)
193: throws java.io.IOException, org.xml.sax.SAXException {
194: Boolean debugWrapper = (Boolean) properties.get("debug");
195: boolean debug = (debugWrapper == null ? false : debugWrapper
196: .booleanValue());
197: HtmlParser yyparser;
198:
199: Reader reader = p1.getCharacterStream();
200:
201: yyparser = new HtmlParser(reader);
202:
203: ParserDelegate delegate = yyparser.getDelegate();
204: delegate.setSaxParser(this );
205:
206: yyparser.yyparse(); // fires off Content/Lexical handler events vis ParserDelegate
207:
208: }
209:
210: public void startDocument() throws SAXException {
211: if (contentHandler != null)
212: contentHandler.startDocument();
213: }
214:
215: public void endDocument() throws SAXException {
216: if (contentHandler != null)
217: contentHandler.endDocument();
218: }
219:
220: /**
221: * collect attributes into a list and call ContentHandler.startElement
222: **/
223: public void startElement(String name) throws SAXException {
224: if (contentHandler != null)
225: contentHandler.startElement("", name, "",
226: (Attributes) attrList);
227: }
228:
229: public void endElement(String name) throws SAXException {
230: if (contentHandler != null)
231: contentHandler.endElement("", name, "");
232: }
233:
234: }
|