001: package net.sf.jmoney.reconciliation.parser;
002:
003: /*
004: * @(#)SimpleDOMParser.java
005: */
006:
007: import java.io.IOException;
008: import java.io.Reader;
009: import java.util.Stack;
010:
011: /**
012: * <code>SimpleDOMParser</code> is a highly-simplified XML DOM
013: * parser.
014: */
015: public class SimpleDOMParser {
016: private static final int[] cdata_start = { '<', '!', '[', 'C', 'D',
017: 'A', 'T', 'A', '[' };
018: private static final int[] cdata_end = { ']', ']', '>' };
019:
020: private Reader reader;
021: private Stack<SimpleElement> elements;
022: private SimpleElement currentElement;
023:
024: public SimpleDOMParser() {
025: elements = new Stack<SimpleElement>();
026: currentElement = null;
027: }
028:
029: public SimpleElement parse(Reader reader) throws IOException {
030: this .reader = reader;
031:
032: // skip xml declaration or DocTypes
033: skipPrologs();
034:
035: while (true) {
036: int index;
037: String tagName;
038:
039: // remove the prepend or trailing white spaces
040: String currentTag = readTag().trim();
041: if (currentTag.startsWith("</")) {
042: // close tag
043: tagName = currentTag.substring(2,
044: currentTag.length() - 1);
045:
046: // no open tag
047: if (currentElement == null) {
048: throw new IOException("Got close tag '" + tagName
049: + "' without open tag.");
050: }
051:
052: // close tag does not match with open tag
053: if (!tagName.equals(currentElement.getTagName())) {
054: throw new IOException("Expected close tag for '"
055: + currentElement.getTagName()
056: + "' but got '" + tagName + "'.");
057: }
058:
059: if (elements.empty()) {
060: // document processing is over
061: return currentElement;
062: } else {
063: // pop up the previous open tag
064: currentElement = elements.pop();
065: }
066: } else {
067: // open tag or tag with both open and close tags
068: index = currentTag.indexOf(" ");
069: if (index < 0) {
070: // tag with no attributes
071: if (currentTag.endsWith("/>")) {
072: // close tag as well
073: tagName = currentTag.substring(1, currentTag
074: .length() - 2);
075: currentTag = "/>";
076: } else {
077: // open tag
078: tagName = currentTag.substring(1, currentTag
079: .length() - 1);
080: currentTag = "";
081: }
082: } else {
083: // tag with attributes
084: tagName = currentTag.substring(1, index);
085: currentTag = currentTag.substring(index + 1);
086: }
087:
088: // create new element
089: SimpleElement element = new SimpleElement(tagName);
090:
091: // parse the attributes
092: boolean isTagClosed = false;
093: while (currentTag.length() > 0) {
094: // remove the prepend or trailing white spaces
095: currentTag = currentTag.trim();
096:
097: if (currentTag.equals("/>")) {
098: // close tag
099: isTagClosed = true;
100: break;
101: } else if (currentTag.equals(">")) {
102: // open tag
103: break;
104: }
105:
106: index = currentTag.indexOf("=");
107: if (index < 0) {
108: throw new IOException(
109: "Invalid attribute for tag '" + tagName
110: + "'.");
111: }
112:
113: // get attribute name
114: String attributeName = currentTag.substring(0,
115: index);
116: currentTag = currentTag.substring(index + 1);
117:
118: // get attribute value
119: String attributeValue;
120: boolean isQuoted = true;
121: if (currentTag.startsWith("\"")) {
122: index = currentTag.indexOf('"', 1);
123: } else if (currentTag.startsWith("'")) {
124: index = currentTag.indexOf('\'', 1);
125: } else {
126: isQuoted = false;
127: index = currentTag.indexOf(' ');
128: if (index < 0) {
129: index = currentTag.indexOf('>');
130: if (index < 0) {
131: index = currentTag.indexOf('/');
132: }
133: }
134: }
135:
136: if (index < 0) {
137: throw new IOException(
138: "Invalid attribute for tag '" + tagName
139: + "'.");
140: }
141:
142: if (isQuoted) {
143: attributeValue = currentTag.substring(1, index);
144: } else {
145: attributeValue = currentTag.substring(0, index);
146: }
147:
148: // add attribute to the new element
149: element.setAttribute(attributeName, attributeValue);
150:
151: currentTag = currentTag.substring(index + 1);
152: }
153:
154: // read the text between the open and close tag
155: if (!isTagClosed) {
156: element.setText(readText());
157: if (!element.isEmptyText())
158: isTagClosed = true;
159: }
160:
161: // add new element as a child element of
162: // the current element
163: if (currentElement != null) {
164: currentElement.addChildElement(element);
165: }
166:
167: if (!isTagClosed) {
168: if (currentElement != null) {
169: elements.push(currentElement);
170: }
171:
172: currentElement = element;
173: } else if (currentElement == null) {
174: // only has one tag in the document
175: return element;
176: }
177: }
178: }
179: }
180:
181: private int peek() throws IOException {
182: reader.mark(1);
183: int result = reader.read();
184: reader.reset();
185:
186: return result;
187: }
188:
189: private void peek(int[] buffer) throws IOException {
190: reader.mark(buffer.length);
191: for (int i = 0; i < buffer.length; i++) {
192: buffer[i] = reader.read();
193: }
194: reader.reset();
195: }
196:
197: private void skipWhitespace() throws IOException {
198: char peek = (char) peek();
199: while (Character.isWhitespace(peek) || peek != '<') {
200: reader.read();
201: peek = (char) peek();
202: }
203: }
204:
205: private void skipProlog() throws IOException {
206: // skip "<?" or "<!"
207: reader.skip(2);
208:
209: while (true) {
210: int next = peek();
211:
212: if (next == '>') {
213: reader.read();
214: break;
215: } else if (next == '<') {
216: // nesting prolog
217: skipProlog();
218: } else {
219: reader.read();
220: }
221: }
222: }
223:
224: private void skipPrologs() throws IOException {
225: while (true) {
226: skipWhitespace();
227:
228: int[] next = new int[2];
229: peek(next);
230:
231: if (next[0] != '<') {
232: throw new IOException("Expected '<' but got '"
233: + (char) next[0] + "'.");
234: }
235:
236: if ((next[1] == '?') || (next[1] == '!')) {
237: skipProlog();
238: } else {
239: break;
240: }
241: }
242: }
243:
244: private String readTag() throws IOException {
245: skipWhitespace();
246:
247: StringBuffer sb = new StringBuffer();
248:
249: int next = peek();
250: if (next != '<') {
251: throw new IOException("Expected < but got " + (char) next);
252: }
253:
254: sb.append((char) reader.read());
255: while (peek() != '>') {
256: sb.append((char) reader.read());
257: }
258: sb.append((char) reader.read());
259:
260: return sb.toString();
261: }
262:
263: private String readText() throws IOException {
264: StringBuffer sb = new StringBuffer();
265:
266: int[] next = new int[cdata_start.length];
267: peek(next);
268: if (compareIntArrays(next, cdata_start) == true) {
269: // CDATA
270: reader.skip(next.length);
271:
272: int[] buffer = new int[cdata_end.length];
273: while (true) {
274: peek(buffer);
275:
276: if (compareIntArrays(buffer, cdata_end) == true) {
277: reader.skip(buffer.length);
278: break;
279: } else {
280: sb.append((char) reader.read());
281: }
282: }
283: } else {
284: while (peek() != '<') {
285: sb.append((char) reader.read());
286: }
287: }
288: return sb.toString();
289: }
290:
291: private boolean compareIntArrays(int[] a1, int[] a2) {
292: if (a1.length != a2.length) {
293: return false;
294: }
295:
296: for (int i = 0; i < a1.length; i++) {
297: if (a1[i] != a2[i]) {
298: return false;
299: }
300: }
301:
302: return true;
303: }
304: }
|