001: /*
002: * Copyright 2007 Outerthought bvba and Schaubroeck nv
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package org.outerj.daisy.diff.tag;
017:
018: /**
019: * Takes a String and generates tokens/atoms that can be used by LCS. This
020: * comparator is used specifically for HTML documents.
021: */
022: import java.io.BufferedReader;
023: import java.io.IOException;
024: import java.util.ArrayList;
025: import java.util.List;
026:
027: import org.eclipse.compare.rangedifferencer.IRangeComparator;
028:
029: public class TagComparator implements IAtomSplitter {
030:
031: private List<Atom> atoms = new ArrayList<Atom>(50);
032:
033: public TagComparator(String s) {
034: generateAtoms(s);
035: }
036:
037: public TagComparator(StringBuilder s) {
038: generateAtoms(s.toString());
039: }
040:
041: public TagComparator(BufferedReader in) throws IOException {
042: StringBuilder sb = new StringBuilder();
043:
044: boolean allRead = false;
045: while (!allRead) {
046: int result = in.read();
047: if (result >= 0) {
048: sb.append((char) result);
049: } else {
050: generateAtoms(sb.toString());
051: allRead = true;
052: }
053: }
054: }
055:
056: public List<Atom> getAtoms() {
057: return new ArrayList<Atom>(atoms);
058: }
059:
060: private void generateAtoms(String s) {
061: if (atoms.size() > 0)
062: throw new IllegalStateException(
063: "Atoms can only be generated once");
064:
065: StringBuilder currentWord = new StringBuilder(100);
066:
067: for (int i = 0; i < s.length(); i++) {
068: char c = s.charAt(i);
069:
070: if (c == '<'
071: && TagAtom.isValidTag(s.substring(i, s.indexOf('>',
072: i) + 1))) {
073: // a tag
074: if (currentWord.length() > 0) {
075: atoms.add(new TextAtom(currentWord.toString()));
076: currentWord.setLength(0);
077: }
078:
079: int end = s.indexOf('>', i);
080: atoms.add(new TagAtom(s.substring(i, end + 1)));
081: i = end;
082: } else if (DelimiterAtom.isValidDelimiter("" + c)) {
083: // a delimiter
084: if (currentWord.length() > 0) {
085: atoms.add(new TextAtom(currentWord.toString()));
086: currentWord.setLength(0);
087: }
088:
089: atoms.add(new DelimiterAtom(c));
090: } else {
091: // something else
092: currentWord.append(c);
093: }
094: }
095: if (currentWord.length() > 0) {
096: atoms.add(new TextAtom(currentWord.toString()));
097: currentWord.setLength(0);
098: }
099: }
100:
101: public String substring(int startAtom, int endAtom) {
102: if (startAtom == endAtom)
103: return "";
104: else {
105: StringBuilder result = new StringBuilder();
106: for (int i = startAtom; i < endAtom; i++) {
107: result.append(atoms.get(i).getFullText());
108: }
109: return result.toString();
110: }
111: }
112:
113: public String substring(int startAtom) {
114: return substring(startAtom, atoms.size());
115: }
116:
117: public Atom getAtom(int i) {
118: if (i < 0 || i >= atoms.size())
119: throw new IndexOutOfBoundsException(
120: "There is no Atom with index " + i);
121: return atoms.get(i);
122: }
123:
124: public int getRangeCount() {
125: return atoms.size();
126: }
127:
128: public boolean rangesEqual(int this Index, IRangeComparator other,
129: int otherIndex) {
130: TagComparator tc2;
131: try {
132: tc2 = (TagComparator) other;
133: } catch (ClassCastException e) {
134: return false;
135: }
136: return tc2.getAtom(otherIndex).equalsIdentifier(
137: getAtom(this Index));
138: }
139:
140: public boolean skipRangeComparison(int length, int maxLength,
141: IRangeComparator other) {
142: return false;
143: }
144:
145: }
|