001: /*
002: * Copyright 2007 Outerthought bvba and Schaubroeck nv
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016: package org.outerj.daisy.diff.tag;
017:
018: import java.util.LinkedList;
019: import java.util.List;
020:
021: import org.eclipse.compare.rangedifferencer.RangeDifference;
022: import org.eclipse.compare.rangedifferencer.RangeDifferencer;
023:
024: /**
025: * Takes 2 AtomSplitters and computes the difference between them. Output is
026: * sent to a given <code>HTMLSaxDiffOutput</code> and tags are diffed
027: * internally on a second iteration. The results are processed as to combine
028: * small subsequent changes in to larger changes.
029: */
030: public class TagDiffer {
031:
032: private TagSaxDiffOutput output;
033:
034: public TagDiffer(TagSaxDiffOutput output) {
035: this .output = output;
036: }
037:
038: public void diff(IAtomSplitter leftComparator,
039: IAtomSplitter rightComparator) throws Exception {
040:
041: RangeDifference[] differences = RangeDifferencer
042: .findDifferences(leftComparator, rightComparator);
043:
044: List<RangeDifference> pdifferences = preProcess(differences,
045: leftComparator);
046:
047: int rightAtom = 0;
048: int leftAtom = 0;
049:
050: for (int i = 0; i < pdifferences.size(); i++) {
051:
052: parseNoChange(leftAtom, pdifferences.get(i).leftStart(),
053: rightAtom, pdifferences.get(i).rightStart(),
054: leftComparator, rightComparator);
055:
056: String leftString = leftComparator.substring(pdifferences
057: .get(i).leftStart(), pdifferences.get(i).leftEnd());
058: String rightString = rightComparator.substring(pdifferences
059: .get(i).rightStart(), pdifferences.get(i)
060: .rightEnd());
061:
062: if (pdifferences.get(i).leftLength() > 0)
063: output.addRemovedPart(leftString);
064:
065: if (pdifferences.get(i).rightLength() > 0)
066: output.addAddedPart(rightString);
067:
068: rightAtom = pdifferences.get(i).rightEnd();
069: leftAtom = pdifferences.get(i).leftEnd();
070:
071: }
072: if (rightAtom < rightComparator.getRangeCount())
073: parseNoChange(leftAtom, leftComparator.getRangeCount(),
074: rightAtom, rightComparator.getRangeCount(),
075: leftComparator, rightComparator);
076:
077: }
078:
079: private void parseNoChange(int beginLeft, int endLeft,
080: int beginRight, int endRight, IAtomSplitter leftComparator,
081: IAtomSplitter rightComparator) throws Exception {
082:
083: StringBuilder sb = new StringBuilder();
084:
085: /*
086: * We can assume that the LCS is correct and that there are exacly as
087: * many atoms left and right
088: */
089: while (beginLeft < endLeft) {
090:
091: while (beginLeft < endLeft
092: && !rightComparator.getAtom(beginRight)
093: .hasInternalIdentifiers()
094: && !leftComparator.getAtom(beginLeft)
095: .hasInternalIdentifiers()) {
096: sb.append(rightComparator.getAtom(beginRight)
097: .getFullText());
098: beginRight++;
099: beginLeft++;
100: }
101:
102: if (sb.length() > 0) {
103: output.addClearPart(sb.toString());
104: sb.setLength(0);
105: }
106:
107: if (beginLeft < endLeft) {
108:
109: IAtomSplitter leftComparator2 = new ArgumentComparator(
110: leftComparator.getAtom(beginLeft).getFullText());
111: IAtomSplitter rightComparator2 = new ArgumentComparator(
112: rightComparator.getAtom(beginRight)
113: .getFullText());
114:
115: RangeDifference[] differences2 = RangeDifferencer
116: .findDifferences(leftComparator2,
117: rightComparator2);
118: List<RangeDifference> pdifferences2 = preProcess(
119: differences2, 2);
120:
121: int rightAtom2 = 0;
122: for (int j = 0; j < pdifferences2.size(); j++) {
123: if (rightAtom2 < pdifferences2.get(j).rightStart()) {
124: output.addClearPart(rightComparator2.substring(
125: rightAtom2, pdifferences2.get(j)
126: .rightStart()));
127: }
128: if (pdifferences2.get(j).leftLength() > 0) {
129: output.addRemovedPart(leftComparator2
130: .substring(pdifferences2.get(j)
131: .leftStart(), pdifferences2
132: .get(j).leftEnd()));
133: }
134: if (pdifferences2.get(j).rightLength() > 0) {
135: output.addAddedPart(rightComparator2.substring(
136: pdifferences2.get(j).rightStart(),
137: pdifferences2.get(j).rightEnd()));
138: }
139:
140: rightAtom2 = pdifferences2.get(j).rightEnd();
141:
142: }
143: if (rightAtom2 < rightComparator2.getRangeCount())
144: output.addClearPart(rightComparator2
145: .substring(rightAtom2));
146: beginLeft++;
147: beginRight++;
148: }
149:
150: }
151:
152: }
153:
154: private List<RangeDifference> preProcess(
155: RangeDifference[] differences, IAtomSplitter leftComparator) {
156:
157: List<RangeDifference> newRanges = new LinkedList<RangeDifference>();
158:
159: for (int i = 0; i < differences.length; i++) {
160:
161: int leftStart = differences[i].leftStart();
162: int leftEnd = differences[i].leftEnd();
163: int rightStart = differences[i].rightStart();
164: int rightEnd = differences[i].rightEnd();
165: int kind = differences[i].kind();
166: int temp = leftEnd;
167: boolean connecting = true;
168:
169: while (connecting && i + 1 < differences.length
170: && differences[i + 1].kind() == kind) {
171:
172: int bridgelength = 0;
173:
174: int nbtokens = Math.max((leftEnd - leftStart),
175: (rightEnd - rightStart));
176: if (nbtokens > 5) {
177: if (nbtokens > 10) {
178: bridgelength = 3;
179: } else
180: bridgelength = 2;
181: }
182:
183: while ((leftComparator.getAtom(temp) instanceof DelimiterAtom || (bridgelength-- > 0))
184: && temp < differences[i + 1].leftStart()) {
185:
186: temp++;
187: }
188: if (temp == differences[i + 1].leftStart()) {
189: leftEnd = differences[i + 1].leftEnd();
190: rightEnd = differences[i + 1].rightEnd();
191: temp = leftEnd;
192: i++;
193: } else {
194: connecting = false;
195: if (!(leftComparator.getAtom(temp) instanceof DelimiterAtom)) {
196: if (leftComparator.getAtom(temp).getFullText()
197: .equals(" "))
198: throw new IllegalStateException(
199: "space found aiaiai");
200: }
201: }
202: }
203: newRanges.add(new RangeDifference(kind, rightStart,
204: rightEnd - rightStart, leftStart, leftEnd
205: - leftStart));
206: }
207:
208: return newRanges;
209: }
210:
211: private List<RangeDifference> preProcess(
212: RangeDifference[] differences, int span) {
213:
214: List<RangeDifference> newRanges = new LinkedList<RangeDifference>();
215:
216: for (int i = 0; i < differences.length; i++) {
217:
218: int leftStart = differences[i].leftStart();
219: int leftEnd = differences[i].leftEnd();
220: int rightStart = differences[i].rightStart();
221: int rightEnd = differences[i].rightEnd();
222: int kind = differences[i].kind();
223:
224: while (i + 1 < differences.length
225: && differences[i + 1].kind() == kind
226: && differences[i + 1].leftStart() <= leftEnd + span
227: && differences[i + 1].rightStart() <= rightEnd
228: + span) {
229: leftEnd = differences[i + 1].leftEnd();
230: rightEnd = differences[i + 1].rightEnd();
231: i++;
232: }
233:
234: newRanges.add(new RangeDifference(kind, rightStart,
235: rightEnd - rightStart, leftStart, leftEnd
236: - leftStart));
237: }
238:
239: return newRanges;
240: }
241:
242: }
|