Source Code Cross Referenced for Levenshtein.java in  » IDE-Eclipse » ui-workbench » org » eclipse » ui » internal » texteditor » quickdiff » compare » rangedifferencer » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » IDE Eclipse » ui workbench » org.eclipse.ui.internal.texteditor.quickdiff.compare.rangedifferencer 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*******************************************************************************
002:         * Copyright (c) 2000, 2006 IBM Corporation and others.
003:         * All rights reserved. This program and the accompanying materials
004:         * are made available under the terms of the Eclipse Public License v1.0
005:         * which accompanies this distribution, and is available at
006:         * http://www.eclipse.org/legal/epl-v10.html
007:         *
008:         * Contributors:
009:         *     IBM Corporation - initial API and implementation
010:         *******************************************************************************/package org.eclipse.ui.internal.texteditor.quickdiff.compare.rangedifferencer;
011:
012:        import java.util.LinkedList;
013:        import java.util.List;
014:
015:        import org.eclipse.core.runtime.Assert;
016:        import org.eclipse.core.runtime.IProgressMonitor;
017:        import org.eclipse.core.runtime.NullProgressMonitor;
018:
019:        /**
020:         * Levenshtein distance and edit script computation using a dynamic programming algorithm.
021:         * The algorithm is O(n*m) in time where n and m are the number of elements in
022:         * the two ranges to compare. It does not implement the greedy Ukkonen algorithm.
023:         *
024:         * @since 3.1
025:         */
026:        public final class Levenshtein {
027:            /* debug output */
028:            private static final boolean DEBUG = false;
029:            private static final boolean MATRIX = false;
030:
031:            /*
032:             * asserts - enable conditional compilation by not making this a trace option.
033:             * @since 3.2
034:             */
035:            private static final boolean ASSERT = false;
036:
037:            /* edit cost constants */
038:            private static final int COST_DELETE = 1;
039:            private static final int COST_INSERT = 1;
040:            private static final int COST_CHANGE = 1;
041:            private static final int SKIP = Integer.MAX_VALUE;
042:
043:            private static final RangeDifference[] EMPTY_DIFFERENCES = new RangeDifference[0];
044:            private static final int NO_DISTANCE = 0;
045:
046:            private interface CellComputer {
047:                int computeCell(int row, int col);
048:            }
049:
050:            private final class DefaultCellComputer implements  CellComputer {
051:                public int computeCell(int row, int column) {
052:                    if (row == fRowStart)
053:                        return computeNullRow(column);
054:                    else if (column == fColStart)
055:                        return computeNullColumn(row);
056:                    else
057:                        return computeInnerCell(row, column);
058:                }
059:
060:                private int computeNullRow(int column) {
061:                    // initialize first row, [0,i] = i if it is valid
062:                    return Math.abs(column - fColStart);
063:                }
064:
065:                private int computeNullColumn(int row) {
066:                    // initialize first column
067:                    return Math.abs(row - fRowStart);
068:                }
069:
070:                private int computeInnerCell(int row, int col) {
071:                    int fromAbove = sum(getAt(row - fStep, col), COST_INSERT);
072:                    int fromLeft = sum(getAt(row, col - fStep), COST_DELETE);
073:                    int minDiag = getAt(row - fStep, col - fStep);
074:
075:                    int minCellValue = Math.min(Math.min(fromAbove, fromLeft),
076:                            minDiag);
077:                    int minCost = minCost(row, col, minCellValue);
078:
079:                    if (minCellValue == fromAbove || minCellValue == fromLeft)
080:                        return minCellValue;
081:
082:                    if (ASSERT)
083:                        Assert.isTrue(minCellValue == minDiag
084:                                && fromAbove >= minDiag && fromLeft >= minDiag);
085:
086:                    int nextCharCost = rangesEqual(row, col) ? 0 : COST_CHANGE;
087:                    minCost = sum(minCost, nextCharCost);
088:                    int cost = minDiag + nextCharCost;
089:                    return cost;
090:                }
091:            }
092:
093:            /**
094:             * Reduces the needed comparisons - can not be used for hirschberg as we
095:             * don't have global values there.
096:             */
097:            private final class OptimizedCellComputer implements  CellComputer {
098:                public int computeCell(int row, int column) {
099:                    if (row == fRowStart)
100:                        return computeNullRow(column);
101:                    else if (column == fColStart)
102:                        return computeNullColumn(row);
103:                    else
104:                        return computeInnerCell(row, column);
105:                }
106:
107:                private int computeNullRow(int column) {
108:                    // initialize first row, [0,i] = i if it is valid
109:                    if (minCost(fRowStart, column, Math.abs(column - fColStart)) > fMaxCost)
110:                        return Levenshtein.SKIP;
111:                    return Math.abs(column - fColStart);
112:                }
113:
114:                private int computeNullColumn(int row) {
115:                    // initialize first column
116:                    if (minCost(row, fColStart, Math.abs(row - fRowStart)) > fMaxCost)
117:                        return Levenshtein.SKIP;
118:                    return Math.abs(row - fRowStart);
119:                }
120:
121:                private int computeInnerCell(int row, int col) {
122:                    int fromAbove = sum(getAt(row - fStep, col),
123:                            Levenshtein.COST_INSERT);
124:                    int fromLeft = sum(getAt(row, col - fStep),
125:                            Levenshtein.COST_DELETE);
126:                    int minDiag = getAt(row - fStep, col - fStep);
127:
128:                    int minCellValue = Math.min(Math.min(fromAbove, fromLeft),
129:                            minDiag);
130:                    int minCost = minCost(row, col, minCellValue);
131:
132:                    if (minCost > fMaxCost) {
133:                        return Levenshtein.SKIP;
134:                    } else if (minCellValue == fromAbove
135:                            || minCellValue == fromLeft) {
136:                        return minCellValue;
137:                    } else {
138:                        if (ASSERT)
139:                            Assert.isTrue(minCellValue == minDiag
140:                                    && fromAbove >= minDiag
141:                                    && fromLeft >= minDiag);
142:
143:                        int nextCharCost = rangesEqual(row, col) ? 0
144:                                : Levenshtein.COST_CHANGE;
145:                        minCost = Levenshtein.sum(minCost, nextCharCost);
146:                        if (minCost > fMaxCost)
147:                            return Levenshtein.SKIP;
148:                        int cost = minDiag + nextCharCost;
149:                        fMaxCost = Math.min(fMaxCost, maxCost(row, col, cost));
150:                        return cost;
151:                    }
152:                }
153:            }
154:
155:            /* the domain ranges we compare */
156:            private IRangeComparator fLeft;
157:            private IRangeComparator fRight;
158:            private IProgressMonitor fProgressMonitor;
159:
160:            /* algorithmic variables - may or may not be used by a method, always
161:             * nulled out by clear().
162:             */
163:            /* package visible for testing */
164:
165:            /** The matrix for full blown N * M edit distance and script computation. */
166:            int[][] fMatrix;
167:            /** The two columns for dynamic algorithms - the last row. */
168:            int[] fPreviousRow;
169:            /** The two columns for dynamic algorithms - the current row. */
170:            int[] fCurrentRow;
171:            /** Used by hirschberg's algorithm to store the result of one run. */
172:            private int[] fResultRow;
173:            /** Direction of the matrix calculation - either <code>1</code> or <code>-1</code>. */
174:            private int fStep;
175:            /** The current row of the dynamic matrix calculation. */
176:            private int fRow;
177:            /** The first row of the matrix to calculate. */
178:            private int fRowStart;
179:            /** The last (inclusive) row of the matrix. */
180:            private int fRowEnd;
181:            /** The first column of the matrix to calculate. */
182:            private int fColStart;
183:            /** The last (inclusive) column of the matrix. */
184:            private int fColEnd;
185:            /**
186:             * Maximum cost of the remaining computation given the current state of the
187:             * computation. For edit distance calculation, this may be used to prune
188:             * impossible cells.
189:             */
190:            private int fMaxCost;
191:
192:            /* statistics collection */
193:            /** Keeps track of the number of needed comparisons. */
194:            private long fComparisons;
195:
196:            /**
197:             * For each row, Hirschberg stores the column of the optimal alignment
198:             * in the next row of the matrix.
199:             */
200:            private int[] fOptimalSplitColumn;
201:            /**
202:             * For each row, this stores whether the domain elements at the [row,column]
203:             * returned equality, where column is the value in <code>fOptimalSplitColumn</code>.
204:             */
205:            private boolean[] fOptimalSplitValues;
206:            /** List of differences computed by the walkback methods. */
207:            private List fDiffs;
208:
209:            /** Normal matrix cell computer. */
210:            final CellComputer fStandardCC = new DefaultCellComputer();
211:            /** Optimized cell computer for that prunes impossible cells. */
212:            final CellComputer fOptimizedCC = new OptimizedCellComputer();
213:            /** The current cell computer. */
214:            CellComputer fCellComputer = fStandardCC;
215:
216:            /**
217:             * Convenience method to compute the edit script between two range
218:             * comparators, see <code>RangeDifferencer</code>.
219:             *
220:             * @param left the left hand side domain range
221:             * @param right the right hand side domain range
222:             * @return the edit script from left to right
223:             */
224:            public static RangeDifference[] findDifferences(
225:                    IRangeComparator left, IRangeComparator right) {
226:                Levenshtein levenshtein = new Levenshtein(left, right);
227:                return levenshtein.editScriptHirschberg();
228:            }
229:
230:            /**
231:             * Convenience method to compute the edit script between two range
232:             * comparators, see <code>RangeDifferencer</code>.
233:             *
234:             * @param pm a progress monitor, or <code>null</code> if no progress should be reported
235:             * @param left the left hand side domain range
236:             * @param right the right hand side domain range
237:             * @return the edit script from left to right
238:             */
239:            public static RangeDifference[] findDifferences(
240:                    IProgressMonitor pm, IRangeComparator left,
241:                    IRangeComparator right) {
242:                Levenshtein levenshtein = new Levenshtein(pm, left, right);
243:                return levenshtein.editScriptHirschberg();
244:            }
245:
246:            /**
247:             * Create a new differ that operates on the two domain range comparators
248:             * given.
249:             *
250:             * @param left the left domain range
251:             * @param right the right domain range
252:             */
253:            public Levenshtein(IRangeComparator left, IRangeComparator right) {
254:                this (null, left, right);
255:            }
256:
257:            /**
258:             * Create a new differ that operates on the two domain range comparators
259:             * given.
260:             *
261:             * @param pm a progress monitor, or <code>null</code> if no progress should be reported
262:             * @param left the left domain range
263:             * @param right the right domain range
264:             */
265:            public Levenshtein(IProgressMonitor pm, IRangeComparator left,
266:                    IRangeComparator right) {
267:                if (left == null || right == null)
268:                    throw new NullPointerException();
269:                fLeft = left;
270:                fRight = right;
271:                if (pm != null)
272:                    fProgressMonitor = pm;
273:                else
274:                    fProgressMonitor = new NullProgressMonitor();
275:            }
276:
277:            /**
278:             * Computes the edit distance.
279:             *
280:             * @return the edit distance of the two range comparators
281:             */
282:            public int editDistance() {
283:                try {
284:                    fCellComputer = fOptimizedCC;
285:                    initRows();
286:
287:                    if (MATRIX)
288:                        printHeader(fLeft, fRight);
289:
290:                    internalEditDistance(1, fRight.getRangeCount(), 1, fLeft
291:                            .getRangeCount());
292:
293:                    if (fProgressMonitor.isCanceled())
294:                        return NO_DISTANCE;
295:
296:                    if (DEBUG)
297:                        System.out.println("" + fComparisons + " comparisons"); //$NON-NLS-1$//$NON-NLS-2$
298:
299:                    return getAt(fRowEnd, fColEnd);
300:                } finally {
301:                    clear();
302:                }
303:            }
304:
305:            /**
306:             * Computes the edit script. This is quadratic in space and time.
307:             *
308:             * @return the shortest edit script between the two range comparators
309:             */
310:            public RangeDifference[] editScript() {
311:                try {
312:                    fCellComputer = fOptimizedCC;
313:                    initMatrix();
314:
315:                    if (MATRIX)
316:                        printHeader(fLeft, fRight);
317:
318:                    // build the matrix
319:                    internalEditDistance(1, fRight.getRangeCount(), 1, fLeft
320:                            .getRangeCount());
321:
322:                    if (fProgressMonitor.isCanceled())
323:                        return EMPTY_DIFFERENCES;
324:
325:                    if (DEBUG)
326:                        System.out.println("" + fComparisons + " comparisons"); //$NON-NLS-1$//$NON-NLS-2$
327:
328:                    RangeDifference[] script = walkback();
329:
330:                    return script;
331:
332:                } finally {
333:                    clear();
334:                }
335:            }
336:
337:            /**
338:             * Computes the edit script. This is quadratic in time but linear in space;
339:             * it is about twice as slow as the <code>editScript</code> method.
340:             *
341:             * @return the shortest edit script between the two range comparators
342:             */
343:            public RangeDifference[] editScriptHirschberg() {
344:                try {
345:                    fCellComputer = fStandardCC;
346:
347:                    initRows();
348:                    fResultRow = new int[fCurrentRow.length];
349:                    fOptimalSplitColumn = new int[fRight.getRangeCount() + 1];
350:                    fOptimalSplitValues = new boolean[fRight.getRangeCount() + 1];
351:
352:                    hirschberg(1, fRight.getRangeCount(), 1, fLeft
353:                            .getRangeCount());
354:
355:                    if (fProgressMonitor.isCanceled())
356:                        return EMPTY_DIFFERENCES;
357:
358:                    if (DEBUG)
359:                        System.out.println("" + fComparisons + " comparisons"); //$NON-NLS-1$//$NON-NLS-2$
360:
361:                    RangeDifference[] script = buildDifferencesHirschberg();
362:
363:                    return script;
364:
365:                } finally {
366:                    clear();
367:                }
368:            }
369:
370:            public int editDistanceHirschberg() {
371:                try {
372:                    fCellComputer = fStandardCC;
373:
374:                    initRows();
375:                    fResultRow = new int[fLeft.getRangeCount() + 1];
376:                    fOptimalSplitColumn = new int[fRight.getRangeCount() + 1];
377:                    fOptimalSplitValues = new boolean[fRight.getRangeCount() + 1];
378:
379:                    int dist = hirschberg(1, fRight.getRangeCount(), 1, fLeft
380:                            .getRangeCount());
381:
382:                    if (fProgressMonitor.isCanceled())
383:                        return NO_DISTANCE;
384:
385:                    if (DEBUG)
386:                        System.out.println("" + fComparisons + " comparisons"); //$NON-NLS-1$//$NON-NLS-2$
387:
388:                    return dist;
389:
390:                } finally {
391:                    clear();
392:                }
393:            }
394:
395:            void initMatrix() {
396:                initMatrix(fRight.getRangeCount() + 1,
397:                        fLeft.getRangeCount() + 1);
398:            }
399:
400:            void initMatrix(int rows, int columns) {
401:                if (fMatrix == null || fMatrix.length < rows
402:                        || fMatrix[0].length < columns)
403:                    fMatrix = new int[rows][columns];
404:            }
405:
406:            void initRows() {
407:                initRows(fLeft.getRangeCount() + 1);
408:            }
409:
410:            void initRows(int columns) {
411:                if (fCurrentRow == null || fCurrentRow.length < columns)
412:                    fCurrentRow = new int[columns];
413:                if (fPreviousRow == null || fPreviousRow.length < columns)
414:                    fPreviousRow = new int[columns];
415:            }
416:
417:            /*
418:             * Fill the matrix, but do not allocate it.
419:             */
420:            void internalEditDistance(int rStart, int rEnd, int lStart, int lEnd) {
421:
422:                if (ASSERT)
423:                    Assert.isTrue(rStart <= rEnd + 1);
424:                if (ASSERT)
425:                    Assert.isTrue(lStart <= lEnd + 1);
426:
427:                // build the matrix
428:                fStep = 1;
429:                fRowStart = rStart - fStep;
430:                fRowEnd = rEnd;
431:
432:                fColStart = lStart - fStep;
433:                fColEnd = lEnd;
434:
435:                fMaxCost = maxCost(fRowStart, fColStart, 0);
436:
437:                for (fRow = fRowStart; fRow <= fRowEnd; fRow += fStep) { // for every row
438:
439:                    if (fProgressMonitor.isCanceled())
440:                        return;
441:
442:                    fProgressMonitor.worked(1);
443:
444:                    for (int col = fColStart; col <= fColEnd; col += fStep) { // for every column
445:
446:                        setAt(fRow, col, fCellComputer.computeCell(fRow, col));
447:                    }
448:
449:                    if (MATRIX)
450:                        printRow();
451:
452:                    swapRows();
453:                }
454:            }
455:
456:            /*
457:             * Fill the matrix, but do not allocate it.
458:             */
459:            void internalReverseEditDistance(int rStart, int rEnd, int lStart,
460:                    int lEnd) {
461:
462:                if (ASSERT)
463:                    Assert.isTrue(rStart <= rEnd + 1);
464:                if (ASSERT)
465:                    Assert.isTrue(lStart <= lEnd + 1);
466:
467:                // build the matrix
468:                fStep = -1;
469:                fRowStart = rEnd - fStep;
470:                fRowEnd = rStart;
471:
472:                fColStart = lEnd - fStep;
473:                fColEnd = lStart;
474:
475:                fMaxCost = maxCost(fRowStart, fColStart, 0);
476:
477:                for (fRow = fRowStart; fRow >= fRowEnd; fRow += fStep) { // for every row
478:
479:                    if (fProgressMonitor.isCanceled())
480:                        return;
481:
482:                    fProgressMonitor.worked(1);
483:
484:                    for (int col = fColStart; col >= fColEnd; col += fStep) { // for every column
485:
486:                        setAt(fRow, col, fCellComputer.computeCell(fRow, col));
487:                    }
488:
489:                    if (MATRIX)
490:                        printRow();
491:
492:                    swapRows();
493:                }
494:            }
495:
496:            private void swapRows() {
497:                int[] tmp;
498:                tmp = fPreviousRow;
499:                fPreviousRow = fCurrentRow;
500:                fCurrentRow = tmp;
501:            }
502:
503:            private void clear() {
504:                fPreviousRow = null;
505:                fCurrentRow = null;
506:                fMatrix = null;
507:                fDiffs = null;
508:                fResultRow = null;
509:                fOptimalSplitColumn = null;
510:                fOptimalSplitValues = null;
511:            }
512:
513:            /* access methods for the compare algorithm */
514:
515:            /**
516:             * Returns the matrix value for [row, column]. Note that not the entire
517:             * matrix may be available at all times.
518:             *
519:             * @param row the row (right domain index)
520:             * @param column (left domain index)
521:             * @return the matrix value for the given row and column
522:             */
523:            int getAt(int row, int column) {
524:
525:                // shift reverse iteration towards left by one
526:                if (fStep < 0)
527:                    column--;
528:
529:                if (fMatrix != null)
530:                    return fMatrix[row][column];
531:
532:                if (row == fRow)
533:                    return fCurrentRow[column];
534:
535:                if (ASSERT
536:                        && !(row == fRow - fStep && ((fStep > 0
537:                                && row >= fRowStart && row <= fRowEnd) || fStep < 0
538:                                && row <= fRowStart && row >= fRowEnd))) {
539:                    Assert.isTrue(false, "random access to matrix not allowed"); //$NON-NLS-1$
540:                    return SKIP; // dummy
541:                }
542:
543:                return fPreviousRow[column];
544:            }
545:
546:            /**
547:             * Sets the matrix value at [row, column]. Note that not the entire
548:             * matrix may be available at all times.
549:             *
550:             * @param row the row (right domain index)
551:             * @param column (left domain index)
552:             * @param value the value to set
553:             */
554:            private void setAt(int row, int column, int value) {
555:
556:                // shift reverse iteration towards left by one
557:                if (fStep < 0)
558:                    column--;
559:
560:                if (fMatrix != null) {
561:                    fMatrix[row][column] = value;
562:                } else {
563:                    if (row == fRow)
564:                        fCurrentRow[column] = value;
565:                    else if (ASSERT
566:                            && !(row == fRow - fStep && ((fStep > 0
567:                                    && row >= fRowStart && row <= fRowEnd) || fStep < 0
568:                                    && row <= fRowStart && row >= fRowEnd)))
569:                        Assert.isTrue(false,
570:                                "random access to matrix not allowed"); //$NON-NLS-1$
571:                    else
572:                        fPreviousRow[column] = value;
573:                }
574:            }
575:
576:            /*
577:             * Compares the two domain element ranges corresponding to the cell at
578:             * [r,l], that is the (zero-based) elements at r - 1 and l - 1.
579:             */
580:            boolean rangesEqual(int r, int l) {
581:                if (DEBUG)
582:                    fComparisons++;
583:                return fLeft.rangesEqual(l - 1, fRight, r - 1);
584:            }
585:
586:            /*
587:             * Adds two cell cost values, never exceeding SKIP.
588:             */
589:            private static int sum(int c1, int c2) {
590:                int sum = c1 + c2;
591:                if (sum < 0)
592:                    return SKIP;
593:                return sum;
594:            }
595:
596:            /*
597:             * Computes the best possible edit distance from cell [r,l] if getting
598:             * there has cost cCur.
599:             */
600:            int minCost(int r, int l, int cCur) {
601:                // minimal cost from cell [r,l] to [rCount, lCount] if cell cost == cost
602:                // Assume that the minimum of the remaining columns / rows are equal, and just
603:                // the rest of the ranges has to be inserted / deleted
604:                if (cCur == SKIP)
605:                    return SKIP;
606:                return cCur + Math.abs((fRowEnd - r) - (fColEnd - l))
607:                        * COST_INSERT; // can be either insert or delete
608:            }
609:
610:            /*
611:             * Computes the worst possible edit distance from cell [r,l] if getting
612:             * there has cost cCur.
613:             */
614:            int maxCost(int r, int l, int cCur) {
615:                // maximal cost from cell [r,l] to [rCount, lCount] if cell cost == cost
616:                // maximal additional cost is the maximum remaining columns / rows
617:                if (cCur == SKIP)
618:                    return SKIP;
619:                return cCur
620:                        + Math
621:                                .max(Math.abs(fRowEnd - r), Math.abs(fColEnd
622:                                        - l)) * COST_CHANGE;
623:            }
624:
625:            /* classic implementation */
626:
627:            private RangeDifference[] walkback() {
628:                fDiffs = new LinkedList();
629:
630:                int row = fRowEnd, col = fColEnd;
631:                RangeDifference difference = null;
632:
633:                int cell = fMatrix[row][col]; // edit distance
634:
635:                while (row > 0 || col > 0) {
636:                    int diag, above, left;
637:
638:                    if (row == 0) {
639:                        // slide deletes along row 0
640:                        diag = SKIP;
641:                        above = SKIP;
642:                        left = col - 1;
643:                    } else if (col == 0) {
644:                        // slide inserts along column 0
645:                        diag = SKIP;
646:                        above = row - 1;
647:                        left = SKIP;
648:                    } else {
649:                        // inner cells
650:                        diag = fMatrix[row - 1][col - 1];
651:                        above = fMatrix[row - 1][col];
652:                        left = fMatrix[row][col - 1];
653:                    }
654:
655:                    if (left == cell - 1 && left <= diag && left <= above) {
656:                        // delete
657:                        col--;
658:                        difference = getChange(difference);
659:                        difference.fLeftStart = col;
660:                        difference.fLeftLength++;
661:                        difference.fRightStart = row;
662:
663:                        cell = left;
664:                    } else if (above == cell - 1 && above <= diag) {
665:                        // insert
666:                        row--;
667:                        difference = getChange(difference);
668:                        difference.fLeftStart = col;
669:                        difference.fRightStart = row;
670:                        difference.fRightLength++;
671:
672:                        cell = above;
673:                    } else {
674:                        col--;
675:                        row--;
676:                        if (cell == diag) {
677:                            // match
678:                            // alternatively, create NOCHANGE ranges for findRanges
679:                            difference = null;
680:                        } else if (cell == diag + 1) {
681:                            // change
682:                            difference = getChange(difference);
683:                            difference.fLeftStart = col;
684:                            difference.fLeftLength++;
685:                            difference.fRightStart = row;
686:                            difference.fRightLength++;
687:                        } else {
688:                            if (ASSERT)
689:                                Assert.isTrue(false, "illegal matrix"); //$NON-NLS-1$
690:                        }
691:
692:                        cell = diag;
693:                    }
694:
695:                }
696:
697:                return (RangeDifference[]) fDiffs
698:                        .toArray(new RangeDifference[fDiffs.size()]);
699:            }
700:
701:            private RangeDifference getChange(RangeDifference difference) {
702:                if (difference != null)
703:                    return difference;
704:
705:                difference = new RangeDifference(RangeDifference.CHANGE);
706:                fDiffs.add(0, difference);
707:                return difference;
708:            }
709:
710:            /* hirschberg's algorithm */
711:
712:            private int hirschberg(int rStart, int rEnd, int lStart, int lEnd) {
713:
714:                /* trivial cases */
715:
716:                if (rEnd < rStart) {
717:                    // right range is empty
718:                    return lEnd - lStart + 1;
719:                } else if (rStart == rEnd) {
720:                    // right has length 1: look for a match and split
721:                    internalEditDistance(rStart, rEnd, lStart, lEnd);
722:                    int distance = SKIP;
723:                    for (int col = lStart - 1; col <= lEnd; col++) {
724:                        distance = fPreviousRow[col];
725:                        if (distance == 0) {
726:                            fOptimalSplitColumn[rStart] = col;
727:                            fOptimalSplitValues[rStart] = true;
728:                            return 0;
729:                        }
730:                    }
731:                    fOptimalSplitColumn[rStart] = lEnd;
732:                    fOptimalSplitValues[rStart] = false;
733:                    if (distance == SKIP)
734:                        return 1;
735:                    return distance;
736:                }
737:                //		else if (lEnd < lStart) {
738:                //			// left is empty // perhaps not necessary
739:                //			Arrays.fill(fOptimalSplitColumn, rStart, rEnd + 1, lEnd);
740:                //			Arrays.fill(fOptimalSplitValues, rStart, rEnd + 1, false);
741:                //			return rEnd - rStart + 1;
742:                //		}
743:
744:                /* divide & conquer */
745:
746:                // split rows at half
747:                int rowSplit = (rStart + rEnd + 1) / 2 - 1;
748:
749:                // compute edit distance of (r1,left) in linear space into fPreviousRow
750:                internalEditDistance(rStart, rowSplit, lStart, lEnd);
751:                int[] tmp = fPreviousRow;
752:                fPreviousRow = fResultRow;
753:                fResultRow = tmp;
754:                // compute backwards edit distance of (r2,left) in linear space into fPreviousRow
755:                internalReverseEditDistance(rowSplit + 1, rEnd, lStart, lEnd);
756:
757:                // find optimal alignment - the column in which to split the
758:                // left hand side
759:                int columnSplit = SKIP, distance = SKIP;
760:                for (int col = lStart - 1; col <= lEnd; col++) {
761:                    int sum = sum(fResultRow[col], fPreviousRow[col]);
762:                    if (sum < distance) {
763:                        distance = sum;
764:                        columnSplit = col;
765:                    }
766:                }
767:
768:                if (fProgressMonitor.isCanceled())
769:                    return NO_DISTANCE;
770:
771:                if (ASSERT)
772:                    Assert.isTrue(distance != SKIP);
773:                if (ASSERT)
774:                    Assert.isTrue(columnSplit != SKIP);
775:
776:                if (distance == 0) {
777:                    // optimize for large unchanged parts
778:                    // no further partitioning needed, this part is equal
779:                    if (ASSERT)
780:                        Assert.isTrue(rEnd - rStart == lEnd - lStart);
781:                    int col = lStart;
782:                    int row = rStart;
783:                    while (row <= rEnd) {
784:                        fOptimalSplitColumn[row] = col;
785:                        fOptimalSplitValues[row] = true;
786:                        col++;
787:                        row++;
788:                    }
789:                    return distance;
790:                }
791:
792:                // store alignment: from [rowSplit, ?] connect to [rowSplit + 1, columnSplit]
793:                fOptimalSplitColumn[rowSplit] = columnSplit;
794:                fOptimalSplitValues[rowSplit] = false;
795:
796:                // divide at column & conquer
797:                // TODO guard against stack overflow
798:                hirschberg(rStart, rowSplit, lStart, columnSplit);
799:                hirschberg(rowSplit + 1, rEnd, columnSplit + 1, lEnd);
800:
801:                return distance;
802:            }
803:
804:            private RangeDifference[] buildDifferencesHirschberg() {
805:                fDiffs = new LinkedList();
806:
807:                RangeDifference difference = null;
808:                int previousColumn = 0;
809:
810:                for (int row = 1; row < fOptimalSplitColumn.length; row++) {
811:                    int previousRow = row - 1;
812:                    int column = fOptimalSplitColumn[row]; // from (row-1), jump to column (column) in row (row)
813:
814:                    if (column == previousColumn + 1) {
815:                        // diagonal
816:                        if (fOptimalSplitValues[row]) {
817:                            // match
818:                            // alternatively, create NOCHANGE ranges for findRanges
819:                            difference = null;
820:                        } else {
821:                            // change
822:                            difference = getChange(difference, previousRow,
823:                                    previousColumn);
824:                            difference.fLeftLength++;
825:                            difference.fRightLength++;
826:                        }
827:                    } else if (column == previousColumn) {
828:                        // downwards / insert
829:                        difference = getChange(difference, previousRow,
830:                                previousColumn);
831:                        difference.fRightLength++;
832:
833:                    } else if (column > previousColumn) {
834:                        // rightward / deletes
835:                        difference = getChange(difference, previousRow,
836:                                previousColumn);
837:                        difference.fLeftLength += column - previousColumn - 1;
838:                    } else {
839:                        if (ASSERT)
840:                            Assert.isTrue(false, "Illegal edit description"); //$NON-NLS-1$
841:                    }
842:
843:                    previousColumn = column;
844:                }
845:
846:                if (previousColumn < fLeft.getRangeCount()) {
847:
848:                    // trailing deletions
849:                    difference = getChange(difference,
850:                            fOptimalSplitColumn.length - 1, previousColumn);
851:                    difference.fLeftLength += fLeft.getRangeCount()
852:                            - previousColumn;
853:                }
854:
855:                return (RangeDifference[]) fDiffs
856:                        .toArray(new RangeDifference[fDiffs.size()]);
857:            }
858:
859:            private RangeDifference getChange(RangeDifference difference,
860:                    int row, int column) {
861:                if (difference != null)
862:                    return difference;
863:
864:                difference = new RangeDifference(RangeDifference.CHANGE, row,
865:                        0, column, 0);
866:                fDiffs.add(difference);
867:                return difference;
868:            }
869:
870:            /* pretty printing for debug output */
871:
872:            private void printRow() {
873:                if (fMatrix != null)
874:                    print(fMatrix[fRow]);
875:                else
876:                    print(fCurrentRow);
877:            }
878:
879:            private static void printHeader(IRangeComparator left,
880:                    IRangeComparator right) {
881:                System.out.println("============================="); //$NON-NLS-1$
882:                System.out.println("= s1: " + left.toString()); //$NON-NLS-1$
883:                System.out.println("= s2: " + right.toString()); //$NON-NLS-1$
884:                System.out.println();
885:            }
886:
887:            private static void print(int[] row) {
888:                for (int i = 0; i < row.length; i++) {
889:                    System.out
890:                            .print("\t" + (row[i] == Integer.MAX_VALUE ? "-" : "" + row[i])); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
891:                }
892:                System.out.println();
893:            }
894:
895:        }
w__ww_._j_a__v_a2_s___.__c_o___m | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.