001: /*
002: * Copyright 2006 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: /*
027: * @(#)$Id: EditDistance.java,v 1.2 2005/09/10 19:09:03 kohsuke Exp $
028: */
029:
030: package com.sun.xml.internal.bind.v2.util;
031:
032: /**
033: * Computes the string edit distance.
034: *
035: * <p>
036: * Refer to a computer science text book for the definition
037: * of the "string edit distance".
038: *
039: * @author
040: * Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com)
041: */
042: public class EditDistance {
043:
044: /**
045: * Computes the edit distance between two strings.
046: *
047: * <p>
048: * The complexity is O(nm) where n=a.length() and m=b.length().
049: */
050: public static int editDistance(String a, String b) {
051: return new EditDistance(a, b).calc();
052: }
053:
054: /**
055: * Finds the string in the <code>group</code> closest to
056: * <code>key</code> and returns it.
057: *
058: * @return null if group.length==0.
059: */
060: public static String findNearest(String key, String[] group) {
061: int c = Integer.MAX_VALUE;
062: String r = null;
063:
064: for (int i = 0; i < group.length; i++) {
065: int ed = editDistance(key, group[i]);
066: if (c > ed) {
067: c = ed;
068: r = group[i];
069: }
070: }
071: return r;
072: }
073:
074: /** cost vector. */
075: private int[] cost;
076: /** back buffer. */
077: private int[] back;
078:
079: /** Two strings to be compared. */
080: private final String a, b;
081:
082: private EditDistance(String a, String b) {
083: this .a = a;
084: this .b = b;
085: cost = new int[a.length() + 1];
086: back = new int[a.length() + 1]; // back buffer
087:
088: for (int i = 0; i <= a.length(); i++)
089: cost[i] = i;
090: }
091:
092: /**
093: * Swaps two buffers.
094: */
095: private void flip() {
096: int[] t = cost;
097: cost = back;
098: back = t;
099: }
100:
101: private int min(int a, int b, int c) {
102: return Math.min(a, Math.min(b, c));
103: }
104:
105: private int calc() {
106: for (int j = 0; j < b.length(); j++) {
107: flip();
108: cost[0] = j + 1;
109: for (int i = 0; i < a.length(); i++) {
110: int match = (a.charAt(i) == b.charAt(j)) ? 0 : 1;
111: cost[i + 1] = min(back[i] + match, cost[i] + 1,
112: back[i + 1] + 1);
113: }
114: }
115: return cost[a.length()];
116: }
117: }
|