aoqi@0: /* aoqi@0: * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. aoqi@0: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. aoqi@0: * aoqi@0: * This code is free software; you can redistribute it and/or modify it aoqi@0: * under the terms of the GNU General Public License version 2 only, as aoqi@0: * published by the Free Software Foundation. Oracle designates this aoqi@0: * particular file as subject to the "Classpath" exception as provided aoqi@0: * by Oracle in the LICENSE file that accompanied this code. aoqi@0: * aoqi@0: * This code is distributed in the hope that it will be useful, but WITHOUT aoqi@0: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or aoqi@0: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License aoqi@0: * version 2 for more details (a copy is included in the LICENSE file that aoqi@0: * accompanied this code). aoqi@0: * aoqi@0: * You should have received a copy of the GNU General Public License version aoqi@0: * 2 along with this work; if not, write to the Free Software Foundation, aoqi@0: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. aoqi@0: * aoqi@0: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA aoqi@0: * or visit www.oracle.com if you need additional information or have any aoqi@0: * questions. aoqi@0: */ aoqi@0: aoqi@0: package com.sun.xml.internal.bind.v2.util; aoqi@0: aoqi@0: import java.util.AbstractMap; aoqi@0: import java.util.Arrays; aoqi@0: import java.util.Collection; aoqi@0: import java.util.WeakHashMap; aoqi@0: aoqi@0: /** aoqi@0: * Computes the string edit distance. aoqi@0: * aoqi@0: *

aoqi@0: * Refer to a computer science text book for the definition aoqi@0: * of the "string edit distance". aoqi@0: * aoqi@0: * @author aoqi@0: * Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com) aoqi@0: */ aoqi@0: public class EditDistance { aoqi@0: aoqi@0: /** aoqi@0: * Weak results cache to avoid additional computations. aoqi@0: * Because of high complexity caching is required. aoqi@0: */ aoqi@0: private static final WeakHashMap, Integer> CACHE = new WeakHashMap, Integer>(); aoqi@0: aoqi@0: /** aoqi@0: * Computes the edit distance between two strings. aoqi@0: * aoqi@0: *

aoqi@0: * The complexity is O(nm) where n=a.length() and m=b.length(). aoqi@0: */ aoqi@0: public static int editDistance( String a, String b ) { aoqi@0: // let's check cache aoqi@0: AbstractMap.SimpleEntry entry = new AbstractMap.SimpleEntry(a, b); // using this class to avoid creation of my own which will handle PAIR of values aoqi@0: Integer result = null; aoqi@0: if (CACHE.containsKey(entry)) aoqi@0: result = CACHE.get(entry); // looks like we have it aoqi@0: aoqi@0: if (result == null) { aoqi@0: result = new EditDistance(a, b).calc(); aoqi@0: CACHE.put(entry, result); // cache the result aoqi@0: } aoqi@0: return result; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Finds the string in the group closest to aoqi@0: * key and returns it. aoqi@0: * aoqi@0: * @return null if group.length==0. aoqi@0: */ aoqi@0: public static String findNearest( String key, String[] group ) { aoqi@0: return findNearest(key, Arrays.asList(group)); aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Finds the string in the group closest to aoqi@0: * key and returns it. aoqi@0: * aoqi@0: * @return null if group.length==0. aoqi@0: */ aoqi@0: public static String findNearest( String key, Collection group ) { aoqi@0: int c = Integer.MAX_VALUE; aoqi@0: String r = null; aoqi@0: aoqi@0: for (String s : group) { aoqi@0: int ed = editDistance(key,s); aoqi@0: if( c>ed ) { aoqi@0: c = ed; aoqi@0: r = s; aoqi@0: } aoqi@0: } aoqi@0: return r; aoqi@0: } aoqi@0: aoqi@0: /** cost vector. */ aoqi@0: private int[] cost; aoqi@0: /** back buffer. */ aoqi@0: private int[] back; aoqi@0: aoqi@0: /** Two strings to be compared. */ aoqi@0: private final String a,b; aoqi@0: aoqi@0: private EditDistance( String a, String b ) { aoqi@0: this.a=a; aoqi@0: this.b=b; aoqi@0: cost = new int[a.length()+1]; aoqi@0: back = new int[a.length()+1]; // back buffer aoqi@0: aoqi@0: for( int i=0; i<=a.length(); i++ ) aoqi@0: cost[i] = i; aoqi@0: } aoqi@0: aoqi@0: /** aoqi@0: * Swaps two buffers. aoqi@0: */ aoqi@0: private void flip() { aoqi@0: int[] t = cost; aoqi@0: cost = back; aoqi@0: back = t; aoqi@0: } aoqi@0: aoqi@0: private int min(int a,int b,int c) { aoqi@0: return Math.min(a,Math.min(b,c)); aoqi@0: } aoqi@0: aoqi@0: private int calc() { aoqi@0: for( int j=0; j