ohair@286: /* ohair@286: * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. ohair@286: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ohair@286: * ohair@286: * This code is free software; you can redistribute it and/or modify it ohair@286: * under the terms of the GNU General Public License version 2 only, as ohair@286: * published by the Free Software Foundation. Oracle designates this ohair@286: * particular file as subject to the "Classpath" exception as provided ohair@286: * by Oracle in the LICENSE file that accompanied this code. ohair@286: * ohair@286: * This code is distributed in the hope that it will be useful, but WITHOUT ohair@286: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ohair@286: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ohair@286: * version 2 for more details (a copy is included in the LICENSE file that ohair@286: * accompanied this code). ohair@286: * ohair@286: * You should have received a copy of the GNU General Public License version ohair@286: * 2 along with this work; if not, write to the Free Software Foundation, ohair@286: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ohair@286: * ohair@286: * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ohair@286: * or visit www.oracle.com if you need additional information or have any ohair@286: * questions. ohair@286: */ ohair@286: ohair@286: package com.sun.xml.internal.bind.v2.util; ohair@286: mkos@397: import java.util.AbstractMap; mkos@397: import java.util.Arrays; ohair@286: import java.util.Collection; mkos@397: import java.util.WeakHashMap; ohair@286: ohair@286: /** ohair@286: * Computes the string edit distance. ohair@286: * ohair@286: *

ohair@286: * Refer to a computer science text book for the definition ohair@286: * of the "string edit distance". ohair@286: * ohair@286: * @author ohair@286: * Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com) ohair@286: */ ohair@286: public class EditDistance { ohair@286: ohair@286: /** mkos@397: * Weak results cache to avoid additional computations. mkos@397: * Because of high complexity caching is required. mkos@397: */ mkos@397: private static final WeakHashMap, Integer> CACHE = new WeakHashMap, Integer>(); mkos@397: mkos@397: /** ohair@286: * Computes the edit distance between two strings. ohair@286: * ohair@286: *

ohair@286: * The complexity is O(nm) where n=a.length() and m=b.length(). ohair@286: */ ohair@286: public static int editDistance( String a, String b ) { mkos@397: // let's check cache mkos@397: AbstractMap.SimpleEntry entry = new AbstractMap.SimpleEntry(a, b); // using this class to avoid creation of my own which will handle PAIR of values mkos@397: Integer result = null; mkos@397: if (CACHE.containsKey(entry)) mkos@397: result = CACHE.get(entry); // looks like we have it mkos@397: mkos@397: if (result == null) { mkos@397: result = new EditDistance(a, b).calc(); mkos@397: CACHE.put(entry, result); // cache the result mkos@397: } mkos@397: return result; ohair@286: } ohair@286: ohair@286: /** ohair@286: * Finds the string in the group closest to ohair@286: * key and returns it. ohair@286: * ohair@286: * @return null if group.length==0. ohair@286: */ ohair@286: public static String findNearest( String key, String[] group ) { ohair@286: return findNearest(key, Arrays.asList(group)); ohair@286: } ohair@286: ohair@286: /** ohair@286: * Finds the string in the group closest to ohair@286: * key and returns it. ohair@286: * ohair@286: * @return null if group.length==0. ohair@286: */ ohair@286: public static String findNearest( String key, Collection group ) { ohair@286: int c = Integer.MAX_VALUE; ohair@286: String r = null; ohair@286: ohair@286: for (String s : group) { ohair@286: int ed = editDistance(key,s); ohair@286: if( c>ed ) { ohair@286: c = ed; ohair@286: r = s; ohair@286: } ohair@286: } ohair@286: return r; ohair@286: } ohair@286: ohair@286: /** cost vector. */ ohair@286: private int[] cost; ohair@286: /** back buffer. */ ohair@286: private int[] back; ohair@286: ohair@286: /** Two strings to be compared. */ ohair@286: private final String a,b; ohair@286: ohair@286: private EditDistance( String a, String b ) { ohair@286: this.a=a; ohair@286: this.b=b; ohair@286: cost = new int[a.length()+1]; ohair@286: back = new int[a.length()+1]; // back buffer ohair@286: ohair@286: for( int i=0; i<=a.length(); i++ ) ohair@286: cost[i] = i; ohair@286: } ohair@286: ohair@286: /** ohair@286: * Swaps two buffers. ohair@286: */ ohair@286: private void flip() { ohair@286: int[] t = cost; ohair@286: cost = back; ohair@286: back = t; ohair@286: } ohair@286: ohair@286: private int min(int a,int b,int c) { ohair@286: return Math.min(a,Math.min(b,c)); ohair@286: } ohair@286: ohair@286: private int calc() { ohair@286: for( int j=0; j