Thu, 31 Aug 2017 15:18:52 +0800
merge
aoqi@0 | 1 | /* |
aoqi@0 | 2 | * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. |
aoqi@0 | 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
aoqi@0 | 4 | * |
aoqi@0 | 5 | * This code is free software; you can redistribute it and/or modify it |
aoqi@0 | 6 | * under the terms of the GNU General Public License version 2 only, as |
aoqi@0 | 7 | * published by the Free Software Foundation. Oracle designates this |
aoqi@0 | 8 | * particular file as subject to the "Classpath" exception as provided |
aoqi@0 | 9 | * by Oracle in the LICENSE file that accompanied this code. |
aoqi@0 | 10 | * |
aoqi@0 | 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
aoqi@0 | 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
aoqi@0 | 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
aoqi@0 | 14 | * version 2 for more details (a copy is included in the LICENSE file that |
aoqi@0 | 15 | * accompanied this code). |
aoqi@0 | 16 | * |
aoqi@0 | 17 | * You should have received a copy of the GNU General Public License version |
aoqi@0 | 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
aoqi@0 | 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
aoqi@0 | 20 | * |
aoqi@0 | 21 | * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
aoqi@0 | 22 | * or visit www.oracle.com if you need additional information or have any |
aoqi@0 | 23 | * questions. |
aoqi@0 | 24 | */ |
aoqi@0 | 25 | |
aoqi@0 | 26 | package com.sun.xml.internal.bind.v2.util; |
aoqi@0 | 27 | |
aoqi@0 | 28 | import java.util.AbstractMap; |
aoqi@0 | 29 | import java.util.Arrays; |
aoqi@0 | 30 | import java.util.Collection; |
aoqi@0 | 31 | import java.util.WeakHashMap; |
aoqi@0 | 32 | |
aoqi@0 | 33 | /** |
aoqi@0 | 34 | * Computes the string edit distance. |
aoqi@0 | 35 | * |
aoqi@0 | 36 | * <p> |
aoqi@0 | 37 | * Refer to a computer science text book for the definition |
aoqi@0 | 38 | * of the "string edit distance". |
aoqi@0 | 39 | * |
aoqi@0 | 40 | * @author |
aoqi@0 | 41 | * Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com) |
aoqi@0 | 42 | */ |
aoqi@0 | 43 | public class EditDistance { |
aoqi@0 | 44 | |
aoqi@0 | 45 | /** |
aoqi@0 | 46 | * Weak results cache to avoid additional computations. |
aoqi@0 | 47 | * Because of high complexity caching is required. |
aoqi@0 | 48 | */ |
aoqi@0 | 49 | private static final WeakHashMap<AbstractMap.SimpleEntry<String,String>, Integer> CACHE = new WeakHashMap<AbstractMap.SimpleEntry<String, String>, Integer>(); |
aoqi@0 | 50 | |
aoqi@0 | 51 | /** |
aoqi@0 | 52 | * Computes the edit distance between two strings. |
aoqi@0 | 53 | * |
aoqi@0 | 54 | * <p> |
aoqi@0 | 55 | * The complexity is O(nm) where n=a.length() and m=b.length(). |
aoqi@0 | 56 | */ |
aoqi@0 | 57 | public static int editDistance( String a, String b ) { |
aoqi@0 | 58 | // let's check cache |
aoqi@0 | 59 | AbstractMap.SimpleEntry<String,String> entry = new AbstractMap.SimpleEntry<String, String>(a, b); // using this class to avoid creation of my own which will handle PAIR of values |
aoqi@0 | 60 | Integer result = null; |
aoqi@0 | 61 | if (CACHE.containsKey(entry)) |
aoqi@0 | 62 | result = CACHE.get(entry); // looks like we have it |
aoqi@0 | 63 | |
aoqi@0 | 64 | if (result == null) { |
aoqi@0 | 65 | result = new EditDistance(a, b).calc(); |
aoqi@0 | 66 | CACHE.put(entry, result); // cache the result |
aoqi@0 | 67 | } |
aoqi@0 | 68 | return result; |
aoqi@0 | 69 | } |
aoqi@0 | 70 | |
aoqi@0 | 71 | /** |
aoqi@0 | 72 | * Finds the string in the <code>group</code> closest to |
aoqi@0 | 73 | * <code>key</code> and returns it. |
aoqi@0 | 74 | * |
aoqi@0 | 75 | * @return null if group.length==0. |
aoqi@0 | 76 | */ |
aoqi@0 | 77 | public static String findNearest( String key, String[] group ) { |
aoqi@0 | 78 | return findNearest(key, Arrays.asList(group)); |
aoqi@0 | 79 | } |
aoqi@0 | 80 | |
aoqi@0 | 81 | /** |
aoqi@0 | 82 | * Finds the string in the <code>group</code> closest to |
aoqi@0 | 83 | * <code>key</code> and returns it. |
aoqi@0 | 84 | * |
aoqi@0 | 85 | * @return null if group.length==0. |
aoqi@0 | 86 | */ |
aoqi@0 | 87 | public static String findNearest( String key, Collection<String> group ) { |
aoqi@0 | 88 | int c = Integer.MAX_VALUE; |
aoqi@0 | 89 | String r = null; |
aoqi@0 | 90 | |
aoqi@0 | 91 | for (String s : group) { |
aoqi@0 | 92 | int ed = editDistance(key,s); |
aoqi@0 | 93 | if( c>ed ) { |
aoqi@0 | 94 | c = ed; |
aoqi@0 | 95 | r = s; |
aoqi@0 | 96 | } |
aoqi@0 | 97 | } |
aoqi@0 | 98 | return r; |
aoqi@0 | 99 | } |
aoqi@0 | 100 | |
aoqi@0 | 101 | /** cost vector. */ |
aoqi@0 | 102 | private int[] cost; |
aoqi@0 | 103 | /** back buffer. */ |
aoqi@0 | 104 | private int[] back; |
aoqi@0 | 105 | |
aoqi@0 | 106 | /** Two strings to be compared. */ |
aoqi@0 | 107 | private final String a,b; |
aoqi@0 | 108 | |
aoqi@0 | 109 | private EditDistance( String a, String b ) { |
aoqi@0 | 110 | this.a=a; |
aoqi@0 | 111 | this.b=b; |
aoqi@0 | 112 | cost = new int[a.length()+1]; |
aoqi@0 | 113 | back = new int[a.length()+1]; // back buffer |
aoqi@0 | 114 | |
aoqi@0 | 115 | for( int i=0; i<=a.length(); i++ ) |
aoqi@0 | 116 | cost[i] = i; |
aoqi@0 | 117 | } |
aoqi@0 | 118 | |
aoqi@0 | 119 | /** |
aoqi@0 | 120 | * Swaps two buffers. |
aoqi@0 | 121 | */ |
aoqi@0 | 122 | private void flip() { |
aoqi@0 | 123 | int[] t = cost; |
aoqi@0 | 124 | cost = back; |
aoqi@0 | 125 | back = t; |
aoqi@0 | 126 | } |
aoqi@0 | 127 | |
aoqi@0 | 128 | private int min(int a,int b,int c) { |
aoqi@0 | 129 | return Math.min(a,Math.min(b,c)); |
aoqi@0 | 130 | } |
aoqi@0 | 131 | |
aoqi@0 | 132 | private int calc() { |
aoqi@0 | 133 | for( int j=0; j<b.length(); j++ ) { |
aoqi@0 | 134 | flip(); |
aoqi@0 | 135 | cost[0] = j+1; |
aoqi@0 | 136 | for( int i=0; i<a.length(); i++ ) { |
aoqi@0 | 137 | int match = (a.charAt(i)==b.charAt(j))?0:1; |
aoqi@0 | 138 | cost[i+1] = min( back[i]+match, cost[i]+1, back[i+1]+1 ); |
aoqi@0 | 139 | } |
aoqi@0 | 140 | } |
aoqi@0 | 141 | return cost[a.length()]; |
aoqi@0 | 142 | } |
aoqi@0 | 143 | } |