|
1 /* |
|
2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Oracle designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Oracle in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 */ |
|
25 |
|
26 package com.sun.xml.internal.bind.v2.util; |
|
27 |
|
28 import java.util.AbstractMap; |
|
29 import java.util.Arrays; |
|
30 import java.util.Collection; |
|
31 import java.util.WeakHashMap; |
|
32 |
|
33 /** |
|
34 * Computes the string edit distance. |
|
35 * |
|
36 * <p> |
|
37 * Refer to a computer science text book for the definition |
|
38 * of the "string edit distance". |
|
39 * |
|
40 * @author |
|
41 * Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com) |
|
42 */ |
|
43 public class EditDistance { |
|
44 |
|
45 /** |
|
46 * Weak results cache to avoid additional computations. |
|
47 * Because of high complexity caching is required. |
|
48 */ |
|
49 private static final WeakHashMap<AbstractMap.SimpleEntry<String,String>, Integer> CACHE = new WeakHashMap<AbstractMap.SimpleEntry<String, String>, Integer>(); |
|
50 |
|
51 /** |
|
52 * Computes the edit distance between two strings. |
|
53 * |
|
54 * <p> |
|
55 * The complexity is O(nm) where n=a.length() and m=b.length(). |
|
56 */ |
|
57 public static int editDistance( String a, String b ) { |
|
58 // let's check cache |
|
59 AbstractMap.SimpleEntry<String,String> entry = new AbstractMap.SimpleEntry<String, String>(a, b); // using this class to avoid creation of my own which will handle PAIR of values |
|
60 Integer result = null; |
|
61 if (CACHE.containsKey(entry)) |
|
62 result = CACHE.get(entry); // looks like we have it |
|
63 |
|
64 if (result == null) { |
|
65 result = new EditDistance(a, b).calc(); |
|
66 CACHE.put(entry, result); // cache the result |
|
67 } |
|
68 return result; |
|
69 } |
|
70 |
|
71 /** |
|
72 * Finds the string in the <code>group</code> closest to |
|
73 * <code>key</code> and returns it. |
|
74 * |
|
75 * @return null if group.length==0. |
|
76 */ |
|
77 public static String findNearest( String key, String[] group ) { |
|
78 return findNearest(key, Arrays.asList(group)); |
|
79 } |
|
80 |
|
81 /** |
|
82 * Finds the string in the <code>group</code> closest to |
|
83 * <code>key</code> and returns it. |
|
84 * |
|
85 * @return null if group.length==0. |
|
86 */ |
|
87 public static String findNearest( String key, Collection<String> group ) { |
|
88 int c = Integer.MAX_VALUE; |
|
89 String r = null; |
|
90 |
|
91 for (String s : group) { |
|
92 int ed = editDistance(key,s); |
|
93 if( c>ed ) { |
|
94 c = ed; |
|
95 r = s; |
|
96 } |
|
97 } |
|
98 return r; |
|
99 } |
|
100 |
|
101 /** cost vector. */ |
|
102 private int[] cost; |
|
103 /** back buffer. */ |
|
104 private int[] back; |
|
105 |
|
106 /** Two strings to be compared. */ |
|
107 private final String a,b; |
|
108 |
|
109 private EditDistance( String a, String b ) { |
|
110 this.a=a; |
|
111 this.b=b; |
|
112 cost = new int[a.length()+1]; |
|
113 back = new int[a.length()+1]; // back buffer |
|
114 |
|
115 for( int i=0; i<=a.length(); i++ ) |
|
116 cost[i] = i; |
|
117 } |
|
118 |
|
119 /** |
|
120 * Swaps two buffers. |
|
121 */ |
|
122 private void flip() { |
|
123 int[] t = cost; |
|
124 cost = back; |
|
125 back = t; |
|
126 } |
|
127 |
|
128 private int min(int a,int b,int c) { |
|
129 return Math.min(a,Math.min(b,c)); |
|
130 } |
|
131 |
|
132 private int calc() { |
|
133 for( int j=0; j<b.length(); j++ ) { |
|
134 flip(); |
|
135 cost[0] = j+1; |
|
136 for( int i=0; i<a.length(); i++ ) { |
|
137 int match = (a.charAt(i)==b.charAt(j))?0:1; |
|
138 cost[i+1] = min( back[i]+match, cost[i]+1, back[i+1]+1 ); |
|
139 } |
|
140 } |
|
141 return cost[a.length()]; |
|
142 } |
|
143 } |