|
1 /* |
|
2 * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Oracle designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Oracle in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
22 * or visit www.oracle.com if you need additional information or have any |
|
23 * questions. |
|
24 */ |
|
25 |
|
26 package com.sun.xml.internal.bind.v2.util; |
|
27 |
|
28 import java.util.Collection; |
|
29 import java.util.Arrays; |
|
30 |
|
31 /** |
|
32 * Computes the string edit distance. |
|
33 * |
|
34 * <p> |
|
35 * Refer to a computer science text book for the definition |
|
36 * of the "string edit distance". |
|
37 * |
|
38 * @author |
|
39 * Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com) |
|
40 */ |
|
41 public class EditDistance { |
|
42 |
|
43 /** |
|
44 * Computes the edit distance between two strings. |
|
45 * |
|
46 * <p> |
|
47 * The complexity is O(nm) where n=a.length() and m=b.length(). |
|
48 */ |
|
49 public static int editDistance( String a, String b ) { |
|
50 return new EditDistance(a,b).calc(); |
|
51 } |
|
52 |
|
53 /** |
|
54 * Finds the string in the <code>group</code> closest to |
|
55 * <code>key</code> and returns it. |
|
56 * |
|
57 * @return null if group.length==0. |
|
58 */ |
|
59 public static String findNearest( String key, String[] group ) { |
|
60 return findNearest(key, Arrays.asList(group)); |
|
61 } |
|
62 |
|
63 /** |
|
64 * Finds the string in the <code>group</code> closest to |
|
65 * <code>key</code> and returns it. |
|
66 * |
|
67 * @return null if group.length==0. |
|
68 */ |
|
69 public static String findNearest( String key, Collection<String> group ) { |
|
70 int c = Integer.MAX_VALUE; |
|
71 String r = null; |
|
72 |
|
73 for (String s : group) { |
|
74 int ed = editDistance(key,s); |
|
75 if( c>ed ) { |
|
76 c = ed; |
|
77 r = s; |
|
78 } |
|
79 } |
|
80 return r; |
|
81 } |
|
82 |
|
83 /** cost vector. */ |
|
84 private int[] cost; |
|
85 /** back buffer. */ |
|
86 private int[] back; |
|
87 |
|
88 /** Two strings to be compared. */ |
|
89 private final String a,b; |
|
90 |
|
91 private EditDistance( String a, String b ) { |
|
92 this.a=a; |
|
93 this.b=b; |
|
94 cost = new int[a.length()+1]; |
|
95 back = new int[a.length()+1]; // back buffer |
|
96 |
|
97 for( int i=0; i<=a.length(); i++ ) |
|
98 cost[i] = i; |
|
99 } |
|
100 |
|
101 /** |
|
102 * Swaps two buffers. |
|
103 */ |
|
104 private void flip() { |
|
105 int[] t = cost; |
|
106 cost = back; |
|
107 back = t; |
|
108 } |
|
109 |
|
110 private int min(int a,int b,int c) { |
|
111 return Math.min(a,Math.min(b,c)); |
|
112 } |
|
113 |
|
114 private int calc() { |
|
115 for( int j=0; j<b.length(); j++ ) { |
|
116 flip(); |
|
117 cost[0] = j+1; |
|
118 for( int i=0; i<a.length(); i++ ) { |
|
119 int match = (a.charAt(i)==b.charAt(j))?0:1; |
|
120 cost[i+1] = min( back[i]+match, cost[i]+1, back[i+1]+1 ); |
|
121 } |
|
122 } |
|
123 return cost[a.length()]; |
|
124 } |
|
125 } |