请帮助我在java中实现字符串相似度比较!使用org.apache.commons.codec.language.Soundex库
Soundex soundex = new Soundex();
String phoneticValue = soundex.encode("YourString");
String phoneticValue2 = soundex.encode("YourStrink");
if(phoneticValue.equals(phoneticValue2)){
}似乎不管用。在字符串相似的情况下,Encode函数给出不同的结果。如何将这两个相似的字符串与这个库进行比较?
期待尽快收到您的回音!;)
发布于 2021-09-11 10:26:44
class Soundex{
private static int getConsonantCode( char ch ){
String codeList[] = { "BFPV", "CGJKQSXZ","DT","L","MN","R" };
int code = 0;
for( int i = 0 ; i < codeList.length ; i++ ){
if( codeList[i].indexOf(ch) >= 0 ) {
code = i+1;
}
}
return code;
}
private static boolean isVowel( char ch ){
return (new String("AEIOUaeiou")).indexOf(ch) >= 0 ;
}
public static String getSoundexCode( String str ){
str=str.toUpperCase();
String soundexCode = "" + str.charAt(0), temp="";
int length = str.length();
char curr, prev, next;{ }
String dropList = "AEIOUYHW";
for( int i=1 ; i< length ; i++ ){
curr = str.charAt(i);
prev = str.charAt( i-1 );
if( ( curr=='H' || curr == 'W') && i != length-1 ){
if( temp.length() >= 2) temp=temp.substring(1);
next=str.charAt( i+1 );
if( isVowel(curr) && getConsonantCode( prev ) == getConsonantCode( next ) ){
temp += prev+prev;
i=i+1;
}else if( getConsonantCode( prev ) == getConsonantCode(next) ){
temp += prev;
i=i+1;
}
}else if( getConsonantCode( curr ) != getConsonantCode(prev) ){
if( dropList.indexOf( curr ) == -1 ){
temp += curr;
}
}
}
temp = ( temp + "0000" ).substring( 0, 3 );
for( int i = 0; i<=2 ; i++ ){
soundexCode += getConsonantCode( temp.charAt(i) );
}
return soundexCode;
}
}发布于 2021-09-11 11:32:55
public class Soundex {
public static String soundexOut(String word) {
String drop = dropedWord(word);
word = word.toLowerCase();
String soundex = "" + drop.charAt(0);
drop = drop.toLowerCase();
int i;
if (soundexCode(drop.charAt(0)) == soundexCode(drop.charAt(1)))
i = 2;
else
i = 1;
for (; i < drop.length() && soundex.length() < 5; i++) {
if (i < drop.length()-1 && soundexCode(drop.charAt(i-1)) == soundexCode(drop.charAt(i+1)) ) {
if (drop.charAt(i) == 'y' || drop.charAt(i) == 'h' || drop.charAt(i) == 'w')
i++;
}
else {
int code = soundexCode(drop.charAt(i));
soundex += code!=0 ? code : "";
}
}
if (soundex.length() < 4)
for (i = soundex.length(); i < 4; i++) {
soundex += "0";
}
return soundex;
}
public static int soundexCode(char c) {
String [] code = {"b, f, p, v" , "c, g, j, k, q, s, x, z" , "d, t" , "l" , "m,n" , "r"} ;
int codeNumber = 0;
for( int i = 0 ; i < code.length ; i++ ){
if( code[i].indexOf(c) >= 0 ) {
codeNumber = i+1;
}
}
return codeNumber;
}
public static String dropedWord(String word) {
String drop = "";
drop += word.charAt(0);
word = word.toLowerCase();
for (int i = 1; i < word.length(); i++) {
if (word.charAt(i) == 'a' || word.charAt(i) == 'e' || word.charAt(i) == 'i' ||
word.charAt(i) == 'o' || word.charAt(i) == 'u' )
continue;
drop += word.charAt(i);
}
return drop;
}
}https://stackoverflow.com/questions/43275987
复制相似问题