Removing accents (and diacritics) in any language from Java
import java.text.Normalizer;
import java.text.Normalizer.Form;
// ...
public static String removeAccents(String text) {
return text == null ? null
: Normalizer.normalize(text, Form.NFD)
.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
}
or
public static String deAccent(String str) {
String nfdNormalizedString = Normalizer.normalize(str, Normalizer.Form.NFD);
Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
return pattern.matcher(nfdNormalizedString).replaceAll("");
}
No comments:
Post a Comment