3/4/14

Removing accents (and diacritics) in any language from Java

Removing accents (and diacritics) in any language from Java


import java.text.Normalizer;
import java.text.Normalizer.Form;

// ...

public static String removeAccents(String text) {
    return text == null ? null
        : Normalizer.normalize(text, Form.NFD)
            .replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
}

or 

 public static String deAccent(String str) {
     String nfdNormalizedString = Normalizer.normalize(str, Normalizer.Form.NFD); 
     Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
     return pattern.matcher(nfdNormalizedString).replaceAll(""); 
        } 

No comments: