o plus do negocio é que esta com as peculariedades do portugues e voce pode adicionar palavras ao dicionario a hora que quizer.
como arquivo de dicionario base eu peguei o .dic do libreoffice e so limpei algum lixo
deu 308.295 palavras (ta bom pra começar ne).
sem mais delongas ai vai o codigo.
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Spelling {
private final HashMap{String, Integer} nWords = new HashMap{String, Integer}();
private final char[] alfabeto = {'a','á','ä','à','ã','â','b','c','ç','d','e','é','ë','è','ê','f','g','h','i','í','ï','ì','î','j',
'k','l','m','n','o','ó','ö','ò','õ','ô','p','q','r','s','t','u','ú','ü','ù','û','v','x','y','z'};
public Spelling(String file) throws IOException {
BufferedReader in = new BufferedReader(new FileReader(file));
Pattern p = Pattern.compile("[a-z,á,í,ó,ú,é,ä,ï,ö,ü,ë,à,ì,ò,ù,è,ã,õ,â,î,ô,û,ê,ç]+");
for (String temp = ""; temp != null; temp = in.readLine()) {
Matcher m = p.matcher(temp.toLowerCase());
while (m.find()) {
nWords.put((temp = m.group()), nWords.containsKey(temp) ? nWords.get(temp) + 1 : 1);
}
}
in.close();
}
private final ArrayList{String} edits(String word) {
ArrayList{String} result = new ArrayList{String}();
for (int i = 0; i < word.length(); ++i) {
result.add(word.substring(0, i) + word.substring(i + 1));
}
for (int i = 0; i < word.length() - 1; ++i) {
result.add(word.substring(0, i) + word.substring(i + 1, i + 2) + word.substring(i, i + 1) + word.substring(i + 2));
}
for (int i = 0; i < word.length(); ++i) {
for (int z=0; z char c = alfabeto[z];
result.add(word.substring(0, i) + String.valueOf(c) + word.substring(i + 1));
}
}
for (int i = 0; i <= word.length(); ++i) {
for (int z=0; z char c = alfabeto[z];
result.add(word.substring(0, i) + String.valueOf(c) + word.substring(i));
}
}
return result;
}
public final String[] correct(String word) {
if (nWords.containsKey(word)) {
return null;
}
ArrayList{String} list = edits(word);
HashMap{Integer, String} candidates = new HashMap{Integer, String}();
for (String s : list) {
if (nWords.containsKey(s)) {
candidates.put(nWords.get(s), s);
}
}
if (candidates.size() > 0) {
return candidates.values().toArray(new String[0]);
}
for (String s : list) {
for (String w : edits(s)) {
if (nWords.containsKey(w)) {
candidates.put(nWords.get(w), w);
}
}
}
return candidates.size() > 0 ? candidates.values().toArray(new String[0]) : null;
}
}
para testar:
public static void main(String[] args) throws IOException {
JFileChooser jFileChooser = new JFileChooser();
jFileChooser.showOpenDialog(null);
String caminhoDicionario = jFileChooser.getSelectedFile().getAbsolutePath();
Spelling spelling = new Spelling(caminhoDicionario);
String word = null;
while ((word = JOptionPane.showInputDialog("Digite uma palavra"))!=null) {
String[] correct = spelling.correct(word);
if (correct==null) {
JOptionPane.showMessageDialog(null, "Palavra esta correta");
} else {
JOptionPane.showMessageDialog(null, correct, "Você quis dizer...", JOptionPane.QUESTION_MESSAGE);
}
}
}
melhorei um pouco a bagaça versao 2.0 entao:
private final SortedSet{String} nWords = new TreeSet{String}();
private final char[] alfabeto = {'a','á','ä','à','ã','â','b','c','ç','d','e','é','ë','è','ê','f','g','h','i','í','ï','ì','î','j','k','l','m','n','o','ó','ö','ò','õ','ô','p','q','r','s','t','u','ú','ü','ù','û','v','x','y','z'};
public Spelling(String file) throws IOException {
BufferedReader in = new BufferedReader(new FileReader(file));
for (String temp = ""; temp != null; temp = in.readLine()) {
nWords.add(temp);
}
in.close();
}
private ArrayList{String} edits(String word) {
ArrayList{String} result = new ArrayList{String}();
for (int i = 0; i < word.length(); ++i) {
result.add(word.substring(0, i) + word.substring(i + 1));
}
for (int i = 0; i < word.length() - 1; ++i) {
result.add(word.substring(0, i) + word.substring(i + 1, i + 2) + word.substring(i, i + 1) + word.substring(i + 2));
}
for (int i = 0; i < word.length(); ++i) {
for (int z=0; z char c = alfabeto[z];
result.add(word.substring(0, i) + String.valueOf(c) + word.substring(i + 1));
}
}
for (int i = 0; i <= word.length(); ++i) {
for (int z=0; z char c = alfabeto[z];
result.add(word.substring(0, i) + String.valueOf(c) + word.substring(i));
}
}
return result;
}
public final String[] correct(String word) {
if (nWords.contains(word)) {
return null;
}
ArrayList{String} list = edits(word);
SortedSet{String} candidates = new TreeSet{String}();
for (String s : list) {
if (nWords.contains(s)) {
candidates.add(s);
}
}
if (candidates.size() > 0) {
return candidates.toArray(new String[0]);
}
for (String s : list) {
for (String w : edits(s)) {
if (nWords.contains(w)) {
candidates.add(w);
}
}
}
return candidates.size() > 0 ? candidates.toArray(new String[0]) : null;
}
Amigo, acredito que haja uma inconsistência nesse trecho do código
ResponderExcluir"for (int z=0; z char c = alfabeto[z]; "
opa na hora de publicar cortou o certo eh
ResponderExcluirfor (int z=0; z<alfabeto.length; z++) {
char c = alfabeto[z];
se exibir o codigo fonte da pagian aparece certo
Olá Michael parabéns pelo o pôster, eu fiz um pôster no meu blog aqui também http://isjavado.wordpress.com/ e citei o seu blz? VALEU.
ResponderExcluir