T9
T9 Trie Word Completion Algorithm
|
00001 00009 package t9; 00010 00011 import java.io.BufferedReader; 00012 import java.io.File; 00013 import java.io.FileNotFoundException; 00014 import java.io.FileReader; 00015 import java.io.IOException; 00016 import java.net.URL; 00017 import java.util.LinkedHashSet; 00018 00019 import t9.trie.Pair; 00020 import t9.trie.TrieBuilder; 00021 import t9.trie.TrieNode; 00022 00029 public class Dictionary { 00030 private final static String wordsFile = "5000_common_words.csv"; 00031 00032 private static Dictionary instance = null; 00033 private static TrieNode[] mDictionary; 00034 00035 private String getPathForResource( String filename ) 00036 { 00037 ClassLoader loader = this.getClass().getClassLoader(); 00038 URL url = loader.getResource( filename ); 00039 String path = null; 00040 if (url != null) { 00041 path = url.getFile(); 00042 } 00043 return path; 00044 } // getPathForResource 00045 00054 protected Dictionary() throws FileNotFoundException, IOException { 00055 int numTries = ((int)'z' - (int)'a') + 1; 00056 mDictionary = new TrieNode[ numTries ]; 00057 String wordPath = getPathForResource( wordsFile ); 00058 File wordFile = new File( wordPath ); 00059 if (wordFile.exists() && wordFile.canRead()) { 00060 FileReader reader = new FileReader( wordFile ); 00061 BufferedReader bufReader = null; 00062 try { 00063 bufReader = new BufferedReader( reader ); 00064 String line = null; 00065 char startChar = 'a'; 00066 LinkedHashSet<Pair> set = new LinkedHashSet<Pair>(); 00067 TrieBuilder builder = new TrieBuilder(); 00068 int lineCnt = 0; 00069 while ((line = bufReader.readLine()) != null) { 00070 lineCnt++; 00071 String[] parts = line.split(","); 00072 if (parts.length == 2) { 00073 String word = parts[0].toLowerCase(); 00074 int freq = Integer.parseInt( parts[1] ); 00075 Pair pair = new Pair(word, freq ); 00076 if (word.charAt(0) == startChar) { 00077 set.add(pair); 00078 } 00079 else { 00080 TrieNode root = builder.buildTrie( set ); 00081 int ix = (int)startChar - (int)'a'; 00082 mDictionary[ix] = root; 00083 set.clear(); 00084 } 00085 } 00086 else { 00087 throw new IOException("Bad format, in file " + wordsFile + " line " + lineCnt ); 00088 } 00089 } // while 00090 } 00091 finally { 00092 if (bufReader != null) { bufReader.close(); } 00093 } 00094 } 00095 else { 00096 final String msg = "Could not read " + wordsFile; 00097 throw new FileNotFoundException( msg ); 00098 } 00099 } // Dictionary constructor 00100 00108 public static Dictionary getDictionary() throws FileNotFoundException, IOException 00109 { 00110 if (instance == null) { 00111 instance = new Dictionary(); 00112 } 00113 return instance; 00114 } 00115 00122 public TrieNode getPrefixTrie(final char letter) { 00123 char l = Character.toLowerCase(letter); 00124 TrieNode root = null; 00125 if (Character.isLetter(l)) { 00126 int ix = (int)l - (int)'z'; 00127 root = mDictionary[ ix ]; 00128 } 00129 return root; 00130 } 00131 }