Trie or Prefix Tree is an interesting tree based data structure for storing set of words efficiently and for
- Finding the words stored
- Finding the words with common prefix
Its common use-cases are
- Auto-complete
- Spell checker
- Longest Prefix Matching
Trie Node Structure
Trie Node structure is simple with an array of Trie Node links. The size of this array depends on the data we want to insert. We want to store only english words, we use size of 26 here representing each alphabet in English.
We also store a boolean variable to mark a word's end. The Trie Node structure in Java represented as below.
class TrieNode { | |
private TrieNode[] links; | |
private boolean isEnd; | |
public TrieNode() { | |
links = new TrieNode[26]; | |
} | |
public TrieNode getNode(char ch) { | |
return links[ch - 'a']; | |
} | |
public void setNode(char ch, TrieNode node) { | |
links[ch - 'a'] = node; | |
} | |
public boolean containsKey(char ch) { | |
return links[ch - 'a'] != null; | |
} | |
public void setEnd() { | |
isEnd = true; | |
} | |
public boolean isEnd() { | |
return isEnd; | |
} | |
} |
Trie Implementation
In this implementation, we are going to implement three methods
- Inserting the word into trie
- Searching the whole word if exists in trie or not
- Searching the prefix if exists or not
class Trie { | |
private TrieNode root; | |
public Trie() { | |
root = new TrieNode(); | |
} | |
public void insert(String word) { | |
TrieNode node = root; | |
for (int i = 0; i < word.length(); i++) { | |
char ch = word.charAt(i); | |
if (!node.containsKey(ch)) { | |
node.setNode(ch, new TrieNode()); | |
} | |
node = node.getNode(ch); | |
} | |
node.setEnd(); | |
} | |
private TrieNode searchPrefix(String word) { | |
TrieNode node = root; | |
for (int i = 0; i < word.length(); i++) { | |
char ch = word.charAt(i); | |
if (node.containsKey(ch)) { | |
node = node.getNode(ch); | |
} else { | |
return null; | |
} | |
} | |
return node; | |
} | |
public boolean search(String word) { | |
TrieNode node = searchPrefix(word); | |
return node != null && node.isEnd(); | |
} | |
public boolean startsWith(String prefix) { | |
TrieNode node = searchPrefix(prefix); | |
return node != null; | |
} | |
} |
The search implementation is similar to insert except here we do not insert, we check whether each word exists in the trie from the root. There are two cases here
- If the word exists or prefix exists, then it should return the last node of the word or prefix.
- If it doesn't exist, it will return null.
Trie Implementation ii
In this extension of the above problem, we see how to add erase functionality and the count of search words and prefixes inserted in the trie.
For Example, if we insert "pranay" string two times into the trie. When we search for the count, we should get 2 as answer.
For this purpose, we are going to our existing TrieNode Entity, Here we added two variables end and prefix to keep track of the count and corresponding methods for the encapsulation purpose. Code would be cleaner this way.
class TrieNode { | |
private final TrieNode[] links; | |
private boolean isEnd; | |
private int end = 0; | |
private int prefix = 0; | |
private static final int count = 26; | |
public TrieNode() { | |
links = new TrieNode[count]; | |
} | |
public TrieNode getNode(char ch) { | |
return links[ch - 'a']; | |
} | |
public void setNode(char ch, TrieNode node) { | |
links[ch - 'a'] = node; | |
} | |
public boolean containsKey(char ch) { | |
return links[ch - 'a'] != null; | |
} | |
public void setEnd(boolean value) { | |
isEnd = value; | |
} | |
public boolean isEnd() { | |
return isEnd; | |
} | |
public void incrementEnd() { | |
end++; | |
} | |
public int getEnd() { | |
return end; | |
} | |
public void incrementPrefix() { | |
prefix++; | |
} | |
public int getPrefix() { | |
return prefix; | |
} | |
public void decrementPrefix() { | |
prefix--; | |
} | |
public void decrementEnd() { | |
end--; | |
} | |
} |
Updated Implementation
class TrieWithErase { | |
private TrieNode root; | |
public TrieWithErase() { | |
root = new TrieNode(); | |
} | |
public void insert(String word) { | |
TrieNode node = root; | |
for (int i = 0; i < word.length(); i++) { | |
char ch = word.charAt(i); | |
if (!node.containsKey(ch)) { | |
node.setNode(ch, new TrieNode()); | |
} | |
node = node.getNode(ch); | |
node.incrementPrefix(); | |
} | |
node.incrementEnd(); | |
node.setEnd(true); | |
} | |
private TrieNode searchPrefix(String word) { | |
TrieNode node = root; | |
for (int i = 0; i < word.length(); i++) { | |
char ch = word.charAt(i); | |
if (node.containsKey(ch)) { | |
node = node.getNode(ch); | |
} else { | |
return null; | |
} | |
} | |
return node; | |
} | |
public boolean search(String word) { | |
TrieNode node = searchPrefix(word); | |
return node != null && node.isEnd(); | |
} | |
public int getEndsWithCount(String word) { | |
TrieNode node = searchPrefix(word); | |
return (node != null && node.isEnd()) ? node.getEnd() : 0; | |
} | |
public boolean startsWith(String prefix) { | |
TrieNode node = searchPrefix(prefix); | |
return node != null; | |
} | |
public int getStartsWithCount(String word) { | |
TrieNode node = searchPrefix(word); | |
return (node != null) ? node.getPrefix() : 0; | |
} | |
// Assuming erase will be called only if the word exists | |
public void eraseWord(String word) { | |
TrieNode node = root; | |
for (int i = 0; i < word.length(); i++) { | |
char ch = word.charAt(i); | |
if (node.containsKey(ch)) { | |
node = node.getNode(ch); | |
node.decrementPrefix(); | |
} | |
} | |
node.decrementEnd(); | |
if (node.getEnd() == 0) { | |
node.setEnd(false); | |
} | |
} | |
} |
Post a Comment