Added tests for gram-based indexes, and hashed tokens.

git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_inverted_index_updates_new@1868 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/IIndexCursor.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/IIndexCursor.java
index 5a23fc6..838be6f 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/IIndexCursor.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/IIndexCursor.java
@@ -22,7 +22,7 @@
     public void open(ICursorInitialState initialState, ISearchPredicate searchPred) throws IndexException,
             HyracksDataException;
 
-    public boolean hasNext() throws HyracksDataException;
+    public boolean hasNext() throws HyracksDataException, IndexException;
 
     public void next() throws HyracksDataException;
 
diff --git a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeRangeSearchCursor.java b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeRangeSearchCursor.java
index a734ea4..3916ac7 100644
--- a/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeRangeSearchCursor.java
+++ b/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeRangeSearchCursor.java
@@ -47,7 +47,7 @@
     }
 
     @Override
-    public boolean hasNext() throws HyracksDataException {
+    public boolean hasNext() throws HyracksDataException, IndexException {
         checkPriorityQueue();
         PriorityQueueElement pqHead = outputPriorityQueue.peek();
         if (pqHead == null) {
diff --git a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMTreeSearchCursor.java b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMTreeSearchCursor.java
index 722d1b5..8625b13 100644
--- a/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMTreeSearchCursor.java
+++ b/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMTreeSearchCursor.java
@@ -23,6 +23,7 @@
 import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
 import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
 import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
 import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMTreeTupleReference;
 import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
@@ -44,7 +45,7 @@
         needPush = false;
     }
 
-    public void initPriorityQueue() throws HyracksDataException {
+    public void initPriorityQueue() throws HyracksDataException, IndexException {
         int pqInitSize = (rangeCursors.length > 0) ? rangeCursors.length : 1;
         outputPriorityQueue = new PriorityQueue<PriorityQueueElement>(pqInitSize, pqCmp);
         for (int i = 0; i < rangeCursors.length; i++) {
@@ -79,7 +80,7 @@
     }
 
     @Override
-    public boolean hasNext() throws HyracksDataException {
+    public boolean hasNext() throws HyracksDataException, IndexException {
         checkPriorityQueue();
         return !outputPriorityQueue.isEmpty();
     }
@@ -127,7 +128,7 @@
         return (ITupleReference) outputElement.getTuple();
     }
 
-    protected boolean pushIntoPriorityQueue(PriorityQueueElement e) throws HyracksDataException {
+    protected boolean pushIntoPriorityQueue(PriorityQueueElement e) throws HyracksDataException, IndexException {
         int cursorIndex = e.getCursorIndex();
         if (rangeCursors[cursorIndex].hasNext()) {
             rangeCursors[cursorIndex].next();
@@ -143,7 +144,7 @@
         return ((ILSMTreeTupleReference) checkElement.getTuple()).isAntimatter();
     }
     
-    protected void checkPriorityQueue() throws HyracksDataException {
+    protected void checkPriorityQueue() throws HyracksDataException, IndexException {
         while (!outputPriorityQueue.isEmpty() || needPush == true) {
             if (!outputPriorityQueue.isEmpty()) {
                 PriorityQueueElement checkElement = outputPriorityQueue.peek();
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedListCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedListCursor.java
index 321eab0..489495c 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedListCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedListCursor.java
@@ -18,6 +18,7 @@
 import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
 
 public interface IInvertedListCursor extends Comparable<IInvertedListCursor> {
@@ -27,7 +28,7 @@
 
     public void unpinPages() throws HyracksDataException;
 
-    public boolean hasNext() throws HyracksDataException;
+    public boolean hasNext() throws HyracksDataException, IndexException;
 
     public void next() throws HyracksDataException;
 
@@ -42,11 +43,11 @@
 
     public int getStartOff();
 
-    public boolean containsKey(ITupleReference searchTuple, MultiComparator invListCmp) throws HyracksDataException;
+    public boolean containsKey(ITupleReference searchTuple, MultiComparator invListCmp) throws HyracksDataException, IndexException;
 
     // for debugging
     @SuppressWarnings("rawtypes")
-    public String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException;
+    public String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException, IndexException;
 
     @SuppressWarnings("rawtypes")
     public String printCurrentElement(ISerializerDeserializer[] serdes) throws HyracksDataException;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
index 6c2de44..aafe950 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
@@ -27,6 +27,7 @@
 import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
 import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMHarness;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.OccurrenceThresholdPanicException;
 
 /**
  * Searches the components one-by-one, completely consuming a cursor before moving on to the next one.
@@ -86,7 +87,7 @@
     }
     
     // Move to the next tuple that has not been deleted.
-    private boolean nextValidTuple() throws HyracksDataException {
+    private boolean nextValidTuple() throws HyracksDataException, IndexException {
         while (currentCursor.hasNext()) {
             currentCursor.next();
             if (!isDeleted(currentCursor.getTuple())) { 
@@ -98,7 +99,7 @@
     }
     
     @Override
-    public boolean hasNext() throws HyracksDataException {
+    public boolean hasNext() throws HyracksDataException, IndexException {
         if (!tupleConsumed) {
             return true;
         }
@@ -115,6 +116,8 @@
             currentCursor = currentAccessor.createSearchCursor();
             try {
                 currentAccessor.search(currentCursor, searchPred);
+            } catch (OccurrenceThresholdPanicException e) {
+                throw e;
             } catch (IndexException e) {
                 throw new HyracksDataException(e);
             }
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java
index c182c87..68de656 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java
@@ -108,7 +108,7 @@
     }
 
     @Override
-    public boolean hasNext() throws HyracksDataException {
+    public boolean hasNext() throws HyracksDataException, IndexException {
         return btreeCursor.hasNext();
     }
 
@@ -170,7 +170,7 @@
     }
 
     @Override
-    public boolean containsKey(ITupleReference searchTuple, MultiComparator invListCmp) throws HyracksDataException {
+    public boolean containsKey(ITupleReference searchTuple, MultiComparator invListCmp) throws HyracksDataException, IndexException {
         btreeSearchTuple.addTuple(searchTuple);
         btreePred.setLowKeyComparator(btreeCmp);
         btreePred.setHighKeyComparator(btreeCmp);
@@ -195,7 +195,7 @@
 
     @SuppressWarnings("rawtypes")
     @Override
-    public String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException {
+    public String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException, IndexException {
         StringBuilder strBuilder = new StringBuilder();
         try {
             while (btreeCursor.hasNext()) {
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexRangeSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexRangeSearchCursor.java
index 875421b..540fb9f 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexRangeSearchCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexRangeSearchCursor.java
@@ -78,7 +78,7 @@
     }
 
     @Override
-    public boolean hasNext() throws HyracksDataException {
+    public boolean hasNext() throws HyracksDataException, IndexException {
         if (invListCursor.hasNext()) {
             return true;
         }
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java
index 1613a0b..2caa740 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java
@@ -186,7 +186,7 @@
         resultCursor.open(null, searchPred);
     }
 
-    protected int mergePrefixLists(int numPrefixTokens, int numQueryTokens) throws HyracksDataException {
+    protected int mergePrefixLists(int numPrefixTokens, int numQueryTokens) throws HyracksDataException, IndexException {
         int maxPrevBufIdx = 0;
         for (int i = 0; i < numPrefixTokens; i++) {
             swap = prevResultBuffers;
@@ -202,7 +202,7 @@
     }
 
     protected int mergeSuffixLists(int numPrefixTokens, int numQueryTokens, int maxPrevBufIdx)
-            throws HyracksDataException {
+            throws HyracksDataException, IndexException {
         for (int i = numPrefixTokens; i < numQueryTokens; i++) {
             swap = prevResultBuffers;
             prevResultBuffers = newResultBuffers;
@@ -224,7 +224,7 @@
     }
 
     protected int mergeSuffixListProbe(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers,
-            int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws HyracksDataException {
+            int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws HyracksDataException, IndexException {
 
         int newBufIdx = 0;
         ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
@@ -270,7 +270,7 @@
 
     protected int mergeSuffixListScan(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers,
             int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens)
-            throws HyracksDataException {
+            throws HyracksDataException, IndexException {
         
         int newBufIdx = 0;
         ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
@@ -365,7 +365,7 @@
     }
 
     protected int mergePrefixList(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers,
-            int maxPrevBufIdx, List<ByteBuffer> newResultBuffers) throws HyracksDataException {
+            int maxPrevBufIdx, List<ByteBuffer> newResultBuffers) throws HyracksDataException, IndexException {
         
         int newBufIdx = 0;
         ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcherSuffixProbeOnly.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcherSuffixProbeOnly.java
index d4ec2cf..630b810 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcherSuffixProbeOnly.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcherSuffixProbeOnly.java
@@ -21,6 +21,7 @@
 import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndex;
@@ -34,7 +35,7 @@
         this.invListCmp = MultiComparator.create(invIndex.getInvListCmpFactories());
     }
 
-    protected int mergeSuffixLists(int numPrefixTokens, int numQueryTokens, int maxPrevBufIdx) throws HyracksDataException {
+    protected int mergeSuffixLists(int numPrefixTokens, int numQueryTokens, int maxPrevBufIdx) throws HyracksDataException, IndexException {
         for (int i = numPrefixTokens; i < numQueryTokens; i++) {
             swap = prevResultBuffers;
             prevResultBuffers = newResultBuffers;
@@ -50,7 +51,7 @@
     }
 
     protected int mergeSuffixListProbe(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers,
-            int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws HyracksDataException {
+            int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws HyracksDataException, IndexException {
 
         int newBufIdx = 0;
         ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcherSuffixScanOnly.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcherSuffixScanOnly.java
index 8c6f2c7..3640511 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcherSuffixScanOnly.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcherSuffixScanOnly.java
@@ -22,6 +22,7 @@
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
 import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndex;
@@ -35,7 +36,7 @@
         this.invListCmp = MultiComparator.create(invIndex.getInvListCmpFactories());
     }
 
-    protected int mergeSuffixLists(int numPrefixTokens, int numQueryTokens, int maxPrevBufIdx) throws HyracksDataException {
+    protected int mergeSuffixLists(int numPrefixTokens, int numQueryTokens, int maxPrevBufIdx) throws HyracksDataException, IndexException {
         for (int i = numPrefixTokens; i < numQueryTokens; i++) {
             swap = prevResultBuffers;
             prevResultBuffers = newResultBuffers;
@@ -51,7 +52,7 @@
     }
 
     protected int mergeSuffixListScan(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers,
-            int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws HyracksDataException {
+            int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws HyracksDataException, IndexException {
 
         int newBufIdx = 0;
         ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizerFactory.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizerFactory.java
index 9813154..986d938 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizerFactory.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizerFactory.java
@@ -15,25 +15,22 @@
 
 package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers;
 
+public class DelimitedUTF8StringBinaryTokenizerFactory implements IBinaryTokenizerFactory {
 
-public class DelimitedUTF8StringBinaryTokenizerFactory implements
-		IBinaryTokenizerFactory {
+    private static final long serialVersionUID = 1L;
+    private final boolean ignoreTokenCount;
+    private final boolean sourceHasTypeTag;
+    private final ITokenFactory tokenFactory;
 
-	private static final long serialVersionUID = 1L;
-	private final boolean ignoreTokenCount;
-	private final boolean sourceHasTypeTag;
-	private final ITokenFactory tokenFactory;
+    public DelimitedUTF8StringBinaryTokenizerFactory(boolean ignoreTokenCount, boolean sourceHasTypeTag,
+            ITokenFactory tokenFactory) {
+        this.ignoreTokenCount = ignoreTokenCount;
+        this.sourceHasTypeTag = sourceHasTypeTag;
+        this.tokenFactory = tokenFactory;
+    }
 
-	public DelimitedUTF8StringBinaryTokenizerFactory(boolean ignoreTokenCount,
-			boolean sourceHasTypeTag, ITokenFactory tokenFactory) {
-		this.ignoreTokenCount = ignoreTokenCount;
-		this.sourceHasTypeTag = sourceHasTypeTag;
-		this.tokenFactory = tokenFactory;
-	}
-
-	@Override
-	public IBinaryTokenizer createTokenizer() {
-		return new DelimitedUTF8StringBinaryTokenizer(ignoreTokenCount,
-				sourceHasTypeTag, tokenFactory);
-	}
+    @Override
+    public IBinaryTokenizer createTokenizer() {
+        return new DelimitedUTF8StringBinaryTokenizer(ignoreTokenCount, sourceHasTypeTag, tokenFactory);
+    }
 }
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramTokenFactory.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramTokenFactory.java
index 866b17c..22efc92 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramTokenFactory.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramTokenFactory.java
@@ -15,7 +15,6 @@
 
 package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers;
 
-
 public class HashedUTF8NGramTokenFactory extends AbstractUTF8TokenFactory {
 
 	private static final long serialVersionUID = 1L;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizerFactory.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizerFactory.java
new file mode 100644
index 0000000..da3d411
--- /dev/null
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizerFactory.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers;
+
+public class NGramUTF8StringBinaryTokenizerFactory implements IBinaryTokenizerFactory {
+
+    private static final long serialVersionUID = 1L;
+    private final int gramLength;
+    private final boolean usePrePost;
+    private final boolean ignoreTokenCount;
+    private final boolean sourceHasTypeTag;
+    private final ITokenFactory tokenFactory;
+
+    public NGramUTF8StringBinaryTokenizerFactory(int gramLength, boolean usePrePost, boolean ignoreTokenCount,
+            boolean sourceHasTypeTag, ITokenFactory tokenFactory) {
+        this.gramLength = gramLength;
+        this.usePrePost = usePrePost;
+        this.ignoreTokenCount = ignoreTokenCount;
+        this.sourceHasTypeTag = sourceHasTypeTag;
+        this.tokenFactory = tokenFactory;
+    }
+
+    @Override
+    public IBinaryTokenizer createTokenizer() {
+        return new NGramUTF8StringBinaryTokenizer(gramLength, usePrePost, ignoreTokenCount, sourceHasTypeTag,
+                tokenFactory);
+    }
+
+}
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java
index 65d52d5..b39d337 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java
@@ -33,16 +33,16 @@
 import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
 import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
 import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
 import edu.uci.ics.hyracks.storage.am.common.api.IndexType;
-import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
 import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOp;
 import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponentFinalizer;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexFileManager;
 import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMFlushController;
 import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
 import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndex;
 import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexFileManager;
 import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
 import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTracker;
 import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.InMemoryBufferCache;
@@ -270,7 +270,7 @@
 
     @Override
     public boolean insertUpdateOrDelete(ITupleReference tuple, IIndexOpContext ictx) throws HyracksDataException,
-            TreeIndexException {
+            IndexException {
         LSMRTreeOpContext ctx = (LSMRTreeOpContext) ictx;
         if (ctx.getIndexOp() == IndexOp.PHYSICALDELETE) {
             throw new UnsupportedOperationException("Physical delete not yet supported in LSM R-tree");
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesSearchCursor.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesSearchCursor.java
index 63ad633..60b0370 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesSearchCursor.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesSearchCursor.java
@@ -43,7 +43,7 @@
     private int[] comparatorFields;
     private MultiComparator btreeCmp;
 
-    public void initPriorityQueue() throws HyracksDataException {
+    public void initPriorityQueue() throws HyracksDataException, IndexException {
         int pqInitSize = (rangeCursors.length > 0) ? rangeCursors.length : 1;
         outputPriorityQueue = new PriorityQueue<PriorityQueueElement>(pqInitSize, pqCmp);
         for (int i = 0; i < rangeCursors.length; i++) {
@@ -80,7 +80,7 @@
     }
 
     @Override
-    public boolean hasNext() throws HyracksDataException {
+    public boolean hasNext() throws HyracksDataException, IndexException {
         if (includeMemComponent) {
             if (foundNext) {
                 return true;
diff --git a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/common/AbstractTreeIndexTestWorker.java b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/common/AbstractTreeIndexTestWorker.java
index ca162ed..2108260 100644
--- a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/common/AbstractTreeIndexTestWorker.java
+++ b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/common/AbstractTreeIndexTestWorker.java
@@ -23,6 +23,7 @@
 import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
 import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
 import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
 import edu.uci.ics.hyracks.storage.am.common.datagen.DataGenThread;
 import edu.uci.ics.hyracks.storage.am.common.datagen.TupleBatch;
 
@@ -60,7 +61,7 @@
         }
     }
 
-    protected void consumeCursorTuples(IIndexCursor cursor) throws HyracksDataException {
+    protected void consumeCursorTuples(IIndexCursor cursor) throws HyracksDataException, IndexException {
         try {
             while (cursor.hasNext()) {
                 cursor.next();
diff --git a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java
index d8ebb1d..a70a0fe 100644
--- a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java
+++ b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java
@@ -74,7 +74,7 @@
     public static final int LSM_INVINDEX_MAX_OPEN_FILES = 10;
     public static final int LSM_INVINDEX_HYRACKS_FRAME_SIZE = 32768;
     
-    public static final int LSM_INVINDEX_NUM_DOCS_TO_INSERT = 10000;
+    public static final int LSM_INVINDEX_NUM_DOCS_TO_INSERT = 1000;
     public static final int LSM_INVINDEX_NUM_BULKLOAD_ROUNDS = 5;
     public static final int LSM_INVINDEX_MAX_TREES_TO_MERGE = 5;
     public static final int LSM_INVINDEX_NUM_INSERT_ROUNDS = 3;
diff --git a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeTestWorker.java b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeTestWorker.java
index 9be5a75..7fd4f5f 100644
--- a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeTestWorker.java
+++ b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeTestWorker.java
@@ -29,7 +29,7 @@
 import edu.uci.ics.hyracks.storage.am.common.TestOperationSelector.TestOperation;
 import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
 import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
-import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
 import edu.uci.ics.hyracks.storage.am.common.datagen.DataGenThread;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
 
@@ -48,7 +48,7 @@
     }
 
     @Override
-    public void performOp(ITupleReference tuple, TestOperation op) throws HyracksDataException, TreeIndexException {
+    public void performOp(ITupleReference tuple, TestOperation op) throws HyracksDataException, IndexException {
         BTree.BTreeAccessor accessor = (BTree.BTreeAccessor) indexAccessor;
         ITreeIndexCursor searchCursor = accessor.createSearchCursor();
         ITreeIndexCursor diskOrderScanCursor = accessor.createDiskOrderScanCursor();
@@ -121,7 +121,7 @@
         }
     }
 
-    private void consumeCursorTuples(ITreeIndexCursor cursor) throws HyracksDataException {
+    private void consumeCursorTuples(ITreeIndexCursor cursor) throws HyracksDataException, IndexException {
         try {
             while (cursor.hasNext()) {
                 cursor.next();
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
index ad47653..db2bc8e 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
@@ -74,4 +74,11 @@
         TupleGenerator tupleGen = InvertedIndexTestUtils.createStringDocumentTupleGen(harness.getRandom());
         runTest(testCtx, tupleGen);
     }
+    
+    @Test
+    public void hashedWordTokensInvIndexTest() throws IOException, IndexException {
+        InvertedIndexTestContext testCtx = InvertedIndexTestUtils.createHashedWordInvIndexTestContext(harness, invIndexType);
+        TupleGenerator tupleGen = InvertedIndexTestUtils.createStringDocumentTupleGen(harness.getRandom());
+        runTest(testCtx, tupleGen);
+    }
 }
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexLoadTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexLoadTest.java
index bb8c851..0c3d53e 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexLoadTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexLoadTest.java
@@ -63,4 +63,11 @@
         TupleGenerator tupleGen = InvertedIndexTestUtils.createStringDocumentTupleGen(harness.getRandom());
         runTest(testCtx, tupleGen);
     }
+    
+    @Test
+    public void hashedWordTokensInvIndexTest() throws IOException, IndexException {
+        InvertedIndexTestContext testCtx = InvertedIndexTestUtils.createHashedWordInvIndexTestContext(harness, invIndexType);
+        TupleGenerator tupleGen = InvertedIndexTestUtils.createStringDocumentTupleGen(harness.getRandom());
+        runTest(testCtx, tupleGen);
+    }
 }
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
index d777b85..ce6e829 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
@@ -16,6 +16,10 @@
 package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 
 import org.junit.Test;
 
@@ -24,15 +28,18 @@
 import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.ConjunctiveSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.EditDistanceSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.JaccardSearchModifier;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestUtils;
 
 public abstract class AbstractInvertedIndexSearchTest extends AbstractInvertedIndexTest {
 
+    protected final Logger LOGGER = Logger.getLogger(AbstractInvertedIndexSearchTest.class.getName());
+
     protected int NUM_DOC_QUERIES = 8000;
     protected int NUM_RANDOM_QUERIES = 2000;
-    protected int[] scanCountArray = new int[NUM_DOCS_TO_INSERT];
     protected final boolean bulkLoad;
 
     public AbstractInvertedIndexSearchTest(InvertedIndexType invIndexType, boolean bulkLoad) {
@@ -41,7 +48,7 @@
     }
 
     protected void runTest(InvertedIndexTestContext testCtx, TupleGenerator tupleGen,
-            IInvertedIndexSearchModifier searchModifier) throws IOException, IndexException {
+            List<IInvertedIndexSearchModifier> searchModifiers) throws IOException, IndexException {
         IIndex invIndex = testCtx.getIndex();
         invIndex.create();
         invIndex.activate();
@@ -53,19 +60,70 @@
         }
         invIndex.validate();
 
-        InvertedIndexTestUtils.testIndexSearch(testCtx, tupleGen, harness.getRandom(), NUM_DOC_QUERIES,
-                NUM_RANDOM_QUERIES, searchModifier, scanCountArray);
-        
+        for (IInvertedIndexSearchModifier searchModifier : searchModifiers) {
+            if (LOGGER.isLoggable(Level.INFO)) {
+                LOGGER.info("Running searches with: " + searchModifier.toString());
+            }
+            InvertedIndexTestUtils.testIndexSearch(testCtx, tupleGen, harness.getRandom(), NUM_DOC_QUERIES,
+                    NUM_RANDOM_QUERIES, searchModifier, SCAN_COUNT_ARRAY);
+        }
+
         invIndex.deactivate();
         invIndex.destroy();
     }
 
+    private void testWordInvIndexIndex(InvertedIndexTestContext testCtx) throws IOException, IndexException {
+        TupleGenerator tupleGen = InvertedIndexTestUtils.createStringDocumentTupleGen(harness.getRandom());
+        List<IInvertedIndexSearchModifier> searchModifiers = new ArrayList<IInvertedIndexSearchModifier>();
+        searchModifiers.add(new ConjunctiveSearchModifier());
+        searchModifiers.add(new JaccardSearchModifier(1.0f));
+        searchModifiers.add(new JaccardSearchModifier(0.9f));
+        searchModifiers.add(new JaccardSearchModifier(0.8f));
+        searchModifiers.add(new JaccardSearchModifier(0.7f));
+        searchModifiers.add(new JaccardSearchModifier(0.6f));
+        searchModifiers.add(new JaccardSearchModifier(0.5f));
+        runTest(testCtx, tupleGen, searchModifiers);
+    }
+    
+    private void testNGramInvIndexIndex(InvertedIndexTestContext testCtx) throws IOException, IndexException {
+        TupleGenerator tupleGen = InvertedIndexTestUtils.createPersonNamesTupleGen(harness.getRandom());
+        List<IInvertedIndexSearchModifier> searchModifiers = new ArrayList<IInvertedIndexSearchModifier>();
+        searchModifiers.add(new ConjunctiveSearchModifier());
+        searchModifiers.add(new JaccardSearchModifier(1.0f));
+        searchModifiers.add(new JaccardSearchModifier(0.9f));
+        searchModifiers.add(new JaccardSearchModifier(0.8f));
+        searchModifiers.add(new JaccardSearchModifier(0.7f));
+        searchModifiers.add(new JaccardSearchModifier(0.6f));
+        searchModifiers.add(new JaccardSearchModifier(0.5f));
+        searchModifiers.add(new EditDistanceSearchModifier(InvertedIndexTestUtils.TEST_GRAM_LENGTH, 0));
+        searchModifiers.add(new EditDistanceSearchModifier(InvertedIndexTestUtils.TEST_GRAM_LENGTH, 1));
+        searchModifiers.add(new EditDistanceSearchModifier(InvertedIndexTestUtils.TEST_GRAM_LENGTH, 2));
+        searchModifiers.add(new EditDistanceSearchModifier(InvertedIndexTestUtils.TEST_GRAM_LENGTH, 3));
+        runTest(testCtx, tupleGen, searchModifiers);
+    }
+    
     @Test
     public void wordTokensInvIndexTest() throws IOException, IndexException {
         InvertedIndexTestContext testCtx = InvertedIndexTestUtils.createWordInvIndexTestContext(harness, invIndexType);
-        TupleGenerator tupleGen = InvertedIndexTestUtils.createStringDocumentTupleGen(harness.getRandom());
-        IInvertedIndexSearchModifier searchModifier = new ConjunctiveSearchModifier();
-        runTest(testCtx, tupleGen, searchModifier);
+        testWordInvIndexIndex(testCtx);
+    }
+    
+    @Test
+    public void hashedWordTokensInvIndexTest() throws IOException, IndexException {
+        InvertedIndexTestContext testCtx = InvertedIndexTestUtils.createHashedWordInvIndexTestContext(harness, invIndexType);
+        testWordInvIndexIndex(testCtx);
+    }
+    
+    @Test
+    public void ngramTokensInvIndexTest() throws IOException, IndexException {
+        InvertedIndexTestContext testCtx = InvertedIndexTestUtils.createNGramInvIndexTestContext(harness, invIndexType);
+        testNGramInvIndexIndex(testCtx);
+    }
+    
+    @Test
+    public void hashedNGramTokensInvIndexTest() throws IOException, IndexException {
+        InvertedIndexTestContext testCtx = InvertedIndexTestUtils.createHashedNGramInvIndexTestContext(harness, invIndexType);
+        testNGramInvIndexIndex(testCtx);
     }
 
 }
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
index bcbd0fa..9551921 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
@@ -49,6 +49,7 @@
 import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
 import edu.uci.ics.hyracks.storage.am.common.datagen.DocumentStringFieldValueGenerator;
 import edu.uci.ics.hyracks.storage.am.common.datagen.IFieldValueGenerator;
+import edu.uci.ics.hyracks.storage.am.common.datagen.PersonNameFieldValueGenerator;
 import edu.uci.ics.hyracks.storage.am.common.datagen.SortedIntegerFieldValueGenerator;
 import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
 import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
@@ -62,19 +63,22 @@
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.OccurrenceThresholdPanicException;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizerFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.HashedUTF8NGramTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.HashedUTF8WordTokenFactory;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IToken;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizerFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.UTF8NGramTokenFactory;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.UTF8WordTokenFactory;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
 
 @SuppressWarnings("rawtypes")
 public class InvertedIndexTestUtils {
 
-    // Probability that a randomly generated query is used, instead of a document from the corpus.
-    protected static final float RQNDOM_QUERY_PROB = 0.9f;
-    
+    public static final int TEST_GRAM_LENGTH = 3;
+
     public static TupleGenerator createStringDocumentTupleGen(Random rnd) throws IOException {
         IFieldValueGenerator[] fieldGens = new IFieldValueGenerator[2];
         fieldGens[0] = new DocumentStringFieldValueGenerator(2, 10, 10000, rnd);
@@ -85,6 +89,16 @@
         return tupleGen;
     }
 
+    public static TupleGenerator createPersonNamesTupleGen(Random rnd) throws IOException {
+        IFieldValueGenerator[] fieldGens = new IFieldValueGenerator[2];
+        fieldGens[0] = new PersonNameFieldValueGenerator(rnd, 0.5f);
+        fieldGens[1] = new SortedIntegerFieldValueGenerator(0);
+        ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] {
+                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+        TupleGenerator tupleGen = new TupleGenerator(fieldGens, fieldSerdes, 0);
+        return tupleGen;
+    }
+
     public static InvertedIndexTestContext createWordInvIndexTestContext(LSMInvertedIndexTestHarness harness,
             InvertedIndexType invIndexType) throws IOException, IndexException {
         ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] {
@@ -97,6 +111,42 @@
         return testCtx;
     }
 
+    public static InvertedIndexTestContext createHashedWordInvIndexTestContext(LSMInvertedIndexTestHarness harness,
+            InvertedIndexType invIndexType) throws IOException, IndexException {
+        ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE,
+                IntegerSerializerDeserializer.INSTANCE };
+        ITokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
+        IBinaryTokenizerFactory tokenizerFactory = new DelimitedUTF8StringBinaryTokenizerFactory(true, false,
+                tokenFactory);
+        InvertedIndexTestContext testCtx = InvertedIndexTestContext.create(harness, fieldSerdes, 1, tokenizerFactory,
+                invIndexType);
+        return testCtx;
+    }
+
+    public static InvertedIndexTestContext createNGramInvIndexTestContext(LSMInvertedIndexTestHarness harness,
+            InvertedIndexType invIndexType) throws IOException, IndexException {
+        ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] {
+                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+        ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
+        IBinaryTokenizerFactory tokenizerFactory = new NGramUTF8StringBinaryTokenizerFactory(TEST_GRAM_LENGTH, true,
+                true, false, tokenFactory);
+        InvertedIndexTestContext testCtx = InvertedIndexTestContext.create(harness, fieldSerdes, 1, tokenizerFactory,
+                invIndexType);
+        return testCtx;
+    }
+
+    public static InvertedIndexTestContext createHashedNGramInvIndexTestContext(LSMInvertedIndexTestHarness harness,
+            InvertedIndexType invIndexType) throws IOException, IndexException {
+        ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE,
+                IntegerSerializerDeserializer.INSTANCE };
+        ITokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
+        IBinaryTokenizerFactory tokenizerFactory = new NGramUTF8StringBinaryTokenizerFactory(TEST_GRAM_LENGTH, true,
+                true, false, tokenFactory);
+        InvertedIndexTestContext testCtx = InvertedIndexTestContext.create(harness, fieldSerdes, 1, tokenizerFactory,
+                invIndexType);
+        return testCtx;
+    }
+
     public static void bulkLoadInvIndex(InvertedIndexTestContext testCtx, TupleGenerator tupleGen, int numDocs)
             throws IndexException, IOException {
         SortedSet<CheckTuple> tmpMemIndex = new TreeSet<CheckTuple>();;
@@ -147,7 +197,7 @@
             documentCorpus.remove(size - 1);
         }
     }
-    
+
     /**
      * Compares actual and expected indexes using the rangeSearch() method of the inverted-index accessor.
      */
@@ -241,7 +291,7 @@
             checkLowKey.appendField(token);
             CheckTuple checkHighKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
             checkHighKey.appendField(token);
-            
+
             SortedSet<CheckTuple> expectedInvList = OrderedIndexTestUtils.getPrefixExpectedSubset(
                     testCtx.getCheckTuples(), checkLowKey, checkHighKey);
             Iterator<CheckTuple> expectedInvListIter = expectedInvList.iterator();
@@ -329,8 +379,8 @@
     }
 
     public static void testIndexSearch(InvertedIndexTestContext testCtx, TupleGenerator tupleGen, Random rnd,
-            int numDocQueries, int numRandomQueries, IInvertedIndexSearchModifier searchModifier, int[] scanCountArray) throws IOException,
-            IndexException {
+            int numDocQueries, int numRandomQueries, IInvertedIndexSearchModifier searchModifier, int[] scanCountArray)
+            throws IOException, IndexException {
         IInvertedIndex invIndex = testCtx.invIndex;
         IInvertedIndexAccessor accessor = (IInvertedIndexAccessor) invIndex.createAccessor(
                 NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
@@ -364,7 +414,7 @@
             try {
                 accessor.search(resultCursor, searchPred);
             } catch (OccurrenceThresholdPanicException e) {
-                // ignore panic queries
+                // ignore panic queries.
                 panic = true;
             }
 
@@ -372,12 +422,17 @@
                 if (!panic) {
                     // Consume cursor and deserialize results so we can sort them. Some search cursors may not deliver the result sorted (e.g., LSM search cursor).
                     ArrayList<Integer> actualResults = new ArrayList<Integer>();
-                    while (resultCursor.hasNext()) {
-                        resultCursor.next();
-                        ITupleReference resultTuple = resultCursor.getTuple();
-                        int actual = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
-                                resultTuple.getFieldStart(0));
-                        actualResults.add(Integer.valueOf(actual));
+                    try {
+                        while (resultCursor.hasNext()) {
+                            resultCursor.next();
+                            ITupleReference resultTuple = resultCursor.getTuple();
+                            int actual = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
+                                    resultTuple.getFieldStart(0));
+                            actualResults.add(Integer.valueOf(actual));
+                        }
+                    } catch (OccurrenceThresholdPanicException e) {
+                        // Ignore panic queries.
+                        continue;
                     }
                     Collections.sort(actualResults);
 
@@ -407,43 +462,4 @@
             }
         }
     }
-    
-    
-    
-    /*
-    public static OnDiskInvertedIndex createTestInvertedIndex(LSMInvertedIndexTestHarness harness, IBinaryTokenizer tokenizer)
-            throws HyracksDataException {
-        ITreeIndexMetaDataFrameFactory metaFrameFactory = new LIFOMetaDataFrameFactory();
-        ITypeTraits[] btreeTypeTraits = new ITypeTraits[] { UTF8StringPointable.TYPE_TRAITS,
-                IntegerPointable.TYPE_TRAITS, IntegerPointable.TYPE_TRAITS, IntegerPointable.TYPE_TRAITS,
-                IntegerPointable.TYPE_TRAITS };
-        ITreeIndexTupleWriterFactory tupleWriterFactory = new TypeAwareTupleWriterFactory(btreeTypeTraits);
-        ITreeIndexFrameFactory leafFrameFactory = new BTreeNSMLeafFrameFactory(tupleWriterFactory);
-        ITreeIndexFrameFactory interiorFrameFactory = new BTreeNSMInteriorFrameFactory(tupleWriterFactory);
-        IFreePageManager freePageManager = new LinkedListFreePageManager(harness.getDiskBufferCache(), 0,
-                metaFrameFactory);
-        IBinaryComparatorFactory[] btreeCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
-                .of(UTF8StringPointable.FACTORY) };
-        return InvertedIndexUtils.createInvertedIndex(harness.getDiskBufferCache(), 
-                harness.getInvListTypeTraits(), harness.getInvListCmpFactories(), tokenizer);
-    }
-
-    public static InMemoryInvertedIndex createInMemoryInvertedIndex(LSMInvertedIndexTestHarness harness,
-            IBinaryTokenizer tokenizer) {
-        return InvertedIndexUtils.createInMemoryBTreeInvertedindex(harness.getMemBufferCache(),
-                harness.getMemFreePageManager(), harness.getTokenTypeTraits(), harness.getInvListTypeTraits(),
-                harness.getTokenCmpFactories(), harness.getInvListCmpFactories(),
-                tokenizer);
-    }
-
-    public static LSMInvertedIndex createLSMInvertedIndex(LSMInvertedIndexTestHarness harness,
-            IBinaryTokenizer tokenizer) {
-        return InvertedIndexUtils.createLSMInvertedIndex(harness.getMemBufferCache(),
-                harness.getMemFreePageManager(), harness.getTokenTypeTraits(), harness.getInvListTypeTraits(),
-                harness.getTokenCmpFactories(), harness.getInvListCmpFactories(),
-                tokenizer, harness.getDiskBufferCache(),
-                new LinkedListFreePageManagerFactory(harness.getDiskBufferCache(), new LIFOMetaDataFrameFactory()),
-                harness.getIOManager(), harness.getOnDiskDir(), harness.getDiskFileMapProvider());
-    }
-    */
 }
diff --git a/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/AbstractLSMRTreeTestWorker.java b/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/AbstractLSMRTreeTestWorker.java
index a89dca1..c54c948 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/AbstractLSMRTreeTestWorker.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/AbstractLSMRTreeTestWorker.java
@@ -23,6 +23,7 @@
 import edu.uci.ics.hyracks.storage.am.common.TestOperationSelector;
 import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
 import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
 import edu.uci.ics.hyracks.storage.am.common.datagen.DataGenThread;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
 
@@ -72,7 +73,7 @@
         rearrangedTuple.reset(rearrangedTb.getFieldEndOffsets(), rearrangedTb.getByteArray());
     }
 
-    protected void consumeCursorTuples(ITreeIndexCursor cursor) throws HyracksDataException {
+    protected void consumeCursorTuples(ITreeIndexCursor cursor) throws HyracksDataException, IndexException {
         try {
             while (cursor.hasNext()) {
                 cursor.next();
diff --git a/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/LSMRTreeTestWorker.java b/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/LSMRTreeTestWorker.java
index 59a7728..3a3bfe6 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/LSMRTreeTestWorker.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/LSMRTreeTestWorker.java
@@ -122,7 +122,7 @@
         rearrangedTuple.reset(rearrangedTb.getFieldEndOffsets(), rearrangedTb.getByteArray());
     }
 
-    private void consumeCursorTuples(ITreeIndexCursor cursor) throws HyracksDataException {
+    private void consumeCursorTuples(ITreeIndexCursor cursor) throws HyracksDataException, IndexException {
         try {
             while (cursor.hasNext()) {
                 cursor.next();