Fixed deletion protocol in the lsm-inverted-index. The in-memory deleted-keys BTree now only contains keys referring to on-disk components. Deletions that refer to documents in the in-memory inverted index are physically removed from there, and no entry to the deleted-keys BTree is made. This behavior seems necessary to avoid a pathological case of 'lost deletes' to on-disk components (more details in comments of the code).

git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_inverted_index_updates_new@1864 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java
index 6a5c762..b12bb7d 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DocumentStringFieldValueGenerator.java
@@ -92,4 +92,8 @@
     public List<String> getTokenDictionary() {
         return tokenDict;
     }
+
+    @Override
+    public void reset() {
+    }
 }
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java
index d535404..c98c249 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/DoubleFieldValueGenerator.java
@@ -28,4 +28,8 @@
     public Double next() {
         return rnd.nextDouble();
     }
+
+    @Override
+    public void reset() {
+    }
 }
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java
index 53450f6..7c3ff81 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/FloatFieldValueGenerator.java
@@ -28,4 +28,8 @@
     public Float next() {
         return rnd.nextFloat();
     }
+
+    @Override
+    public void reset() {
+    }
 }
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IFieldValueGenerator.java
index 6f743ac..dfeead6 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IFieldValueGenerator.java
@@ -17,4 +17,5 @@
 
 public interface IFieldValueGenerator<T> {
     public T next();
+    public void reset();
 }
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java
index 88f7bab..cd6e2a6 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/IntegerFieldValueGenerator.java
@@ -28,4 +28,8 @@
     public Integer next() {
         return rnd.nextInt();
     }
+
+    @Override
+    public void reset() {
+    }
 }
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java
index 7c94f18..6b86278 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/PersonNameFieldValueGenerator.java
@@ -90,4 +90,8 @@
         
         return strBuilder.toString();
     }
+
+    @Override
+    public void reset() {
+    }
 }
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java
index 228d12e..e93b8de 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedDoubleFieldValueGenerator.java
@@ -16,17 +16,26 @@
 package edu.uci.ics.hyracks.storage.am.common.datagen;
 
 public class SortedDoubleFieldValueGenerator implements IFieldValueGenerator<Double> {
-    private double val = 0.0d;
-
+    private double val;
+    private final double startVal;
+    
     public SortedDoubleFieldValueGenerator() {
+        startVal = 0.0d;
+        reset();
     }
     
     public SortedDoubleFieldValueGenerator(double startVal) {
-        val = startVal;
+        this.startVal = startVal;
+        reset();
     }
     
     @Override
     public Double next() {
         return val++;
     }
+
+    @Override
+    public void reset() {
+        val = startVal;        
+    }
 }
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java
index 13fd47b..fb163e1 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedFloatFieldValueGenerator.java
@@ -17,16 +17,25 @@
 
 public class SortedFloatFieldValueGenerator implements IFieldValueGenerator<Float> {
     private float val = 0.0f;
-
+    private final float startVal;
+    
     public SortedFloatFieldValueGenerator() {
+        startVal = 0.0f;
+        reset();
     }
     
     public SortedFloatFieldValueGenerator(float startVal) {
-        val = startVal;
+        this.startVal = startVal;
+        reset();
     }
     
     @Override
     public Float next() {
         return val++;
     }
+
+    @Override
+    public void reset() {
+        val = startVal;
+    }
 }
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java
index b4a13cb..a036772 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/SortedIntegerFieldValueGenerator.java
@@ -17,16 +17,25 @@
 
 public class SortedIntegerFieldValueGenerator implements IFieldValueGenerator<Integer> {
     private int val = 0;
+    private final int startVal;
 
     public SortedIntegerFieldValueGenerator() {
+        startVal = 0;
+        reset();
     }
     
     public SortedIntegerFieldValueGenerator(int startVal) {
-        val = startVal;
+        this.startVal = startVal;
+        reset();
     }
     
     @Override
     public Integer next() {
         return val++;
     }
+
+    @Override
+    public void reset() {
+        val = startVal;
+    }
 }
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java
index ef395ef..6bf01a4 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/StringFieldValueGenerator.java
@@ -39,4 +39,8 @@
         }
         return strBuilder.toString();
     }
+
+    @Override
+    public void reset() {
+    }
 }
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/TupleGenerator.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/TupleGenerator.java
index bfc56b6..1099af2 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/TupleGenerator.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/datagen/TupleGenerator.java
@@ -63,4 +63,10 @@
     public ITupleReference get() {
         return tuple;
     }
+    
+    public void reset() {
+        for (IFieldValueGenerator fieldGen : fieldGens) {
+            fieldGen.reset();
+        }
+    }
 }
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
index 3d7de71..4965e45 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
@@ -26,7 +26,6 @@
 import edu.uci.ics.hyracks.api.io.FileReference;
 import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
 import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeNonExistentKeyException;
 import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeLeafFrameType;
 import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
 import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
@@ -341,8 +340,14 @@
     }
         
     /**
+     * The keys in the in-memory deleted-keys BTree only refers to on-disk components.
+     * We delete documents from the in-memory inverted index by deleting its entries directly,
+     * and do NOT add the deleted key to the deleted-keys BTree.
+     * Otherwise, inserts would have to remove keys from the in-memory deleted-keys BTree which 
+     * may cause incorrect behavior in the following pathological case:
+     * Insert doc 1, flush, delete doc 1, insert doc 1
+     * After the sequence above doc 1 will now appear twice because the delete of the on-disk doc 1 has been lost.
      * Insert:
-     * - Delete key from deleted-keys BTree (ignore if key does not exist).
      * - Insert document into in-memory inverted index.
      * Delete:
      * - Delete document from in-memory inverted index (ignore if it does not exist).
@@ -354,13 +359,6 @@
         LSMInvertedIndexOpContext ctx = (LSMInvertedIndexOpContext) ictx;
         switch (ctx.getIndexOp()) {
             case INSERT: {
-                // First remove the possibly deleted key from the deleted-keys BTree.
-                ctx.keysOnlyTuple.reset(tuple);
-                try {
-                    ctx.deletedKeysBTreeAccessor.delete(ctx.keysOnlyTuple);
-                } catch (BTreeNonExistentKeyException e) {
-                    // The key did not exist in the deleted-keys BTree.
-                }
                 // Insert into the in-memory inverted index.
                 ctx.memInvIndexAccessor.insert(tuple);
                 break;
@@ -452,7 +450,6 @@
     }
     
     @Override
-    // TODO: Deal with deletions properly.
     public Object merge(List<Object> mergedComponents, ILSMIOOperation operation) throws HyracksDataException,
             IndexException {
         LSMInvertedIndexMergeOperation mergeOp = (LSMInvertedIndexMergeOperation) operation;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
index 353626b..3eee4b7 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
@@ -72,8 +72,8 @@
     protected boolean isDeleted(PriorityQueueElement checkElement) throws HyracksDataException {
         keysOnlyTuple.reset(checkElement.getTuple());
         int end = checkElement.getCursorIndex();
-        for (int i = 0; i <= end; i++) {
-            deletedKeysBTreeCursor.reset();            
+        for (int i = 0; i < end; i++) {
+            deletedKeysBTreeCursor.reset();       
             try {
                 deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursor, keySearchPred);
                 if (deletedKeysBTreeCursor.hasNext()) {
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
index 983c0bc..6c2de44 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
@@ -69,7 +69,7 @@
     protected boolean isDeleted(ITupleReference key) throws HyracksDataException {
         keySearchPred.setLowKey(key, true);
         keySearchPred.setHighKey(key, true);
-        for (int i = 0; i <= accessorIndex; i++) {
+        for (int i = 0; i < accessorIndex; i++) {
             deletedKeysBTreeCursor.reset();
             try {
                 deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursor, keySearchPred);
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
index 737fad5..348b69b 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
@@ -26,6 +26,8 @@
 import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
 import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
 import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.ConjunctiveSearchModifier;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestUtils;
@@ -33,9 +35,12 @@
 public abstract class AbstractInvertedIndexDeleteTest extends AbstractInvertedIndexTest {
 
     protected final int numInsertRounds = AccessMethodTestsConfig.LSM_INVINDEX_NUM_INSERT_ROUNDS;
-    protected final int numDeleteRounds = AccessMethodTestsConfig.LSM_INVINDEX_NUM_INSERT_ROUNDS;
+    protected final int numDeleteRounds = AccessMethodTestsConfig.LSM_INVINDEX_NUM_DELETE_ROUNDS;
     protected final boolean bulkLoad;
 
+    protected int NUM_QUERIES = 10000;
+    protected int[] scanCountArray = new int[NUM_DOCS_TO_INSERT];
+    
     public AbstractInvertedIndexDeleteTest(InvertedIndexType invIndexType, boolean bulkLoad) {
         super(invIndexType);
         this.bulkLoad = bulkLoad;
@@ -48,6 +53,9 @@
         invIndex.activate();
 
         for (int i = 0; i < numInsertRounds; i++) {
+            // Start generating documents ids from 0 again.
+            tupleGen.reset();
+            
             if (bulkLoad) {
                 InvertedIndexTestUtils.bulkLoadInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
             } else {
@@ -62,8 +70,18 @@
             int numTuplesPerDeleteRound = (int) Math.ceil((float) testCtx.getDocumentCorpus().size()
                     / (float) numDeleteRounds);
             for (int j = 0; j < numDeleteRounds; j++) {
+                System.out.println("DELETE ROUND: " + i + " " + j);
+                
                 InvertedIndexTestUtils.deleteFromInvIndex(testCtx, harness.getRandom(), numTuplesPerDeleteRound);
                 validateAndCheckIndex(testCtx);
+                
+                System.out.println("TESTING SEARCHES");
+                
+                IInvertedIndexSearchModifier searchModifier = new ConjunctiveSearchModifier();
+                InvertedIndexTestUtils.testIndexSearch(testCtx, tupleGen, harness.getRandom(), NUM_QUERIES, searchModifier,
+                        scanCountArray);
+                
+                System.out.println("DONE WITH TESTING SEARCHES");
             }
         }
 
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
index 1c5fcf6..0f750c6 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
@@ -15,31 +15,15 @@
 
 package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common;
 
-import static org.junit.Assert.fail;
-
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Random;
 
 import org.junit.Test;
 
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
-import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
 import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
 import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
 import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
-import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
-import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.OccurrenceThresholdPanicException;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.ConjunctiveSearchModifier;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestUtils;
@@ -50,9 +34,6 @@
     protected int[] scanCountArray = new int[NUM_DOCS_TO_INSERT];
     protected final boolean bulkLoad;
 
-    // Probability that a randomly generated query is used, instead of a document from the corpus.
-    protected final float randomQueryProb = 0.9f;
-
     public AbstractInvertedIndexSearchTest(InvertedIndexType invIndexType, boolean bulkLoad) {
         super(invIndexType);
         this.bulkLoad = bulkLoad;
@@ -71,89 +52,9 @@
         }
         invIndex.validate();
 
-        IInvertedIndexAccessor accessor = (IInvertedIndexAccessor) invIndex.createAccessor(
-                NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
-        IBinaryTokenizer tokenizer = testCtx.getTokenizerFactory().createTokenizer();
-        InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(tokenizer, searchModifier);
-        Random rnd = harness.getRandom();
-        List<ITupleReference> documentCorpus = testCtx.getDocumentCorpus();
-        // Project away the primary-key field.
-        int[] fieldPermutation = new int[] { 0 };
-        PermutingTupleReference searchDocument = new PermutingTupleReference(fieldPermutation);
-
-        IIndexCursor resultCursor = accessor.createSearchCursor();
-        for (int i = 0; i < NUM_QUERIES; i++) {
-            if (rnd.nextFloat() <= randomQueryProb) {
-                // Generate a random query.
-                ITupleReference randomQuery = tupleGen.next();
-                searchDocument.reset(randomQuery);
-            } else {
-                // Pick a random document from the corpus to use as the search query.
-                int queryIndex = Math.abs(rnd.nextInt() % documentCorpus.size());
-                searchDocument.reset(documentCorpus.get(queryIndex));
-            }
-
-            // DEBUG
-            /*
-            StringBuilder builder = new StringBuilder();
-            UTF8StringPointable.toString(builder, searchDocument.getFieldData(0), searchDocument.getFieldStart(0));
-            String query = builder.toString();
-            
-            System.out.println("QUERY: " + i + " " + query + " " + isRandom);
-            if (query.equals("Patricia Mary")) {
-                System.out.println("HERE WE GO, DEBUG IT!");
-            }
-            */
-
-            // Set query tuple in search predicate.
-            searchPred.setQueryTuple(searchDocument);
-            searchPred.setQueryFieldIndex(0);
-
-            resultCursor.reset();
-            boolean panic = false;
-            try {
-                accessor.search(resultCursor, searchPred);
-            } catch (OccurrenceThresholdPanicException e) {
-                // ignore panic queries
-                panic = true;
-            }
-
-            if (!panic) {
-                // Consume cursor and deserialize results so we can sort them. Some search cursors may not deliver the result sorted (e.g., LSM search cursor).
-                ArrayList<Integer> actualResults = new ArrayList<Integer>();
-                while (resultCursor.hasNext()) {
-                    resultCursor.next();
-                    ITupleReference resultTuple = resultCursor.getTuple();
-                    int actual = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
-                            resultTuple.getFieldStart(0));
-                    actualResults.add(Integer.valueOf(actual));
-                }
-                Collections.sort(actualResults);
-
-                // Get expected results.
-                List<Integer> expectedResults = new ArrayList<Integer>();
-                InvertedIndexTestUtils.getExpectedResults(scanCountArray, testCtx.getCheckTuples(),
-                        searchDocument, tokenizer, testCtx.getFieldSerdes()[0],
-                        searchModifier, expectedResults);
-
-                Iterator<Integer> expectedIter = expectedResults.iterator();
-                Iterator<Integer> actualIter = actualResults.iterator();
-                while (expectedIter.hasNext() && actualIter.hasNext()) {
-                    int expected = expectedIter.next();
-                    int actual = actualIter.next();
-                    if (actual != expected) {
-                        fail("Query results do not match. Encountered: " + actual + ". Expected: " + expected + "");
-                    }
-                }
-                if (expectedIter.hasNext()) {
-                    fail("Query results do not match. Actual results missing.");
-                }
-                if (actualIter.hasNext()) {
-                    fail("Query results do not match. Actual contains too many results.");
-                }
-            }
-        }
-
+        InvertedIndexTestUtils.testIndexSearch(testCtx, tupleGen, harness.getRandom(), NUM_QUERIES, searchModifier,
+                scanCountArray);
+        
         invIndex.deactivate();
         invIndex.destroy();
     }
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
index 1c8809d..aaf0564 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
@@ -23,7 +23,9 @@
 import java.io.DataOutput;
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Random;
@@ -57,6 +59,8 @@
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.LSMInvertedIndexTestHarness;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.OccurrenceThresholdPanicException;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizerFactory;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
@@ -68,6 +72,9 @@
 @SuppressWarnings("rawtypes")
 public class InvertedIndexTestUtils {
 
+    // Probability that a randomly generated query is used, instead of a document from the corpus.
+    protected static final float RQNDOM_QUERY_PROB = 0.9f;
+    
     public static TupleGenerator createStringDocumentTupleGen(Random rnd) throws IOException {
         IFieldValueGenerator[] fieldGens = new IFieldValueGenerator[2];
         fieldGens[0] = new DocumentStringFieldValueGenerator(2, 10, 10000, rnd);
@@ -172,7 +179,6 @@
         Iterator<CheckTuple> expectedIter = testCtx.getCheckTuples().iterator();
 
         // Compare index elements.
-        int count = 0;
         try {
             while (invIndexCursor.hasNext() && expectedIter.hasNext()) {
                 invIndexCursor.next();
@@ -180,9 +186,14 @@
                 CheckTuple expected = expectedIter.next();
                 OrderedIndexTestUtils.createTupleFromCheckTuple(expected, expectedBuilder, expectedTuple, fieldSerdes);
                 if (tupleCmp.compare(actualTuple, expectedTuple) != 0) {
-                    fail("Index elements differ for token '" + expected.getField(0) + "' at position " + count + ".");
+                    fail("Index entries differ for token '" + expected.getField(0) + "'.");
                 }
-                count++;
+            }
+            if (expectedIter.hasNext()) {
+                fail("Indexes do not match. Actual index is missing entries.");
+            }
+            if (invIndexCursor.hasNext()) {
+                fail("Indexes do not match. Actual index contains too many entries.");
             }
         } finally {
             invIndexCursor.close();
@@ -317,6 +328,86 @@
         }
     }
 
+    public static void testIndexSearch(InvertedIndexTestContext testCtx, TupleGenerator tupleGen, Random rnd,
+            int numQueries, IInvertedIndexSearchModifier searchModifier, int[] scanCountArray) throws IOException,
+            IndexException {
+        IInvertedIndex invIndex = testCtx.invIndex;
+        IInvertedIndexAccessor accessor = (IInvertedIndexAccessor) invIndex.createAccessor(
+                NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+        IBinaryTokenizer tokenizer = testCtx.getTokenizerFactory().createTokenizer();
+        InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(tokenizer, searchModifier);
+        List<ITupleReference> documentCorpus = testCtx.getDocumentCorpus();
+        // Project away the primary-key field.
+        int[] fieldPermutation = new int[] { 0 };
+        PermutingTupleReference searchDocument = new PermutingTupleReference(fieldPermutation);
+
+        IIndexCursor resultCursor = accessor.createSearchCursor();
+        for (int i = 0; i < numQueries; i++) {
+            if (rnd.nextFloat() <= RQNDOM_QUERY_PROB || documentCorpus.isEmpty()) {
+                // Generate a random query.
+                ITupleReference randomQuery = tupleGen.next();
+                searchDocument.reset(randomQuery);
+            } else {
+                // Pick a random document from the corpus to use as the search query.
+                int queryIndex = Math.abs(rnd.nextInt() % documentCorpus.size());
+                searchDocument.reset(documentCorpus.get(queryIndex));
+            }
+
+            // Set query tuple in search predicate.
+            searchPred.setQueryTuple(searchDocument);
+            searchPred.setQueryFieldIndex(0);
+
+            resultCursor.reset();
+            boolean panic = false;
+            try {
+                accessor.search(resultCursor, searchPred);
+            } catch (OccurrenceThresholdPanicException e) {
+                // ignore panic queries
+                panic = true;
+            }
+
+            try {
+                if (!panic) {
+                    // Consume cursor and deserialize results so we can sort them. Some search cursors may not deliver the result sorted (e.g., LSM search cursor).
+                    ArrayList<Integer> actualResults = new ArrayList<Integer>();
+                    while (resultCursor.hasNext()) {
+                        resultCursor.next();
+                        ITupleReference resultTuple = resultCursor.getTuple();
+                        int actual = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
+                                resultTuple.getFieldStart(0));
+                        actualResults.add(Integer.valueOf(actual));
+                    }
+                    Collections.sort(actualResults);
+
+                    // Get expected results.
+                    List<Integer> expectedResults = new ArrayList<Integer>();
+                    InvertedIndexTestUtils.getExpectedResults(scanCountArray, testCtx.getCheckTuples(), searchDocument,
+                            tokenizer, testCtx.getFieldSerdes()[0], searchModifier, expectedResults);
+
+                    Iterator<Integer> expectedIter = expectedResults.iterator();
+                    Iterator<Integer> actualIter = actualResults.iterator();
+                    while (expectedIter.hasNext() && actualIter.hasNext()) {
+                        int expected = expectedIter.next();
+                        int actual = actualIter.next();
+                        if (actual != expected) {
+                            fail("Query results do not match. Encountered: " + actual + ". Expected: " + expected + "");
+                        }
+                    }
+                    if (expectedIter.hasNext()) {
+                        fail("Query results do not match. Actual results missing.");
+                    }
+                    if (actualIter.hasNext()) {
+                        fail("Query results do not match. Actual contains too many results.");
+                    }
+                }
+            } finally {
+                resultCursor.close();
+            }
+        }
+    }
+    
+    
+    
     /*
     public static OnDiskInvertedIndex createTestInvertedIndex(LSMInvertedIndexTestHarness harness, IBinaryTokenizer tokenizer)
             throws HyracksDataException {