Added basic lsm-inverted-index delete test that validates the index using a range search cursor (sort-merges multiple components and removes deleted entries). Still need to remove deleted entries during regular inverted index searches.

git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_inverted_index_updates_new@1853 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
index 41cfe74..c284149 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
@@ -135,7 +135,7 @@
         InMemoryInvertedIndex memInvIndex = InvertedIndexUtils.createInMemoryBTreeInvertedindex(memBufferCache,
                 memFreePageManager, invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories,
                 tokenizerFactory);
-        BTree deleteKeysBTree = BTreeUtils.createBTree(memBufferCache,
+        BTree deleteKeysBTree = BTreeUtils.createBTree(memBufferCache, memFreePageManager,
                 ((InMemoryBufferCache) memBufferCache).getFileMapProvider(), invListTypeTraits, invListCmpFactories,
                 BTreeLeafFrameType.REGULAR_NSM, memDeleteKeysBTreeFile);
         memComponent = new LSMInvertedIndexComponent(memInvIndex, deleteKeysBTree);
@@ -356,7 +356,7 @@
                 // First remove the possibly deleted key from the deleted-keys BTree.
                 ctx.keysOnlyTuple.reset(tuple);
                 try {
-                    ctx.deletedKeysBTreeAccessor.delete(tuple);
+                    ctx.deletedKeysBTreeAccessor.delete(ctx.keysOnlyTuple);
                 } catch (BTreeNonExistentKeyException e) {
                     // The key did not exist in the deleted-keys BTree.
                 }
@@ -370,7 +370,7 @@
                 // Insert key into the deleted-keys BTree.
                 ctx.keysOnlyTuple.reset(tuple);
                 try {
-                    ctx.deletedKeysBTreeAccessor.insert(tuple);
+                    ctx.deletedKeysBTreeAccessor.insert(ctx.keysOnlyTuple);
                 } catch (BTreeDuplicateKeyException e) {
                     // Key has already been deleted.
                 }
@@ -394,23 +394,32 @@
             boolean includeMemComponent, AtomicInteger searcherRefCount) throws HyracksDataException, IndexException {
         int numComponents = (includeMemComponent) ? diskComponents.size() : diskComponents.size() + 1;
         ArrayList<IIndexAccessor> indexAccessors = new ArrayList<IIndexAccessor>(numComponents);
+        ArrayList<IIndexAccessor> deletedKeysBTreeAccessors = new ArrayList<IIndexAccessor>(numComponents);
         if (includeMemComponent) {
-            IIndexAccessor accessor = memComponent.getInvIndex().createAccessor(NoOpOperationCallback.INSTANCE,
+            IIndexAccessor invIndexAccessor = memComponent.getInvIndex().createAccessor(NoOpOperationCallback.INSTANCE,
                     NoOpOperationCallback.INSTANCE);
-            indexAccessors.add(accessor);
+            indexAccessors.add(invIndexAccessor);
+            IIndexAccessor deletedKeysAccessor = memComponent.getDeletedKeysBTree().createAccessor(
+                    NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+            deletedKeysBTreeAccessors.add(deletedKeysAccessor);
         }
         for (int i = 0; i < diskComponents.size(); i++) {
             LSMInvertedIndexComponent component = (LSMInvertedIndexComponent) diskComponents.get(i);
-            IIndexAccessor accessor = component.getInvIndex().createAccessor(NoOpOperationCallback.INSTANCE,
+            IIndexAccessor invIndexAccessor = component.getInvIndex().createAccessor(NoOpOperationCallback.INSTANCE,
                     NoOpOperationCallback.INSTANCE);
-            indexAccessors.add(accessor);
+            indexAccessors.add(invIndexAccessor);
+            IIndexAccessor deletedKeysAccessor = component.getDeletedKeysBTree().createAccessor(
+                    NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+            deletedKeysBTreeAccessors.add(deletedKeysAccessor);
         }
-        ICursorInitialState initState = createCursorInitialState(pred, ictx, includeMemComponent, searcherRefCount, indexAccessors);
+        ICursorInitialState initState = createCursorInitialState(pred, ictx, includeMemComponent, searcherRefCount,
+                indexAccessors, deletedKeysBTreeAccessors);
         cursor.open(initState, pred);
     }
 
     private ICursorInitialState createCursorInitialState(ISearchPredicate pred, IIndexOpContext ictx,
-            boolean includeMemComponent, AtomicInteger searcherRefCount, ArrayList<IIndexAccessor> indexAccessors) {
+            boolean includeMemComponent, AtomicInteger searcherRefCount, ArrayList<IIndexAccessor> indexAccessors,
+            ArrayList<IIndexAccessor> deletedKeysBTreeAccessors) {
         // TODO: This check is not pretty, but it does the job. Come up with something more OO in the future.
         ICursorInitialState initState = null;
         // Distinguish between regular searches and range searches (mostly used in merges).
@@ -419,9 +428,10 @@
                     searcherRefCount, lsmHarness);
         } else {
             InMemoryInvertedIndex memInvIndex = (InMemoryInvertedIndex) memComponent.getInvIndex();
-            MultiComparator cmp = MultiComparator.create(memInvIndex.getBTree().getComparatorFactories());
-            initState = new LSMInvertedIndexRangeSearchCursorInitialState(cmp, searcherRefCount, lsmHarness,
-                    indexAccessors, pred);
+            MultiComparator tokensAndKeyCmp = MultiComparator.create(memInvIndex.getBTree().getComparatorFactories());
+            MultiComparator keyCmp = MultiComparator.create(invListCmpFactories);
+            initState = new LSMInvertedIndexRangeSearchCursorInitialState(tokensAndKeyCmp, keyCmp, includeMemComponent, searcherRefCount,
+                    lsmHarness, indexAccessors, deletedKeysBTreeAccessors, pred);
         }
         return initState;
     }
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
index 4f13138..d83726e 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
@@ -15,16 +15,28 @@
 
 package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls;
 
+import java.util.ArrayList;
+
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
 import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
 import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
 import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
 import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
+import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
 import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMTreeSearchCursor;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
 
 public class LSMInvertedIndexRangeSearchCursor extends LSMTreeSearchCursor {
 
+    // Assuming the cursor for all deleted-keys indexes are of the same type.
+    protected IIndexCursor deletedKeysBTreeCursor;
+    protected ArrayList<IIndexAccessor> deletedKeysBTreeAccessors;
+    protected PermutingTupleReference keysOnlyTuple;
+    protected RangePredicate keySearchPred;
+    
     @Override
     public void open(ICursorInitialState initState, ISearchPredicate searchPred) throws IndexException,
             HyracksDataException {
@@ -38,20 +50,63 @@
             invIndexAccessor.rangeSearch(rangeCursors[i], lsmInitState.getSearchPredicate());
 
         }
+        
+        // For searching the deleted-keys BTrees.
+        deletedKeysBTreeAccessors = lsmInitState.getDeletedKeysBTreeAccessors();
+        deletedKeysBTreeCursor = deletedKeysBTreeAccessors.get(0).createSearchCursor();        
+        MultiComparator keyCmp = lsmInitState.getKeyComparator();
+        // Project away token fields.
+        int[] keyFieldPermutation = new int[keyCmp.getKeyFieldCount()];
+        int numTokenFields = cmp.getKeyFieldCount() - keyCmp.getKeyFieldCount();
+        for (int i = 0; i < keyCmp.getKeyFieldCount(); i++) {
+            keyFieldPermutation[i] = numTokenFields + i;
+        }
+        keysOnlyTuple = new PermutingTupleReference(keyFieldPermutation);
+        keySearchPred = new RangePredicate(keysOnlyTuple, keysOnlyTuple, true, true, keyCmp, keyCmp);
+        
         searcherRefCount = lsmInitState.getSearcherRefCount();
         lsmHarness = lsmInitState.getLSMHarness();
+        includeMemComponent = lsmInitState.getIncludeMemComponent();
         setPriorityQueueComparator();
         initPriorityQueue();
     }
     
+    // Check deleted-keys BTrees whether they contain the key in the checkElement's tuple.
+    protected boolean isDeleted(PriorityQueueElement checkElement) throws HyracksDataException {
+        int end = checkElement.getCursorIndex();
+        for (int i = 0; i <= end; i++) {
+            deletedKeysBTreeCursor.reset();
+            keysOnlyTuple.reset(checkElement.getTuple());
+            try {
+                deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursor, keySearchPred);
+                if (deletedKeysBTreeCursor.hasNext()) {
+                    return true;
+                }
+            } catch (IndexException e) {
+                throw new HyracksDataException(e);
+            } finally {
+                deletedKeysBTreeCursor.close();
+            }
+        }
+        return false;
+    }
+    
     protected void checkPriorityQueue() throws HyracksDataException {
         while (!outputPriorityQueue.isEmpty() || needPush == true) {
             if (!outputPriorityQueue.isEmpty()) {
                 PriorityQueueElement checkElement = outputPriorityQueue.peek();
                 // If there is no previous tuple or the previous tuple can be ignored
                 if (outputElement == null) {
-                    // TODO: This is the place where we need to check for deleted keys. Have a look at the super class to find out more.
-                    break;
+                    // Test the tuple is a delete tuple or not
+                    if (isDeleted(checkElement)) {
+                        // If the key has been deleted then pop it and set needPush to true.
+                        // We cannot push immediately because the tuple may be
+                        // modified if hasNext() is called
+                        outputElement = outputPriorityQueue.poll();
+                        needPush = true;
+                    } else {
+                        break;
+                    }
                 } else {
                     // Compare the previous tuple and the head tuple in the PQ
                     if (compare(cmp, outputElement.getTuple(), checkElement.getTuple()) == 0) {
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java
index 645658e..c4f83a0 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java
@@ -28,20 +28,29 @@
 
 public class LSMInvertedIndexRangeSearchCursorInitialState implements ICursorInitialState {
 
-    private final MultiComparator cmp;
+    private final MultiComparator tokensAndKeyCmp;
+    private final MultiComparator keyCmp;
     private final AtomicInteger searcherRefCount;
     private final LSMHarness lsmHarness;
 
     private final ArrayList<IIndexAccessor> indexAccessors;
+    private final ArrayList<IIndexAccessor> deletedKeysBTreeAccessors;
     private final ISearchPredicate predicate;
+    
+    private final boolean includeMemComponent;
 
-    public LSMInvertedIndexRangeSearchCursorInitialState(MultiComparator cmp, AtomicInteger searcherRefCount,
-            LSMHarness lsmHarness, ArrayList<IIndexAccessor> indexAccessors, ISearchPredicate predicate) {
-        this.cmp = cmp;
+    public LSMInvertedIndexRangeSearchCursorInitialState(MultiComparator tokensAndKeyCmp, MultiComparator keyCmp,
+            boolean includeMemComponent, AtomicInteger searcherRefCount, LSMHarness lsmHarness,
+            ArrayList<IIndexAccessor> indexAccessors, ArrayList<IIndexAccessor> deletedKeysBTreeAccessors,
+            ISearchPredicate predicate) {
+        this.tokensAndKeyCmp = tokensAndKeyCmp;
+        this.keyCmp = keyCmp;
         this.searcherRefCount = searcherRefCount;
         this.lsmHarness = lsmHarness;
         this.indexAccessors = indexAccessors;
+        this.deletedKeysBTreeAccessors = deletedKeysBTreeAccessors;
         this.predicate = predicate;
+        this.includeMemComponent = includeMemComponent;
     }
 
     public int getNumComponents() {
@@ -79,17 +88,29 @@
         return indexAccessors;
     }
 
+    public ArrayList<IIndexAccessor> getDeletedKeysBTreeAccessors() {
+        return deletedKeysBTreeAccessors;
+    }
+    
     public ISearchPredicate getSearchPredicate() {
         return predicate;
     }
 
+    public MultiComparator getKeyComparator() {
+        return keyCmp;
+    }
+    
     @Override
     public MultiComparator getOriginalKeyComparator() {
-        return cmp;
+        return tokensAndKeyCmp;
     }
 
     @Override
     public void setOriginialKeyComparator(MultiComparator originalCmp) {
         // Do nothing.
     }
+    
+    public boolean getIncludeMemComponent() {
+        return includeMemComponent;
+    }
 }
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexDeleteTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexDeleteTest.java
new file mode 100644
index 0000000..ce6d882
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexDeleteTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexDeleteTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
+
+public class LSMInvertedIndexDeleteTest extends AbstractInvertedIndexDeleteTest {
+
+    public LSMInvertedIndexDeleteTest() {
+        super(InvertedIndexType.LSM, false);
+    }
+}
\ No newline at end of file
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
index 83b2558..737fad5 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common;
 
 import java.io.IOException;
@@ -12,15 +27,15 @@
 import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
 import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestUtils;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestUtils;
 
 public abstract class AbstractInvertedIndexDeleteTest extends AbstractInvertedIndexTest {
-    
+
     protected final int numInsertRounds = AccessMethodTestsConfig.LSM_INVINDEX_NUM_INSERT_ROUNDS;
     protected final int numDeleteRounds = AccessMethodTestsConfig.LSM_INVINDEX_NUM_INSERT_ROUNDS;
     protected final boolean bulkLoad;
-    
+
     public AbstractInvertedIndexDeleteTest(InvertedIndexType invIndexType, boolean bulkLoad) {
         super(invIndexType);
         this.bulkLoad = bulkLoad;
@@ -28,10 +43,10 @@
 
     protected void runTest(InvertedIndexTestContext testCtx, TupleGenerator tupleGen) throws IOException,
             IndexException {
-        IIndex invIndex = testCtx.getIndex();        
+        IIndex invIndex = testCtx.getIndex();
         invIndex.create();
         invIndex.activate();
-        
+
         for (int i = 0; i < numInsertRounds; i++) {
             if (bulkLoad) {
                 InvertedIndexTestUtils.bulkLoadInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
@@ -40,21 +55,22 @@
             }
             validateAndCheckIndex(testCtx);
 
-            List<ITupleReference> documentCorpus = new ArrayList<ITupleReference>(); 
+            List<ITupleReference> documentCorpus = new ArrayList<ITupleReference>();
             documentCorpus.addAll(testCtx.getDocumentCorpus());
-            
+
             // Delete all documents in a couple of rounds.
-            int numTuplesPerDeleteRound = (int) Math.ceil((float) testCtx.getDocumentCorpus().size() / (float) numDeleteRounds);
+            int numTuplesPerDeleteRound = (int) Math.ceil((float) testCtx.getDocumentCorpus().size()
+                    / (float) numDeleteRounds);
             for (int j = 0; j < numDeleteRounds; j++) {
                 InvertedIndexTestUtils.deleteFromInvIndex(testCtx, harness.getRandom(), numTuplesPerDeleteRound);
                 validateAndCheckIndex(testCtx);
-            }                        
+            }
         }
-        
+
         invIndex.deactivate();
         invIndex.destroy();
     }
-    
+
     @Test
     public void wordTokensInvIndexTest() throws IOException, IndexException {
         InvertedIndexTestContext testCtx = InvertedIndexTestUtils.createWordInvIndexTestContext(harness, invIndexType);
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java
index 5547980..16e1426 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java
@@ -51,7 +51,7 @@
     private static final long RANDOM_SEED = 50;
     private static final int DEFAULT_DISK_PAGE_SIZE = 256;
     private static final int DEFAULT_DISK_NUM_PAGES = 10000;
-    private static final int DEFAULT_DISK_MAX_OPEN_FILES = 200;
+    private static final int DEFAULT_DISK_MAX_OPEN_FILES = 1000;
     private static final int DEFAULT_MEM_PAGE_SIZE = 256;
     private static final int DEFAULT_MEM_NUM_PAGES = 200;
     private static final int DEFAULT_HYRACKS_FRAME_SIZE = 512;