Added basic lsm-inverted-index delete test that validates the index using a range search cursor (sort-merges multiple components and removes deleted entries). Still need to remove deleted entries during regular inverted index searches.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_inverted_index_updates_new@1853 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
index 41cfe74..c284149 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
@@ -135,7 +135,7 @@
InMemoryInvertedIndex memInvIndex = InvertedIndexUtils.createInMemoryBTreeInvertedindex(memBufferCache,
memFreePageManager, invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories,
tokenizerFactory);
- BTree deleteKeysBTree = BTreeUtils.createBTree(memBufferCache,
+ BTree deleteKeysBTree = BTreeUtils.createBTree(memBufferCache, memFreePageManager,
((InMemoryBufferCache) memBufferCache).getFileMapProvider(), invListTypeTraits, invListCmpFactories,
BTreeLeafFrameType.REGULAR_NSM, memDeleteKeysBTreeFile);
memComponent = new LSMInvertedIndexComponent(memInvIndex, deleteKeysBTree);
@@ -356,7 +356,7 @@
// First remove the possibly deleted key from the deleted-keys BTree.
ctx.keysOnlyTuple.reset(tuple);
try {
- ctx.deletedKeysBTreeAccessor.delete(tuple);
+ ctx.deletedKeysBTreeAccessor.delete(ctx.keysOnlyTuple);
} catch (BTreeNonExistentKeyException e) {
// The key did not exist in the deleted-keys BTree.
}
@@ -370,7 +370,7 @@
// Insert key into the deleted-keys BTree.
ctx.keysOnlyTuple.reset(tuple);
try {
- ctx.deletedKeysBTreeAccessor.insert(tuple);
+ ctx.deletedKeysBTreeAccessor.insert(ctx.keysOnlyTuple);
} catch (BTreeDuplicateKeyException e) {
// Key has already been deleted.
}
@@ -394,23 +394,32 @@
boolean includeMemComponent, AtomicInteger searcherRefCount) throws HyracksDataException, IndexException {
int numComponents = (includeMemComponent) ? diskComponents.size() : diskComponents.size() + 1;
ArrayList<IIndexAccessor> indexAccessors = new ArrayList<IIndexAccessor>(numComponents);
+ ArrayList<IIndexAccessor> deletedKeysBTreeAccessors = new ArrayList<IIndexAccessor>(numComponents);
if (includeMemComponent) {
- IIndexAccessor accessor = memComponent.getInvIndex().createAccessor(NoOpOperationCallback.INSTANCE,
+ IIndexAccessor invIndexAccessor = memComponent.getInvIndex().createAccessor(NoOpOperationCallback.INSTANCE,
NoOpOperationCallback.INSTANCE);
- indexAccessors.add(accessor);
+ indexAccessors.add(invIndexAccessor);
+ IIndexAccessor deletedKeysAccessor = memComponent.getDeletedKeysBTree().createAccessor(
+ NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ deletedKeysBTreeAccessors.add(deletedKeysAccessor);
}
for (int i = 0; i < diskComponents.size(); i++) {
LSMInvertedIndexComponent component = (LSMInvertedIndexComponent) diskComponents.get(i);
- IIndexAccessor accessor = component.getInvIndex().createAccessor(NoOpOperationCallback.INSTANCE,
+ IIndexAccessor invIndexAccessor = component.getInvIndex().createAccessor(NoOpOperationCallback.INSTANCE,
NoOpOperationCallback.INSTANCE);
- indexAccessors.add(accessor);
+ indexAccessors.add(invIndexAccessor);
+ IIndexAccessor deletedKeysAccessor = component.getDeletedKeysBTree().createAccessor(
+ NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ deletedKeysBTreeAccessors.add(deletedKeysAccessor);
}
- ICursorInitialState initState = createCursorInitialState(pred, ictx, includeMemComponent, searcherRefCount, indexAccessors);
+ ICursorInitialState initState = createCursorInitialState(pred, ictx, includeMemComponent, searcherRefCount,
+ indexAccessors, deletedKeysBTreeAccessors);
cursor.open(initState, pred);
}
private ICursorInitialState createCursorInitialState(ISearchPredicate pred, IIndexOpContext ictx,
- boolean includeMemComponent, AtomicInteger searcherRefCount, ArrayList<IIndexAccessor> indexAccessors) {
+ boolean includeMemComponent, AtomicInteger searcherRefCount, ArrayList<IIndexAccessor> indexAccessors,
+ ArrayList<IIndexAccessor> deletedKeysBTreeAccessors) {
// TODO: This check is not pretty, but it does the job. Come up with something more OO in the future.
ICursorInitialState initState = null;
// Distinguish between regular searches and range searches (mostly used in merges).
@@ -419,9 +428,10 @@
searcherRefCount, lsmHarness);
} else {
InMemoryInvertedIndex memInvIndex = (InMemoryInvertedIndex) memComponent.getInvIndex();
- MultiComparator cmp = MultiComparator.create(memInvIndex.getBTree().getComparatorFactories());
- initState = new LSMInvertedIndexRangeSearchCursorInitialState(cmp, searcherRefCount, lsmHarness,
- indexAccessors, pred);
+ MultiComparator tokensAndKeyCmp = MultiComparator.create(memInvIndex.getBTree().getComparatorFactories());
+ MultiComparator keyCmp = MultiComparator.create(invListCmpFactories);
+ initState = new LSMInvertedIndexRangeSearchCursorInitialState(tokensAndKeyCmp, keyCmp, includeMemComponent, searcherRefCount,
+ lsmHarness, indexAccessors, deletedKeysBTreeAccessors, pred);
}
return initState;
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
index 4f13138..d83726e 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
@@ -15,16 +15,28 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls;
+import java.util.ArrayList;
+
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
+import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMTreeSearchCursor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
public class LSMInvertedIndexRangeSearchCursor extends LSMTreeSearchCursor {
+ // Assuming the cursor for all deleted-keys indexes are of the same type.
+ protected IIndexCursor deletedKeysBTreeCursor;
+ protected ArrayList<IIndexAccessor> deletedKeysBTreeAccessors;
+ protected PermutingTupleReference keysOnlyTuple;
+ protected RangePredicate keySearchPred;
+
@Override
public void open(ICursorInitialState initState, ISearchPredicate searchPred) throws IndexException,
HyracksDataException {
@@ -38,20 +50,63 @@
invIndexAccessor.rangeSearch(rangeCursors[i], lsmInitState.getSearchPredicate());
}
+
+ // For searching the deleted-keys BTrees.
+ deletedKeysBTreeAccessors = lsmInitState.getDeletedKeysBTreeAccessors();
+ deletedKeysBTreeCursor = deletedKeysBTreeAccessors.get(0).createSearchCursor();
+ MultiComparator keyCmp = lsmInitState.getKeyComparator();
+ // Project away token fields.
+ int[] keyFieldPermutation = new int[keyCmp.getKeyFieldCount()];
+ int numTokenFields = cmp.getKeyFieldCount() - keyCmp.getKeyFieldCount();
+ for (int i = 0; i < keyCmp.getKeyFieldCount(); i++) {
+ keyFieldPermutation[i] = numTokenFields + i;
+ }
+ keysOnlyTuple = new PermutingTupleReference(keyFieldPermutation);
+ keySearchPred = new RangePredicate(keysOnlyTuple, keysOnlyTuple, true, true, keyCmp, keyCmp);
+
searcherRefCount = lsmInitState.getSearcherRefCount();
lsmHarness = lsmInitState.getLSMHarness();
+ includeMemComponent = lsmInitState.getIncludeMemComponent();
setPriorityQueueComparator();
initPriorityQueue();
}
+ // Check deleted-keys BTrees whether they contain the key in the checkElement's tuple.
+ protected boolean isDeleted(PriorityQueueElement checkElement) throws HyracksDataException {
+ int end = checkElement.getCursorIndex();
+ for (int i = 0; i <= end; i++) {
+ deletedKeysBTreeCursor.reset();
+ keysOnlyTuple.reset(checkElement.getTuple());
+ try {
+ deletedKeysBTreeAccessors.get(i).search(deletedKeysBTreeCursor, keySearchPred);
+ if (deletedKeysBTreeCursor.hasNext()) {
+ return true;
+ }
+ } catch (IndexException e) {
+ throw new HyracksDataException(e);
+ } finally {
+ deletedKeysBTreeCursor.close();
+ }
+ }
+ return false;
+ }
+
protected void checkPriorityQueue() throws HyracksDataException {
while (!outputPriorityQueue.isEmpty() || needPush == true) {
if (!outputPriorityQueue.isEmpty()) {
PriorityQueueElement checkElement = outputPriorityQueue.peek();
// If there is no previous tuple or the previous tuple can be ignored
if (outputElement == null) {
- // TODO: This is the place where we need to check for deleted keys. Have a look at the super class to find out more.
- break;
+ // Test the tuple is a delete tuple or not
+ if (isDeleted(checkElement)) {
+ // If the key has been deleted then pop it and set needPush to true.
+ // We cannot push immediately because the tuple may be
+ // modified if hasNext() is called
+ outputElement = outputPriorityQueue.poll();
+ needPush = true;
+ } else {
+ break;
+ }
} else {
// Compare the previous tuple and the head tuple in the PQ
if (compare(cmp, outputElement.getTuple(), checkElement.getTuple()) == 0) {
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java
index 645658e..c4f83a0 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursorInitialState.java
@@ -28,20 +28,29 @@
public class LSMInvertedIndexRangeSearchCursorInitialState implements ICursorInitialState {
- private final MultiComparator cmp;
+ private final MultiComparator tokensAndKeyCmp;
+ private final MultiComparator keyCmp;
private final AtomicInteger searcherRefCount;
private final LSMHarness lsmHarness;
private final ArrayList<IIndexAccessor> indexAccessors;
+ private final ArrayList<IIndexAccessor> deletedKeysBTreeAccessors;
private final ISearchPredicate predicate;
+
+ private final boolean includeMemComponent;
- public LSMInvertedIndexRangeSearchCursorInitialState(MultiComparator cmp, AtomicInteger searcherRefCount,
- LSMHarness lsmHarness, ArrayList<IIndexAccessor> indexAccessors, ISearchPredicate predicate) {
- this.cmp = cmp;
+ public LSMInvertedIndexRangeSearchCursorInitialState(MultiComparator tokensAndKeyCmp, MultiComparator keyCmp,
+ boolean includeMemComponent, AtomicInteger searcherRefCount, LSMHarness lsmHarness,
+ ArrayList<IIndexAccessor> indexAccessors, ArrayList<IIndexAccessor> deletedKeysBTreeAccessors,
+ ISearchPredicate predicate) {
+ this.tokensAndKeyCmp = tokensAndKeyCmp;
+ this.keyCmp = keyCmp;
this.searcherRefCount = searcherRefCount;
this.lsmHarness = lsmHarness;
this.indexAccessors = indexAccessors;
+ this.deletedKeysBTreeAccessors = deletedKeysBTreeAccessors;
this.predicate = predicate;
+ this.includeMemComponent = includeMemComponent;
}
public int getNumComponents() {
@@ -79,17 +88,29 @@
return indexAccessors;
}
+ public ArrayList<IIndexAccessor> getDeletedKeysBTreeAccessors() {
+ return deletedKeysBTreeAccessors;
+ }
+
public ISearchPredicate getSearchPredicate() {
return predicate;
}
+ public MultiComparator getKeyComparator() {
+ return keyCmp;
+ }
+
@Override
public MultiComparator getOriginalKeyComparator() {
- return cmp;
+ return tokensAndKeyCmp;
}
@Override
public void setOriginialKeyComparator(MultiComparator originalCmp) {
// Do nothing.
}
+
+ public boolean getIncludeMemComponent() {
+ return includeMemComponent;
+ }
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexDeleteTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexDeleteTest.java
new file mode 100644
index 0000000..ce6d882
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexDeleteTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexDeleteTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
+
+public class LSMInvertedIndexDeleteTest extends AbstractInvertedIndexDeleteTest {
+
+ public LSMInvertedIndexDeleteTest() {
+ super(InvertedIndexType.LSM, false);
+ }
+}
\ No newline at end of file
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
index 83b2558..737fad5 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common;
import java.io.IOException;
@@ -12,15 +27,15 @@
import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestUtils;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestUtils;
public abstract class AbstractInvertedIndexDeleteTest extends AbstractInvertedIndexTest {
-
+
protected final int numInsertRounds = AccessMethodTestsConfig.LSM_INVINDEX_NUM_INSERT_ROUNDS;
protected final int numDeleteRounds = AccessMethodTestsConfig.LSM_INVINDEX_NUM_INSERT_ROUNDS;
protected final boolean bulkLoad;
-
+
public AbstractInvertedIndexDeleteTest(InvertedIndexType invIndexType, boolean bulkLoad) {
super(invIndexType);
this.bulkLoad = bulkLoad;
@@ -28,10 +43,10 @@
protected void runTest(InvertedIndexTestContext testCtx, TupleGenerator tupleGen) throws IOException,
IndexException {
- IIndex invIndex = testCtx.getIndex();
+ IIndex invIndex = testCtx.getIndex();
invIndex.create();
invIndex.activate();
-
+
for (int i = 0; i < numInsertRounds; i++) {
if (bulkLoad) {
InvertedIndexTestUtils.bulkLoadInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
@@ -40,21 +55,22 @@
}
validateAndCheckIndex(testCtx);
- List<ITupleReference> documentCorpus = new ArrayList<ITupleReference>();
+ List<ITupleReference> documentCorpus = new ArrayList<ITupleReference>();
documentCorpus.addAll(testCtx.getDocumentCorpus());
-
+
// Delete all documents in a couple of rounds.
- int numTuplesPerDeleteRound = (int) Math.ceil((float) testCtx.getDocumentCorpus().size() / (float) numDeleteRounds);
+ int numTuplesPerDeleteRound = (int) Math.ceil((float) testCtx.getDocumentCorpus().size()
+ / (float) numDeleteRounds);
for (int j = 0; j < numDeleteRounds; j++) {
InvertedIndexTestUtils.deleteFromInvIndex(testCtx, harness.getRandom(), numTuplesPerDeleteRound);
validateAndCheckIndex(testCtx);
- }
+ }
}
-
+
invIndex.deactivate();
invIndex.destroy();
}
-
+
@Test
public void wordTokensInvIndexTest() throws IOException, IndexException {
InvertedIndexTestContext testCtx = InvertedIndexTestUtils.createWordInvIndexTestContext(harness, invIndexType);
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java
index 5547980..16e1426 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java
@@ -51,7 +51,7 @@
private static final long RANDOM_SEED = 50;
private static final int DEFAULT_DISK_PAGE_SIZE = 256;
private static final int DEFAULT_DISK_NUM_PAGES = 10000;
- private static final int DEFAULT_DISK_MAX_OPEN_FILES = 200;
+ private static final int DEFAULT_DISK_MAX_OPEN_FILES = 1000;
private static final int DEFAULT_MEM_PAGE_SIZE = 256;
private static final int DEFAULT_MEM_NUM_PAGES = 200;
private static final int DEFAULT_HYRACKS_FRAME_SIZE = 512;