Added delete test for in-memory inverted index (only adding key to LSM buddy BTree can lead to false positives, practically impossible to test). Fixed a bug in the counting BTree cursor when dealing with empty pages due to deletes.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_inverted_index_updates_new@1852 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
index 5429007..4f667b2 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeCountingSearchCursor.java
@@ -106,7 +106,7 @@
}
tupleIndex = getLowKeyIndex();
- stopTupleIndex = getHighKeyIndex();
+ stopTupleIndex = getHighKeyIndex();
}
private void fetchNextLeafPage(int nextLeafPage) throws HyracksDataException {
@@ -165,7 +165,7 @@
if (count < 0) {
count = 0;
- while (stopTupleIndex >= 0) {
+ while (stopTupleIndex >= 0 || frame.getTupleCount() == 0) {
count += (stopTupleIndex - tupleIndex + 1);
int nextLeafPage = frame.getNextLeaf();
@@ -207,6 +207,7 @@
tupleIndex = 0;
page = null;
pred = null;
+ count = -1;
}
@Override
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
index 7e9e451..41cfe74 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
@@ -340,17 +340,12 @@
}
/**
- * The way we deal with inserts/deletes allows for the rare possibility of false-positive answers.
- * We assume that the user of this index performs a post-processing step to removes such false positives
- * (e.g., by not removing the original optimizable predicate).
- * For example, consider inserting a document with key '1' and tokens 'A' and 'B', then deleting that document.
- * The state of the index is that 'A,1' 'B,1' are in the in-memory inverted index, and '1' is in the deleted-keys BTree.
- * Now if we insert a document with key '1' and tokens 'C' and 'D', then we have 'A,1', 'B,1', 'C,1' and 'D,1' in
- * the in-memory inverted index with the key '1' not in the deleted-keys BTree.
- * So, the index will incorrectly return document '1' for a query 'A and B'.
- * The post-processing assumption allows for faster processing of deletes because only a single insert into a BTree
- * is needed as opposed to physically deleting all token,key pairs from the in-memory inverted index.
- * Further, the post-processing is often needed anyway, for example, to deal with collisions when using hashed tokens.
+ * Insert:
+ * - Delete key from deleted-keys BTree (ignore if key does not exist).
+ * - Insert document into in-memory inverted index.
+ * Delete:
+ * - Delete document from in-memory inverted index (ignore if it does not exist).
+ * - Insert key into deleted-keys BTree.
*/
@Override
public boolean insertUpdateOrDelete(ITupleReference tuple, IIndexOpContext ictx) throws HyracksDataException,
@@ -370,6 +365,8 @@
break;
}
case DELETE: {
+ // First remove all entries in the in-memory inverted index (if any).
+ ctx.memInvIndexAccessor.delete(tuple);
// Insert key into the deleted-keys BTree.
ctx.keysOnlyTuple.reset(tuple);
try {
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
index 1508708..02978bb 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
@@ -23,6 +23,7 @@
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeException;
+import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeLeafFrameType;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
@@ -135,7 +136,11 @@
while (ctx.tupleIter.hasNext()) {
ctx.tupleIter.next();
ITupleReference deleteTuple = ctx.tupleIter.getTuple();
- btreeAccessor.delete(deleteTuple);
+ try {
+ btreeAccessor.delete(deleteTuple);
+ } catch (BTreeNonExistentKeyException e) {
+ // Ignore this exception, since a document may have duplicate tokens.
+ }
}
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
index 202550d..7d16cb7 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
@@ -60,7 +60,7 @@
@Override
public void delete(ITupleReference tuple) throws HyracksDataException, IndexException {
- opCtx.reset(IndexOp.INSERT);
+ opCtx.reset(IndexOp.DELETE);
index.delete(tuple, btreeAccessor, opCtx);
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java
index 0401741..c182c87 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedListCursor.java
@@ -87,6 +87,8 @@
btreeSearchTuple.addTuple(tokenTuple);
btreePred.setLowKey(tokenTuple, true);
btreePred.setHighKey(tokenTuple, true);
+ btreeCursor.reset();
+ countingCursor.reset();
btreeAccessor.search(btreeCursor, btreePred);
}
@@ -144,7 +146,6 @@
} finally {
try {
countingCursor.close();
- countingCursor.reset();
} catch (HyracksDataException e) {
e.printStackTrace();
}
diff --git a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java
index da0cd9a..59485b2 100644
--- a/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java
+++ b/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java
@@ -76,6 +76,8 @@
public static final int LSM_INVINDEX_NUM_BULKLOAD_ROUNDS = 5;
public static final int LSM_INVINDEX_MAX_TREES_TO_MERGE = 5;
+ public static final int LSM_INVINDEX_NUM_INSERT_ROUNDS = 3;
+ public static final int LSM_INVINDEX_NUM_DELETE_ROUNDS = 3;
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexMergeTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexMergeTest.java
index 8bd2d4b..6d5c4e4 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexMergeTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexMergeTest.java
@@ -59,14 +59,7 @@
if (ioop != null) {
invIndexAccessor.merge(ioop);
}
-
- // Validate index and compare against expected index.
- invIndex.validate();
- if (invIndexType == InvertedIndexType.INMEMORY || invIndexType == InvertedIndexType.ONDISK) {
- // This comparison method exercises different features of these types of inverted indexes.
- InvertedIndexTestUtils.compareActualAndExpectedIndexes(testCtx);
- }
- InvertedIndexTestUtils.compareActualAndExpectedIndexesRangeSearch(testCtx);
+ validateAndCheckIndex(testCtx);
}
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
new file mode 100644
index 0000000..83b2558
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
@@ -0,0 +1,64 @@
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
+import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestUtils;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
+
+public abstract class AbstractInvertedIndexDeleteTest extends AbstractInvertedIndexTest {
+
+ protected final int numInsertRounds = AccessMethodTestsConfig.LSM_INVINDEX_NUM_INSERT_ROUNDS;
+ protected final int numDeleteRounds = AccessMethodTestsConfig.LSM_INVINDEX_NUM_INSERT_ROUNDS;
+ protected final boolean bulkLoad;
+
+ public AbstractInvertedIndexDeleteTest(InvertedIndexType invIndexType, boolean bulkLoad) {
+ super(invIndexType);
+ this.bulkLoad = bulkLoad;
+ }
+
+ protected void runTest(InvertedIndexTestContext testCtx, TupleGenerator tupleGen) throws IOException,
+ IndexException {
+ IIndex invIndex = testCtx.getIndex();
+ invIndex.create();
+ invIndex.activate();
+
+ for (int i = 0; i < numInsertRounds; i++) {
+ if (bulkLoad) {
+ InvertedIndexTestUtils.bulkLoadInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
+ } else {
+ InvertedIndexTestUtils.insertIntoInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
+ }
+ validateAndCheckIndex(testCtx);
+
+ List<ITupleReference> documentCorpus = new ArrayList<ITupleReference>();
+ documentCorpus.addAll(testCtx.getDocumentCorpus());
+
+ // Delete all documents in a couple of rounds.
+ int numTuplesPerDeleteRound = (int) Math.ceil((float) testCtx.getDocumentCorpus().size() / (float) numDeleteRounds);
+ for (int j = 0; j < numDeleteRounds; j++) {
+ InvertedIndexTestUtils.deleteFromInvIndex(testCtx, harness.getRandom(), numTuplesPerDeleteRound);
+ validateAndCheckIndex(testCtx);
+ }
+ }
+
+ invIndex.deactivate();
+ invIndex.destroy();
+ }
+
+ @Test
+ public void wordTokensInvIndexTest() throws IOException, IndexException {
+ InvertedIndexTestContext testCtx = InvertedIndexTestUtils.createWordInvIndexTestContext(harness, invIndexType);
+ TupleGenerator tupleGen = InvertedIndexTestUtils.createStringDocumentTupleGen(harness.getRandom());
+ runTest(testCtx, tupleGen);
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexLoadTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexLoadTest.java
index 39e438f..b8f9ac5 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexLoadTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexLoadTest.java
@@ -49,20 +49,13 @@
} else {
InvertedIndexTestUtils.insertIntoInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
}
-
- // Validate index and compare against expected index.
- invIndex.validate();
- if (invIndexType == InvertedIndexType.INMEMORY || invIndexType == InvertedIndexType.ONDISK) {
- // This comparison method exercises different features of these types of inverted indexes.
- InvertedIndexTestUtils.compareActualAndExpectedIndexes(testCtx);
- }
- InvertedIndexTestUtils.compareActualAndExpectedIndexesRangeSearch(testCtx);
+ validateAndCheckIndex(testCtx);
}
invIndex.deactivate();
invIndex.destroy();
}
-
+
@Test
public void wordTokensInvIndexTest() throws IOException, IndexException {
InvertedIndexTestContext testCtx = InvertedIndexTestUtils.createWordInvIndexTestContext(harness, invIndexType);
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
index b2692c3..1c5fcf6 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
@@ -133,7 +133,7 @@
// Get expected results.
List<Integer> expectedResults = new ArrayList<Integer>();
InvertedIndexTestUtils.getExpectedResults(scanCountArray, testCtx.getCheckTuples(),
- testCtx.getDeletedKeys(), searchDocument, tokenizer, testCtx.getFieldSerdes()[0],
+ searchDocument, tokenizer, testCtx.getFieldSerdes()[0],
searchModifier, expectedResults);
Iterator<Integer> expectedIter = expectedResults.iterator();
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexTest.java
index 5f28813..6636d4c 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexTest.java
@@ -20,7 +20,11 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestUtils;
public abstract class AbstractInvertedIndexTest {
protected final LSMInvertedIndexTestHarness harness = new LSMInvertedIndexTestHarness();
@@ -42,4 +46,15 @@
public void tearDown() throws HyracksDataException {
harness.tearDown();
}
+
+ protected void validateAndCheckIndex(InvertedIndexTestContext testCtx) throws HyracksDataException, IndexException {
+ IIndex invIndex = testCtx.getIndex();
+ // Validate index and compare against expected index.
+ invIndex.validate();
+ if (invIndexType == InvertedIndexType.INMEMORY || invIndexType == InvertedIndexType.ONDISK) {
+ // This comparison method exercises different features of these types of inverted indexes.
+ InvertedIndexTestUtils.compareActualAndExpectedIndexes(testCtx);
+ }
+ InvertedIndexTestUtils.compareActualAndExpectedIndexesRangeSearch(testCtx);
+ }
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexDeleteTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexDeleteTest.java
new file mode 100644
index 0000000..ef2937c
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexDeleteTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexDeleteTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTestContext.InvertedIndexType;
+
+public class InMemoryInvertedIndexDeleteTest extends AbstractInvertedIndexDeleteTest {
+
+ public InMemoryInvertedIndexDeleteTest() {
+ super(InvertedIndexType.INMEMORY, false);
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java
index c3f5ac4..7152e61 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java
@@ -28,7 +28,6 @@
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
import edu.uci.ics.hyracks.dataflow.common.util.SerdeUtils;
import edu.uci.ics.hyracks.dataflow.common.util.TupleUtils;
import edu.uci.ics.hyracks.storage.am.btree.OrderedIndexTestContext;
@@ -52,18 +51,16 @@
protected IInvertedIndex invIndex;
protected IBinaryComparatorFactory[] allCmpFactories;
protected IBinaryTokenizerFactory tokenizerFactory;
- protected InvertedIndexTokenizingTupleIterator indexInsertIter;
+ protected InvertedIndexTokenizingTupleIterator indexTupleIter;
protected HashSet<Comparable> allTokens = new HashSet<Comparable>();
protected List<ITupleReference> documentCorpus = new ArrayList<ITupleReference>();
- // Set containing the keys of deleted documents.
- protected HashSet<Integer> deletedKeys = new HashSet<Integer>();
public InvertedIndexTestContext(ISerializerDeserializer[] fieldSerdes, IIndex index,
IBinaryTokenizerFactory tokenizerFactory) {
super(fieldSerdes, index);
this.tokenizerFactory = tokenizerFactory;
invIndex = (IInvertedIndex) index;
- indexInsertIter = new InvertedIndexTokenizingTupleIterator(invIndex.getTokenTypeTraits().length,
+ indexTupleIter = new InvertedIndexTokenizingTupleIterator(invIndex.getTokenTypeTraits().length,
invIndex.getInvListTypeTraits().length, tokenizerFactory.createTokenizer());
}
@@ -146,27 +143,25 @@
public void insertCheckTuples(ITupleReference tuple, Collection<CheckTuple> checkTuples)
throws HyracksDataException {
documentCorpus.add(TupleUtils.copyTuple(tuple));
- indexInsertIter.reset(tuple);
- while (indexInsertIter.hasNext()) {
- indexInsertIter.next();
- ITupleReference insertTuple = indexInsertIter.getTuple();
+ indexTupleIter.reset(tuple);
+ while (indexTupleIter.hasNext()) {
+ indexTupleIter.next();
+ ITupleReference insertTuple = indexTupleIter.getTuple();
CheckTuple checkTuple = createCheckTuple(insertTuple);
insertCheckTuple(checkTuple, getCheckTuples());
allTokens.add(checkTuple.getField(0));
}
- // Remove key from the deleted-keys set.
- int numTokenFields = invIndex.getTokenTypeTraits().length;
- int key = IntegerSerializerDeserializer.getInt(tuple.getFieldData(numTokenFields),
- tuple.getFieldStart(numTokenFields));
- deletedKeys.remove(Integer.valueOf(key));
}
- public void deleteTuple(ITupleReference tuple) throws HyracksDataException {
- // Add key to the deleted-keys set.
- int numTokenFields = invIndex.getTokenTypeTraits().length;
- int key = IntegerSerializerDeserializer.getInt(tuple.getFieldData(numTokenFields),
- tuple.getFieldStart(numTokenFields));
- deletedKeys.add(Integer.valueOf(key));
+ public void deleteCheckTuples(ITupleReference tuple, Collection<CheckTuple> checkTuples)
+ throws HyracksDataException {
+ indexTupleIter.reset(tuple);
+ while (indexTupleIter.hasNext()) {
+ indexTupleIter.next();
+ ITupleReference insertTuple = indexTupleIter.getTuple();
+ CheckTuple checkTuple = createCheckTuple(insertTuple);
+ deleteCheckTuple(checkTuple, getCheckTuples());
+ }
}
public HashSet<Comparable> getAllTokens() {
@@ -197,8 +192,4 @@
public List<ITupleReference> getDocumentCorpus() {
return documentCorpus;
}
-
- public HashSet<Integer> getDeletedKeys() {
- return deletedKeys;
- }
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
index e35e973..1c8809d 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
@@ -24,7 +24,6 @@
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.Arrays;
-import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
@@ -127,6 +126,21 @@
}
}
+ public static void deleteFromInvIndex(InvertedIndexTestContext testCtx, Random rnd, int numDocsToDelete)
+ throws HyracksDataException, IndexException {
+ List<ITupleReference> documentCorpus = testCtx.getDocumentCorpus();
+ for (int i = 0; i < numDocsToDelete && !documentCorpus.isEmpty(); i++) {
+ int size = documentCorpus.size();
+ int tupleIndex = Math.abs(rnd.nextInt()) % size;
+ ITupleReference deleteTuple = documentCorpus.get(tupleIndex);
+ testCtx.getIndexAccessor().delete(deleteTuple);
+ testCtx.deleteCheckTuples(deleteTuple, testCtx.getCheckTuples());
+ // Swap tupleIndex with last element.
+ documentCorpus.set(tupleIndex, documentCorpus.get(size - 1));
+ documentCorpus.remove(size - 1);
+ }
+ }
+
/**
* Compares actual and expected indexes using the rangeSearch() method of the inverted-index accessor.
*/
@@ -216,6 +230,7 @@
checkLowKey.appendField(token);
CheckTuple checkHighKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
checkHighKey.appendField(token);
+
SortedSet<CheckTuple> expectedInvList = OrderedIndexTestUtils.getPrefixExpectedSubset(
testCtx.getCheckTuples(), checkLowKey, checkHighKey);
Iterator<CheckTuple> expectedInvListIter = expectedInvList.iterator();
@@ -255,7 +270,7 @@
* Determine the expected results with the simple ScanCount algorithm.
*/
@SuppressWarnings("unchecked")
- public static void getExpectedResults(int[] scanCountArray, TreeSet<CheckTuple> checkTuples, HashSet<Integer> deletedKeys,
+ public static void getExpectedResults(int[] scanCountArray, TreeSet<CheckTuple> checkTuples,
ITupleReference searchDocument, IBinaryTokenizer tokenizer, ISerializerDeserializer tokenSerde,
IInvertedIndexSearchModifier searchModifier, List<Integer> expectedResults) throws IOException {
// Reset scan count array.
@@ -296,7 +311,7 @@
// Run through scan count array, and see whether elements satisfy the given occurrence threshold.
expectedResults.clear();
for (int i = 0; i < scanCountArray.length; i++) {
- if (scanCountArray[i] >= occurrenceThreshold && !deletedKeys.contains(Integer.valueOf(i))) {
+ if (scanCountArray[i] >= occurrenceThreshold) {
expectedResults.add(i);
}
}
diff --git a/pom.xml b/pom.xml
index a08341e..78f9589 100644
--- a/pom.xml
+++ b/pom.xml
@@ -11,21 +11,6 @@
<jvm.extraargs></jvm.extraargs>
</properties>
- <profiles>
- <profile>
- <id>macosx</id>
- <activation>
- <os>
- <name>mac os x</name>
- </os>
- <jdk>1.7</jdk>
- </activation>
- <properties>
- <jvm.extraargs>-Djava.nio.channels.spi.SelectorProvider=sun.nio.ch.KQueueSelectorProvider -Xms1024m -Xmx1024m</jvm.extraargs>
- </properties>
- </profile>
- </profiles>
-
<build>
<plugins>
<plugin>