First steps to getting delete working.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_inverted_index_updates_new@1851 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
index 7156c8b..7e9e451 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
@@ -25,6 +25,8 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
+import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeLeafFrameType;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
@@ -336,11 +338,56 @@
public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput) throws IndexException {
return new LSMInvertedIndexBulkLoader(fillFactor, verifyInput);
}
-
+
+ /**
+ * The way we deal with inserts/deletes allows for the rare possibility of false-positive answers.
+ * We assume that the user of this index performs a post-processing step to removes such false positives
+ * (e.g., by not removing the original optimizable predicate).
+ * For example, consider inserting a document with key '1' and tokens 'A' and 'B', then deleting that document.
+ * The state of the index is that 'A,1' 'B,1' are in the in-memory inverted index, and '1' is in the deleted-keys BTree.
+ * Now if we insert a document with key '1' and tokens 'C' and 'D', then we have 'A,1', 'B,1', 'C,1' and 'D,1' in
+ * the in-memory inverted index with the key '1' not in the deleted-keys BTree.
+ * So, the index will incorrectly return document '1' for a query 'A and B'.
+ * The post-processing assumption allows for faster processing of deletes because only a single insert into a BTree
+ * is needed as opposed to physically deleting all token,key pairs from the in-memory inverted index.
+ * Further, the post-processing is often needed anyway, for example, to deal with collisions when using hashed tokens.
+ */
+ @Override
public boolean insertUpdateOrDelete(ITupleReference tuple, IIndexOpContext ictx) throws HyracksDataException,
IndexException {
LSMInvertedIndexOpContext ctx = (LSMInvertedIndexOpContext) ictx;
- ctx.insertAccessor.insert(tuple);
+ switch (ctx.getIndexOp()) {
+ case INSERT: {
+ // First remove the possibly deleted key from the deleted-keys BTree.
+ ctx.keysOnlyTuple.reset(tuple);
+ try {
+ ctx.deletedKeysBTreeAccessor.delete(tuple);
+ } catch (BTreeNonExistentKeyException e) {
+ // The key did not exist in the deleted-keys BTree.
+ }
+ // Insert into the in-memory inverted index.
+ ctx.memInvIndexAccessor.insert(tuple);
+ break;
+ }
+ case DELETE: {
+ // Insert key into the deleted-keys BTree.
+ ctx.keysOnlyTuple.reset(tuple);
+ try {
+ ctx.deletedKeysBTreeAccessor.insert(tuple);
+ } catch (BTreeDuplicateKeyException e) {
+ // Key has already been deleted.
+ }
+ break;
+ }
+ case PHYSICALDELETE: {
+ // TODO: Think about whether we actually need this, since this is a secondary index.
+ ctx.memInvIndexAccessor.delete(tuple);
+ break;
+ }
+ default: {
+ throw new UnsupportedOperationException("Operation " + ctx.getIndexOp() + " not supported.");
+ }
+ }
return true;
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java
index c6d30d1..b478b98 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java
@@ -46,21 +46,28 @@
this.invIndex = invIndex;
}
+ @Override
public void insert(ITupleReference tuple) throws HyracksDataException, IndexException {
ctx.reset(IndexOp.INSERT);
lsmHarness.insertUpdateOrDelete(tuple, ctx);
}
+ @Override
+ public void delete(ITupleReference tuple) throws HyracksDataException, IndexException {
+ ctx.reset(IndexOp.DELETE);
+ lsmHarness.insertUpdateOrDelete(tuple, ctx);
+ }
+
+ @Override
+ public void physicalDelete(ITupleReference tuple) throws HyracksDataException, IndexException {
+ ctx.reset(IndexOp.PHYSICALDELETE);
+ lsmHarness.insertUpdateOrDelete(tuple, ctx);
+ }
+
public void search(IIndexCursor cursor, ISearchPredicate searchPred) throws HyracksDataException, IndexException {
ctx.reset(IndexOp.SEARCH);
lsmHarness.search(cursor, searchPred, ctx, true);
}
-
- @Override
- public void delete(ITupleReference tuple) throws HyracksDataException, IndexException {
- // TODO Auto-generated method stub
-
- }
public IIndexCursor createSearchCursor() {
return new LSMInvertedIndexSearchCursor();
@@ -106,11 +113,6 @@
}
@Override
- public void physicalDelete(ITupleReference tuple) throws HyracksDataException, IndexException {
- // TODO: Do we need this?
- }
-
- @Override
public void update(ITupleReference tuple) throws HyracksDataException, IndexException {
// TODO Auto-generated method stub
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexOpContext.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexOpContext.java
index 8873653..2ff02cc 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexOpContext.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexOpContext.java
@@ -20,6 +20,7 @@
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOp;
+import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
@@ -29,10 +30,13 @@
private final IInvertedIndex memInvIndex;
private final IIndex memDeletedKeysBTree;
+ // Tuple that only has the inverted-index elements (aka keys), projecting away the document fields.
+ public PermutingTupleReference keysOnlyTuple;
+
// Accessor to the in-memory inverted index.
- public IInvertedIndexAccessor insertAccessor;
+ public IInvertedIndexAccessor memInvIndexAccessor;
// Accessor to the deleted-keys BTree.
- public IIndexAccessor deleteAccessor;
+ public IIndexAccessor deletedKeysBTreeAccessor;
public LSMInvertedIndexOpContext(IInvertedIndex memInvIndex, IIndex memDeletedKeysBTree) {
this.memInvIndex = memInvIndex;
@@ -47,25 +51,30 @@
// TODO: Ignore opcallback for now.
public void reset(IndexOp newOp) {
switch (newOp) {
- case INSERT: {
- if (insertAccessor == null) {
- insertAccessor = (IInvertedIndexAccessor) memInvIndex.createAccessor(
+ case INSERT:
+ case DELETE:
+ case PHYSICALDELETE: {
+ if (deletedKeysBTreeAccessor == null) {
+ memInvIndexAccessor = (IInvertedIndexAccessor) memInvIndex.createAccessor(
NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
- }
- break;
- }
- case DELETE: {
- if (deleteAccessor == null) {
- deleteAccessor = memDeletedKeysBTree.createAccessor(NoOpOperationCallback.INSTANCE,
+ deletedKeysBTreeAccessor = memDeletedKeysBTree.createAccessor(NoOpOperationCallback.INSTANCE,
NoOpOperationCallback.INSTANCE);
+ // Project away the document fields, leaving only the key fields.
+ int numTokenFields = memInvIndex.getTokenTypeTraits().length;
+ int numKeyFields = memInvIndex.getInvListTypeTraits().length;
+ int[] keyFieldPermutation = new int[memInvIndex.getInvListTypeTraits().length];
+ for (int i = 0; i < numKeyFields; i++) {
+ keyFieldPermutation[i] = numTokenFields + i;
+ }
+ keysOnlyTuple = new PermutingTupleReference(keyFieldPermutation);
}
break;
}
}
op = newOp;
}
-
- public IndexOp getOp() {
+
+ public IndexOp getIndexOp() {
return op;
}
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
index f9caad9..1508708 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
@@ -111,13 +111,13 @@
btree.validate();
}
- public boolean insert(ITupleReference tuple, BTreeAccessor btreeAccessor, IIndexOpContext ictx)
+ public void insert(ITupleReference tuple, BTreeAccessor btreeAccessor, IIndexOpContext ictx)
throws HyracksDataException, IndexException {
InMemoryInvertedIndexOpContext ctx = (InMemoryInvertedIndexOpContext) ictx;
- ctx.insertTupleIter.reset(tuple);
- while (ctx.insertTupleIter.hasNext()) {
- ctx.insertTupleIter.next();
- ITupleReference insertTuple = ctx.insertTupleIter.getTuple();
+ ctx.tupleIter.reset(tuple);
+ while (ctx.tupleIter.hasNext()) {
+ ctx.tupleIter.next();
+ ITupleReference insertTuple = ctx.tupleIter.getTuple();
try {
btreeAccessor.insert(insertTuple);
} catch (BTreeDuplicateKeyException e) {
@@ -126,7 +126,17 @@
// we can safely ignore this exception.
}
}
- return true;
+ }
+
+ public void delete(ITupleReference tuple, BTreeAccessor btreeAccessor, IIndexOpContext ictx)
+ throws HyracksDataException, IndexException {
+ InMemoryInvertedIndexOpContext ctx = (InMemoryInvertedIndexOpContext) ictx;
+ ctx.tupleIter.reset(tuple);
+ while (ctx.tupleIter.hasNext()) {
+ ctx.tupleIter.next();
+ ITupleReference deleteTuple = ctx.tupleIter.getTuple();
+ btreeAccessor.delete(deleteTuple);
+ }
}
@Override
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
index 7cc33e5..202550d 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
@@ -52,12 +52,19 @@
NoOpOperationCallback.INSTANCE);
}
+ @Override
public void insert(ITupleReference tuple) throws HyracksDataException, IndexException {
opCtx.reset(IndexOp.INSERT);
index.insert(tuple, btreeAccessor, opCtx);
}
@Override
+ public void delete(ITupleReference tuple) throws HyracksDataException, IndexException {
+ opCtx.reset(IndexOp.INSERT);
+ index.delete(tuple, btreeAccessor, opCtx);
+ }
+
+ @Override
public IIndexCursor createSearchCursor() {
return new OnDiskInvertedIndexSearchCursor(searcher);
}
@@ -93,11 +100,6 @@
public BTreeAccessor getBTreeAccessor() {
return btreeAccessor;
}
-
- @Override
- public void delete(ITupleReference tuple) throws HyracksDataException, IndexException {
- throw new UnsupportedOperationException("Delete not supported by in-memory inverted index.");
- }
@Override
public void update(ITupleReference tuple) throws HyracksDataException, IndexException {
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexOpContext.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexOpContext.java
index 5009c73..bcdb12d 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexOpContext.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexOpContext.java
@@ -25,7 +25,7 @@
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexInsertTupleIterator;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.InvertedIndexTokenizingTupleIterator;
public class InMemoryInvertedIndexOpContext implements IIndexOpContext {
public IndexOp op;
@@ -40,7 +40,7 @@
// To generate in-memory BTree tuples for insertions.
private final IBinaryTokenizerFactory tokenizerFactory;
- public InvertedIndexInsertTupleIterator insertTupleIter;
+ public InvertedIndexTokenizingTupleIterator tupleIter;
public InMemoryInvertedIndexOpContext(BTree btree, IBinaryComparatorFactory[] tokenCmpFactories,
IBinaryTokenizerFactory tokenizerFactory) {
@@ -52,10 +52,13 @@
@Override
public void reset(IndexOp newOp) {
switch (newOp) {
- case INSERT: {
- IBinaryTokenizer tokenizer = tokenizerFactory.createTokenizer();
- insertTupleIter = new InvertedIndexInsertTupleIterator(tokenCmpFactories.length, btree.getFieldCount()
- - tokenCmpFactories.length, tokenizer);
+ case INSERT:
+ case DELETE: {
+ if (tupleIter == null) {
+ IBinaryTokenizer tokenizer = tokenizerFactory.createTokenizer();
+ tupleIter = new InvertedIndexTokenizingTupleIterator(tokenCmpFactories.length,
+ btree.getFieldCount() - tokenCmpFactories.length, tokenizer);
+ }
break;
}
case SEARCH: {
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexInsertTupleIterator.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTokenizingTupleIterator.java
similarity index 94%
rename from hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexInsertTupleIterator.java
rename to hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTokenizingTupleIterator.java
index 44ef3c1..633213a 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexInsertTupleIterator.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTokenizingTupleIterator.java
@@ -25,7 +25,7 @@
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IToken;
// TODO: We can possibly avoid copying the data into a new tuple here.
-public class InvertedIndexInsertTupleIterator {
+public class InvertedIndexTokenizingTupleIterator {
// Field that is expected to be tokenized.
private final int DOC_FIELD_INDEX = 0;
@@ -35,7 +35,7 @@
private final IBinaryTokenizer tokenizer;
private ITupleReference inputTuple;
- public InvertedIndexInsertTupleIterator(int tokensFieldCount, int invListFieldCount, IBinaryTokenizer tokenizer) {
+ public InvertedIndexTokenizingTupleIterator(int tokensFieldCount, int invListFieldCount, IBinaryTokenizer tokenizer) {
this.invListFieldCount = invListFieldCount;
this.tupleBuilder = new ArrayTupleBuilder(tokensFieldCount + invListFieldCount);
this.tupleReference = new ArrayTupleReference();
diff --git a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java
index 95abab4..65d52d5 100644
--- a/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java
+++ b/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java
@@ -268,6 +268,7 @@
return memComponent.getRTree().getFileId();
}
+ @Override
public boolean insertUpdateOrDelete(ITupleReference tuple, IIndexOpContext ictx) throws HyracksDataException,
TreeIndexException {
LSMRTreeOpContext ctx = (LSMRTreeOpContext) ictx;
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
index 7042d01..b2692c3 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
@@ -132,8 +132,9 @@
// Get expected results.
List<Integer> expectedResults = new ArrayList<Integer>();
- InvertedIndexTestUtils.getExpectedResults(scanCountArray, testCtx.getCheckTuples(), searchDocument,
- tokenizer, testCtx.getFieldSerdes()[0], searchModifier, expectedResults);
+ InvertedIndexTestUtils.getExpectedResults(scanCountArray, testCtx.getCheckTuples(),
+ testCtx.getDeletedKeys(), searchDocument, tokenizer, testCtx.getFieldSerdes()[0],
+ searchModifier, expectedResults);
Iterator<Integer> expectedIter = expectedResults.iterator();
Iterator<Integer> actualIter = actualResults.iterator();
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java
index 54cdc8c..c3f5ac4 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestContext.java
@@ -28,6 +28,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
import edu.uci.ics.hyracks.dataflow.common.util.SerdeUtils;
import edu.uci.ics.hyracks.dataflow.common.util.TupleUtils;
import edu.uci.ics.hyracks.storage.am.btree.OrderedIndexTestContext;
@@ -48,18 +49,21 @@
LSM
};
+ protected IInvertedIndex invIndex;
protected IBinaryComparatorFactory[] allCmpFactories;
protected IBinaryTokenizerFactory tokenizerFactory;
- protected InvertedIndexInsertTupleIterator indexInsertIter;
- protected HashSet<Comparable> allTokens = new HashSet<Comparable>();
+ protected InvertedIndexTokenizingTupleIterator indexInsertIter;
+ protected HashSet<Comparable> allTokens = new HashSet<Comparable>();
protected List<ITupleReference> documentCorpus = new ArrayList<ITupleReference>();
+ // Set containing the keys of deleted documents.
+ protected HashSet<Integer> deletedKeys = new HashSet<Integer>();
public InvertedIndexTestContext(ISerializerDeserializer[] fieldSerdes, IIndex index,
IBinaryTokenizerFactory tokenizerFactory) {
super(fieldSerdes, index);
this.tokenizerFactory = tokenizerFactory;
- IInvertedIndex invIndex = (IInvertedIndex) index;
- indexInsertIter = new InvertedIndexInsertTupleIterator(invIndex.getTokenTypeTraits().length,
+ invIndex = (IInvertedIndex) index;
+ indexInsertIter = new InvertedIndexTokenizingTupleIterator(invIndex.getTokenTypeTraits().length,
invIndex.getInvListTypeTraits().length, tokenizerFactory.createTokenizer());
}
@@ -139,9 +143,10 @@
return testCtx;
}
- public void insertCheckTuples(ITupleReference tuple, Collection<CheckTuple> checkTuples) throws HyracksDataException {
+ public void insertCheckTuples(ITupleReference tuple, Collection<CheckTuple> checkTuples)
+ throws HyracksDataException {
documentCorpus.add(TupleUtils.copyTuple(tuple));
- indexInsertIter.reset(tuple);
+ indexInsertIter.reset(tuple);
while (indexInsertIter.hasNext()) {
indexInsertIter.next();
ITupleReference insertTuple = indexInsertIter.getTuple();
@@ -149,16 +154,19 @@
insertCheckTuple(checkTuple, getCheckTuples());
allTokens.add(checkTuple.getField(0));
}
+ // Remove key from the deleted-keys set.
+ int numTokenFields = invIndex.getTokenTypeTraits().length;
+ int key = IntegerSerializerDeserializer.getInt(tuple.getFieldData(numTokenFields),
+ tuple.getFieldStart(numTokenFields));
+ deletedKeys.remove(Integer.valueOf(key));
}
- public void deleteCheckTuples(ITupleReference tuple, Collection<CheckTuple> checkTuples) throws HyracksDataException {
- indexInsertIter.reset(tuple);
- while (indexInsertIter.hasNext()) {
- indexInsertIter.next();
- ITupleReference deteleTuple = indexInsertIter.getTuple();
- CheckTuple checkTuple = createCheckTuple(deteleTuple);
- deleteCheckTuple(checkTuple, getCheckTuples());
- }
+ public void deleteTuple(ITupleReference tuple) throws HyracksDataException {
+ // Add key to the deleted-keys set.
+ int numTokenFields = invIndex.getTokenTypeTraits().length;
+ int key = IntegerSerializerDeserializer.getInt(tuple.getFieldData(numTokenFields),
+ tuple.getFieldStart(numTokenFields));
+ deletedKeys.add(Integer.valueOf(key));
}
public HashSet<Comparable> getAllTokens() {
@@ -189,4 +197,8 @@
public List<ITupleReference> getDocumentCorpus() {
return documentCorpus;
}
+
+ public HashSet<Integer> getDeletedKeys() {
+ return deletedKeys;
+ }
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
index 4fc43f1..e35e973 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
@@ -24,6 +24,7 @@
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.Arrays;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
@@ -254,7 +255,7 @@
* Determine the expected results with the simple ScanCount algorithm.
*/
@SuppressWarnings("unchecked")
- public static void getExpectedResults(int[] scanCountArray, TreeSet<CheckTuple> checkTuples,
+ public static void getExpectedResults(int[] scanCountArray, TreeSet<CheckTuple> checkTuples, HashSet<Integer> deletedKeys,
ITupleReference searchDocument, IBinaryTokenizer tokenizer, ISerializerDeserializer tokenSerde,
IInvertedIndexSearchModifier searchModifier, List<Integer> expectedResults) throws IOException {
// Reset scan count array.
@@ -295,7 +296,7 @@
// Run through scan count array, and see whether elements satisfy the given occurrence threshold.
expectedResults.clear();
for (int i = 0; i < scanCountArray.length; i++) {
- if (scanCountArray[i] >= occurrenceThreshold) {
+ if (scanCountArray[i] >= occurrenceThreshold && !deletedKeys.contains(Integer.valueOf(i))) {
expectedResults.add(i);
}
}