1. Fixed bugs in TOccurrenceSearcher related to low occurrence thresholds.
2. Added search test for inverted index.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_indexes@455 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexSearcher.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexSearcher.java
index db031e5..9a591d0 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexSearcher.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedIndexSearcher.java
@@ -26,5 +26,5 @@
public IFrameTupleAccessor createResultFrameTupleAccessor();
public ITupleReference createResultTupleReference();
public List<ByteBuffer> getResultBuffers();
- public int getNumValidResultBuffers();
+ public int getNumValidResultBuffers();
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java
index d29ebf0..9309772 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java
@@ -45,6 +45,10 @@
this.fileId = fileId;
}
+ public void close() {
+ this.fileId = -1;
+ }
+
public BulkLoadContext beginBulkLoad(IInvertedListBuilder invListBuilder, int hyracksFrameSize) throws HyracksDataException {
BulkLoadContext ctx = new BulkLoadContext(invListBuilder, hyracksFrameSize);
ctx.init(rootPageId, fileId);
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndexException.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndexException.java
new file mode 100644
index 0000000..5ba852a
--- /dev/null
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndexException.java
@@ -0,0 +1,8 @@
+package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
+
+public class InvertedIndexException extends Exception {
+ private static final long serialVersionUID = 1L;
+ public InvertedIndexException(String msg) {
+ super(msg);
+ }
+}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/OccurrenceThresholdPanicException.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/OccurrenceThresholdPanicException.java
new file mode 100644
index 0000000..4711fb6
--- /dev/null
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/OccurrenceThresholdPanicException.java
@@ -0,0 +1,9 @@
+package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
+
+public class OccurrenceThresholdPanicException extends InvertedIndexException {
+ private static final long serialVersionUID = 1L;
+
+ public OccurrenceThresholdPanicException(String msg) {
+ super(msg);
+ }
+}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/SearchResultCursor.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/SearchResultCursor.java
index f7236bd..f1fe841 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/SearchResultCursor.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/SearchResultCursor.java
@@ -30,13 +30,13 @@
private FixedSizeTupleReference resultTuple;
private int numResultBuffers;
private int currentBufferIndex = 0;
- private int tupleIndex = 0;
-
+ private int tupleIndex = 0;
+
public SearchResultCursor(IFrameTupleAccessor fta, ITupleReference resultTuple) {
this.fta = fta;
this.resultTuple = (FixedSizeTupleReference)resultTuple;
}
-
+
@Override
public boolean hasNext() {
if (currentBufferIndex < numResultBuffers && tupleIndex < fta.getTupleCount())
@@ -46,17 +46,16 @@
}
@Override
- public void next() {
- resultTuple.reset(fta.getBuffer().array(), fta.getTupleStartOffset(tupleIndex));
+ public void next() {
+ resultTuple.reset(fta.getBuffer().array(), fta.getTupleStartOffset(tupleIndex));
tupleIndex++;
- if(tupleIndex >= fta.getTupleCount()) {
- if(currentBufferIndex + 1 < numResultBuffers) {
+ if(tupleIndex >= fta.getTupleCount()) {
+ if(currentBufferIndex + 1 < numResultBuffers) {
currentBufferIndex++;
fta.reset(resultBuffers.get(currentBufferIndex));
- resultTuple.reset(fta.getBuffer().array(), fta.getTupleStartOffset(0));
- tupleIndex = 1;
+ tupleIndex = 0;
}
- }
+ }
}
@Override
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java
index 1b03a5a..b2f289a 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java
@@ -52,282 +52,268 @@
public class TOccurrenceSearcher implements IInvertedIndexSearcher {
- protected final IHyracksStageletContext ctx;
- protected final FixedSizeFrameTupleAppender resultFrameTupleApp;
- protected final FixedSizeFrameTupleAccessor resultFrameTupleAcc;
- protected final FixedSizeTupleReference resultTuple;
- protected final int invListKeyLength;
- protected int currentNumResults;
-
- protected List<ByteBuffer> newResultBuffers = new ArrayList<ByteBuffer>();
- protected List<ByteBuffer> prevResultBuffers = new ArrayList<ByteBuffer>();
- protected List<ByteBuffer> swap = null;
- protected int maxResultBufIdx = 0;
+ protected final IHyracksStageletContext ctx;
+ protected final FixedSizeFrameTupleAppender resultFrameTupleApp;
+ protected final FixedSizeFrameTupleAccessor resultFrameTupleAcc;
+ protected final FixedSizeTupleReference resultTuple;
+ protected final int invListKeyLength;
+ protected int currentNumResults;
- protected final IBTreeLeafFrame leafFrame;
- protected final IBTreeInteriorFrame interiorFrame;
- protected final IBTreeCursor btreeCursor;
- protected final FrameTupleReference searchKey = new FrameTupleReference();
- protected final RangePredicate btreePred = new RangePredicate(true, null, null, true, true, null, null);
- protected final BTreeOpContext btreeOpCtx;
-
- protected RecordDescriptor queryTokenRecDesc = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
- protected ArrayTupleBuilder queryTokenBuilder = new ArrayTupleBuilder(queryTokenRecDesc.getFields().length);
- protected DataOutput queryTokenDos = queryTokenBuilder.getDataOutput();
- protected FrameTupleAppender queryTokenAppender;
- protected ByteBuffer queryTokenFrame;
-
- protected final InvertedIndex invIndex;
- protected final IBinaryTokenizer queryTokenizer;
- protected final ITypeTrait[] invListFieldsWithCount;
- protected int occurrenceThreshold;
-
- protected final int cursorCacheSize = 10;
- protected List<IInvertedListCursor> invListCursorCache = new ArrayList<IInvertedListCursor>(cursorCacheSize);
- protected List<IInvertedListCursor> invListCursors = new ArrayList<IInvertedListCursor>(cursorCacheSize);
-
- public TOccurrenceSearcher(IHyracksStageletContext ctx, InvertedIndex invIndex, IBinaryTokenizer queryTokenizer) {
- this.ctx = ctx;
- this.invIndex = invIndex;
- this.queryTokenizer = queryTokenizer;
+ protected List<ByteBuffer> newResultBuffers = new ArrayList<ByteBuffer>();
+ protected List<ByteBuffer> prevResultBuffers = new ArrayList<ByteBuffer>();
+ protected List<ByteBuffer> swap = null;
+ protected int maxResultBufIdx = 0;
- leafFrame = invIndex.getBTree().getLeafFrameFactory().getFrame();
- interiorFrame = invIndex.getBTree().getInteriorFrameFactory().getFrame();
+ protected final IBTreeLeafFrame leafFrame;
+ protected final IBTreeInteriorFrame interiorFrame;
+ protected final IBTreeCursor btreeCursor;
+ protected final FrameTupleReference searchKey = new FrameTupleReference();
+ protected final RangePredicate btreePred = new RangePredicate(true, null, null, true, true, null, null);
+ protected final BTreeOpContext btreeOpCtx;
- btreeCursor = new RangeSearchCursor(leafFrame);
- ITypeTrait[] invListFields = invIndex.getInvListElementCmp().getTypeTraits();
- invListFieldsWithCount = new TypeTrait[invListFields.length + 1];
- int tmp = 0;
- for(int i = 0; i < invListFields.length; i++) {
- invListFieldsWithCount[i] = invListFields[i];
- tmp += invListFields[i].getStaticallyKnownDataLength();
- }
- // using an integer for counting occurrences
- invListFieldsWithCount[invListFields.length] = new TypeTrait(4);
- invListKeyLength = tmp;
-
- btreeOpCtx = invIndex.getBTree().createOpContext(TreeIndexOp.TI_SEARCH, leafFrame,
- interiorFrame, null);
-
- resultFrameTupleApp = new FixedSizeFrameTupleAppender(ctx.getFrameSize(), invListFieldsWithCount);
- resultFrameTupleAcc = new FixedSizeFrameTupleAccessor(ctx.getFrameSize(), invListFieldsWithCount);
- resultTuple = new FixedSizeTupleReference(invListFieldsWithCount);
- newResultBuffers.add(ctx.allocateFrame());
- prevResultBuffers.add(ctx.allocateFrame());
+ protected RecordDescriptor queryTokenRecDesc = new RecordDescriptor(
+ new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ protected ArrayTupleBuilder queryTokenBuilder = new ArrayTupleBuilder(queryTokenRecDesc.getFields().length);
+ protected DataOutput queryTokenDos = queryTokenBuilder.getDataOutput();
+ protected FrameTupleAppender queryTokenAppender;
+ protected ByteBuffer queryTokenFrame;
- MultiComparator searchCmp = invIndex.getBTree().getMultiComparator();
- btreePred.setLowKeyComparator(searchCmp);
- btreePred.setHighKeyComparator(searchCmp);
- btreePred.setLowKey(searchKey, true);
- btreePred.setHighKey(searchKey, true);
+ protected final InvertedIndex invIndex;
+ protected final IBinaryTokenizer queryTokenizer;
+ protected final ITypeTrait[] invListFieldsWithCount;
+ protected int occurrenceThreshold;
- // pre-create cursor objects
- for (int i = 0; i < cursorCacheSize; i++) {
- invListCursorCache.add(new FixedSizeElementInvertedListCursor(invIndex.getBufferCache(), invIndex
- .getInvListsFileId(), invIndex.getInvListElementCmp().getTypeTraits()));
- }
-
- queryTokenAppender = new FrameTupleAppender(ctx.getFrameSize());
- queryTokenFrame = ctx.allocateFrame();
-
- currentNumResults = 0;
- }
+ protected final int cursorCacheSize = 10;
+ protected List<IInvertedListCursor> invListCursorCache = new ArrayList<IInvertedListCursor>(cursorCacheSize);
+ protected List<IInvertedListCursor> invListCursors = new ArrayList<IInvertedListCursor>(cursorCacheSize);
- public void reset() {
- for(ByteBuffer b : newResultBuffers) {
- resultFrameTupleApp.reset(b, true);
- }
- for(ByteBuffer b : prevResultBuffers) {
- resultFrameTupleApp.reset(b, true);
- }
- currentNumResults = 0;
- }
-
- public void search(IInvertedIndexResultCursor resultCursor, ITupleReference queryTuple, int queryFieldIndex, IInvertedIndexSearchModifier searchModifier) throws Exception {
-
- queryTokenAppender.reset(queryTokenFrame, true);
- queryTokenizer.reset(queryTuple.getFieldData(queryFieldIndex), queryTuple.getFieldStart(queryFieldIndex),
- queryTuple.getFieldLength(queryFieldIndex));
- while (queryTokenizer.hasNext()) {
- queryTokenizer.next();
+ public TOccurrenceSearcher(IHyracksStageletContext ctx, InvertedIndex invIndex, IBinaryTokenizer queryTokenizer) {
+ this.ctx = ctx;
+ this.invIndex = invIndex;
+ this.queryTokenizer = queryTokenizer;
- queryTokenBuilder.reset();
- try {
- IToken token = queryTokenizer.getToken();
- token.serializeToken(queryTokenDos);
- queryTokenBuilder.addFieldEndOffset();
- // WARNING: assuming one frame is big enough to hold all tokens
- queryTokenAppender.append(queryTokenBuilder.getFieldEndOffsets(), queryTokenBuilder.getByteArray(), 0,
- queryTokenBuilder.getSize());
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- }
+ leafFrame = invIndex.getBTree().getLeafFrameFactory().getFrame();
+ interiorFrame = invIndex.getBTree().getInteriorFrameFactory().getFrame();
- FrameTupleAccessor queryTokenAccessor = new FrameTupleAccessor(ctx.getFrameSize(), queryTokenRecDesc);
- queryTokenAccessor.reset(queryTokenFrame);
- int numQueryTokens = queryTokenAccessor.getTupleCount();
-
- // expand cursor cache if necessary
- if (numQueryTokens > invListCursorCache.size()) {
- int diff = numQueryTokens - invListCursorCache.size();
- for (int i = 0; i < diff; i++) {
- invListCursorCache.add(new FixedSizeElementInvertedListCursor(invIndex.getBufferCache(), invIndex
- .getInvListsFileId(), invIndex.getInvListElementCmp().getTypeTraits()));
- }
- }
-
- invListCursors.clear();
- for (int i = 0; i < numQueryTokens; i++) {
- searchKey.reset(queryTokenAccessor, i);
- invIndex.openCursor(btreeCursor, btreePred, btreeOpCtx, invListCursorCache.get(i));
- invListCursors.add(invListCursorCache.get(i));
- }
-
- /*
- for(int i = 0; i < numQueryTokens; i++) {
- System.out.println("SIZE: " + i + " " + invListCursors.get(i).getNumElements());
- }
- */
-
- occurrenceThreshold = searchModifier.getOccurrenceThreshold(invListCursors);
-
- int numPrefixLists = searchModifier.getPrefixLists(invListCursors);
- maxResultBufIdx = mergePrefixLists(numPrefixLists, numQueryTokens);
- maxResultBufIdx = mergeSuffixLists(numPrefixLists, numQueryTokens, maxResultBufIdx);
-
- resultCursor.reset(this);
-
- //System.out.println("NUMBER RESULTS: " + currentNumResults);
-
- /*
- StringBuffer strBuffer = new StringBuffer();
- for(int i = 0; i <= maxResultBufIdx; i++) {
- ByteBuffer testBuf = newResultBuffers.get(i);
- resultFrameTupleAcc.reset(testBuf);
- for(int j = 0; j < resultFrameTupleAcc.getTupleCount(); j++) {
- strBuffer.append(IntegerSerializerDeserializer.getInt(resultFrameTupleAcc.getBuffer().array(), resultFrameTupleAcc.getFieldStartOffset(j, 0)) + ",");
- strBuffer.append(IntegerSerializerDeserializer.getInt(resultFrameTupleAcc.getBuffer().array(), resultFrameTupleAcc.getFieldStartOffset(j, 1)) + " ");
- }
- }
- System.out.println(strBuffer.toString());
- */
-
- }
-
- protected int mergePrefixLists(int numPrefixTokens, int numQueryTokens) throws IOException {
- int maxPrevBufIdx = 0;
- for(int i = 0; i < numPrefixTokens; i++) {
- swap = prevResultBuffers;
- prevResultBuffers = newResultBuffers;
- newResultBuffers = swap;
- currentNumResults = 0;
-
- invListCursors.get(i).pinPagesSync();
- maxPrevBufIdx = mergePrefixList(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx, newResultBuffers);
- invListCursors.get(i).unpinPages();
- }
-
- return maxPrevBufIdx;
- }
-
- protected int mergeSuffixLists(int numPrefixTokens, int numQueryTokens, int maxPrevBufIdx) throws IOException {
- for(int i = numPrefixTokens; i < numQueryTokens; i++) {
- swap = prevResultBuffers;
- prevResultBuffers = newResultBuffers;
- newResultBuffers = swap;
-
- invListCursors.get(i).pinPagesSync();
- int numInvListElements = invListCursors.get(i).getNumElements();
- // should we binary search the next list or should we sort-merge it?
- if(currentNumResults * Math.log(numInvListElements) < currentNumResults + numInvListElements) {
- //System.out.println("PROBING LIST: " + i);
- maxPrevBufIdx = mergeSuffixListProbe(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx, newResultBuffers, i, numQueryTokens);
- }
- else {
- //System.out.println("SCANNING LIST: " + i);
- maxPrevBufIdx = mergeSuffixListScan(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx, newResultBuffers, i, numQueryTokens);
- }
- invListCursors.get(i).unpinPages();
- }
- return maxPrevBufIdx;
- }
-
- protected int mergeSuffixListProbe(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers, int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws IOException {
-
- int newBufIdx = 0;
+ btreeCursor = new RangeSearchCursor(leafFrame);
+ ITypeTrait[] invListFields = invIndex.getInvListElementCmp().getTypeTraits();
+ invListFieldsWithCount = new TypeTrait[invListFields.length + 1];
+ int tmp = 0;
+ for(int i = 0; i < invListFields.length; i++) {
+ invListFieldsWithCount[i] = invListFields[i];
+ tmp += invListFields[i].getStaticallyKnownDataLength();
+ }
+ // using an integer for counting occurrences
+ invListFieldsWithCount[invListFields.length] = new TypeTrait(4);
+ invListKeyLength = tmp;
+
+ btreeOpCtx = invIndex.getBTree().createOpContext(TreeIndexOp.TI_SEARCH, leafFrame,
+ interiorFrame, null);
+
+ resultFrameTupleApp = new FixedSizeFrameTupleAppender(ctx.getFrameSize(), invListFieldsWithCount);
+ resultFrameTupleAcc = new FixedSizeFrameTupleAccessor(ctx.getFrameSize(), invListFieldsWithCount);
+ resultTuple = new FixedSizeTupleReference(invListFieldsWithCount);
+ newResultBuffers.add(ctx.allocateFrame());
+ prevResultBuffers.add(ctx.allocateFrame());
+
+ MultiComparator searchCmp = invIndex.getBTree().getMultiComparator();
+ btreePred.setLowKeyComparator(searchCmp);
+ btreePred.setHighKeyComparator(searchCmp);
+ btreePred.setLowKey(searchKey, true);
+ btreePred.setHighKey(searchKey, true);
+
+ // pre-create cursor objects
+ for (int i = 0; i < cursorCacheSize; i++) {
+ invListCursorCache.add(new FixedSizeElementInvertedListCursor(invIndex.getBufferCache(), invIndex
+ .getInvListsFileId(), invIndex.getInvListElementCmp().getTypeTraits()));
+ }
+
+ queryTokenAppender = new FrameTupleAppender(ctx.getFrameSize());
+ queryTokenFrame = ctx.allocateFrame();
+
+ currentNumResults = 0;
+ }
+
+ public void reset() {
+ for(ByteBuffer b : newResultBuffers) {
+ resultFrameTupleApp.reset(b, true);
+ }
+ for(ByteBuffer b : prevResultBuffers) {
+ resultFrameTupleApp.reset(b, true);
+ }
+ currentNumResults = 0;
+ }
+
+ public void search(IInvertedIndexResultCursor resultCursor, ITupleReference queryTuple, int queryFieldIndex, IInvertedIndexSearchModifier searchModifier) throws Exception {
+
+ queryTokenAppender.reset(queryTokenFrame, true);
+ queryTokenizer.reset(queryTuple.getFieldData(queryFieldIndex), queryTuple.getFieldStart(queryFieldIndex),
+ queryTuple.getFieldLength(queryFieldIndex));
+ while (queryTokenizer.hasNext()) {
+ queryTokenizer.next();
+
+ queryTokenBuilder.reset();
+ try {
+ IToken token = queryTokenizer.getToken();
+ token.serializeToken(queryTokenDos);
+ queryTokenBuilder.addFieldEndOffset();
+ // WARNING: assuming one frame is big enough to hold all tokens
+ queryTokenAppender.append(queryTokenBuilder.getFieldEndOffsets(), queryTokenBuilder.getByteArray(), 0,
+ queryTokenBuilder.getSize());
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ FrameTupleAccessor queryTokenAccessor = new FrameTupleAccessor(ctx.getFrameSize(), queryTokenRecDesc);
+ queryTokenAccessor.reset(queryTokenFrame);
+ int numQueryTokens = queryTokenAccessor.getTupleCount();
+
+ // expand cursor cache if necessary
+ if (numQueryTokens > invListCursorCache.size()) {
+ int diff = numQueryTokens - invListCursorCache.size();
+ for (int i = 0; i < diff; i++) {
+ invListCursorCache.add(new FixedSizeElementInvertedListCursor(invIndex.getBufferCache(), invIndex
+ .getInvListsFileId(), invIndex.getInvListElementCmp().getTypeTraits()));
+ }
+ }
+
+ invListCursors.clear();
+ for (int i = 0; i < numQueryTokens; i++) {
+ searchKey.reset(queryTokenAccessor, i);
+ invIndex.openCursor(btreeCursor, btreePred, btreeOpCtx, invListCursorCache.get(i));
+ invListCursors.add(invListCursorCache.get(i));
+ }
+
+ occurrenceThreshold = searchModifier.getOccurrenceThreshold(invListCursors);
+
+ // TODO: deal with panic cases properly
+ if(occurrenceThreshold <= 0) {
+ throw new OccurrenceThresholdPanicException("Merge Threshold is <= 0. Failing Search.");
+ }
+
+ int numPrefixLists = searchModifier.getPrefixLists(invListCursors);
+ maxResultBufIdx = mergePrefixLists(numPrefixLists, numQueryTokens);
+ maxResultBufIdx = mergeSuffixLists(numPrefixLists, numQueryTokens, maxResultBufIdx);
+
+ resultCursor.reset(this);
+
+ //printNewResults(maxResultBufIdx);
+ }
+
+ protected int mergePrefixLists(int numPrefixTokens, int numQueryTokens) throws IOException {
+ int maxPrevBufIdx = 0;
+ for(int i = 0; i < numPrefixTokens; i++) {
+ swap = prevResultBuffers;
+ prevResultBuffers = newResultBuffers;
+ newResultBuffers = swap;
+ currentNumResults = 0;
+
+ invListCursors.get(i).pinPagesSync();
+ maxPrevBufIdx = mergePrefixList(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx, newResultBuffers);
+ invListCursors.get(i).unpinPages();
+
+ //printNewResults(maxPrevBufIdx);
+ }
+
+ return maxPrevBufIdx;
+ }
+
+ protected int mergeSuffixLists(int numPrefixTokens, int numQueryTokens, int maxPrevBufIdx) throws IOException {
+ for(int i = numPrefixTokens; i < numQueryTokens; i++) {
+ swap = prevResultBuffers;
+ prevResultBuffers = newResultBuffers;
+ newResultBuffers = swap;
+
+ invListCursors.get(i).pinPagesSync();
+ int numInvListElements = invListCursors.get(i).getNumElements();
+ // should we binary search the next list or should we sort-merge it?
+ if(currentNumResults * Math.log(numInvListElements) < currentNumResults + numInvListElements) {
+ //System.out.println("PROBING LIST: " + i);
+ maxPrevBufIdx = mergeSuffixListProbe(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx, newResultBuffers, i, numQueryTokens);
+ }
+ else {
+ //System.out.println("SCANNING LIST: " + i);
+ maxPrevBufIdx = mergeSuffixListScan(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx, newResultBuffers, i, numQueryTokens);
+ }
+ invListCursors.get(i).unpinPages();
+ }
+ return maxPrevBufIdx;
+ }
+
+ protected int mergeSuffixListProbe(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers, int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws IOException {
+
+ int newBufIdx = 0;
+ ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
+
+ int prevBufIdx = 0;
+ ByteBuffer prevCurrentBuffer = prevResultBuffers.get(0);
+
+ int resultTidx = 0;
+
+ currentNumResults = 0;
+
+ MultiComparator invListCmp = invIndex.getInvListElementCmp();
+
+ resultFrameTupleAcc.reset(prevCurrentBuffer);
+ resultFrameTupleApp.reset(newCurrentBuffer, true);
+
+ while(resultTidx < resultFrameTupleAcc.getTupleCount()) {
+
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
+
+ if(invListCursor.containsKey(resultTuple, invListCmp)) {
+ count++;
+ newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
+ }
+ else {
+ if(count + numQueryTokens - invListIx > occurrenceThreshold) {
+ newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
+ }
+ }
+
+ resultTidx++;
+ if (resultTidx >= resultFrameTupleAcc.getTupleCount()) {
+ prevBufIdx++;
+ if (prevBufIdx <= maxPrevBufIdx) {
+ prevCurrentBuffer = prevResultBuffers.get(prevBufIdx);
+ resultFrameTupleAcc.reset(prevCurrentBuffer);
+ resultTidx = 0;
+ }
+ }
+ }
+
+ return newBufIdx;
+ }
+
+ protected int mergeSuffixListScan(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers, int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws IOException {
+ int newBufIdx = 0;
ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
int prevBufIdx = 0;
ByteBuffer prevCurrentBuffer = prevResultBuffers.get(0);
-
- int resultTidx = 0;
-
- currentNumResults = 0;
-
- MultiComparator invListCmp = invIndex.getInvListElementCmp();
-
- resultFrameTupleAcc.reset(prevCurrentBuffer);
- resultFrameTupleApp.reset(newCurrentBuffer, true);
-
- while(resultTidx < resultFrameTupleAcc.getTupleCount()) {
-
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
- if(invListCursor.containsKey(resultTuple, invListCmp)) {
- count++;
- newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
- }
- else {
- if(count + numQueryTokens - invListIx > occurrenceThreshold) {
- //System.out.println("C: " + count);
- newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
- }
- }
-
- resultTidx++;
- if (resultTidx >= resultFrameTupleAcc.getTupleCount()) {
- prevBufIdx++;
- if (prevBufIdx <= maxPrevBufIdx) {
- prevCurrentBuffer = prevResultBuffers.get(prevBufIdx);
- resultFrameTupleAcc.reset(prevCurrentBuffer);
- resultTidx = 0;
- }
- }
- }
-
- return newBufIdx;
- }
-
- protected int mergeSuffixListScan(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers, int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws IOException {
-
- int newBufIdx = 0;
- ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
-
- int prevBufIdx = 0;
- ByteBuffer prevCurrentBuffer = prevResultBuffers.get(0);
-
boolean advanceCursor = true;
boolean advancePrevResult = false;
int resultTidx = 0;
-
- currentNumResults = 0;
-
+
MultiComparator invListCmp = invIndex.getInvListElementCmp();
-
+
resultFrameTupleAcc.reset(prevCurrentBuffer);
resultFrameTupleApp.reset(newCurrentBuffer, true);
-
- while(invListCursor.hasNext() && resultTidx < resultFrameTupleAcc.getTupleCount()) {
-
- if(advanceCursor) invListCursor.next();
-
+
+ int invListTidx = 0;
+ int invListNumTuples = invListCursor.getNumElements();
+
+ if(invListCursor.hasNext()) invListCursor.next();
+
+ while(invListTidx < invListNumTuples && resultTidx < resultFrameTupleAcc.getTupleCount()) {
+
ITupleReference invListTuple = invListCursor.getTuple();
-
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
-
+
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+
int cmp = invListCmp.compare(invListTuple, resultTuple);
if (cmp == 0) {
int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1)) + 1;
@@ -339,10 +325,10 @@
advanceCursor = true;
advancePrevResult = false;
} else {
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
- if(count + numQueryTokens - invListIx > occurrenceThreshold) {
- newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
- }
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
+ if(count + numQueryTokens - invListIx > occurrenceThreshold) {
+ newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
+ }
advanceCursor = false;
advancePrevResult = true;
}
@@ -358,16 +344,24 @@
resultTidx = 0;
}
}
- }
+ }
+
+ if(advanceCursor) {
+ invListTidx++;
+ invListCursor.next();
+ }
}
-
+
// append remaining elements from previous result set
- //if(resultTidx < resultFrameTupleAcc.getTupleCount()) System.out.println("APPENDING FROM RESULTS");
while(resultTidx < resultFrameTupleAcc.getTupleCount()) {
-
- int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
- newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
-
+
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
+ if(count + numQueryTokens - invListIx > occurrenceThreshold) {
+ newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
+ }
+
resultTidx++;
if (resultTidx >= resultFrameTupleAcc.getTupleCount()) {
prevBufIdx++;
@@ -377,35 +371,38 @@
resultTidx = 0;
}
}
- }
-
+ }
+
return newBufIdx;
- }
-
+ }
+
protected int mergePrefixList(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers, int maxPrevBufIdx, List<ByteBuffer> newResultBuffers) throws IOException {
int newBufIdx = 0;
ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
int prevBufIdx = 0;
ByteBuffer prevCurrentBuffer = prevResultBuffers.get(0);
-
+
boolean advanceCursor = true;
boolean advancePrevResult = false;
int resultTidx = 0;
-
+
MultiComparator invListCmp = invIndex.getInvListElementCmp();
-
+
resultFrameTupleAcc.reset(prevCurrentBuffer);
resultFrameTupleApp.reset(newCurrentBuffer, true);
-
- while(invListCursor.hasNext() && resultTidx < resultFrameTupleAcc.getTupleCount()) {
-
- if(advanceCursor) invListCursor.next();
-
+
+ int invListTidx = 0;
+ int invListNumTuples = invListCursor.getNumElements();
+
+ if(invListCursor.hasNext()) invListCursor.next();
+
+ while(invListTidx < invListNumTuples && resultTidx < resultFrameTupleAcc.getTupleCount()) {
+
ITupleReference invListTuple = invListCursor.getTuple();
-
- resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
-
+
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+
int cmp = invListCmp.compare(invListTuple, resultTuple);
if (cmp == 0) {
int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1)) + 1;
@@ -436,24 +433,30 @@
resultTidx = 0;
}
}
- }
+ }
+
+ if(advanceCursor) {
+ invListTidx++;
+ invListCursor.next();
+ }
}
-
- // append remaining new elements from inverted list
- //if(invListCursor.hasNext()) System.out.println("APPENDING FROM INV LIST");
- while(invListCursor.hasNext()) {
- invListCursor.next();
+
+ // append remaining new elements from inverted list
+ while(invListTidx < invListNumTuples) {
ITupleReference invListTuple = invListCursor.getTuple();
newBufIdx = appendTupleToNewResults(invListTuple, 1, newBufIdx);
+ invListTidx++;
+ invListCursor.next();
}
-
+
// append remaining elements from previous result set
- //if(resultTidx < resultFrameTupleAcc.getTupleCount()) System.out.println("APPENDING FROM RESULTS");
while(resultTidx < resultFrameTupleAcc.getTupleCount()) {
-
+
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+
int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
-
+
resultTidx++;
if (resultTidx >= resultFrameTupleAcc.getTupleCount()) {
prevBufIdx++;
@@ -464,54 +467,237 @@
}
}
}
-
+
return newBufIdx;
- }
-
- protected int appendTupleToNewResults(ITupleReference tuple, int newCount, int newBufIdx) throws IOException {
- ByteBuffer newCurrentBuffer = newResultBuffers.get(newBufIdx);
-
- if (!resultFrameTupleApp.hasSpace()) {
- newBufIdx++;
- if (newBufIdx >= newResultBuffers.size()) {
- newResultBuffers.add(ctx.allocateFrame());
- }
- newCurrentBuffer = newResultBuffers.get(newBufIdx);
- resultFrameTupleApp.reset(newCurrentBuffer, true);
- }
-
- // append key
- if (!resultFrameTupleApp.append(tuple.getFieldData(0), tuple.getFieldStart(0), invListKeyLength) ) {
- throw new IllegalStateException();
- }
-
- // append new count
- if (!resultFrameTupleApp.append(newCount) ) {
- throw new IllegalStateException();
- }
-
- resultFrameTupleApp.incrementTupleCount(1);
-
- currentNumResults++;
-
- return newBufIdx;
- }
-
- public IFrameTupleAccessor createResultFrameTupleAccessor() {
- return new FixedSizeFrameTupleAccessor(ctx.getFrameSize(), invListFieldsWithCount);
- }
-
- public ITupleReference createResultTupleReference() {
- return new FixedSizeTupleReference(invListFieldsWithCount);
}
- @Override
- public List<ByteBuffer> getResultBuffers() {
- return newResultBuffers;
- }
+ protected int appendTupleToNewResults(ITupleReference tuple, int newCount, int newBufIdx) throws IOException {
+ ByteBuffer newCurrentBuffer = newResultBuffers.get(newBufIdx);
- @Override
- public int getNumValidResultBuffers() {
- return maxResultBufIdx + 1;
- }
+ if (!resultFrameTupleApp.hasSpace()) {
+ newBufIdx++;
+ if (newBufIdx >= newResultBuffers.size()) {
+ newResultBuffers.add(ctx.allocateFrame());
+ }
+ newCurrentBuffer = newResultBuffers.get(newBufIdx);
+ resultFrameTupleApp.reset(newCurrentBuffer, true);
+ }
+
+ // append key
+ if (!resultFrameTupleApp.append(tuple.getFieldData(0), tuple.getFieldStart(0), invListKeyLength) ) {
+ throw new IllegalStateException();
+ }
+
+ // append new count
+ if (!resultFrameTupleApp.append(newCount) ) {
+ throw new IllegalStateException();
+ }
+
+ resultFrameTupleApp.incrementTupleCount(1);
+
+ currentNumResults++;
+
+ return newBufIdx;
+ }
+
+ public IFrameTupleAccessor createResultFrameTupleAccessor() {
+ return new FixedSizeFrameTupleAccessor(ctx.getFrameSize(), invListFieldsWithCount);
+ }
+
+ public ITupleReference createResultTupleReference() {
+ return new FixedSizeTupleReference(invListFieldsWithCount);
+ }
+
+ @Override
+ public List<ByteBuffer> getResultBuffers() {
+ return newResultBuffers;
+ }
+
+ @Override
+ public int getNumValidResultBuffers() {
+ return maxResultBufIdx + 1;
+ }
+
+ public int getOccurrenceThreshold() {
+ return occurrenceThreshold;
+ }
+
+ public void printNewResults(int maxResultBufIdx) {
+ StringBuffer strBuffer = new StringBuffer();
+ for(int i = 0; i <= maxResultBufIdx; i++) {
+ ByteBuffer testBuf = newResultBuffers.get(i);
+ resultFrameTupleAcc.reset(testBuf);
+ for(int j = 0; j < resultFrameTupleAcc.getTupleCount(); j++) {
+ strBuffer.append(IntegerSerializerDeserializer.getInt(resultFrameTupleAcc.getBuffer().array(), resultFrameTupleAcc.getFieldStartOffset(j, 0)) + ",");
+ strBuffer.append(IntegerSerializerDeserializer.getInt(resultFrameTupleAcc.getBuffer().array(), resultFrameTupleAcc.getFieldStartOffset(j, 1)) + " ");
+ }
+ }
+ System.out.println(strBuffer.toString());
+ }
+
+
+
+ // older slower code
+ /*
+ protected int mergeSuffixListScan(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers, int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) throws IOException {
+
+ int newBufIdx = 0;
+ ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
+
+ int prevBufIdx = 0;
+ ByteBuffer prevCurrentBuffer = prevResultBuffers.get(0);
+
+ boolean advanceCursor = true;
+ boolean advancePrevResult = false;
+ int resultTidx = 0;
+
+ MultiComparator invListCmp = invIndex.getInvListElementCmp();
+
+ resultFrameTupleAcc.reset(prevCurrentBuffer);
+ resultFrameTupleApp.reset(newCurrentBuffer, true);
+
+ ITupleReference invListTuple = null;
+
+ int invListTidx = 0;
+ int invListNumTuples = invListCursor.getNumElements();
+
+ if(invListCursor.hasNext()) invListCursor.next();
+
+ while(invListTidx < invListNumTuples || resultTidx < resultFrameTupleAcc.getTupleCount()) {
+
+ invListTuple = null;
+
+ int cmp = 0;
+ if(invListTidx >= invListNumTuples) {
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+ cmp = 1;
+ } else if(resultTidx >= resultFrameTupleAcc.getTupleCount()) {
+ invListTuple = invListCursor.getTuple();
+ cmp = -1;
+ } else {
+ invListTuple = invListCursor.getTuple();
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+ cmp = invListCmp.compare(invListTuple, resultTuple);
+ }
+
+ if (cmp == 0) {
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1)) + 1;
+ newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
+ advanceCursor = true;
+ advancePrevResult = true;
+ } else {
+ if (cmp < 0) {
+ advanceCursor = true;
+ advancePrevResult = false;
+ } else {
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
+ if(count + numQueryTokens - invListIx > occurrenceThreshold) {
+ newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
+ }
+ advanceCursor = false;
+ advancePrevResult = true;
+ }
+ }
+
+ if (advancePrevResult) {
+ resultTidx++;
+ if (resultTidx >= resultFrameTupleAcc.getTupleCount()) {
+ prevBufIdx++;
+ if (prevBufIdx <= maxPrevBufIdx) {
+ prevCurrentBuffer = prevResultBuffers.get(prevBufIdx);
+ resultFrameTupleAcc.reset(prevCurrentBuffer);
+ resultTidx = 0;
+ }
+ }
+ }
+
+ if(advanceCursor) {
+ invListTidx++;
+ invListCursor.next();
+ }
+ }
+
+ return newBufIdx;
+ }
+
+ protected int mergePrefixList(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers, int maxPrevBufIdx, List<ByteBuffer> newResultBuffers) throws IOException {
+ int newBufIdx = 0;
+ ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
+
+ int prevBufIdx = 0;
+ ByteBuffer prevCurrentBuffer = prevResultBuffers.get(0);
+
+ boolean advanceCursor = true;
+ boolean advancePrevResult = false;
+ int resultTidx = 0;
+
+ MultiComparator invListCmp = invIndex.getInvListElementCmp();
+
+ resultFrameTupleAcc.reset(prevCurrentBuffer);
+ resultFrameTupleApp.reset(newCurrentBuffer, true);
+
+ ITupleReference invListTuple = null;
+
+ int invListTidx = 0;
+ int invListNumTuples = invListCursor.getNumElements();
+
+ if(invListCursor.hasNext()) invListCursor.next();
+
+ while(invListTidx < invListNumTuples || resultTidx < resultFrameTupleAcc.getTupleCount()) {
+
+ invListTuple = null;
+
+ int cmp = 0;
+ if(invListTidx >= invListNumTuples) {
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+ cmp = 1;
+ } else if(resultTidx >= resultFrameTupleAcc.getTupleCount()) {
+ invListTuple = invListCursor.getTuple();
+ cmp = -1;
+ } else {
+ invListTuple = invListCursor.getTuple();
+ resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
+ cmp = invListCmp.compare(invListTuple, resultTuple);
+ }
+
+ if (cmp == 0) {
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1)) + 1;
+ newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
+ advanceCursor = true;
+ advancePrevResult = true;
+ } else {
+ if (cmp < 0) {
+ int count = 1;
+ newBufIdx = appendTupleToNewResults(invListTuple, count, newBufIdx);
+ advanceCursor = true;
+ advancePrevResult = false;
+ } else {
+ int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(resultTuple.getFieldCount()-1));
+ newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
+ advanceCursor = false;
+ advancePrevResult = true;
+ }
+ }
+
+ if (advancePrevResult) {
+ resultTidx++;
+ if (resultTidx >= resultFrameTupleAcc.getTupleCount()) {
+ prevBufIdx++;
+ if (prevBufIdx <= maxPrevBufIdx) {
+ prevCurrentBuffer = prevResultBuffers.get(prevBufIdx);
+ resultFrameTupleAcc.reset(prevCurrentBuffer);
+ resultTidx = 0;
+ }
+ }
+ }
+
+ if(advanceCursor) {
+ invListTidx++;
+ invListCursor.next();
+ }
+ }
+
+ return newBufIdx;
+ }
+*/
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/JaccardSearchModifier.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/JaccardSearchModifier.java
index dbd2196..72298e3 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/JaccardSearchModifier.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchmodifiers/JaccardSearchModifier.java
@@ -16,7 +16,7 @@
@Override
public int getOccurrenceThreshold(List<IInvertedListCursor> invListCursors) {
- return (int) Math.floor((float) invListCursors.size() * jaccThresh);
+ return (int) Math.floor((float) invListCursors.size() * jaccThresh);
}
@Override
@@ -25,7 +25,7 @@
if (invListCursors.size() == 0) {
return 0;
}
- return invListCursors.size() - (int) Math.ceil(jaccThresh * invListCursors.size()) + 1;
+ return invListCursors.size() - getOccurrenceThreshold(invListCursors) + 1;
}
public float getJaccThresh() {
diff --git a/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/AbstractInvIndexTest.java b/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/AbstractInvIndexTest.java
index 0ea886d..88fabba 100644
--- a/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/AbstractInvIndexTest.java
+++ b/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/AbstractInvIndexTest.java
@@ -4,8 +4,6 @@
import java.text.SimpleDateFormat;
import java.util.Date;
-import org.junit.AfterClass;
-
public abstract class AbstractInvIndexTest {
protected final static SimpleDateFormat simpleDateFormat = new SimpleDateFormat("ddMMyy-hhmmssSS");
@@ -14,16 +12,11 @@
protected final static String baseFileName = tmpDir + sep + simpleDateFormat.format(new Date());
protected final static String btreeFileName = baseFileName + "btree";
protected final static String invListsFileName = baseFileName + "invlists";
-
- protected void print(String str) {
- System.out.print(str);
- }
-
- @AfterClass
- public static void cleanup() throws Exception {
- File btreeFile = new File(btreeFileName);
+
+ public static void tearDown() {
+ File btreeFile = new File(btreeFileName);
btreeFile.deleteOnExit();
File invListsFile = new File(invListsFileName);
invListsFile.deleteOnExit();
- }
+ }
}
diff --git a/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/BulkLoadTest.java b/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/BulkLoadTest.java
index 62a0879..df96900 100644
--- a/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/BulkLoadTest.java
+++ b/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/BulkLoadTest.java
@@ -9,6 +9,7 @@
import junit.framework.Assert;
+import org.junit.AfterClass;
import org.junit.Test;
import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
@@ -121,7 +122,7 @@
int invListFields = 1;
ITypeTrait[] invListTypeTraits = new ITypeTrait[invListFields];
- invListTypeTraits[0] = new TypeTrait(4);
+ invListTypeTraits[0] = new TypeTrait(4);
int invListKeys = 1;
IBinaryComparator[] invListBinCmps = new IBinaryComparator[invListKeys];
@@ -278,4 +279,9 @@
bufferCache.closeFile(invListsFileId);
bufferCache.close();
}
+
+ @AfterClass
+ public static void deinit() {
+ AbstractInvIndexTest.tearDown();
+ }
}
diff --git a/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/SearchTest.java b/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/SearchTest.java
index 18c938f..0d6c7ca 100644
--- a/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/SearchTest.java
+++ b/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/SearchTest.java
@@ -6,8 +6,10 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
-import java.util.TreeSet;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
import org.junit.Test;
import edu.uci.ics.fuzzyjoin.tokenizer.DelimitedUTF8StringBinaryTokenizer;
@@ -21,6 +23,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
import edu.uci.ics.hyracks.api.dataflow.value.TypeTrait;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
@@ -49,9 +52,10 @@
import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedListBuilder;
import edu.uci.ics.hyracks.storage.am.invertedindex.impls.FixedSizeElementInvertedListBuilder;
import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndex;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.OccurrenceThresholdPanicException;
import edu.uci.ics.hyracks.storage.am.invertedindex.impls.SearchResultCursor;
import edu.uci.ics.hyracks.storage.am.invertedindex.impls.TOccurrenceSearcher;
-import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.ConjunctiveSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.JaccardSearchModifier;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
@@ -66,11 +70,357 @@
private static final int PAGE_SIZE = 32768;
private static final int NUM_PAGES = 100;
private static final int HYRACKS_FRAME_SIZE = 32768;
- private IHyracksStageletContext stageletCtx = TestUtils.create(HYRACKS_FRAME_SIZE);
+ private static IHyracksStageletContext stageletCtx = TestUtils.create(HYRACKS_FRAME_SIZE);
+
+ private static IBufferCache bufferCache;
+ private static IFileMapProvider fmp;
+
+ // --- BTREE ---
+
+ // create file refs
+ private static FileReference btreeFile = new FileReference(new File(btreeFileName));
+ private static int btreeFileId;
+
+ // declare btree fields
+ private static int fieldCount = 5;
+ private static ITypeTrait[] typeTraits = new ITypeTrait[fieldCount];
+
+ // declare btree keys
+ private static int btreeKeyFieldCount = 1;
+ private static IBinaryComparator[] btreeBinCmps = new IBinaryComparator[btreeKeyFieldCount];
+ private static MultiComparator btreeCmp = new MultiComparator(typeTraits, btreeBinCmps);
+
+ // btree frame factories
+ private static TypeAwareTupleWriterFactory tupleWriterFactory = new TypeAwareTupleWriterFactory(typeTraits);
+ private static IBTreeLeafFrameFactory leafFrameFactory = new NSMLeafFrameFactory(tupleWriterFactory);
+ private static IBTreeInteriorFrameFactory interiorFrameFactory = new NSMInteriorFrameFactory(tupleWriterFactory);
+ private static ITreeIndexMetaDataFrameFactory metaFrameFactory = new LIFOMetaDataFrameFactory();
+
+ // btree frames
+ private static IBTreeLeafFrame leafFrame = leafFrameFactory.getFrame();
+ private static ITreeIndexMetaDataFrame metaFrame = metaFrameFactory.getFrame();
+
+ private static IFreePageManager freePageManager;
+
+ private static BTree btree;
+
+
+ // --- INVERTED INDEX ---
+
+ private static FileReference invListsFile = new FileReference(new File(invListsFileName));
+ private static int invListsFileId;
+
+ private static int invListFields = 1;
+ private static ITypeTrait[] invListTypeTraits = new ITypeTrait[invListFields];
+
+ private static int invListKeys = 1;
+ private static IBinaryComparator[] invListBinCmps = new IBinaryComparator[invListKeys];
+ private static MultiComparator invListCmp = new MultiComparator(invListTypeTraits, invListBinCmps);
+
+ private static InvertedIndex invIndex;
+
+ private static Random rnd = new Random();
+
+ private static ByteBuffer frame = stageletCtx.allocateFrame();
+ private static FrameTupleAppender appender = new FrameTupleAppender(stageletCtx.getFrameSize());
+ private static ArrayTupleBuilder tb = new ArrayTupleBuilder(2);
+ private static DataOutput dos = tb.getDataOutput();
+ private static ISerializerDeserializer[] insertSerde = { UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE };
+ private static RecordDescriptor insertRecDesc = new RecordDescriptor(insertSerde);
+ private static IFrameTupleAccessor accessor = new FrameTupleAccessor(stageletCtx.getFrameSize(), insertRecDesc);
+
+ private static FrameTupleReference tuple = new FrameTupleReference();
+
+
+ private static List<String> tokens = new ArrayList<String>();
+ private static ArrayList<ArrayList<Integer>> checkInvLists = new ArrayList<ArrayList<Integer>>();
+
+ private static int maxId = 1000000;
+ //private static int maxId = 1000;
+ private static int[] scanCountArray = new int[maxId];
+ private static ArrayList<Integer> expectedResults = new ArrayList<Integer>();
+
+ private static ISerializerDeserializer[] querySerde = { UTF8StringSerializerDeserializer.INSTANCE };
+ private static RecordDescriptor queryRecDesc = new RecordDescriptor(querySerde);
+
+ private static FrameTupleAppender queryAppender = new FrameTupleAppender(stageletCtx.getFrameSize());
+ private static ArrayTupleBuilder queryTb = new ArrayTupleBuilder(querySerde.length);
+ private static DataOutput queryDos = queryTb.getDataOutput();
+
+ private static IFrameTupleAccessor queryAccessor = new FrameTupleAccessor(stageletCtx.getFrameSize(), queryRecDesc);
+ private static FrameTupleReference queryTuple = new FrameTupleReference();
+
+ private static ITokenFactory tokenFactory = new UTF8WordTokenFactory();
+ private static IBinaryTokenizer queryTokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, tokenFactory);
+
+ private static TOccurrenceSearcher searcher;
+ private static IInvertedIndexResultCursor resultCursor;
+
+ @BeforeClass
+ public static void start() throws Exception {
+ TestStorageManagerComponentHolder.init(PAGE_SIZE, NUM_PAGES);
+ bufferCache = TestStorageManagerComponentHolder.getBufferCache(stageletCtx);
+ fmp = TestStorageManagerComponentHolder.getFileMapProvider(stageletCtx);
+
+ // --- BTREE ---
+
+ bufferCache.createFile(btreeFile);
+ btreeFileId = fmp.lookupFileId(btreeFile);
+ bufferCache.openFile(btreeFileId);
+
+ // token (key)
+ typeTraits[0] = new TypeTrait(ITypeTrait.VARIABLE_LENGTH);
+ // startPageId
+ typeTraits[1] = new TypeTrait(4);
+ // endPageId
+ typeTraits[2] = new TypeTrait(4);
+ // startOff
+ typeTraits[3] = new TypeTrait(4);
+ // numElements
+ typeTraits[4] = new TypeTrait(4);
+
+ btreeBinCmps[0] = UTF8StringBinaryComparatorFactory.INSTANCE.createBinaryComparator();
+
+ freePageManager = new LinkedListFreePageManager(bufferCache, btreeFileId, 0, metaFrameFactory);
+
+ btree = new BTree(bufferCache, freePageManager, interiorFrameFactory, leafFrameFactory, btreeCmp);
+ btree.create(btreeFileId, leafFrame, metaFrame);
+ btree.open(btreeFileId);
+
+
+ // --- INVERTED INDEX ---
+
+ bufferCache.createFile(invListsFile);
+ invListsFileId = fmp.lookupFileId(invListsFile);
+ bufferCache.openFile(invListsFileId);
+
+ invListTypeTraits[0] = new TypeTrait(4);
+ invListBinCmps[0] = IntegerBinaryComparatorFactory.INSTANCE.createBinaryComparator();
+
+ invIndex = new InvertedIndex(bufferCache, btree, invListCmp);
+ invIndex.open(invListsFileId);
+
+ searcher = new TOccurrenceSearcher(stageletCtx, invIndex, queryTokenizer);
+ resultCursor = new SearchResultCursor(searcher.createResultFrameTupleAccessor(), searcher.createResultTupleReference());
+
+ rnd.setSeed(50);
+
+ accessor.reset(frame);
+ queryAccessor.reset(frame);
+
+ loadData();
+ }
+
+ private static void loadData() throws HyracksDataException {
+ tokens.add("compilers");
+ tokens.add("computer");
+ tokens.add("databases");
+ tokens.add("fast");
+ tokens.add("hyracks");
+ tokens.add("major");
+ tokens.add("science");
+ tokens.add("systems");
+ tokens.add("university");
+
+ for(int i = 0; i < tokens.size(); i++) {
+ checkInvLists.add(new ArrayList<Integer>());
+ }
+
+ // for generating length-skewed inverted lists
+ int addProb = 0;
+ int addProbStep = 10;
+
+ IInvertedListBuilder invListBuilder = new FixedSizeElementInvertedListBuilder(invListTypeTraits);
+ InvertedIndex.BulkLoadContext ctx = invIndex.beginBulkLoad(invListBuilder, HYRACKS_FRAME_SIZE);
+
+ int totalElements = 0;
+ for (int i = 0; i < tokens.size(); i++) {
+
+ addProb += addProbStep * (i+1);
+ for (int j = 0; j < maxId; j++) {
+ if ((Math.abs(rnd.nextInt()) % addProb) == 0) {
+
+ totalElements++;
+
+ tb.reset();
+ UTF8StringSerializerDeserializer.INSTANCE.serialize(tokens.get(i), dos);
+ tb.addFieldEndOffset();
+ IntegerSerializerDeserializer.INSTANCE.serialize(j, dos);
+ tb.addFieldEndOffset();
+
+ checkInvLists.get(i).add(j);
+
+ appender.reset(frame, true);
+ appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
+
+ tuple.reset(accessor, 0);
+
+ try {
+ invIndex.bulkLoadAddTuple(ctx, tuple);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+ invIndex.endBulkLoad(ctx);
+ }
+
+ private void fillExpectedResults(int[] queryTokenIndexes, int numQueryTokens, int occurrenceThreshold) {
+ // reset scan count array
+ for(int i = 0; i < maxId; i++) {
+ scanCountArray[i] = 0;
+ }
+
+ // count occurrences
+ for(int i = 0; i < numQueryTokens; i++) {
+ //System.out.println("LIST: " + i);
+ ArrayList<Integer> list = checkInvLists.get(queryTokenIndexes[i]);
+ for(int j = 0; j < list.size(); j++) {
+ //System.out.print(list.get(j) + " ");
+ scanCountArray[list.get(j)]++;
+ }
+ //System.out.println();
+ }
+
+ // check threshold
+ expectedResults.clear();
+ for(int i = 0; i < maxId; i++) {
+ if(scanCountArray[i] >= occurrenceThreshold) {
+ expectedResults.add(i);
+ }
+ }
+ }
+
+ private void runQueries(IInvertedIndexSearchModifier searchModifier) throws Exception {
+
+ rnd.setSeed(50);
+
+ // generate random queries
+ int queries = 50;
+ int[] queryTokenIndexes = new int[tokens.size()];
+ for(int i = 0; i < queries; i++) {
+
+ int numQueryTokens = Math.abs(rnd.nextInt() % tokens.size()) + 1;
+ for(int j = 0; j < numQueryTokens; j++) {
+ queryTokenIndexes[j] = Math.abs(rnd.nextInt() % tokens.size());
+ }
+
+ StringBuilder strBuilder = new StringBuilder();
+ for(int j = 0; j < numQueryTokens; j++) {
+ strBuilder.append(tokens.get(queryTokenIndexes[j]));
+ if(j+1 != numQueryTokens) strBuilder.append(" ");
+ }
+
+ String queryString = strBuilder.toString();
+
+ queryTb.reset();
+ UTF8StringSerializerDeserializer.INSTANCE.serialize(queryString, queryDos);
+ queryTb.addFieldEndOffset();
+
+ queryAppender.reset(frame, true);
+ queryAppender.append(queryTb.getFieldEndOffsets(), queryTb.getByteArray(), 0, queryTb.getSize());
+ queryTuple.reset(queryAccessor, 0);
+
+ boolean panic = false;
+
+ int repeats = 1;
+ double totalTime = 0;
+ for(int j = 0; j < repeats; j++) {
+ long timeStart = System.currentTimeMillis();
+ try {
+ searcher.reset();
+ searcher.search(resultCursor, queryTuple, 0, searchModifier);
+ } catch(OccurrenceThresholdPanicException e) {
+ panic = true;
+ }
+ long timeEnd = System.currentTimeMillis();
+ totalTime += timeEnd - timeStart;
+ }
+ double avgTime = totalTime / (double)repeats;
+ System.out.println(i + ": " + "\"" + queryString + "\": " + avgTime + "ms");
+
+ if(!panic) {
+
+ fillExpectedResults(queryTokenIndexes, numQueryTokens, searcher.getOccurrenceThreshold());
+
+ // verify results
+ int checkIndex = 0;
+ while(resultCursor.hasNext()) {
+ resultCursor.next();
+ ITupleReference resultTuple = resultCursor.getTuple();
+ int id = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(0));
+ Assert.assertEquals(expectedResults.get(checkIndex).intValue(), id);
+ checkIndex++;
+ }
+
+ if(expectedResults.size() != checkIndex) {
+ System.out.println("CHECKING");
+ for(Integer x : expectedResults) {
+ System.out.print(x + " ");
+ }
+ System.out.println();
+ }
+
+ Assert.assertEquals(expectedResults.size(), checkIndex);
+ }
+ }
+ }
+
+ /*
@Test
- public void conjunctiveSearchTest() throws Exception {
-
+ public void conjunctiveKeywordQueryTest() throws Exception {
+ IInvertedIndexSearchModifier searchModifier = new ConjunctiveSearchModifier();
+ runQueries(searchModifier);
+ }
+ */
+
+
+ @Test
+ public void jaccardKeywordQueryTest() throws Exception {
+ JaccardSearchModifier searchModifier = new JaccardSearchModifier(1.0f);
+
+ //System.out.println("JACCARD: " + 1.0f);
+ //searchModifier.setJaccThresh(1.0f);
+ //runQueries(searchModifier);
+
+ System.out.println("JACCARD: " + 0.9f);
+ searchModifier.setJaccThresh(0.9f);
+ runQueries(searchModifier);
+
+ System.out.println("JACCARD: " + 0.8f);
+ searchModifier.setJaccThresh(0.8f);
+ runQueries(searchModifier);
+
+ System.out.println("JACCARD: " + 0.7f);
+ searchModifier.setJaccThresh(0.7f);
+ runQueries(searchModifier);
+
+ System.out.println("JACCARD: " + 0.6f);
+ searchModifier.setJaccThresh(0.6f);
+ runQueries(searchModifier);
+
+ System.out.println("JACCARD: " + 0.5f);
+ searchModifier.setJaccThresh(0.5f);
+ runQueries(searchModifier);
+ }
+
+ @AfterClass
+ public static void deinit() throws HyracksDataException {
+ AbstractInvIndexTest.tearDown();
+ btree.close();
+ invIndex.close();
+ bufferCache.closeFile(btreeFileId);
+ bufferCache.closeFile(invListsFileId);
+ bufferCache.close();
+ }
+
+ /*
+ @Test
+ public void jaccardKeywordQueryTest() throws Exception {
+
TestStorageManagerComponentHolder.init(PAGE_SIZE, NUM_PAGES);
IBufferCache bufferCache = TestStorageManagerComponentHolder.getBufferCache(stageletCtx);
IFileMapProvider fmp = TestStorageManagerComponentHolder.getFileMapProvider(stageletCtx);
@@ -92,7 +442,7 @@
int fieldCount = 5;
ITypeTrait[] typeTraits = new ITypeTrait[fieldCount];
// token (key)
- typeTraits[0] = new TypeTrait(ITypeTrait.VARIABLE_LENGTH);
+ typeTraits[0] = new TypeTrait(4);
// startPageId
typeTraits[1] = new TypeTrait(4);
// endPageId
@@ -105,14 +455,12 @@
// declare btree keys
int keyFieldCount = 1;
IBinaryComparator[] cmps = new IBinaryComparator[keyFieldCount];
- cmps[0] = UTF8StringBinaryComparatorFactory.INSTANCE.createBinaryComparator();
+ cmps[0] = IntegerBinaryComparatorFactory.INSTANCE.createBinaryComparator();
MultiComparator cmp = new MultiComparator(typeTraits, cmps);
TypeAwareTupleWriterFactory tupleWriterFactory = new TypeAwareTupleWriterFactory(typeTraits);
IBTreeLeafFrameFactory leafFrameFactory = new NSMLeafFrameFactory(tupleWriterFactory);
- // IBTreeLeafFrameFactory leafFrameFactory = new
- // FieldPrefixNSMLeafFrameFactory(tupleWriterFactory);
IBTreeInteriorFrameFactory interiorFrameFactory = new NSMInteriorFrameFactory(tupleWriterFactory);
ITreeIndexMetaDataFrameFactory metaFrameFactory = new LIFOMetaDataFrameFactory();
@@ -127,7 +475,7 @@
int invListFields = 1;
ITypeTrait[] invListTypeTraits = new ITypeTrait[invListFields];
- invListTypeTraits[0] = new TypeTrait(4);
+ invListTypeTraits[0] = new TypeTrait(4);
int invListKeys = 1;
IBinaryComparator[] invListBinCmps = new IBinaryComparator[invListKeys];
@@ -171,7 +519,7 @@
int maxId = 1000000;
int addProb = 0;
- int addProbStep = 10;
+ int addProbStep = 10;
IInvertedListBuilder invListBuilder = new FixedSizeElementInvertedListBuilder(invListTypeTraits);
InvertedIndex.BulkLoadContext ctx = invIndex.beginBulkLoad(invListBuilder, HYRACKS_FRAME_SIZE);
@@ -234,6 +582,7 @@
int queries = 100;
int[] queryTokenIndexes = new int[tokens.size()];
for(int i = 0; i < queries; i++) {
+
int numQueryTokens = Math.abs(rnd.nextInt() % tokens.size()) + 1;
for(int j = 0; j < numQueryTokens; j++) {
queryTokenIndexes[j] = Math.abs(rnd.nextInt() % tokens.size());
@@ -246,7 +595,6 @@
}
String queryString = strBuilder.toString();
- //String queryString = "major";
queryTb.reset();
UTF8StringSerializerDeserializer.INSTANCE.serialize(queryString, queryDos);
@@ -256,7 +604,7 @@
queryAppender.append(queryTb.getFieldEndOffsets(), queryTb.getByteArray(), 0, queryTb.getSize());
queryTuple.reset(queryAccessor, 0);
- int repeats = 1;
+ int repeats = 10;
double totalTime = 0;
for(int j = 0; j < repeats; j++) {
long timeStart = System.currentTimeMillis();
@@ -266,27 +614,27 @@
totalTime += timeEnd - timeStart;
}
double avgTime = totalTime / (double)repeats;
- System.out.println("\"" + queryString + "\": " + avgTime + "ms");
+ System.out.println(i + ": " + "\"" + queryString + "\": " + avgTime + "ms");
+ // TODO:
// generate intersection for verification
TreeSet<Integer> checkResults = new TreeSet<Integer>(checkSets.get(queryTokenIndexes[0]));
for(int j = 1; j < numQueryTokens; j++) {
checkResults.retainAll(checkSets.get(queryTokenIndexes[j]));
}
Integer[] check = new Integer[checkResults.size()];
- check = checkResults.toArray(check);
-
+ check = checkResults.toArray(check);
+
// verify results
int checkIndex = 0;
while(resultCursor.hasNext()) {
resultCursor.next();
ITupleReference resultTuple = resultCursor.getTuple();
int id = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(0));
- //Assert.assertEquals(id, check[checkIndex].intValue());
+ Assert.assertEquals(id, check[checkIndex].intValue());
checkIndex++;
- }
-
- //System.out.println("RESULTS: " + check.length + " " + checkIndex);
+ }
+ Assert.assertEquals(check.length, checkIndex);
}
btree.close();
@@ -294,4 +642,5 @@
bufferCache.closeFile(invListsFileId);
bufferCache.close();
}
+ */
}