First steps in preparing the inverted-index testing framework to deal with length partitioning.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_lsm_length_filter@2454 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexSearchModifier.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexSearchModifier.java
index 619937f..315b29e 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexSearchModifier.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexSearchModifier.java
@@ -20,4 +20,8 @@
public int getOccurrenceThreshold(int numQueryTokens);
public int getNumPrefixLists(int numQueryTokens);
+
+ public int getNumTokensLowerBound(int numQueryTokens);
+
+ public int getNumTokensUpperBound(int numQueryTokens);
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/ConjunctiveSearchModifier.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/ConjunctiveSearchModifier.java
index 1d260f0..14d8414 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/ConjunctiveSearchModifier.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/ConjunctiveSearchModifier.java
@@ -33,4 +33,14 @@
public String toString() {
return "Conjunctive Search Modifier";
}
+
+ @Override
+ public int getNumTokensLowerBound(int numQueryTokens) {
+ return -1;
+ }
+
+ @Override
+ public int getNumTokensUpperBound(int numQueryTokens) {
+ return -1;
+ }
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/EditDistanceSearchModifier.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/EditDistanceSearchModifier.java
index 0580319..a9f4471 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/EditDistanceSearchModifier.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/EditDistanceSearchModifier.java
@@ -37,6 +37,16 @@
return numQueryTokens - getOccurrenceThreshold(numQueryTokens) + 1;
}
+ @Override
+ public int getNumTokensLowerBound(int numQueryTokens) {
+ return numQueryTokens - edThresh;
+ }
+
+ @Override
+ public int getNumTokensUpperBound(int numQueryTokens) {
+ return numQueryTokens + edThresh;
+ }
+
public int getGramLength() {
return gramLength;
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/JaccardSearchModifier.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/JaccardSearchModifier.java
index 4cf7e40..d1facc4 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/JaccardSearchModifier.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/JaccardSearchModifier.java
@@ -38,6 +38,16 @@
return numQueryTokens - getOccurrenceThreshold(numQueryTokens) + 1;
}
+ @Override
+ public int getNumTokensLowerBound(int numQueryTokens) {
+ return (int) Math.floor(numQueryTokens * jaccThresh);
+ }
+
+ @Override
+ public int getNumTokensUpperBound(int numQueryTokens) {
+ return (int) Math.ceil(numQueryTokens / jaccThresh);
+ }
+
public float getJaccThresh() {
return jaccThresh;
}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTokenizingNumTokensTupleIterator.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTokenizingNumTokensTupleIterator.java
new file mode 100644
index 0000000..347ea73
--- /dev/null
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTokenizingNumTokensTupleIterator.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util;
+
+import java.io.IOException;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IToken;
+
+// TODO: We can possibly avoid copying the data into a new tuple here.
+public class InvertedIndexTokenizingNumTokensTupleIterator extends InvertedIndexTokenizingTupleIterator {
+
+ protected int numTokens = 0;
+
+ public InvertedIndexTokenizingNumTokensTupleIterator(int tokensFieldCount, int invListFieldCount,
+ IBinaryTokenizer tokenizer) {
+ super(tokensFieldCount, invListFieldCount, tokenizer);
+ }
+
+ public void reset(ITupleReference inputTuple) {
+ super.reset(inputTuple);
+ // Run through the tokenizer once to get the total number of tokens.
+ numTokens = 0;
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+ numTokens++;
+ }
+ super.reset(inputTuple);
+ }
+
+ public void next() throws HyracksDataException {
+ tokenizer.next();
+ IToken token = tokenizer.getToken();
+ tupleBuilder.reset();
+ try {
+ // Add token field.
+ token.serializeToken(tupleBuilder.getDataOutput());
+ tupleBuilder.addFieldEndOffset();
+ // Add field with number of tokens.
+ tupleBuilder.getDataOutput().writeInt(numTokens);
+ tupleBuilder.addFieldEndOffset();
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ // Add inverted-list element fields.
+ for (int i = 0; i < invListFieldCount; i++) {
+ tupleBuilder.addField(inputTuple.getFieldData(i + 1), inputTuple.getFieldStart(i + 1),
+ inputTuple.getFieldLength(i + 1));
+ }
+ // Reset tuple reference for insert operation.
+ tupleReference.reset(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray());
+ }
+}
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTokenizingTupleIterator.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTokenizingTupleIterator.java
index 633213a..4f8f635 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTokenizingTupleIterator.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTokenizingTupleIterator.java
@@ -27,13 +27,13 @@
// TODO: We can possibly avoid copying the data into a new tuple here.
public class InvertedIndexTokenizingTupleIterator {
// Field that is expected to be tokenized.
- private final int DOC_FIELD_INDEX = 0;
+ protected final int DOC_FIELD_INDEX = 0;
- private final int invListFieldCount;
- private final ArrayTupleBuilder tupleBuilder;
- private final ArrayTupleReference tupleReference;
- private final IBinaryTokenizer tokenizer;
- private ITupleReference inputTuple;
+ protected final int invListFieldCount;
+ protected final ArrayTupleBuilder tupleBuilder;
+ protected final ArrayTupleReference tupleReference;
+ protected final IBinaryTokenizer tokenizer;
+ protected ITupleReference inputTuple;
public InvertedIndexTokenizingTupleIterator(int tokensFieldCount, int invListFieldCount, IBinaryTokenizer tokenizer) {
this.invListFieldCount = invListFieldCount;
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestContext.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestContext.java
index 34b11c7..ce87bd7 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestContext.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestContext.java
@@ -45,23 +45,28 @@
public static enum InvertedIndexType {
INMEMORY,
ONDISK,
- LSM
+ LSM,
+ PARTITIONED_INMEMORY,
+ PARTITIONED_ONDISK,
+ PARTITIONED_LSM
};
protected IInvertedIndex invIndex;
protected IBinaryComparatorFactory[] allCmpFactories;
protected IBinaryTokenizerFactory tokenizerFactory;
+ protected InvertedIndexType invIndexType;
protected InvertedIndexTokenizingTupleIterator indexTupleIter;
protected HashSet<Comparable> allTokens = new HashSet<Comparable>();
protected List<ITupleReference> documentCorpus = new ArrayList<ITupleReference>();
public LSMInvertedIndexTestContext(ISerializerDeserializer[] fieldSerdes, IIndex index,
- IBinaryTokenizerFactory tokenizerFactory) {
+ IBinaryTokenizerFactory tokenizerFactory, InvertedIndexType invIndexType,
+ InvertedIndexTokenizingTupleIterator indexTupleIter) {
super(fieldSerdes, index);
- this.tokenizerFactory = tokenizerFactory;
invIndex = (IInvertedIndex) index;
- indexTupleIter = new InvertedIndexTokenizingTupleIterator(invIndex.getTokenTypeTraits().length,
- invIndex.getInvListTypeTraits().length, tokenizerFactory.createTokenizer());
+ this.tokenizerFactory = tokenizerFactory;
+ this.invIndexType = invIndexType;
+ this.indexTupleIter = indexTupleIter;
}
@Override
@@ -88,8 +93,9 @@
return allCmpFactories;
}
- public static LSMInvertedIndexTestContext create(LSMInvertedIndexTestHarness harness, ISerializerDeserializer[] fieldSerdes,
- int tokenFieldCount, IBinaryTokenizerFactory tokenizerFactory, InvertedIndexType invIndexType) throws IndexException {
+ public static LSMInvertedIndexTestContext create(LSMInvertedIndexTestHarness harness,
+ ISerializerDeserializer[] fieldSerdes, int tokenFieldCount, IBinaryTokenizerFactory tokenizerFactory,
+ InvertedIndexType invIndexType) throws IndexException {
ITypeTraits[] allTypeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
IBinaryComparatorFactory[] allCmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes,
fieldSerdes.length);
@@ -108,22 +114,25 @@
invListTypeTraits[i] = allTypeTraits[i + tokenFieldCount];
invListCmpFactories[i] = allCmpFactories[i + tokenFieldCount];
}
- // Create index and test context.
+ // Create index and test context.
IInvertedIndex invIndex;
switch (invIndexType) {
- case INMEMORY: {
+ case INMEMORY:
+ case PARTITIONED_INMEMORY: {
invIndex = InvertedIndexUtils.createInMemoryBTreeInvertedindex(harness.getMemBufferCache(),
harness.getMemFreePageManager(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
tokenCmpFactories, tokenizerFactory);
break;
}
- case ONDISK: {
+ case ONDISK:
+ case PARTITIONED_ONDISK: {
invIndex = InvertedIndexUtils.createOnDiskInvertedIndex(harness.getDiskBufferCache(),
harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
tokenCmpFactories, harness.getInvListsFileRef());
break;
}
- case LSM: {
+ case LSM:
+ case PARTITIONED_LSM: {
invIndex = InvertedIndexUtils.createLSMInvertedIndex(harness.getMemBufferCache(),
harness.getMemFreePageManager(), harness.getDiskFileMapProvider(), invListTypeTraits,
invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory,
@@ -136,7 +145,29 @@
throw new InvertedIndexException("Unknow inverted-index type '" + invIndexType + "'.");
}
}
- LSMInvertedIndexTestContext testCtx = new LSMInvertedIndexTestContext(fieldSerdes, invIndex, tokenizerFactory);
+ InvertedIndexTokenizingTupleIterator indexTupleIter = null;
+ switch (invIndexType) {
+ case INMEMORY:
+ case ONDISK:
+ case LSM: {
+ indexTupleIter = new InvertedIndexTokenizingTupleIterator(invIndex.getTokenTypeTraits().length,
+ invIndex.getInvListTypeTraits().length, tokenizerFactory.createTokenizer());
+ break;
+ }
+ case PARTITIONED_INMEMORY:
+ case PARTITIONED_ONDISK:
+ case PARTITIONED_LSM: {
+ indexTupleIter = new InvertedIndexTokenizingNumTokensTupleIterator(
+ invIndex.getTokenTypeTraits().length, invIndex.getInvListTypeTraits().length,
+ tokenizerFactory.createTokenizer());
+ break;
+ }
+ default: {
+ throw new InvertedIndexException("Unknow inverted-index type '" + invIndexType + "'.");
+ }
+ }
+ LSMInvertedIndexTestContext testCtx = new LSMInvertedIndexTestContext(fieldSerdes, invIndex, tokenizerFactory,
+ invIndexType, indexTupleIter);
return testCtx;
}
@@ -192,4 +223,8 @@
public List<ITupleReference> getDocumentCorpus() {
return documentCorpus;
}
+
+ public InvertedIndexType getInvertedIndexType() {
+ return invIndexType;
+ }
}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
index 68824f5..8044821 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
@@ -101,16 +101,38 @@
public static LSMInvertedIndexTestContext createWordInvIndexTestContext(LSMInvertedIndexTestHarness harness,
InvertedIndexType invIndexType) throws IOException, IndexException {
- ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = null;
+ int tokenFieldCount = 0;
+ switch (invIndexType) {
+ case INMEMORY:
+ case ONDISK:
+ case LSM: {
+ fieldSerdes = new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE };
+ tokenFieldCount = 1;
+ break;
+ }
+ case PARTITIONED_INMEMORY:
+ case PARTITIONED_ONDISK:
+ case PARTITIONED_LSM: {
+ // Such indexes also include the set-size for partitioning.
+ fieldSerdes = new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+ tokenFieldCount = 2;
+ break;
+ }
+ default: {
+ throw new IndexException("Unhandled inverted index type '" + invIndexType + "'.");
+ }
+ }
ITokenFactory tokenFactory = new UTF8WordTokenFactory();
IBinaryTokenizerFactory tokenizerFactory = new DelimitedUTF8StringBinaryTokenizerFactory(true, false,
tokenFactory);
- LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestContext.create(harness, fieldSerdes, 1, tokenizerFactory,
- invIndexType);
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestContext.create(harness, fieldSerdes, tokenFieldCount,
+ tokenizerFactory, invIndexType);
return testCtx;
}
-
+
public static LSMInvertedIndexTestContext createHashedWordInvIndexTestContext(LSMInvertedIndexTestHarness harness,
InvertedIndexType invIndexType) throws IOException, IndexException {
ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE,
@@ -149,7 +171,7 @@
public static void bulkLoadInvIndex(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen, int numDocs)
throws IndexException, IOException {
- SortedSet<CheckTuple> tmpMemIndex = new TreeSet<CheckTuple>();;
+ SortedSet<CheckTuple> tmpMemIndex = new TreeSet<CheckTuple>();
// First generate the expected index by inserting the documents one-by-one.
for (int i = 0; i < numDocs; i++) {
ITupleReference tuple = tupleGen.next();
@@ -330,10 +352,33 @@
/**
* Determine the expected results with the simple ScanCount algorithm.
*/
+ public static void getExpectedResults(int[] scanCountArray, TreeSet<CheckTuple> checkTuples,
+ ITupleReference searchDocument, IBinaryTokenizer tokenizer, ISerializerDeserializer tokenSerde,
+ IInvertedIndexSearchModifier searchModifier, List<Integer> expectedResults, InvertedIndexType invIndexType)
+ throws IOException {
+ boolean isPartitioned = false;
+ switch (invIndexType) {
+ case INMEMORY:
+ case ONDISK:
+ case LSM: {
+ isPartitioned = false;
+ break;
+ }
+ case PARTITIONED_INMEMORY:
+ case PARTITIONED_ONDISK:
+ case PARTITIONED_LSM: {
+ isPartitioned = true;
+ break;
+ }
+ }
+ getExpectedResults(scanCountArray, checkTuples, searchDocument, tokenizer, tokenSerde,
+ searchModifier, expectedResults, isPartitioned);
+ }
+
@SuppressWarnings("unchecked")
public static void getExpectedResults(int[] scanCountArray, TreeSet<CheckTuple> checkTuples,
ITupleReference searchDocument, IBinaryTokenizer tokenizer, ISerializerDeserializer tokenSerde,
- IInvertedIndexSearchModifier searchModifier, List<Integer> expectedResults) throws IOException {
+ IInvertedIndexSearchModifier searchModifier, List<Integer> expectedResults, boolean isPartitioned) throws IOException {
// Reset scan count array.
Arrays.fill(scanCountArray, 0);
expectedResults.clear();
@@ -341,9 +386,25 @@
ByteArrayAccessibleOutputStream baaos = new ByteArrayAccessibleOutputStream();
tokenizer.reset(searchDocument.getFieldData(0), searchDocument.getFieldStart(0),
searchDocument.getFieldLength(0));
+ // Run though tokenizer to get number of tokens.
int numQueryTokens = 0;
while (tokenizer.hasNext()) {
tokenizer.next();
+ numQueryTokens++;
+ }
+ int numTokensLowerBound = -1;
+ int numTokensUpperBound = -1;
+ int invListElementField = 1;
+ if (isPartitioned) {
+ numTokensLowerBound = searchModifier.getNumTokensLowerBound(numQueryTokens);
+ numTokensUpperBound = searchModifier.getNumTokensUpperBound(numQueryTokens);
+ invListElementField = 2;
+ }
+ int occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
+ tokenizer.reset(searchDocument.getFieldData(0), searchDocument.getFieldStart(0),
+ searchDocument.getFieldLength(0));
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
IToken token = tokenizer.getToken();
baaos.reset();
DataOutput out = new DataOutputStream(baaos);
@@ -351,10 +412,24 @@
ByteArrayInputStream inStream = new ByteArrayInputStream(baaos.getByteArray(), 0, baaos.size());
DataInput dataIn = new DataInputStream(inStream);
Comparable tokenObj = (Comparable) tokenSerde.deserialize(dataIn);
- CheckTuple lowKey = new CheckTuple(1, 1);
- lowKey.appendField(tokenObj);
- CheckTuple highKey = new CheckTuple(1, 1);
- highKey.appendField(tokenObj);
+ CheckTuple lowKey;
+ if (numTokensLowerBound < 0) {
+ lowKey = new CheckTuple(1, 1);
+ lowKey.appendField(tokenObj);
+ } else {
+ lowKey = new CheckTuple(2, 2);
+ lowKey.appendField(tokenObj);
+ lowKey.appendField(Integer.valueOf(numTokensLowerBound));
+ }
+ CheckTuple highKey;
+ if (numTokensUpperBound < 0) {
+ highKey = new CheckTuple(1, 1);
+ highKey.appendField(tokenObj);
+ } else {
+ highKey = new CheckTuple(2, 2);
+ highKey.appendField(tokenObj);
+ highKey.appendField(Integer.valueOf(numTokensUpperBound));
+ }
// Get view over check tuples containing inverted-list corresponding to token.
SortedSet<CheckTuple> invList = OrderedIndexTestUtils.getPrefixExpectedSubset(checkTuples, lowKey, highKey);
@@ -362,13 +437,12 @@
// Iterate over inverted list and update scan count array.
while (invListIter.hasNext()) {
CheckTuple checkTuple = invListIter.next();
- Integer element = (Integer) checkTuple.getField(1);
+ Integer element = (Integer) checkTuple.getField(invListElementField);
scanCountArray[element]++;
}
- numQueryTokens++;
+
}
-
- int occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
+
// Run through scan count array, and see whether elements satisfy the given occurrence threshold.
expectedResults.clear();
for (int i = 0; i < scanCountArray.length; i++) {
@@ -394,7 +468,7 @@
IIndexCursor resultCursor = accessor.createSearchCursor();
int numQueries = numDocQueries + numRandomQueries;
for (int i = 0; i < numQueries; i++) {
- // If number of documents in the corpus io less than numDocQueries, then replace the remaining ones with random queries.
+ // If number of documents in the corpus is less than numDocQueries, then replace the remaining ones with random queries.
if (i >= numDocQueries || i >= documentCorpus.size()) {
// Generate a random query.
ITupleReference randomQuery = tupleGen.next();
@@ -438,8 +512,9 @@
// Get expected results.
List<Integer> expectedResults = new ArrayList<Integer>();
- LSMInvertedIndexTestUtils.getExpectedResults(scanCountArray, testCtx.getCheckTuples(), searchDocument,
- tokenizer, testCtx.getFieldSerdes()[0], searchModifier, expectedResults);
+ LSMInvertedIndexTestUtils.getExpectedResults(scanCountArray, testCtx.getCheckTuples(),
+ searchDocument, tokenizer, testCtx.getFieldSerdes()[0], searchModifier, expectedResults,
+ testCtx.getInvertedIndexType());
Iterator<Integer> expectedIter = expectedResults.iterator();
Iterator<Integer> actualIter = actualResults.iterator();