First initial copy from hyracks_inverted_index_updates.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_inverted_index_updates_new@1802 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
new file mode 100644
index 0000000..8541a92
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
@@ -0,0 +1,50 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hyracks-storage-am-lsm-invertedindex-test</artifactId>
+
+ <parent>
+ <artifactId>hyracks-tests</artifactId>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <version>0.2.1-SNAPSHOT</version>
+ <relativePath>..</relativePath>
+ </parent>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.6</source>
+ <target>1.6</target>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ <dependencies>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
+ <version>0.2.1-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-test-support</artifactId>
+ <version>0.2.1-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-data-std</artifactId>
+ <version>0.2.1-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+</project>
\ No newline at end of file
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/utils/InvertedIndexTestUtils.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/utils/InvertedIndexTestUtils.java
new file mode 100644
index 0000000..7152a9a
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/utils/InvertedIndexTestUtils.java
@@ -0,0 +1,65 @@
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.utils;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
+import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMInteriorFrameFactory;
+import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMLeafFrameFactory;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrameFactory;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexTupleWriterFactory;
+import edu.uci.ics.hyracks.storage.am.common.frames.LIFOMetaDataFrameFactory;
+import edu.uci.ics.hyracks.storage.am.common.freepage.LinkedListFreePageManager;
+import edu.uci.ics.hyracks.storage.am.common.freepage.LinkedListFreePageManagerFactory;
+import edu.uci.ics.hyracks.storage.am.common.tuples.TypeAwareTupleWriterFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndex;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls.InMemoryBtreeInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.inverteredindex.LSMInvertedIndexTestHarness;
+
+public class InvertedIndexTestUtils {
+ public static InvertedIndex createTestInvertedIndex(LSMInvertedIndexTestHarness harness, IBinaryTokenizer tokenizer)
+ throws HyracksDataException {
+ ITreeIndexMetaDataFrameFactory metaFrameFactory = new LIFOMetaDataFrameFactory();
+ ITypeTraits[] btreeTypeTraits = new ITypeTraits[] { UTF8StringPointable.TYPE_TRAITS,
+ IntegerPointable.TYPE_TRAITS, IntegerPointable.TYPE_TRAITS, IntegerPointable.TYPE_TRAITS,
+ IntegerPointable.TYPE_TRAITS };
+ ITreeIndexTupleWriterFactory tupleWriterFactory = new TypeAwareTupleWriterFactory(btreeTypeTraits);
+ ITreeIndexFrameFactory leafFrameFactory = new BTreeNSMLeafFrameFactory(tupleWriterFactory);
+ ITreeIndexFrameFactory interiorFrameFactory = new BTreeNSMInteriorFrameFactory(tupleWriterFactory);
+ IFreePageManager freePageManager = new LinkedListFreePageManager(harness.getDiskBufferCache(), 0,
+ metaFrameFactory);
+ IBinaryComparatorFactory[] btreeCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
+ .of(UTF8StringPointable.FACTORY) };
+ BTree btree = new BTree(harness.getDiskBufferCache(), 5, btreeCmpFactories, freePageManager,
+ interiorFrameFactory, leafFrameFactory);
+ btree.create(harness.getDiskBtreeFileId());
+ btree.open(harness.getDiskBtreeFileId());
+ return LSMInvertedIndexUtils.createInvertedIndex(harness.getDiskBufferCache(), btree,
+ harness.getInvertedListTypeTraits(), harness.getInvertedListBinaryComparatorFactories(), tokenizer);
+ }
+
+ public static InMemoryBtreeInvertedIndex createTestInMemoryBTreeInvertedIndex(LSMInvertedIndexTestHarness harness,
+ IBinaryTokenizer tokenizer) {
+ return LSMInvertedIndexUtils.createInMemoryBTreeInvertedindex(harness.getMemBufferCache(),
+ harness.getMemFreePageManager(), harness.getTokenTypeTraits(), harness.getInvertedListTypeTraits(),
+ harness.getTokenBinaryComparatorFactories(), harness.getInvertedListBinaryComparatorFactories(),
+ tokenizer);
+ }
+
+ public static LSMInvertedIndex createTestLSMInvertedIndex(LSMInvertedIndexTestHarness harness,
+ IBinaryTokenizer tokenizer) {
+ return LSMInvertedIndexUtils.createLSMInvertedIndex(harness.getMemBufferCache(),
+ harness.getMemFreePageManager(), harness.getTokenTypeTraits(), harness.getInvertedListTypeTraits(),
+ harness.getTokenBinaryComparatorFactories(), harness.getInvertedListBinaryComparatorFactories(),
+ tokenizer, harness.getDiskBufferCache(),
+ new LinkedListFreePageManagerFactory(harness.getDiskBufferCache(), new LIFOMetaDataFrameFactory()),
+ harness.getIOManager(), harness.getOnDiskDir(), harness.getDiskFileMapProvider());
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexBulkloadTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexBulkloadTest.java
new file mode 100644
index 0000000..ba52116
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexBulkloadTest.java
@@ -0,0 +1,17 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+
+public abstract class AbstractInvertedIndexBulkloadTest extends AbstractInvertedIndexTest {
+
+ @Test
+ public void bulkLoadTest() throws IndexException, IOException {
+ bulkLoadDocuments();
+ buildBaselineIndex();
+ verifyAgainstBaseline();
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexInsertTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexInsertTest.java
new file mode 100644
index 0000000..ed2a39c
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexInsertTest.java
@@ -0,0 +1,23 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+
+public abstract class AbstractInvertedIndexInsertTest extends AbstractInvertedIndexTest {
+
+ /**
+ * This test inserts documents into the inverted index, builds a baseline inverted index, and
+ * verifies that each token and its associated inverted list is matching in both the baseline and the
+ * inverted index to be tested.
+ */
+ @Test
+ public void insertionTest() throws IndexException, IOException {
+ insertDocuments();
+ buildBaselineIndex();
+ verifyAgainstBaseline();
+ }
+
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexTest.java
new file mode 100644
index 0000000..8fdf67e
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexTest.java
@@ -0,0 +1,402 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.junit.After;
+import org.junit.Before;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleReference;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ByteArrayAccessibleOutputStream;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.util.TupleUtils;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoadContext;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndexSearchPredicate;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.OccurrenceThresholdPanicException;
+import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.ConjunctiveSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IToken;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndexAccessor;
+
+public abstract class AbstractInvertedIndexTest {
+ protected Logger LOGGER;
+ protected LSMInvertedIndexTestHarness harness = new LSMInvertedIndexTestHarness();
+
+ protected IInvertedIndex invertedIndex;
+ protected IIndexAccessor invertedIndexAccessor;
+ protected IBinaryComparator[] tokenComparators;
+ protected IBinaryTokenizer tokenizer;
+
+ // This number will only be used in generating random documents.
+ // If predefined data is generated, then the number of documents is fixed.
+ protected int numDocuments = 1000;
+ protected int maxDocumentLength = 200;
+ protected List<String> documents = new ArrayList<String>();
+ protected Map<String, SortedSet<Integer>> baselineInvertedIndex = new HashMap<String, SortedSet<Integer>>();
+
+ // Generate random data is false by default (generate predefined data is true!)
+ protected boolean random = false;
+
+ // Subclasses must implement these methods by initializing the proper class members
+ protected abstract void setTokenizer();
+
+ protected abstract void setInvertedIndex() throws HyracksDataException;
+
+ protected abstract void setLogger();
+
+ protected abstract void setRandom();
+
+ @Before
+ public void setUp() throws HyracksException {
+ harness.setUp();
+ setTokenizer();
+ setInvertedIndex();
+ setLogger();
+ setRandom();
+ generateData();
+ invertedIndexAccessor = invertedIndex.createAccessor();
+
+ IBinaryComparatorFactory[] tokenCmpFactories = harness.getTokenBinaryComparatorFactories();
+ tokenComparators = new IBinaryComparator[tokenCmpFactories.length];
+ for (int i = 0; i < tokenCmpFactories.length; i++) {
+ tokenComparators[i] = tokenCmpFactories[i].createBinaryComparator();
+ }
+ }
+
+ @After
+ public void tearDown() throws HyracksDataException {
+ invertedIndex.close();
+ harness.tearDown();
+ }
+
+ protected void generateData() {
+ if (random) {
+ generateRandomDocumentData();
+ } else {
+ generatePredefinedDocumentData();
+ }
+ }
+
+ protected void generateRandomDocumentData() {
+ int documentLength;
+ String validCharacters = "abcdefghijklmnopqrstuvwxyz ";
+ StringBuilder builder = new StringBuilder();
+ Random rng = harness.getRandom();
+
+ // Generate numDocuments random documents (strings)
+ documents.clear();
+ for (int i = 0; i < numDocuments; i++) {
+
+ // Generate a random string of size [0, maxDocumentLength] with
+ // characters chosen from the set of valid characters defined above
+ documentLength = rng.nextInt(maxDocumentLength + 1);
+ for (int j = 0; j < documentLength; j++) {
+ builder.append(validCharacters.charAt(rng.nextInt(validCharacters.length())));
+ }
+
+ // Ensure that numDocuments is honored by regenerating the document
+ // if it is a duplicate.
+ if (!documents.add(builder.toString())) {
+ i--;
+ }
+
+ builder.setLength(0);
+ }
+ }
+
+ protected void generatePredefinedDocumentData() {
+ List<String> firstNames = new ArrayList<String>();
+ List<String> lastNames = new ArrayList<String>();
+
+ // Generate first names
+ firstNames.add("Kathrin");
+ firstNames.add("Cathrin");
+ firstNames.add("Kathryn");
+ firstNames.add("Cathryn");
+ firstNames.add("Kathrine");
+ firstNames.add("Cathrine");
+ firstNames.add("Kathryne");
+ firstNames.add("Cathryne");
+ firstNames.add("Katherin");
+ firstNames.add("Catherin");
+ firstNames.add("Katheryn");
+ firstNames.add("Catheryn");
+ firstNames.add("Katherine");
+ firstNames.add("Catherine");
+ firstNames.add("Katheryne");
+ firstNames.add("Catheryne");
+ firstNames.add("John");
+ firstNames.add("Jack");
+ firstNames.add("Jonathan");
+ firstNames.add("Nathan");
+
+ // Generate last names
+ lastNames.add("Miller");
+ lastNames.add("Myller");
+ lastNames.add("Keller");
+ lastNames.add("Ketler");
+ lastNames.add("Muller");
+ lastNames.add("Fuller");
+ lastNames.add("Smith");
+ lastNames.add("Smyth");
+ lastNames.add("Smithe");
+ lastNames.add("Smythe");
+
+ // Generate all 'firstName lastName' combinations
+ documents.clear();
+ for (String first : firstNames) {
+ for (String last : lastNames) {
+ documents.add(first + " " + last);
+ }
+ }
+
+ // The number of documents is fixed since the data is predefined
+ numDocuments = documents.size();
+ }
+
+ protected class TokenIdPair implements Comparable<TokenIdPair> {
+ public ByteArrayAccessibleOutputStream baaos = new ByteArrayAccessibleOutputStream();
+ public DataOutputStream dos = new DataOutputStream(baaos);
+ public int id;
+
+ TokenIdPair(IToken token, int id) throws IOException {
+ token.serializeToken(dos);
+ this.id = id;
+ }
+
+ @Override
+ public int compareTo(TokenIdPair o) {
+ int cmp = tokenComparators[0].compare(baaos.getByteArray(), 0, baaos.getByteArray().length,
+ o.baaos.getByteArray(), 0, o.baaos.getByteArray().length);
+ if (cmp == 0) {
+ return id - o.id;
+ } else {
+ return cmp;
+ }
+ }
+ }
+
+ protected void buildBaselineIndex() throws IOException {
+ ITupleReference tuple;
+ IToken token;
+ SortedSet<Integer> baselineInvertedList = null;
+ ByteArrayAccessibleOutputStream baos = new ByteArrayAccessibleOutputStream();
+ DataOutputStream dos = new DataOutputStream(baos);
+ ISerializerDeserializer[] fieldSerDes = new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+
+ int docId = 0;
+ for (String document : documents) {
+ tuple = TupleUtils.createTuple(fieldSerDes, document, docId);
+
+ // Insert into the baseline
+ tokenizer.reset(tuple.getFieldData(0), tuple.getFieldStart(0), tuple.getFieldLength(0));
+ while (tokenizer.hasNext()) {
+ baos.reset();
+ tokenizer.next();
+ token = tokenizer.getToken();
+ token.serializeToken(dos);
+ String tokenStr = (String) fieldSerDes[0].deserialize(new DataInputStream(new ByteArrayInputStream(baos
+ .getByteArray())));
+ baselineInvertedList = baselineInvertedIndex.get(tokenStr);
+ if (baselineInvertedList == null) {
+ baselineInvertedList = new TreeSet<Integer>();
+ baselineInvertedIndex.put(tokenStr, baselineInvertedList);
+ }
+ baselineInvertedList.add(docId);
+ }
+ docId++;
+ }
+ }
+
+ protected void insertDocuments() throws HyracksDataException, IndexException {
+ ITupleReference tuple;
+ ISerializerDeserializer[] fieldSerDes = new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+
+ // Insert the documents into the index while building the baseline
+ int docId = 0;
+ for (String document : documents) {
+ // Insert into the index to be tested
+ tuple = TupleUtils.createTuple(fieldSerDes, document, docId);
+ invertedIndexAccessor.insert(tuple);
+ docId++;
+ }
+ }
+
+ protected void verifyAgainstBaseline() throws HyracksDataException, IndexException {
+ ITupleReference tuple;
+ int docId;
+// int count = 0;
+ SortedSet<Integer> baselineInvertedList = null;
+ SortedSet<Integer> testInvertedList = new TreeSet<Integer>();
+
+ // Query all tokens in the baseline
+
+ ConjunctiveSearchModifier searchModifier = new ConjunctiveSearchModifier();
+ InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(searchModifier);
+ IIndexCursor resultCursor = invertedIndexAccessor.createSearchCursor();
+ for (String tokenStr : baselineInvertedIndex.keySet()) {
+ tuple = TupleUtils.createTuple(new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE },
+ tokenStr);
+ searchPred.setQueryTuple(tuple);
+ searchPred.setQueryFieldIndex(0);
+
+ try {
+ resultCursor.reset();
+ invertedIndexAccessor.search(resultCursor, searchPred);
+
+ baselineInvertedList = baselineInvertedIndex.get(tokenStr);
+ // Check the matches
+ testInvertedList.clear();
+ while (resultCursor.hasNext()) {
+ resultCursor.next();
+ tuple = resultCursor.getTuple();
+ docId = IntegerSerializerDeserializer.getInt(tuple.getFieldData(0), tuple.getFieldStart(0));
+ testInvertedList.add(docId);
+ }
+ } finally {
+ resultCursor.close();
+ }
+// count++;
+
+// if (count % 6500 == 0) {
+// System.out.println("################# count: " + count);
+// ((LSMInvertedIndexAccessor) invertedIndexAccessor).merge();
+// }
+
+ if (LOGGER != null && LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("\nQuery:\t\t\"" + tokenStr + "\"\n" + "Baseline:\t" + baselineInvertedList.toString()
+ + "\n" + "Test:\t\t" + testInvertedList.toString() + "\n");
+ }
+ assertTrue(baselineInvertedList.equals(testInvertedList));
+ }
+ }
+
+ protected void bulkLoadDocuments() throws IndexException, IOException {
+ List<TokenIdPair> pairs = new ArrayList<TokenIdPair>();
+ ArrayTupleBuilder tb = new ArrayTupleBuilder(2);
+ ArrayTupleReference tuple = new ArrayTupleReference();
+
+ // Generate pairs for sorting and bulk-loading
+ int docId = 0;
+ for (String s : documents) {
+ ByteArrayAccessibleOutputStream baaos = new ByteArrayAccessibleOutputStream();
+ DataOutputStream dos = new DataOutputStream(baaos);
+ baaos.reset();
+ UTF8StringSerializerDeserializer.INSTANCE.serialize(s, dos);
+ tokenizer.reset(baaos.getByteArray(), 0, baaos.size());
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+ IToken token = tokenizer.getToken();
+ pairs.add(new TokenIdPair(token, docId));
+ }
+ docId++;
+ }
+
+ Collections.sort(pairs);
+
+ IIndexBulkLoadContext bulkLoadCtx = invertedIndex.beginBulkLoad(1.0f);
+ for (TokenIdPair t : pairs) {
+ tb.reset();
+ tb.addField(t.baaos.getByteArray(), 0, t.baaos.getByteArray().length);
+ IntegerSerializerDeserializer.INSTANCE.serialize(t.id, tb.getDataOutput());
+ tb.addFieldEndOffset();
+ tuple.reset(tb.getFieldEndOffsets(), tb.getByteArray());
+ invertedIndex.bulkLoadAddTuple(tuple, bulkLoadCtx);
+ }
+ invertedIndex.endBulkLoad(bulkLoadCtx);
+ }
+
+ /**
+ * Runs a specified number of randomly picked strings from dataStrings as
+ * queries. We run each query, measure it's time, and print it's results.
+ */
+ protected void runQueries(IInvertedIndexSearchModifier searchModifier, int numQueries) throws Exception {
+ ISerializerDeserializer[] querySerde = { UTF8StringSerializerDeserializer.INSTANCE };
+ ArrayTupleBuilder queryTb = new ArrayTupleBuilder(querySerde.length);
+ ArrayTupleReference queryTuple = new ArrayTupleReference();
+ IIndexCursor resultCursor;
+
+ Random rnd = harness.getRandom();
+ rnd.setSeed(50);
+
+ InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(searchModifier);
+
+ for (int i = 0; i < numQueries; i++) {
+
+ int queryIndex = Math.abs(rnd.nextInt() % documents.size());
+ String queryString = documents.get(queryIndex);
+
+ // Serialize query.
+ queryTb.reset();
+ UTF8StringSerializerDeserializer.INSTANCE.serialize(queryString, queryTb.getDataOutput());
+ queryTb.addFieldEndOffset();
+ queryTuple.reset(queryTb.getFieldEndOffsets(), queryTb.getByteArray());
+
+ // Set query tuple in search predicate.
+ searchPred.setQueryTuple(queryTuple);
+ searchPred.setQueryFieldIndex(0);
+
+ resultCursor = invertedIndexAccessor.createSearchCursor();
+
+ int repeats = 1;
+ double totalTime = 0;
+ for (int j = 0; j < repeats; j++) {
+ long timeStart = System.currentTimeMillis();
+ try {
+ resultCursor.reset();
+ invertedIndexAccessor.search(resultCursor, searchPred);
+ } catch (OccurrenceThresholdPanicException e) {
+ // ignore panic queries
+ }
+ long timeEnd = System.currentTimeMillis();
+ totalTime += timeEnd - timeStart;
+ }
+ double avgTime = totalTime / (double) repeats;
+ StringBuilder strBuilder = new StringBuilder();
+ strBuilder.append(i + ": " + "\"" + queryString + "\" " + queryIndex + ": " + avgTime + "ms" + "\n");
+ strBuilder.append("CANDIDATE RESULTS:\n");
+ while (resultCursor.hasNext()) {
+ resultCursor.next();
+ ITupleReference resultTuple = resultCursor.getTuple();
+ int id = IntegerSerializerDeserializer
+ .getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(0));
+ strBuilder.append(id + " " + documents.get(id));
+ strBuilder.append('\n');
+ }
+ // remove trailing newline
+ strBuilder.deleteCharAt(strBuilder.length() - 1);
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info(strBuilder.toString());
+ }
+ }
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InMemoryBTreeInvertedIndexInsertTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InMemoryBTreeInvertedIndexInsertTest.java
new file mode 100644
index 0000000..1a1a7cc
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InMemoryBTreeInvertedIndexInsertTest.java
@@ -0,0 +1,38 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.util.logging.Logger;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.ITokenFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.UTF8NGramTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.utils.InvertedIndexTestUtils;
+
+public class InMemoryBTreeInvertedIndexInsertTest extends AbstractInvertedIndexInsertTest {
+
+ @Override
+ protected void setTokenizer() {
+ ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
+ tokenizer = new NGramUTF8StringBinaryTokenizer(3, false, true, false, tokenFactory);
+// ITokenFactory tokenFactory = new UTF8WordTokenFactory();
+// tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, tokenFactory);
+ }
+
+ @Override
+ protected void setInvertedIndex() throws HyracksDataException {
+ invertedIndex = InvertedIndexTestUtils.createTestInMemoryBTreeInvertedIndex(harness, tokenizer);
+ invertedIndex.create(harness.getFileId());
+ invertedIndex.open(harness.getFileId());
+ }
+
+ @Override
+ protected void setLogger() {
+ LOGGER = Logger.getLogger(InMemoryBTreeInvertedIndexInsertTest.class.getName());
+ }
+
+ @Override
+ protected void setRandom() {
+ random = true;
+ }
+
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InMemoryBTreeInvertedIndexSearchTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InMemoryBTreeInvertedIndexSearchTest.java
new file mode 100644
index 0000000..99dc654
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InMemoryBTreeInvertedIndexSearchTest.java
@@ -0,0 +1,110 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.ConjunctiveSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.EditDistanceSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.JaccardSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.ITokenFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.UTF8NGramTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.utils.InvertedIndexTestUtils;
+
+public class InMemoryBTreeInvertedIndexSearchTest extends AbstractInvertedIndexTest {
+
+ /**
+ * Runs 5 random conjunctive search queries to test the
+ * ConjunctiveSearchModifier.
+ */
+ @Test
+ public void conjunctiveQueryTest() throws Exception {
+ insertDocuments();
+ IInvertedIndexSearchModifier searchModifier = new ConjunctiveSearchModifier();
+ runQueries(searchModifier, 5);
+ }
+
+ /**
+ * Runs 5 random jaccard-based search queries with thresholds 0.9, 0.8, 0.7.
+ * Tests the JaccardSearchModifier.
+ */
+ @Test
+ public void jaccardQueryTest() throws Exception {
+ insertDocuments();
+ JaccardSearchModifier searchModifier = new JaccardSearchModifier(1.0f);
+
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("JACCARD: " + 0.9f);
+ }
+ searchModifier.setJaccThresh(0.9f);
+ runQueries(searchModifier, 5);
+
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("JACCARD: " + 0.8f);
+ }
+ searchModifier.setJaccThresh(0.8f);
+ runQueries(searchModifier, 5);
+
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("JACCARD: " + 0.7f);
+ }
+ searchModifier.setJaccThresh(0.7f);
+ runQueries(searchModifier, 5);
+ }
+
+ /**
+ * Runs 5 random edit-distance based search queries with thresholds 1, 2, 3.
+ * Tests the EditDistanceSearchModifier.
+ */
+ @Test
+ public void editDistanceQueryTest() throws Exception {
+ insertDocuments();
+ EditDistanceSearchModifier searchModifier = new EditDistanceSearchModifier(3, 0);
+
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("EDIT DISTANCE: " + 1);
+ }
+ searchModifier.setEdThresh(1);
+ runQueries(searchModifier, 5);
+
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("EDIT DISTANCE: " + 2);
+ }
+ searchModifier.setEdThresh(2);
+ runQueries(searchModifier, 5);
+
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("EDIT DISTANCE: " + 3);
+ }
+ searchModifier.setEdThresh(3);
+ runQueries(searchModifier, 5);
+ }
+
+ @Override
+ protected void setTokenizer() {
+ ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
+ tokenizer = new NGramUTF8StringBinaryTokenizer(3, false, true, false, tokenFactory);
+ }
+
+ @Override
+ protected void setInvertedIndex() throws HyracksDataException {
+ invertedIndex = InvertedIndexTestUtils.createTestInMemoryBTreeInvertedIndex(harness, tokenizer);
+ invertedIndex.create(harness.getFileId());
+ invertedIndex.open(harness.getFileId());
+ }
+
+ @Override
+ protected void setLogger() {
+ LOGGER = Logger.getLogger(InMemoryBTreeInvertedIndexSearchTest.class.getName());
+ }
+
+ @Override
+ protected void setRandom() {
+ random = false;
+ }
+
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InvertedIndexBulkLoadTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InvertedIndexBulkLoadTest.java
new file mode 100644
index 0000000..fbe408a
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InvertedIndexBulkLoadTest.java
@@ -0,0 +1,37 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.util.logging.Logger;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.ITokenFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.UTF8NGramTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.utils.InvertedIndexTestUtils;
+
+public class InvertedIndexBulkLoadTest extends AbstractInvertedIndexBulkloadTest {
+
+ @Override
+ protected void setTokenizer() {
+ ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
+ tokenizer = new NGramUTF8StringBinaryTokenizer(3, false, true, false, tokenFactory);
+ }
+
+ @Override
+ protected void setInvertedIndex() throws HyracksDataException {
+ invertedIndex = InvertedIndexTestUtils.createTestInvertedIndex(harness, tokenizer);
+ invertedIndex.create(harness.getDiskInvertedIndexFileId());
+ invertedIndex.open(harness.getDiskInvertedIndexFileId());
+ }
+
+ @Override
+ protected void setLogger() {
+ LOGGER = Logger.getLogger(InvertedIndexBulkLoadTest.class.getName());
+
+ }
+
+ @Override
+ protected void setRandom() {
+ random = true;
+ }
+
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InvertedIndexSearchTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InvertedIndexSearchTest.java
new file mode 100644
index 0000000..79686ae
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InvertedIndexSearchTest.java
@@ -0,0 +1,109 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.ConjunctiveSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.EditDistanceSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.JaccardSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.ITokenFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.UTF8NGramTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.utils.InvertedIndexTestUtils;
+
+public class InvertedIndexSearchTest extends AbstractInvertedIndexTest {
+ /**
+ * Runs 5 random conjunctive search queries to test the
+ * ConjunctiveSearchModifier.
+ */
+ @Test
+ public void conjunctiveQueryTest() throws Exception {
+ bulkLoadDocuments();
+ IInvertedIndexSearchModifier searchModifier = new ConjunctiveSearchModifier();
+ runQueries(searchModifier, 5);
+ }
+
+ /**
+ * Runs 5 random jaccard-based search queries with thresholds 0.9, 0.8, 0.7.
+ * Tests the JaccardSearchModifier.
+ */
+ @Test
+ public void jaccardQueryTest() throws Exception {
+ bulkLoadDocuments();
+ JaccardSearchModifier searchModifier = new JaccardSearchModifier(1.0f);
+
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("JACCARD: " + 0.9f);
+ }
+ searchModifier.setJaccThresh(0.9f);
+ runQueries(searchModifier, 5);
+
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("JACCARD: " + 0.8f);
+ }
+ searchModifier.setJaccThresh(0.8f);
+ runQueries(searchModifier, 5);
+
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("JACCARD: " + 0.7f);
+ }
+ searchModifier.setJaccThresh(0.7f);
+ runQueries(searchModifier, 5);
+ }
+
+ /**
+ * Runs 5 random edit-distance based search queries with thresholds 1, 2, 3.
+ * Tests the EditDistanceSearchModifier.
+ */
+ @Test
+ public void editDistanceQueryTest() throws Exception {
+ bulkLoadDocuments();
+ EditDistanceSearchModifier searchModifier = new EditDistanceSearchModifier(3, 0);
+
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("EDIT DISTANCE: " + 1);
+ }
+ searchModifier.setEdThresh(1);
+ runQueries(searchModifier, 5);
+
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("EDIT DISTANCE: " + 2);
+ }
+ searchModifier.setEdThresh(2);
+ runQueries(searchModifier, 5);
+
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("EDIT DISTANCE: " + 3);
+ }
+ searchModifier.setEdThresh(3);
+ runQueries(searchModifier, 5);
+ }
+
+ @Override
+ protected void setTokenizer() {
+ ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
+ tokenizer = new NGramUTF8StringBinaryTokenizer(3, false, true, false, tokenFactory);
+ }
+
+ @Override
+ protected void setInvertedIndex() throws HyracksDataException {
+ invertedIndex = InvertedIndexTestUtils.createTestInvertedIndex(harness, tokenizer);
+ invertedIndex.create(harness.getDiskInvertedIndexFileId());
+ invertedIndex.open(harness.getDiskInvertedIndexFileId());
+ }
+
+ @Override
+ protected void setLogger() {
+ LOGGER = Logger.getLogger(InvertedIndexSearchTest.class.getName());
+ }
+
+ @Override
+ protected void setRandom() {
+ random = false;
+ }
+
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/LSMInvertedIndexInsertTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/LSMInvertedIndexInsertTest.java
new file mode 100644
index 0000000..23834c2
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/LSMInvertedIndexInsertTest.java
@@ -0,0 +1,36 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.util.logging.Logger;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.ITokenFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.UTF8NGramTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.utils.InvertedIndexTestUtils;
+
+public class LSMInvertedIndexInsertTest extends AbstractInvertedIndexInsertTest {
+
+ @Override
+ protected void setTokenizer() {
+ ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
+ tokenizer = new NGramUTF8StringBinaryTokenizer(3, false, true, false, tokenFactory);
+ }
+
+ @Override
+ protected void setInvertedIndex() throws HyracksDataException {
+ invertedIndex = InvertedIndexTestUtils.createTestLSMInvertedIndex(harness, tokenizer);
+ invertedIndex.create(harness.getFileId());
+ invertedIndex.open(harness.getFileId());
+ }
+
+ @Override
+ protected void setLogger() {
+ LOGGER = Logger.getLogger(LSMInvertedIndexInsertTest.class.getName());
+ }
+
+ @Override
+ protected void setRandom() {
+ random = true;
+ }
+
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/LSMInvertedIndexTestHarness.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/LSMInvertedIndexTestHarness.java
new file mode 100644
index 0000000..60d5683
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/LSMInvertedIndexTestHarness.java
@@ -0,0 +1,222 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Random;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.api.io.IODeviceHandle;
+import edu.uci.ics.hyracks.control.nc.io.IOManager;
+import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
+import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+import edu.uci.ics.hyracks.storage.am.common.frames.LIFOMetaDataFrameFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.InMemoryBufferCache;
+import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.InMemoryFreePageManager;
+import edu.uci.ics.hyracks.storage.common.buffercache.HeapBufferAllocator;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
+import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
+import edu.uci.ics.hyracks.test.support.TestUtils;
+
+public class LSMInvertedIndexTestHarness {
+
+ private static final long RANDOM_SEED = 50;
+ private static final int DEFAULT_DISK_PAGE_SIZE = 256;
+ private static final int DEFAULT_DISK_NUM_PAGES = 1000;
+ private static final int DEFAULT_DISK_MAX_OPEN_FILES = 200;
+ private static final int DEFAULT_MEM_PAGE_SIZE = 4096;
+ private static final int DEFAULT_MEM_NUM_PAGES = 200;
+ private static final int DEFAULT_HYRACKS_FRAME_SIZE = 128;
+ private static final int DUMMY_FILE_ID = -1;
+
+ protected final int diskPageSize;
+ protected final int diskNumPages;
+ protected final int diskMaxOpenFiles;
+ protected final int memPageSize;
+ protected final int memNumPages;
+ protected final int hyracksFrameSize;
+
+ protected IOManager ioManager;
+ protected IBufferCache diskBufferCache;
+ protected IFileMapProvider diskFileMapProvider;
+ protected InMemoryBufferCache memBufferCache;
+ protected InMemoryFreePageManager memFreePageManager;
+ protected IHyracksTaskContext ctx;
+
+ protected final Random rnd = new Random();
+ protected final static SimpleDateFormat simpleDateFormat = new SimpleDateFormat("ddMMyy-hhmmssSS");
+ protected final static String sep = System.getProperty("file.separator");
+ protected String onDiskDir;
+ protected String btreeFileName = "btree_vocab";
+ protected String invIndexFileName = "inv_index";
+ protected FileReference btreeFileRef;
+ protected FileReference invIndexFileRef;
+
+ // Token information
+ protected ITypeTraits[] tokenTypeTraits = new ITypeTraits[] { UTF8StringPointable.TYPE_TRAITS };
+ protected IBinaryComparatorFactory[] tokenCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
+ .of(UTF8StringPointable.FACTORY) };
+
+ // Inverted list information
+ protected ITypeTraits[] invListTypeTraits = new ITypeTraits[] { IntegerPointable.TYPE_TRAITS };
+ protected IBinaryComparatorFactory[] invListCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
+ .of(IntegerPointable.FACTORY) };
+
+ public LSMInvertedIndexTestHarness() {
+ this.diskPageSize = DEFAULT_DISK_PAGE_SIZE;
+ this.diskNumPages = DEFAULT_DISK_NUM_PAGES;
+ this.diskMaxOpenFiles = DEFAULT_DISK_MAX_OPEN_FILES;
+ this.memPageSize = DEFAULT_MEM_PAGE_SIZE;
+ this.memNumPages = DEFAULT_MEM_NUM_PAGES;
+ this.hyracksFrameSize = DEFAULT_HYRACKS_FRAME_SIZE;
+ }
+
+ public LSMInvertedIndexTestHarness(int diskPageSize, int diskNumPages, int diskMaxOpenFiles, int memPageSize,
+ int memNumPages, int hyracksFrameSize) {
+ this.diskPageSize = diskPageSize;
+ this.diskNumPages = diskNumPages;
+ this.diskMaxOpenFiles = diskMaxOpenFiles;
+ this.memPageSize = memPageSize;
+ this.memNumPages = memNumPages;
+ this.hyracksFrameSize = hyracksFrameSize;
+ }
+
+ public void setUp() throws HyracksException {
+ onDiskDir = "lsm_invertedindex_" + simpleDateFormat.format(new Date()) + sep;
+ ctx = TestUtils.create(getHyracksFrameSize());
+ TestStorageManagerComponentHolder.init(diskPageSize, diskNumPages, diskMaxOpenFiles);
+ diskBufferCache = TestStorageManagerComponentHolder.getBufferCache(ctx);
+ diskFileMapProvider = TestStorageManagerComponentHolder.getFileMapProvider(ctx);
+ memBufferCache = new InMemoryBufferCache(new HeapBufferAllocator(), memPageSize, memNumPages);
+ memFreePageManager = new InMemoryFreePageManager(memNumPages, new LIFOMetaDataFrameFactory());
+ ioManager = TestStorageManagerComponentHolder.getIOManager();
+ rnd.setSeed(RANDOM_SEED);
+
+ File btreeFile = new File(onDiskDir + btreeFileName);
+ btreeFile.deleteOnExit();
+ File invIndexFile = new File(onDiskDir + invIndexFileName);
+ invIndexFile.deleteOnExit();
+ btreeFileRef = new FileReference(btreeFile);
+ invIndexFileRef = new FileReference(invIndexFile);
+ diskBufferCache.createFile(btreeFileRef);
+ diskBufferCache.openFile(diskFileMapProvider.lookupFileId(btreeFileRef));
+ diskBufferCache.createFile(invIndexFileRef);
+ diskBufferCache.openFile(diskFileMapProvider.lookupFileId(invIndexFileRef));
+ }
+
+ public void tearDown() throws HyracksDataException {
+ diskBufferCache.closeFile(diskFileMapProvider.lookupFileId(btreeFileRef));
+ diskBufferCache.deleteFile(diskFileMapProvider.lookupFileId(btreeFileRef), false);
+ diskBufferCache.closeFile(diskFileMapProvider.lookupFileId(invIndexFileRef));
+ diskBufferCache.deleteFile(diskFileMapProvider.lookupFileId(invIndexFileRef), false);
+ diskBufferCache.close();
+ for (IODeviceHandle dev : ioManager.getIODevices()) {
+ File dir = new File(dev.getPath(), onDiskDir);
+ FilenameFilter filter = new FilenameFilter() {
+ public boolean accept(File dir, String name) {
+ return !name.startsWith(".");
+ }
+ };
+ String[] files = dir.list(filter);
+ if (files != null) {
+ for (String fileName : files) {
+ File file = new File(dir.getPath() + File.separator + fileName);
+ file.delete();
+ }
+ }
+ dir.delete();
+ }
+ }
+
+ public int getDiskInvertedIndexFileId() throws HyracksDataException {
+ return diskFileMapProvider.lookupFileId(invIndexFileRef);
+ }
+
+ public int getDiskBtreeFileId() throws HyracksDataException {
+ return diskFileMapProvider.lookupFileId(btreeFileRef);
+ }
+
+ public int getDiskPageSize() {
+ return diskPageSize;
+ }
+
+ public int getDiskNumPages() {
+ return diskNumPages;
+ }
+
+ public int getDiskMaxOpenFiles() {
+ return diskMaxOpenFiles;
+ }
+
+ public int getMemPageSize() {
+ return memPageSize;
+ }
+
+ public int getMemNumPages() {
+ return memNumPages;
+ }
+
+ public int getHyracksFrameSize() {
+ return hyracksFrameSize;
+ }
+
+ public int getFileId() {
+ return DUMMY_FILE_ID;
+ }
+
+ public IOManager getIOManager() {
+ return ioManager;
+ }
+
+ public IBufferCache getDiskBufferCache() {
+ return diskBufferCache;
+ }
+
+ public IFileMapProvider getDiskFileMapProvider() {
+ return diskFileMapProvider;
+ }
+
+ public InMemoryBufferCache getMemBufferCache() {
+ return memBufferCache;
+ }
+
+ public InMemoryFreePageManager getMemFreePageManager() {
+ return memFreePageManager;
+ }
+
+ public IHyracksTaskContext getHyracksTastContext() {
+ return ctx;
+ }
+
+ public String getOnDiskDir() {
+ return onDiskDir;
+ }
+
+ public Random getRandom() {
+ return rnd;
+ }
+
+ public ITypeTraits[] getTokenTypeTraits() {
+ return tokenTypeTraits;
+ }
+
+ public ITypeTraits[] getInvertedListTypeTraits() {
+ return invListTypeTraits;
+ }
+
+ public IBinaryComparatorFactory[] getTokenBinaryComparatorFactories() {
+ return tokenCmpFactories;
+ }
+
+ public IBinaryComparatorFactory[] getInvertedListBinaryComparatorFactories() {
+ return invListCmpFactories;
+ }
+}
diff --git a/hyracks-tests/pom.xml b/hyracks-tests/pom.xml
index 8950730..8c4c67a 100644
--- a/hyracks-tests/pom.xml
+++ b/hyracks-tests/pom.xml
@@ -19,5 +19,6 @@
<module>hyracks-storage-am-lsm-common-test</module>
<module>hyracks-storage-am-lsm-btree-test</module>
<module>hyracks-storage-am-lsm-rtree-test</module>
+ <module>hyracks-storage-am-lsm-invertedindex-test</module>
</modules>
</project>
diff --git a/pom.xml b/pom.xml
index fbef96f..c292ecb 100644
--- a/pom.xml
+++ b/pom.xml
@@ -96,6 +96,7 @@
<module>hyracks-storage-am-lsm-common</module>
<module>hyracks-storage-am-lsm-btree</module>
<module>hyracks-storage-am-lsm-rtree</module>
+ <module>hyracks-storage-am-lsm-invertedindex</module>
<module>hyracks-storage-am-rtree</module>
<module>hyracks-test-support</module>
<module>hyracks-tests</module>