First initial copy from hyracks_inverted_index_updates.

git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_inverted_index_updates_new@1802 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
new file mode 100644
index 0000000..8541a92
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
@@ -0,0 +1,50 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<artifactId>hyracks-storage-am-lsm-invertedindex-test</artifactId>
+
+	<parent>
+		<artifactId>hyracks-tests</artifactId>
+		<groupId>edu.uci.ics.hyracks</groupId>
+		<version>0.2.1-SNAPSHOT</version>
+		<relativePath>..</relativePath>
+	</parent>
+
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-compiler-plugin</artifactId>
+				<version>2.0.2</version>
+				<configuration>
+					<source>1.6</source>
+					<target>1.6</target>
+				</configuration>
+			</plugin>
+		</plugins>
+	</build>
+	<dependencies>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
+			<version>0.2.1-SNAPSHOT</version>
+			<type>jar</type>
+			<scope>compile</scope>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>hyracks-test-support</artifactId>
+			<version>0.2.1-SNAPSHOT</version>
+			<type>jar</type>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>hyracks-data-std</artifactId>
+			<version>0.2.1-SNAPSHOT</version>
+			<type>jar</type>
+			<scope>test</scope>
+		</dependency>
+	</dependencies>
+
+</project>
\ No newline at end of file
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/utils/InvertedIndexTestUtils.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/utils/InvertedIndexTestUtils.java
new file mode 100644
index 0000000..7152a9a
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/utils/InvertedIndexTestUtils.java
@@ -0,0 +1,65 @@
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.utils;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
+import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMInteriorFrameFactory;
+import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMLeafFrameFactory;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrameFactory;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexTupleWriterFactory;
+import edu.uci.ics.hyracks.storage.am.common.frames.LIFOMetaDataFrameFactory;
+import edu.uci.ics.hyracks.storage.am.common.freepage.LinkedListFreePageManager;
+import edu.uci.ics.hyracks.storage.am.common.freepage.LinkedListFreePageManagerFactory;
+import edu.uci.ics.hyracks.storage.am.common.tuples.TypeAwareTupleWriterFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndex;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls.InMemoryBtreeInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.inverteredindex.LSMInvertedIndexTestHarness;
+
+public class InvertedIndexTestUtils {
+    public static InvertedIndex createTestInvertedIndex(LSMInvertedIndexTestHarness harness, IBinaryTokenizer tokenizer)
+            throws HyracksDataException {
+        ITreeIndexMetaDataFrameFactory metaFrameFactory = new LIFOMetaDataFrameFactory();
+        ITypeTraits[] btreeTypeTraits = new ITypeTraits[] { UTF8StringPointable.TYPE_TRAITS,
+                IntegerPointable.TYPE_TRAITS, IntegerPointable.TYPE_TRAITS, IntegerPointable.TYPE_TRAITS,
+                IntegerPointable.TYPE_TRAITS };
+        ITreeIndexTupleWriterFactory tupleWriterFactory = new TypeAwareTupleWriterFactory(btreeTypeTraits);
+        ITreeIndexFrameFactory leafFrameFactory = new BTreeNSMLeafFrameFactory(tupleWriterFactory);
+        ITreeIndexFrameFactory interiorFrameFactory = new BTreeNSMInteriorFrameFactory(tupleWriterFactory);
+        IFreePageManager freePageManager = new LinkedListFreePageManager(harness.getDiskBufferCache(), 0,
+                metaFrameFactory);
+        IBinaryComparatorFactory[] btreeCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
+                .of(UTF8StringPointable.FACTORY) };
+        BTree btree = new BTree(harness.getDiskBufferCache(), 5, btreeCmpFactories, freePageManager,
+                interiorFrameFactory, leafFrameFactory);
+        btree.create(harness.getDiskBtreeFileId());
+        btree.open(harness.getDiskBtreeFileId());
+        return LSMInvertedIndexUtils.createInvertedIndex(harness.getDiskBufferCache(), btree,
+                harness.getInvertedListTypeTraits(), harness.getInvertedListBinaryComparatorFactories(), tokenizer);
+    }
+
+    public static InMemoryBtreeInvertedIndex createTestInMemoryBTreeInvertedIndex(LSMInvertedIndexTestHarness harness,
+            IBinaryTokenizer tokenizer) {
+        return LSMInvertedIndexUtils.createInMemoryBTreeInvertedindex(harness.getMemBufferCache(),
+                harness.getMemFreePageManager(), harness.getTokenTypeTraits(), harness.getInvertedListTypeTraits(),
+                harness.getTokenBinaryComparatorFactories(), harness.getInvertedListBinaryComparatorFactories(),
+                tokenizer);
+    }
+
+    public static LSMInvertedIndex createTestLSMInvertedIndex(LSMInvertedIndexTestHarness harness,
+            IBinaryTokenizer tokenizer) {
+        return LSMInvertedIndexUtils.createLSMInvertedIndex(harness.getMemBufferCache(),
+                harness.getMemFreePageManager(), harness.getTokenTypeTraits(), harness.getInvertedListTypeTraits(),
+                harness.getTokenBinaryComparatorFactories(), harness.getInvertedListBinaryComparatorFactories(),
+                tokenizer, harness.getDiskBufferCache(),
+                new LinkedListFreePageManagerFactory(harness.getDiskBufferCache(), new LIFOMetaDataFrameFactory()),
+                harness.getIOManager(), harness.getOnDiskDir(), harness.getDiskFileMapProvider());
+    }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexBulkloadTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexBulkloadTest.java
new file mode 100644
index 0000000..ba52116
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexBulkloadTest.java
@@ -0,0 +1,17 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+
+public abstract class AbstractInvertedIndexBulkloadTest extends AbstractInvertedIndexTest {
+
+    @Test
+    public void bulkLoadTest() throws IndexException, IOException {
+        bulkLoadDocuments();
+        buildBaselineIndex();
+        verifyAgainstBaseline();
+    }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexInsertTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexInsertTest.java
new file mode 100644
index 0000000..ed2a39c
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexInsertTest.java
@@ -0,0 +1,23 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+
+public abstract class AbstractInvertedIndexInsertTest extends AbstractInvertedIndexTest {
+
+    /**
+     * This test inserts documents into the inverted index, builds a baseline inverted index, and
+     * verifies that each token and its associated inverted list is matching in both the baseline and the
+     * inverted index to be tested.
+     */
+    @Test
+    public void insertionTest() throws IndexException, IOException {
+        insertDocuments();
+        buildBaselineIndex();
+        verifyAgainstBaseline();
+    }
+
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexTest.java
new file mode 100644
index 0000000..8fdf67e
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/AbstractInvertedIndexTest.java
@@ -0,0 +1,402 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.junit.After;
+import org.junit.Before;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleReference;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ByteArrayAccessibleOutputStream;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.util.TupleUtils;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoadContext;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndexSearchPredicate;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.OccurrenceThresholdPanicException;
+import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.ConjunctiveSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IToken;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndexAccessor;
+
+public abstract class AbstractInvertedIndexTest {
+    protected Logger LOGGER;
+    protected LSMInvertedIndexTestHarness harness = new LSMInvertedIndexTestHarness();
+
+    protected IInvertedIndex invertedIndex;
+    protected IIndexAccessor invertedIndexAccessor;
+    protected IBinaryComparator[] tokenComparators;
+    protected IBinaryTokenizer tokenizer;
+
+    // This number will only be used in generating random documents.
+    // If predefined data is generated, then the number of documents is fixed.
+    protected int numDocuments = 1000;
+    protected int maxDocumentLength = 200;
+    protected List<String> documents = new ArrayList<String>();
+    protected Map<String, SortedSet<Integer>> baselineInvertedIndex = new HashMap<String, SortedSet<Integer>>();
+
+    // Generate random data is false by default (generate predefined data is true!)
+    protected boolean random = false;
+
+    // Subclasses must implement these methods by initializing the proper class members
+    protected abstract void setTokenizer();
+
+    protected abstract void setInvertedIndex() throws HyracksDataException;
+
+    protected abstract void setLogger();
+
+    protected abstract void setRandom();
+
+    @Before
+    public void setUp() throws HyracksException {
+        harness.setUp();
+        setTokenizer();
+        setInvertedIndex();
+        setLogger();
+        setRandom();
+        generateData();
+        invertedIndexAccessor = invertedIndex.createAccessor();
+
+        IBinaryComparatorFactory[] tokenCmpFactories = harness.getTokenBinaryComparatorFactories();
+        tokenComparators = new IBinaryComparator[tokenCmpFactories.length];
+        for (int i = 0; i < tokenCmpFactories.length; i++) {
+            tokenComparators[i] = tokenCmpFactories[i].createBinaryComparator();
+        }
+    }
+
+    @After
+    public void tearDown() throws HyracksDataException {
+        invertedIndex.close();
+        harness.tearDown();
+    }
+
+    protected void generateData() {
+        if (random) {
+            generateRandomDocumentData();
+        } else {
+            generatePredefinedDocumentData();
+        }
+    }
+
+    protected void generateRandomDocumentData() {
+        int documentLength;
+        String validCharacters = "abcdefghijklmnopqrstuvwxyz ";
+        StringBuilder builder = new StringBuilder();
+        Random rng = harness.getRandom();
+
+        // Generate numDocuments random documents (strings)
+        documents.clear();
+        for (int i = 0; i < numDocuments; i++) {
+
+            // Generate a random string of size [0, maxDocumentLength] with 
+            // characters chosen from the set of valid characters defined above
+            documentLength = rng.nextInt(maxDocumentLength + 1);
+            for (int j = 0; j < documentLength; j++) {
+                builder.append(validCharacters.charAt(rng.nextInt(validCharacters.length())));
+            }
+
+            // Ensure that numDocuments is honored by regenerating the document 
+            // if it is a duplicate.
+            if (!documents.add(builder.toString())) {
+                i--;
+            }
+
+            builder.setLength(0);
+        }
+    }
+
+    protected void generatePredefinedDocumentData() {
+        List<String> firstNames = new ArrayList<String>();
+        List<String> lastNames = new ArrayList<String>();
+
+        // Generate first names
+        firstNames.add("Kathrin");
+        firstNames.add("Cathrin");
+        firstNames.add("Kathryn");
+        firstNames.add("Cathryn");
+        firstNames.add("Kathrine");
+        firstNames.add("Cathrine");
+        firstNames.add("Kathryne");
+        firstNames.add("Cathryne");
+        firstNames.add("Katherin");
+        firstNames.add("Catherin");
+        firstNames.add("Katheryn");
+        firstNames.add("Catheryn");
+        firstNames.add("Katherine");
+        firstNames.add("Catherine");
+        firstNames.add("Katheryne");
+        firstNames.add("Catheryne");
+        firstNames.add("John");
+        firstNames.add("Jack");
+        firstNames.add("Jonathan");
+        firstNames.add("Nathan");
+
+        // Generate last names
+        lastNames.add("Miller");
+        lastNames.add("Myller");
+        lastNames.add("Keller");
+        lastNames.add("Ketler");
+        lastNames.add("Muller");
+        lastNames.add("Fuller");
+        lastNames.add("Smith");
+        lastNames.add("Smyth");
+        lastNames.add("Smithe");
+        lastNames.add("Smythe");
+
+        // Generate all 'firstName lastName' combinations
+        documents.clear();
+        for (String first : firstNames) {
+            for (String last : lastNames) {
+                documents.add(first + " " + last);
+            }
+        }
+
+        // The number of documents is fixed since the data is predefined
+        numDocuments = documents.size();
+    }
+
+    protected class TokenIdPair implements Comparable<TokenIdPair> {
+        public ByteArrayAccessibleOutputStream baaos = new ByteArrayAccessibleOutputStream();
+        public DataOutputStream dos = new DataOutputStream(baaos);
+        public int id;
+
+        TokenIdPair(IToken token, int id) throws IOException {
+            token.serializeToken(dos);
+            this.id = id;
+        }
+
+        @Override
+        public int compareTo(TokenIdPair o) {
+            int cmp = tokenComparators[0].compare(baaos.getByteArray(), 0, baaos.getByteArray().length,
+                    o.baaos.getByteArray(), 0, o.baaos.getByteArray().length);
+            if (cmp == 0) {
+                return id - o.id;
+            } else {
+                return cmp;
+            }
+        }
+    }
+
+    protected void buildBaselineIndex() throws IOException {
+        ITupleReference tuple;
+        IToken token;
+        SortedSet<Integer> baselineInvertedList = null;
+        ByteArrayAccessibleOutputStream baos = new ByteArrayAccessibleOutputStream();
+        DataOutputStream dos = new DataOutputStream(baos);
+        ISerializerDeserializer[] fieldSerDes = new ISerializerDeserializer[] {
+                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+
+        int docId = 0;
+        for (String document : documents) {
+            tuple = TupleUtils.createTuple(fieldSerDes, document, docId);
+
+            // Insert into the baseline
+            tokenizer.reset(tuple.getFieldData(0), tuple.getFieldStart(0), tuple.getFieldLength(0));
+            while (tokenizer.hasNext()) {
+                baos.reset();
+                tokenizer.next();
+                token = tokenizer.getToken();
+                token.serializeToken(dos);
+                String tokenStr = (String) fieldSerDes[0].deserialize(new DataInputStream(new ByteArrayInputStream(baos
+                        .getByteArray())));
+                baselineInvertedList = baselineInvertedIndex.get(tokenStr);
+                if (baselineInvertedList == null) {
+                    baselineInvertedList = new TreeSet<Integer>();
+                    baselineInvertedIndex.put(tokenStr, baselineInvertedList);
+                }
+                baselineInvertedList.add(docId);
+            }
+            docId++;
+        }
+    }
+
+    protected void insertDocuments() throws HyracksDataException, IndexException {
+        ITupleReference tuple;
+        ISerializerDeserializer[] fieldSerDes = new ISerializerDeserializer[] {
+                UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+
+        // Insert the documents into the index while building the baseline
+        int docId = 0;
+        for (String document : documents) {
+            // Insert into the index to be tested
+            tuple = TupleUtils.createTuple(fieldSerDes, document, docId);
+            invertedIndexAccessor.insert(tuple);
+            docId++;
+        }
+    }
+
+    protected void verifyAgainstBaseline() throws HyracksDataException, IndexException {
+        ITupleReference tuple;
+        int docId;
+//        int count = 0;
+        SortedSet<Integer> baselineInvertedList = null;
+        SortedSet<Integer> testInvertedList = new TreeSet<Integer>();
+
+        // Query all tokens in the baseline
+
+        ConjunctiveSearchModifier searchModifier = new ConjunctiveSearchModifier();
+        InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(searchModifier);
+        IIndexCursor resultCursor = invertedIndexAccessor.createSearchCursor();
+        for (String tokenStr : baselineInvertedIndex.keySet()) {
+            tuple = TupleUtils.createTuple(new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE },
+                    tokenStr);
+            searchPred.setQueryTuple(tuple);
+            searchPred.setQueryFieldIndex(0);
+
+            try {
+                resultCursor.reset();
+                invertedIndexAccessor.search(resultCursor, searchPred);
+
+                baselineInvertedList = baselineInvertedIndex.get(tokenStr);
+                // Check the matches
+                testInvertedList.clear();
+                while (resultCursor.hasNext()) {
+                    resultCursor.next();
+                    tuple = resultCursor.getTuple();
+                    docId = IntegerSerializerDeserializer.getInt(tuple.getFieldData(0), tuple.getFieldStart(0));
+                    testInvertedList.add(docId);
+                }
+            } finally {
+                resultCursor.close();
+            }
+//            count++;
+
+//            if (count % 6500 == 0) {
+//                System.out.println("################# count: " + count);
+//                ((LSMInvertedIndexAccessor) invertedIndexAccessor).merge();
+//            }
+
+            if (LOGGER != null && LOGGER.isLoggable(Level.INFO)) {
+                LOGGER.info("\nQuery:\t\t\"" + tokenStr + "\"\n" + "Baseline:\t" + baselineInvertedList.toString()
+                        + "\n" + "Test:\t\t" + testInvertedList.toString() + "\n");
+            }
+            assertTrue(baselineInvertedList.equals(testInvertedList));
+        }
+    }
+
+    protected void bulkLoadDocuments() throws IndexException, IOException {
+        List<TokenIdPair> pairs = new ArrayList<TokenIdPair>();
+        ArrayTupleBuilder tb = new ArrayTupleBuilder(2);
+        ArrayTupleReference tuple = new ArrayTupleReference();
+
+        // Generate pairs for sorting and bulk-loading
+        int docId = 0;
+        for (String s : documents) {
+            ByteArrayAccessibleOutputStream baaos = new ByteArrayAccessibleOutputStream();
+            DataOutputStream dos = new DataOutputStream(baaos);
+            baaos.reset();
+            UTF8StringSerializerDeserializer.INSTANCE.serialize(s, dos);
+            tokenizer.reset(baaos.getByteArray(), 0, baaos.size());
+            while (tokenizer.hasNext()) {
+                tokenizer.next();
+                IToken token = tokenizer.getToken();
+                pairs.add(new TokenIdPair(token, docId));
+            }
+            docId++;
+        }
+
+        Collections.sort(pairs);
+
+        IIndexBulkLoadContext bulkLoadCtx = invertedIndex.beginBulkLoad(1.0f);
+        for (TokenIdPair t : pairs) {
+            tb.reset();
+            tb.addField(t.baaos.getByteArray(), 0, t.baaos.getByteArray().length);
+            IntegerSerializerDeserializer.INSTANCE.serialize(t.id, tb.getDataOutput());
+            tb.addFieldEndOffset();
+            tuple.reset(tb.getFieldEndOffsets(), tb.getByteArray());
+            invertedIndex.bulkLoadAddTuple(tuple, bulkLoadCtx);
+        }
+        invertedIndex.endBulkLoad(bulkLoadCtx);
+    }
+
+    /**
+     * Runs a specified number of randomly picked strings from dataStrings as
+     * queries. We run each query, measure it's time, and print it's results.
+     */
+    protected void runQueries(IInvertedIndexSearchModifier searchModifier, int numQueries) throws Exception {
+        ISerializerDeserializer[] querySerde = { UTF8StringSerializerDeserializer.INSTANCE };
+        ArrayTupleBuilder queryTb = new ArrayTupleBuilder(querySerde.length);
+        ArrayTupleReference queryTuple = new ArrayTupleReference();
+        IIndexCursor resultCursor;
+
+        Random rnd = harness.getRandom();
+        rnd.setSeed(50);
+
+        InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(searchModifier);
+
+        for (int i = 0; i < numQueries; i++) {
+
+            int queryIndex = Math.abs(rnd.nextInt() % documents.size());
+            String queryString = documents.get(queryIndex);
+
+            // Serialize query.
+            queryTb.reset();
+            UTF8StringSerializerDeserializer.INSTANCE.serialize(queryString, queryTb.getDataOutput());
+            queryTb.addFieldEndOffset();
+            queryTuple.reset(queryTb.getFieldEndOffsets(), queryTb.getByteArray());
+
+            // Set query tuple in search predicate.
+            searchPred.setQueryTuple(queryTuple);
+            searchPred.setQueryFieldIndex(0);
+
+            resultCursor = invertedIndexAccessor.createSearchCursor();
+
+            int repeats = 1;
+            double totalTime = 0;
+            for (int j = 0; j < repeats; j++) {
+                long timeStart = System.currentTimeMillis();
+                try {
+                    resultCursor.reset();
+                    invertedIndexAccessor.search(resultCursor, searchPred);
+                } catch (OccurrenceThresholdPanicException e) {
+                    // ignore panic queries
+                }
+                long timeEnd = System.currentTimeMillis();
+                totalTime += timeEnd - timeStart;
+            }
+            double avgTime = totalTime / (double) repeats;
+            StringBuilder strBuilder = new StringBuilder();
+            strBuilder.append(i + ": " + "\"" + queryString + "\" " + queryIndex + ": " + avgTime + "ms" + "\n");
+            strBuilder.append("CANDIDATE RESULTS:\n");
+            while (resultCursor.hasNext()) {
+                resultCursor.next();
+                ITupleReference resultTuple = resultCursor.getTuple();
+                int id = IntegerSerializerDeserializer
+                        .getInt(resultTuple.getFieldData(0), resultTuple.getFieldStart(0));
+                strBuilder.append(id + " " + documents.get(id));
+                strBuilder.append('\n');
+            }
+            // remove trailing newline
+            strBuilder.deleteCharAt(strBuilder.length() - 1);
+            if (LOGGER.isLoggable(Level.INFO)) {
+                LOGGER.info(strBuilder.toString());
+            }
+        }
+    }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InMemoryBTreeInvertedIndexInsertTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InMemoryBTreeInvertedIndexInsertTest.java
new file mode 100644
index 0000000..1a1a7cc
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InMemoryBTreeInvertedIndexInsertTest.java
@@ -0,0 +1,38 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.util.logging.Logger;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.ITokenFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.UTF8NGramTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.utils.InvertedIndexTestUtils;
+
+public class InMemoryBTreeInvertedIndexInsertTest extends AbstractInvertedIndexInsertTest {
+
+    @Override
+    protected void setTokenizer() {
+        ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
+        tokenizer = new NGramUTF8StringBinaryTokenizer(3, false, true, false, tokenFactory);
+//        ITokenFactory tokenFactory = new UTF8WordTokenFactory();
+//        tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, tokenFactory);
+    }
+
+    @Override
+    protected void setInvertedIndex() throws HyracksDataException {
+        invertedIndex = InvertedIndexTestUtils.createTestInMemoryBTreeInvertedIndex(harness, tokenizer);
+        invertedIndex.create(harness.getFileId());
+        invertedIndex.open(harness.getFileId());
+    }
+
+    @Override
+    protected void setLogger() {
+        LOGGER = Logger.getLogger(InMemoryBTreeInvertedIndexInsertTest.class.getName());
+    }
+
+    @Override
+    protected void setRandom() {
+        random = true;
+    }
+
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InMemoryBTreeInvertedIndexSearchTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InMemoryBTreeInvertedIndexSearchTest.java
new file mode 100644
index 0000000..99dc654
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InMemoryBTreeInvertedIndexSearchTest.java
@@ -0,0 +1,110 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.ConjunctiveSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.EditDistanceSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.JaccardSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.ITokenFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.UTF8NGramTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.utils.InvertedIndexTestUtils;
+
+public class InMemoryBTreeInvertedIndexSearchTest extends AbstractInvertedIndexTest {
+
+    /**
+     * Runs 5 random conjunctive search queries to test the
+     * ConjunctiveSearchModifier.
+     */
+    @Test
+    public void conjunctiveQueryTest() throws Exception {
+        insertDocuments();
+        IInvertedIndexSearchModifier searchModifier = new ConjunctiveSearchModifier();
+        runQueries(searchModifier, 5);
+    }
+
+    /**
+     * Runs 5 random jaccard-based search queries with thresholds 0.9, 0.8, 0.7.
+     * Tests the JaccardSearchModifier.
+     */
+    @Test
+    public void jaccardQueryTest() throws Exception {
+        insertDocuments();
+        JaccardSearchModifier searchModifier = new JaccardSearchModifier(1.0f);
+
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.info("JACCARD: " + 0.9f);
+        }
+        searchModifier.setJaccThresh(0.9f);
+        runQueries(searchModifier, 5);
+
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.info("JACCARD: " + 0.8f);
+        }
+        searchModifier.setJaccThresh(0.8f);
+        runQueries(searchModifier, 5);
+
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.info("JACCARD: " + 0.7f);
+        }
+        searchModifier.setJaccThresh(0.7f);
+        runQueries(searchModifier, 5);
+    }
+
+    /**
+     * Runs 5 random edit-distance based search queries with thresholds 1, 2, 3.
+     * Tests the EditDistanceSearchModifier.
+     */
+    @Test
+    public void editDistanceQueryTest() throws Exception {
+        insertDocuments();
+        EditDistanceSearchModifier searchModifier = new EditDistanceSearchModifier(3, 0);
+
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.info("EDIT DISTANCE: " + 1);
+        }
+        searchModifier.setEdThresh(1);
+        runQueries(searchModifier, 5);
+
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.info("EDIT DISTANCE: " + 2);
+        }
+        searchModifier.setEdThresh(2);
+        runQueries(searchModifier, 5);
+
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.info("EDIT DISTANCE: " + 3);
+        }
+        searchModifier.setEdThresh(3);
+        runQueries(searchModifier, 5);
+    }
+    
+    @Override
+    protected void setTokenizer() {
+        ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
+        tokenizer = new NGramUTF8StringBinaryTokenizer(3, false, true, false, tokenFactory);
+    }
+
+    @Override
+    protected void setInvertedIndex() throws HyracksDataException {
+        invertedIndex = InvertedIndexTestUtils.createTestInMemoryBTreeInvertedIndex(harness, tokenizer);
+        invertedIndex.create(harness.getFileId());
+        invertedIndex.open(harness.getFileId());
+    }
+
+    @Override
+    protected void setLogger() {
+        LOGGER = Logger.getLogger(InMemoryBTreeInvertedIndexSearchTest.class.getName());
+    }
+
+    @Override
+    protected void setRandom() {
+        random = false;
+    }
+
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InvertedIndexBulkLoadTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InvertedIndexBulkLoadTest.java
new file mode 100644
index 0000000..fbe408a
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InvertedIndexBulkLoadTest.java
@@ -0,0 +1,37 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.util.logging.Logger;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.ITokenFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.UTF8NGramTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.utils.InvertedIndexTestUtils;
+
+public class InvertedIndexBulkLoadTest extends AbstractInvertedIndexBulkloadTest {
+
+    @Override
+    protected void setTokenizer() {
+        ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
+        tokenizer = new NGramUTF8StringBinaryTokenizer(3, false, true, false, tokenFactory);
+    }
+
+    @Override
+    protected void setInvertedIndex() throws HyracksDataException {
+        invertedIndex = InvertedIndexTestUtils.createTestInvertedIndex(harness, tokenizer);
+        invertedIndex.create(harness.getDiskInvertedIndexFileId());
+        invertedIndex.open(harness.getDiskInvertedIndexFileId());
+    }
+
+    @Override
+    protected void setLogger() {
+        LOGGER = Logger.getLogger(InvertedIndexBulkLoadTest.class.getName());
+
+    }
+
+    @Override
+    protected void setRandom() {
+        random = true;
+    }
+
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InvertedIndexSearchTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InvertedIndexSearchTest.java
new file mode 100644
index 0000000..79686ae
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/InvertedIndexSearchTest.java
@@ -0,0 +1,109 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.ConjunctiveSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.EditDistanceSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.searchmodifiers.JaccardSearchModifier;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.ITokenFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.UTF8NGramTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.utils.InvertedIndexTestUtils;
+
+public class InvertedIndexSearchTest extends AbstractInvertedIndexTest {
+    /**
+     * Runs 5 random conjunctive search queries to test the
+     * ConjunctiveSearchModifier.
+     */
+    @Test
+    public void conjunctiveQueryTest() throws Exception {
+        bulkLoadDocuments();
+        IInvertedIndexSearchModifier searchModifier = new ConjunctiveSearchModifier();
+        runQueries(searchModifier, 5);
+    }
+
+    /**
+     * Runs 5 random jaccard-based search queries with thresholds 0.9, 0.8, 0.7.
+     * Tests the JaccardSearchModifier.
+     */
+    @Test
+    public void jaccardQueryTest() throws Exception {
+        bulkLoadDocuments();
+        JaccardSearchModifier searchModifier = new JaccardSearchModifier(1.0f);
+
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.info("JACCARD: " + 0.9f);
+        }
+        searchModifier.setJaccThresh(0.9f);
+        runQueries(searchModifier, 5);
+
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.info("JACCARD: " + 0.8f);
+        }
+        searchModifier.setJaccThresh(0.8f);
+        runQueries(searchModifier, 5);
+
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.info("JACCARD: " + 0.7f);
+        }
+        searchModifier.setJaccThresh(0.7f);
+        runQueries(searchModifier, 5);
+    }
+
+    /**
+     * Runs 5 random edit-distance based search queries with thresholds 1, 2, 3.
+     * Tests the EditDistanceSearchModifier.
+     */
+    @Test
+    public void editDistanceQueryTest() throws Exception {
+        bulkLoadDocuments();
+        EditDistanceSearchModifier searchModifier = new EditDistanceSearchModifier(3, 0);
+
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.info("EDIT DISTANCE: " + 1);
+        }
+        searchModifier.setEdThresh(1);
+        runQueries(searchModifier, 5);
+
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.info("EDIT DISTANCE: " + 2);
+        }
+        searchModifier.setEdThresh(2);
+        runQueries(searchModifier, 5);
+
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.info("EDIT DISTANCE: " + 3);
+        }
+        searchModifier.setEdThresh(3);
+        runQueries(searchModifier, 5);
+    }
+    
+    @Override
+    protected void setTokenizer() {
+        ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
+        tokenizer = new NGramUTF8StringBinaryTokenizer(3, false, true, false, tokenFactory);
+    }
+
+    @Override
+    protected void setInvertedIndex() throws HyracksDataException {
+        invertedIndex = InvertedIndexTestUtils.createTestInvertedIndex(harness, tokenizer);
+        invertedIndex.create(harness.getDiskInvertedIndexFileId());
+        invertedIndex.open(harness.getDiskInvertedIndexFileId());
+    }
+
+    @Override
+    protected void setLogger() {
+        LOGGER = Logger.getLogger(InvertedIndexSearchTest.class.getName());
+    }
+
+    @Override
+    protected void setRandom() {
+        random = false;
+    }
+
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/LSMInvertedIndexInsertTest.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/LSMInvertedIndexInsertTest.java
new file mode 100644
index 0000000..23834c2
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/LSMInvertedIndexInsertTest.java
@@ -0,0 +1,36 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.util.logging.Logger;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.ITokenFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.UTF8NGramTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.utils.InvertedIndexTestUtils;
+
+public class LSMInvertedIndexInsertTest extends AbstractInvertedIndexInsertTest {
+
+    @Override
+    protected void setTokenizer() {
+        ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
+        tokenizer = new NGramUTF8StringBinaryTokenizer(3, false, true, false, tokenFactory);
+    }
+
+    @Override
+    protected void setInvertedIndex() throws HyracksDataException {
+        invertedIndex = InvertedIndexTestUtils.createTestLSMInvertedIndex(harness, tokenizer);
+        invertedIndex.create(harness.getFileId());
+        invertedIndex.open(harness.getFileId());
+    }
+
+    @Override
+    protected void setLogger() {
+        LOGGER = Logger.getLogger(LSMInvertedIndexInsertTest.class.getName());
+    }
+
+    @Override
+    protected void setRandom() {
+        random = true;
+    }
+
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/LSMInvertedIndexTestHarness.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/LSMInvertedIndexTestHarness.java
new file mode 100644
index 0000000..60d5683
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/inverteredindex/LSMInvertedIndexTestHarness.java
@@ -0,0 +1,222 @@
+package edu.uci.ics.hyracks.storage.am.lsm.inverteredindex;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Random;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.api.io.IODeviceHandle;
+import edu.uci.ics.hyracks.control.nc.io.IOManager;
+import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
+import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+import edu.uci.ics.hyracks.storage.am.common.frames.LIFOMetaDataFrameFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.InMemoryBufferCache;
+import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.InMemoryFreePageManager;
+import edu.uci.ics.hyracks.storage.common.buffercache.HeapBufferAllocator;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
+import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
+import edu.uci.ics.hyracks.test.support.TestUtils;
+
+public class LSMInvertedIndexTestHarness {
+
+    private static final long RANDOM_SEED = 50;
+    private static final int DEFAULT_DISK_PAGE_SIZE = 256;
+    private static final int DEFAULT_DISK_NUM_PAGES = 1000;
+    private static final int DEFAULT_DISK_MAX_OPEN_FILES = 200;
+    private static final int DEFAULT_MEM_PAGE_SIZE = 4096;
+    private static final int DEFAULT_MEM_NUM_PAGES = 200;
+    private static final int DEFAULT_HYRACKS_FRAME_SIZE = 128;
+    private static final int DUMMY_FILE_ID = -1;
+
+    protected final int diskPageSize;
+    protected final int diskNumPages;
+    protected final int diskMaxOpenFiles;
+    protected final int memPageSize;
+    protected final int memNumPages;
+    protected final int hyracksFrameSize;
+
+    protected IOManager ioManager;
+    protected IBufferCache diskBufferCache;
+    protected IFileMapProvider diskFileMapProvider;
+    protected InMemoryBufferCache memBufferCache;
+    protected InMemoryFreePageManager memFreePageManager;
+    protected IHyracksTaskContext ctx;
+
+    protected final Random rnd = new Random();
+    protected final static SimpleDateFormat simpleDateFormat = new SimpleDateFormat("ddMMyy-hhmmssSS");
+    protected final static String sep = System.getProperty("file.separator");
+    protected String onDiskDir;
+    protected String btreeFileName = "btree_vocab";
+    protected String invIndexFileName = "inv_index";
+    protected FileReference btreeFileRef;
+    protected FileReference invIndexFileRef;
+
+    // Token information
+    protected ITypeTraits[] tokenTypeTraits = new ITypeTraits[] { UTF8StringPointable.TYPE_TRAITS };
+    protected IBinaryComparatorFactory[] tokenCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
+            .of(UTF8StringPointable.FACTORY) };
+
+    // Inverted list information
+    protected ITypeTraits[] invListTypeTraits = new ITypeTraits[] { IntegerPointable.TYPE_TRAITS };
+    protected IBinaryComparatorFactory[] invListCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
+            .of(IntegerPointable.FACTORY) };
+
+    public LSMInvertedIndexTestHarness() {
+        this.diskPageSize = DEFAULT_DISK_PAGE_SIZE;
+        this.diskNumPages = DEFAULT_DISK_NUM_PAGES;
+        this.diskMaxOpenFiles = DEFAULT_DISK_MAX_OPEN_FILES;
+        this.memPageSize = DEFAULT_MEM_PAGE_SIZE;
+        this.memNumPages = DEFAULT_MEM_NUM_PAGES;
+        this.hyracksFrameSize = DEFAULT_HYRACKS_FRAME_SIZE;
+    }
+
+    public LSMInvertedIndexTestHarness(int diskPageSize, int diskNumPages, int diskMaxOpenFiles, int memPageSize,
+            int memNumPages, int hyracksFrameSize) {
+        this.diskPageSize = diskPageSize;
+        this.diskNumPages = diskNumPages;
+        this.diskMaxOpenFiles = diskMaxOpenFiles;
+        this.memPageSize = memPageSize;
+        this.memNumPages = memNumPages;
+        this.hyracksFrameSize = hyracksFrameSize;
+    }
+
+    public void setUp() throws HyracksException {
+        onDiskDir = "lsm_invertedindex_" + simpleDateFormat.format(new Date()) + sep;
+        ctx = TestUtils.create(getHyracksFrameSize());
+        TestStorageManagerComponentHolder.init(diskPageSize, diskNumPages, diskMaxOpenFiles);
+        diskBufferCache = TestStorageManagerComponentHolder.getBufferCache(ctx);
+        diskFileMapProvider = TestStorageManagerComponentHolder.getFileMapProvider(ctx);
+        memBufferCache = new InMemoryBufferCache(new HeapBufferAllocator(), memPageSize, memNumPages);
+        memFreePageManager = new InMemoryFreePageManager(memNumPages, new LIFOMetaDataFrameFactory());
+        ioManager = TestStorageManagerComponentHolder.getIOManager();
+        rnd.setSeed(RANDOM_SEED);
+        
+        File btreeFile = new File(onDiskDir + btreeFileName);
+        btreeFile.deleteOnExit();
+        File invIndexFile = new File(onDiskDir + invIndexFileName);
+        invIndexFile.deleteOnExit();
+        btreeFileRef = new FileReference(btreeFile);
+        invIndexFileRef = new FileReference(invIndexFile);
+        diskBufferCache.createFile(btreeFileRef);
+        diskBufferCache.openFile(diskFileMapProvider.lookupFileId(btreeFileRef));
+        diskBufferCache.createFile(invIndexFileRef);
+        diskBufferCache.openFile(diskFileMapProvider.lookupFileId(invIndexFileRef));
+    }
+
+    public void tearDown() throws HyracksDataException {
+        diskBufferCache.closeFile(diskFileMapProvider.lookupFileId(btreeFileRef));
+        diskBufferCache.deleteFile(diskFileMapProvider.lookupFileId(btreeFileRef), false);
+        diskBufferCache.closeFile(diskFileMapProvider.lookupFileId(invIndexFileRef));
+        diskBufferCache.deleteFile(diskFileMapProvider.lookupFileId(invIndexFileRef), false);
+        diskBufferCache.close();
+        for (IODeviceHandle dev : ioManager.getIODevices()) {
+            File dir = new File(dev.getPath(), onDiskDir);
+            FilenameFilter filter = new FilenameFilter() {
+                public boolean accept(File dir, String name) {
+                    return !name.startsWith(".");
+                }
+            };
+            String[] files = dir.list(filter);
+            if (files != null) {
+                for (String fileName : files) {
+                    File file = new File(dir.getPath() + File.separator + fileName);
+                    file.delete();
+                }
+            }
+            dir.delete();
+        }
+    }
+    
+    public int getDiskInvertedIndexFileId() throws HyracksDataException {
+        return diskFileMapProvider.lookupFileId(invIndexFileRef);
+    }
+    
+    public int getDiskBtreeFileId() throws HyracksDataException {
+        return diskFileMapProvider.lookupFileId(btreeFileRef);
+    }
+
+    public int getDiskPageSize() {
+        return diskPageSize;
+    }
+
+    public int getDiskNumPages() {
+        return diskNumPages;
+    }
+
+    public int getDiskMaxOpenFiles() {
+        return diskMaxOpenFiles;
+    }
+
+    public int getMemPageSize() {
+        return memPageSize;
+    }
+
+    public int getMemNumPages() {
+        return memNumPages;
+    }
+
+    public int getHyracksFrameSize() {
+        return hyracksFrameSize;
+    }
+
+    public int getFileId() {
+        return DUMMY_FILE_ID;
+    }
+
+    public IOManager getIOManager() {
+        return ioManager;
+    }
+
+    public IBufferCache getDiskBufferCache() {
+        return diskBufferCache;
+    }
+
+    public IFileMapProvider getDiskFileMapProvider() {
+        return diskFileMapProvider;
+    }
+
+    public InMemoryBufferCache getMemBufferCache() {
+        return memBufferCache;
+    }
+
+    public InMemoryFreePageManager getMemFreePageManager() {
+        return memFreePageManager;
+    }
+
+    public IHyracksTaskContext getHyracksTastContext() {
+        return ctx;
+    }
+
+    public String getOnDiskDir() {
+        return onDiskDir;
+    }
+
+    public Random getRandom() {
+        return rnd;
+    }
+    
+    public ITypeTraits[] getTokenTypeTraits() {
+        return tokenTypeTraits;
+    }
+    
+    public ITypeTraits[] getInvertedListTypeTraits() {
+        return invListTypeTraits;
+    }
+    
+    public IBinaryComparatorFactory[] getTokenBinaryComparatorFactories() {
+        return tokenCmpFactories;
+    }
+    
+    public IBinaryComparatorFactory[] getInvertedListBinaryComparatorFactories() {
+        return invListCmpFactories;
+    }
+}
diff --git a/hyracks-tests/pom.xml b/hyracks-tests/pom.xml
index 8950730..8c4c67a 100644
--- a/hyracks-tests/pom.xml
+++ b/hyracks-tests/pom.xml
@@ -19,5 +19,6 @@
     <module>hyracks-storage-am-lsm-common-test</module>
     <module>hyracks-storage-am-lsm-btree-test</module>
     <module>hyracks-storage-am-lsm-rtree-test</module>
+    <module>hyracks-storage-am-lsm-invertedindex-test</module>
   </modules>
 </project>
diff --git a/pom.xml b/pom.xml
index fbef96f..c292ecb 100644
--- a/pom.xml
+++ b/pom.xml
@@ -96,6 +96,7 @@
     <module>hyracks-storage-am-lsm-common</module>
     <module>hyracks-storage-am-lsm-btree</module>
     <module>hyracks-storage-am-lsm-rtree</module>
+    <module>hyracks-storage-am-lsm-invertedindex</module>
     <module>hyracks-storage-am-rtree</module>
     <module>hyracks-test-support</module>
     <module>hyracks-tests</module>