Added range search cursor for on-disk inverted index in preparation for implementing merge.

git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_inverted_index_updates_new@1842 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/tuples/ConcatenatingTupleReference.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/tuples/ConcatenatingTupleReference.java
index ca1fa1e..d82004c 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/tuples/ConcatenatingTupleReference.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/tuples/ConcatenatingTupleReference.java
@@ -17,6 +17,8 @@
 
 import java.util.Arrays;
 
+import javax.swing.plaf.basic.BasicInternalFrameTitlePane.MaximizeAction;
+
 import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
 
 /**
@@ -58,6 +60,14 @@
         }
     }
     
+    public int getNumTuples() {
+        return numTuples;
+    }
+    
+    public boolean hasMaxTuples() {
+        return numTuples == tuples.length;
+    }
+    
     @Override
     public int getFieldCount() {
         return totalFieldCount;
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexAccessor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexAccessor.java
index d5266ff..3fe5b57 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexAccessor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/api/IInvertedIndexAccessor.java
@@ -18,6 +18,8 @@
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
 import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
+import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
 import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
 
 public interface IInvertedIndexAccessor extends IIndexAccessor {
@@ -25,4 +27,9 @@
 
     public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference searchKey)
             throws HyracksDataException, IndexException;
+
+    public IIndexCursor createRangeSearchCursor();
+
+    public void rangeSearch(IIndexCursor cursor, ISearchPredicate searchPred) throws IndexException,
+            HyracksDataException;
 }
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java
index c76799f..c7d6075 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java
@@ -94,6 +94,19 @@
     }
 
     @Override
+    public IIndexCursor createRangeSearchCursor() {
+        return new LSMInvertedIndexRangeSearchCursor();
+    }
+    
+    @Override
+    public void rangeSearch(IIndexCursor cursor, ISearchPredicate searchPred) throws IndexException,
+            HyracksDataException {
+        // TODO: Figure out what the initial state is here.
+        //LSMInvertedIndexRangeSearchCursor rangeSearchCursor = (LSMInvertedIndexRangeSearchCursor) cursor;
+        //rangeSearchCursor.open(initialState, searchPred);
+    }
+    
+    @Override
     public void physicalDelete(ITupleReference tuple) throws HyracksDataException, IndexException {
         // TODO: Do we need this?
     }
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
index 8dac1d0..a3e7bf1 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
@@ -1,5 +1,5 @@
 /*
- * Copyright 2009-2010 by The Regents of the University of California
+ * Copyright 2009-2012 by The Regents of the University of California
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * you may obtain a copy of the License from
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
index c0acf81..7cc33e5 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexAccessor.java
@@ -18,7 +18,9 @@
 import edu.uci.ics.hyracks.api.context.IHyracksCommonContext;
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
 import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeRangeSearchCursor;
 import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
 import edu.uci.ics.hyracks.storage.am.common.api.IIndexOpContext;
 import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
@@ -76,6 +78,18 @@
         index.openInvertedListCursor(listCursor, searchKey, opCtx);
     }
 
+    @Override
+    public IIndexCursor createRangeSearchCursor() {
+        IBTreeLeafFrame leafFrame = (IBTreeLeafFrame) index.getBTree().getLeafFrameFactory().createFrame();
+        return new BTreeRangeSearchCursor(leafFrame, false);
+    }
+    
+    @Override
+    public void rangeSearch(IIndexCursor cursor, ISearchPredicate searchPred) throws IndexException,
+            HyracksDataException {
+        btreeAccessor.search(cursor, searchPred);
+    }
+    
     public BTreeAccessor getBTreeAccessor() {
         return btreeAccessor;
     }
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
index bd88c8d..eecb0cd 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
@@ -463,6 +463,17 @@
         }
 
         @Override
+        public IIndexCursor createRangeSearchCursor() {
+            return new OnDiskInvertedIndexRangeSearchCursor(index, opCtx);
+        }
+        
+        @Override
+        public void rangeSearch(IIndexCursor cursor, ISearchPredicate searchPred) throws HyracksDataException {
+            OnDiskInvertedIndexRangeSearchCursor rangeSearchCursor = (OnDiskInvertedIndexRangeSearchCursor) cursor;
+            rangeSearchCursor.open(null, searchPred);
+        }
+        
+        @Override
         public void insert(ITupleReference tuple) throws HyracksDataException, IndexException {
             throw new UnsupportedOperationException("Insert not supported by inverted index.");
         }
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexRangeSearchCursor.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexRangeSearchCursor.java
new file mode 100644
index 0000000..e4c9110
--- /dev/null
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexRangeSearchCursor.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
+import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexOpContext;
+import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
+import edu.uci.ics.hyracks.storage.am.common.tuples.ConcatenatingTupleReference;
+import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
+
+/**
+ * Scans a range of tokens, returning tuples containing a token and an inverted-list element.
+ */
+public class OnDiskInvertedIndexRangeSearchCursor implements IIndexCursor {
+
+    private final BTree btree;
+    private final IIndexAccessor btreeAccessor;
+    private final IInvertedIndex invIndex;
+    private final IIndexOpContext opCtx;
+    private final IInvertedListCursor invListCursor;
+    private boolean unpinNeeded;
+    
+    private final IIndexCursor btreeCursor;
+    private RangePredicate btreePred;
+
+    private final PermutingTupleReference tokenTuple;
+    private ConcatenatingTupleReference concatTuple;
+
+    public OnDiskInvertedIndexRangeSearchCursor(IInvertedIndex invIndex, IIndexOpContext opCtx) {
+        this.btree = ((OnDiskInvertedIndex) invIndex).getBTree();
+        this.btreeAccessor = btree.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+        this.invIndex = invIndex;
+        this.opCtx = opCtx;
+        // Project away non-token fields of the BTree tuples.
+        int[] fieldPermutation = new int[invIndex.getTokenTypeTraits().length];
+        for (int i = 0; i < invIndex.getTokenTypeTraits().length; i++) {
+            fieldPermutation[i] = i;
+        }
+        tokenTuple = new PermutingTupleReference(fieldPermutation);
+        btreeCursor = btreeAccessor.createSearchCursor();
+        concatTuple = new ConcatenatingTupleReference(2);
+        invListCursor = invIndex.createInvertedListCursor();
+        unpinNeeded = false;
+    }
+
+    @Override
+    public void open(ICursorInitialState initialState, ISearchPredicate searchPred) throws HyracksDataException {
+        this.btreePred = (RangePredicate) searchPred;
+        try {
+            btreeAccessor.search(btreeCursor, btreePred);
+        } catch (IndexException e) {
+            throw new HyracksDataException(e);
+        }        
+        invListCursor.pinPages();
+    }
+
+    @Override
+    public boolean hasNext() throws HyracksDataException {
+        if (invListCursor.hasNext()) {
+            return true;
+        }
+        invListCursor.unpinPages();
+        unpinNeeded = false;
+        if (!btreeCursor.hasNext()) {
+            return false;
+        }
+        btreeCursor.next();
+        tokenTuple.reset(btreeCursor.getTuple());
+        try {
+            invIndex.openInvertedListCursor(invListCursor, tokenTuple, opCtx);
+        } catch (IndexException e) {
+            throw new HyracksDataException(e);
+        }
+        invListCursor.pinPages();
+        unpinNeeded = true;
+        concatTuple.reset();
+        concatTuple.addTuple(tokenTuple);
+        return true;
+    }
+
+    @Override
+    public void next() throws HyracksDataException {
+        invListCursor.next();
+        if (concatTuple.hasMaxTuples()) {
+            concatTuple.removeLastTuple();
+        }
+        concatTuple.addTuple(invListCursor.getTuple());
+    }
+
+    @Override
+    public void close() throws HyracksDataException {
+        if (unpinNeeded) {
+            invListCursor.unpinPages();
+        }
+        btreeCursor.close();
+    }
+
+    @Override
+    public void reset() throws HyracksDataException {
+        if (unpinNeeded) {
+            invListCursor.unpinPages();
+        }
+        open(null, btreePred);
+    }
+
+    @Override
+    public ITupleReference getTuple() {
+        return concatTuple;
+    }
+}
diff --git a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
index a39ec46..d5696c5 100644
--- a/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
+++ b/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexTestUtils.java
@@ -30,6 +30,7 @@
 import java.util.SortedSet;
 import java.util.TreeSet;
 
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
 import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 import edu.uci.ics.hyracks.data.std.util.ByteArrayAccessibleOutputStream;
@@ -39,19 +40,20 @@
 import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
 import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
 import edu.uci.ics.hyracks.storage.am.btree.OrderedIndexTestUtils;
+import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
 import edu.uci.ics.hyracks.storage.am.common.CheckTuple;
 import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
 import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
 import edu.uci.ics.hyracks.storage.am.common.datagen.DocumentStringFieldValueGenerator;
 import edu.uci.ics.hyracks.storage.am.common.datagen.IFieldValueGenerator;
 import edu.uci.ics.hyracks.storage.am.common.datagen.SortedIntegerFieldValueGenerator;
 import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
+import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
-import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.LSMInvertedIndexTestHarness;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizerFactory;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
@@ -122,6 +124,52 @@
         }
     }
 
+    public static void compareActualAndExpectedIndexes(InvertedIndexTestContext testCtx) throws HyracksDataException,
+            IndexException {
+        IInvertedIndex invIndex = (IInvertedIndex) testCtx.getIndex();
+        int tokenFieldCount = invIndex.getTokenTypeTraits().length;
+        int invListFieldCount = invIndex.getInvListTypeTraits().length;
+        IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) invIndex.createAccessor(
+                NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+        IIndexCursor invIndexCursor = invIndexAccessor.createRangeSearchCursor();
+        MultiComparator tokenCmp = MultiComparator.create(invIndex.getTokenCmpFactories());
+        IBinaryComparatorFactory[] tupleCmpFactories = new IBinaryComparatorFactory[tokenFieldCount + invListFieldCount];
+        for (int i = 0; i < tokenFieldCount; i++) {
+            tupleCmpFactories[i] = invIndex.getTokenCmpFactories()[i];
+        }
+        for (int i = 0; i < invListFieldCount; i++) {
+            tupleCmpFactories[tokenFieldCount + i] = invIndex.getInvListCmpFactories()[i];
+        }
+        MultiComparator tupleCmp = MultiComparator.create(tupleCmpFactories);
+        RangePredicate nullPred = new RangePredicate(null, null, true, true, tokenCmp, tokenCmp);
+        invIndexAccessor.rangeSearch(invIndexCursor, nullPred);
+
+        // Helpers for generating a serialized inverted-list element from a CheckTuple from the expected index.
+        ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
+        ArrayTupleBuilder expectedBuilder = new ArrayTupleBuilder(fieldSerdes.length);
+        ArrayTupleReference expectedTuple = new ArrayTupleReference();
+
+        Iterator<CheckTuple> expectedIter = testCtx.getCheckTuples().iterator();
+
+        // Compare index elements.
+        int count = 0;
+        try {
+            while (invIndexCursor.hasNext() && expectedIter.hasNext()) {
+                invIndexCursor.next();
+                ITupleReference actualTuple = invIndexCursor.getTuple();
+                CheckTuple expected = expectedIter.next();
+                OrderedIndexTestUtils.createTupleFromCheckTuple(expected, expectedBuilder, expectedTuple, fieldSerdes);
+                if (tupleCmp.compare(actualTuple, expectedTuple) != 0) {
+                    fail("Index elements differ for token '" + expected.getField(0) + "' at position " + count + ".");
+                }
+                count++;
+            }
+        } finally {
+            invIndexCursor.close();
+        }
+    }
+
+    /*
     @SuppressWarnings("unchecked")
     public static void compareActualAndExpectedIndexes(InvertedIndexTestContext testCtx) throws HyracksDataException,
             IndexException {
@@ -194,6 +242,7 @@
             }
         }
     }
+    */
 
     /**
      * Determine the expected results with the simple ScanCount algorithm.
@@ -245,7 +294,7 @@
             }
         }
     }
-    
+
     /*
     public static OnDiskInvertedIndex createTestInvertedIndex(LSMInvertedIndexTestHarness harness, IBinaryTokenizer tokenizer)
             throws HyracksDataException {