Some generalizations to support length filtering.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_lsm_length_filter@2453 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
index 69e57be..2fd5d63 100644
--- a/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
+++ b/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndex.java
@@ -15,6 +15,8 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk;
+import java.io.DataOutput;
+import java.io.IOException;
import java.nio.ByteBuffer;
import edu.uci.ics.hyracks.api.context.IHyracksCommonContext;
@@ -69,13 +71,12 @@
public class OnDiskInvertedIndex implements IInvertedIndex {
private final IHyracksCommonContext ctx = new DefaultHyracksCommonContext();
- // Schema of BTree tuples.
- public final int TOKEN_FIELD = 0;
- public final int INVLIST_START_PAGE_ID_FIELD = 1;
- public final int INVLIST_END_PAGE_ID_FIELD = 2;
- public final int INVLIST_START_OFF_FIELD = 3;
- public final int INVLIST_NUM_ELEMENTS_FIELD = 4;
-
+ // Schema of BTree tuples, set in constructor.
+ public final int invListStartPageIdField;
+ public final int invListEndPageIdField;
+ public final int invListStartOffField;
+ public final int invListNumElementsField;
+
// Type traits to be appended to the token type trait which finally form the BTree field type traits.
private static final ITypeTraits[] btreeValueTypeTraits = new ITypeTraits[4];
static {
@@ -104,7 +105,6 @@
private final FileReference invListsFile;
// Last page id of inverted-lists file (inclusive). Set during bulk load.
private int invListsMaxPageId = -1;
-
private boolean isOpen = false;
public OnDiskInvertedIndex(IBufferCache bufferCache, IFileMapProvider fileMapProvider,
@@ -124,6 +124,10 @@
this.numTokenFields = btree.getComparatorFactories().length;
this.numInvListKeys = invListCmpFactories.length;
this.invListsFile = invListsFile;
+ this.invListStartPageIdField = numTokenFields;
+ this.invListEndPageIdField = numTokenFields + 1;
+ this.invListStartOffField = numTokenFields + 2;
+ this.invListNumElementsField = numTokenFields + 3;
}
@Override
@@ -260,16 +264,16 @@
ctx.btreeCursor.next();
ITupleReference frameTuple = ctx.btreeCursor.getTuple();
int startPageId = IntegerSerializerDeserializer.getInt(
- frameTuple.getFieldData(INVLIST_START_PAGE_ID_FIELD),
- frameTuple.getFieldStart(INVLIST_START_PAGE_ID_FIELD));
+ frameTuple.getFieldData(invListStartPageIdField),
+ frameTuple.getFieldStart(invListStartPageIdField));
int endPageId = IntegerSerializerDeserializer.getInt(
- frameTuple.getFieldData(INVLIST_END_PAGE_ID_FIELD),
- frameTuple.getFieldStart(INVLIST_END_PAGE_ID_FIELD));
- int startOff = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(INVLIST_START_OFF_FIELD),
- frameTuple.getFieldStart(INVLIST_START_OFF_FIELD));
+ frameTuple.getFieldData(invListEndPageIdField),
+ frameTuple.getFieldStart(invListEndPageIdField));
+ int startOff = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(invListStartOffField),
+ frameTuple.getFieldStart(invListStartOffField));
int numElements = IntegerSerializerDeserializer.getInt(
- frameTuple.getFieldData(INVLIST_NUM_ELEMENTS_FIELD),
- frameTuple.getFieldStart(INVLIST_NUM_ELEMENTS_FIELD));
+ frameTuple.getFieldData(invListNumElementsField),
+ frameTuple.getFieldStart(invListNumElementsField));
listCursor.reset(startPageId, endPageId, startOff, numElements);
} else {
listCursor.reset(0, 0, 0, 0);
@@ -330,14 +334,26 @@
private void createAndInsertBTreeTuple() throws IndexException, HyracksDataException {
// Build tuple.
btreeTupleBuilder.reset();
- btreeTupleBuilder.addField(lastTuple.getFieldData(0), lastTuple.getFieldStart(0),
- lastTuple.getFieldLength(0));
- // TODO: Boxing integers here. Fix it.
- btreeTupleBuilder.addField(IntegerSerializerDeserializer.INSTANCE, currentInvListStartPageId);
- btreeTupleBuilder.addField(IntegerSerializerDeserializer.INSTANCE, currentPageId);
- btreeTupleBuilder.addField(IntegerSerializerDeserializer.INSTANCE, currentInvListStartOffset);
- btreeTupleBuilder.addField(IntegerSerializerDeserializer.INSTANCE, invListBuilder.getListSize());
- // Reset tuple reference and add it.
+ DataOutput output = btreeTupleBuilder.getDataOutput();
+ // Add key fields.
+ for (int i = 0; i < numTokenFields; i++) {
+ btreeTupleBuilder.addField(lastTuple.getFieldData(i), lastTuple.getFieldStart(i),
+ lastTuple.getFieldLength(i));
+ }
+ // Add inverted-list 'pointer' value fields.
+ try {
+ output.writeInt(currentInvListStartPageId);
+ btreeTupleBuilder.addFieldEndOffset();
+ output.writeInt(currentPageId);
+ btreeTupleBuilder.addFieldEndOffset();
+ output.writeInt(currentInvListStartOffset);
+ btreeTupleBuilder.addFieldEndOffset();
+ output.writeInt(invListBuilder.getListSize());
+ btreeTupleBuilder.addFieldEndOffset();
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ // Reset tuple reference and add it into the BTree load.
btreeTupleReference.reset(btreeTupleBuilder.getFieldEndOffsets(), btreeTupleBuilder.getByteArray());
btreeBulkloader.add(btreeTupleReference);
}