1. Inverted lists are now more generic. An inverted list is a sorted list of fixed-size tuples.
2. Finished bulk loading of inverted index using one btree file as token dictionary and one file for the inverted lists.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_indexes@428 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/dataflow/BTreeOpHelper.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/dataflow/BTreeOpHelper.java
index cf4be8f..770bbba 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/dataflow/BTreeOpHelper.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/dataflow/BTreeOpHelper.java
@@ -24,7 +24,9 @@
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrame;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrameFactory;
import edu.uci.ics.hyracks.storage.am.common.frames.LIFOMetaDataFrame;
+import edu.uci.ics.hyracks.storage.am.common.frames.LIFOMetaDataFrameFactory;
import edu.uci.ics.hyracks.storage.am.common.freepage.LinkedListFreePageManager;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
@@ -125,11 +127,12 @@
.getTypeTraits(), comparators);
// TODO: abstract away in some kind of factory
- IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, btreeFileId, 0);
+ ITreeIndexMetaDataFrameFactory metaDataFrameFactory = new LIFOMetaDataFrameFactory();
+ IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, btreeFileId, 0, metaDataFrameFactory);
btree = new BTree(bufferCache, freePageManager, opDesc.getInteriorFactory(),
opDesc.getLeafFactory(), cmp);
if (mode == BTreeMode.CREATE_BTREE) {
- ITreeIndexMetaDataFrame metaFrame = new LIFOMetaDataFrame();
+ ITreeIndexMetaDataFrame metaFrame = btree.getFreePageManager().getMetaDataFrameFactory().getFrame();
try {
btree.create(btreeFileId, leafFrame, metaFrame);
} catch (Exception e) {
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java
index 14559f5..920b3b3 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java
@@ -42,6 +42,8 @@
public class BTree {
+ public static final float DEFAULT_FILL_FACTOR = 0.7f;
+
private final static int RESTART_OP = Integer.MIN_VALUE;
private final static int MAX_RESTARTS = 10;
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/IFreePageManager.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/IFreePageManager.java
index 0c2bcfa..d8989d6 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/IFreePageManager.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/IFreePageManager.java
@@ -7,4 +7,5 @@
public void addFreePage(ITreeIndexMetaDataFrame metaFrame, int freePage) throws HyracksDataException;
public int getMaxPage(ITreeIndexMetaDataFrame metaFrame) throws HyracksDataException;
public void init(ITreeIndexMetaDataFrame metaFrame, int currentMaxPage) throws HyracksDataException;
+ public ITreeIndexMetaDataFrameFactory getMetaDataFrameFactory();
}
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/freepage/LinkedListFreePageManager.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/freepage/LinkedListFreePageManager.java
index 286acda..c09199a 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/freepage/LinkedListFreePageManager.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/freepage/LinkedListFreePageManager.java
@@ -3,6 +3,7 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrame;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrameFactory;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.buffercache.ICachedPage;
import edu.uci.ics.hyracks.storage.common.file.BufferedFileHandle;
@@ -12,12 +13,13 @@
private final IBufferCache bufferCache;
private final int fileId;
private final int headPage;
+ private final ITreeIndexMetaDataFrameFactory metaDataFrameFactory;
- public LinkedListFreePageManager(IBufferCache bufferCache, int fileId,
- int headPage) {
+ public LinkedListFreePageManager(IBufferCache bufferCache, int fileId, int headPage, ITreeIndexMetaDataFrameFactory metaDataFrameFactory) {
this.bufferCache = bufferCache;
this.fileId = fileId;
this.headPage = headPage;
+ this.metaDataFrameFactory = metaDataFrameFactory;
}
@Override
@@ -168,4 +170,8 @@
}
}
+ @Override
+ public ITreeIndexMetaDataFrameFactory getMetaDataFrameFactory() {
+ return metaDataFrameFactory;
+ }
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedListBuilder.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedListBuilder.java
index 7cdd9ac..02b57d2 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedListBuilder.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedListBuilder.java
@@ -3,11 +3,11 @@
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
public interface IInvertedListBuilder {
- public boolean startNewList(ITupleReference tuple, int tokenField);
+ public boolean startNewList(ITupleReference tuple, int numTokenFields);
// returns true if successfully appended
// returns false if not enough space in targetBuf
- public boolean appendElement(ITupleReference tuple, int[] elementFields);
+ public boolean appendElement(ITupleReference tuple, int numTokenFields, int numElementFields);
public void setTargetBuffer(byte[] targetBuf, int startPos);
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedListCursor.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedListCursor.java
new file mode 100644
index 0000000..73ca734
--- /dev/null
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/api/IInvertedListCursor.java
@@ -0,0 +1,33 @@
+package edu.uci.ics.hyracks.storage.am.invertedindex.api;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+
+public interface IInvertedListCursor extends Comparable<IInvertedListCursor> {
+ void reset(int startPageId, int endPageId, int startOff, int numElements);
+
+ void pinPagesSync() throws HyracksDataException;
+ void pinPagesAsync() throws HyracksDataException;
+ void unpinPages() throws HyracksDataException;
+
+ boolean hasNext();
+ void next();
+
+ ITupleReference getTuple();
+
+ // getters
+ int getNumElements();
+ int getStartPageId();
+ int getEndPageId();
+ int getStartOff();
+
+ // jump to a specific element
+ void positionCursor(int elementIx);
+ boolean containsKey(byte[] searchKey, int keyStartOff, int keyLength, IBinaryComparator cmp);
+
+ // for debugging
+ String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException;
+ String printCurrentElement(ISerializerDeserializer[] serdes) throws HyracksDataException;
+}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListBuilder.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListBuilder.java
index b733103..d141db0 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListBuilder.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListBuilder.java
@@ -1,5 +1,6 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedListBuilder;
@@ -10,13 +11,17 @@
private byte[] targetBuf;
private int pos;
- public FixedSizeElementInvertedListBuilder(int listElementSize) {
- this.listElementSize = listElementSize;
+ public FixedSizeElementInvertedListBuilder(ITypeTrait[] invListFields) {
+ int tmp = 0;
+ for(int i = 0; i < invListFields.length; i++) {
+ tmp += invListFields[i].getStaticallyKnownDataLength();
+ }
+ listElementSize = tmp;
}
@Override
public boolean startNewList(ITupleReference tuple, int tokenField) {
- if(pos + listElementSize >= targetBuf.length) return false;
+ if(pos + listElementSize > targetBuf.length) return false;
else {
listSize = 0;
return true;
@@ -24,23 +29,24 @@
}
@Override
- public boolean appendElement(ITupleReference tuple, int[] elementFields) {
- if(pos + listElementSize >= targetBuf.length) return false;
+ public boolean appendElement(ITupleReference tuple, int numTokenFields, int numElementFields) {
+ if(pos + listElementSize > targetBuf.length) return false;
- for(int i = 0; i < elementFields.length; i++) {
- int field = elementFields[i];
+ for(int i = 0; i < numElementFields; i++) {
+ int field = numTokenFields + i;
System.arraycopy(tuple.getFieldData(field), tuple.getFieldStart(field), targetBuf, pos, tuple.getFieldLength(field));
}
listSize++;
+ pos += listElementSize;
return true;
}
@Override
public void setTargetBuffer(byte[] targetBuf, int startPos) {
- this.pos = startPos;
- this.targetBuf = targetBuf;
+ this.targetBuf = targetBuf;
+ this.pos = startPos;
}
@Override
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListCursor.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListCursor.java
new file mode 100644
index 0000000..3d28415
--- /dev/null
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeElementInvertedListCursor.java
@@ -0,0 +1,278 @@
+package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedListCursor;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+import edu.uci.ics.hyracks.storage.common.buffercache.ICachedPage;
+import edu.uci.ics.hyracks.storage.common.file.BufferedFileHandle;
+
+public class FixedSizeElementInvertedListCursor implements IInvertedListCursor {
+
+ private final IBufferCache bufferCache;
+ private final int fileId;
+ private final int elementSize;
+ private int currentElementIx;
+ private int currentOff;
+ private int currentPageIx;
+
+ private int startPageId;
+ private int endPageId;
+ private int startOff;
+ private int numElements;
+
+ private final FixedSizeTupleReference tuple;
+ private ICachedPage[] pages = new ICachedPage[10];
+ private int[] elementIndexes = new int[10];
+
+ public FixedSizeElementInvertedListCursor(IBufferCache bufferCache, int fileId, ITypeTrait[] invListFields) {
+ this.bufferCache = bufferCache;
+ this.fileId = fileId;
+ this.currentElementIx = 0;
+ this.currentPageIx = 0;
+
+ int tmp = 0;
+ for(int i = 0; i < invListFields.length; i++) {
+ tmp += invListFields[i].getStaticallyKnownDataLength();
+ }
+ elementSize = tmp;
+ this.currentOff = -elementSize;
+ this.tuple = new FixedSizeTupleReference(invListFields);
+ }
+
+ @Override
+ public boolean hasNext() {
+ if(currentElementIx < numElements) return true;
+ else return false;
+ }
+
+ @Override
+ public void next() {
+ //System.out.println("NEXT: " + currentOff + " " + elementSize + " " + bufferCache.getPageSize());
+ if(currentOff + elementSize >= bufferCache.getPageSize()) {
+ currentPageIx++;
+ currentOff = 0;
+ }
+ else {
+ currentOff += elementSize;
+ }
+
+ currentElementIx++;
+ tuple.reset(pages[currentPageIx].getBuffer().array(), currentOff);
+ }
+
+ @Override
+ public void pinPagesAsync() {
+
+
+ }
+
+ @Override
+ public void pinPagesSync() throws HyracksDataException {
+ int pix = 0;
+ for(int i = startPageId; i <= endPageId; i++) {
+ pages[pix] = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, i), false);
+ pages[pix].acquireReadLatch();
+ pix++;
+ }
+ }
+
+ @Override
+ public void unpinPages() throws HyracksDataException {
+ int numPages = endPageId - startPageId + 1;
+ for(int i = 0; i < numPages; i++) {
+ pages[i].releaseReadLatch();
+ bufferCache.unpin(pages[i]);
+ }
+ }
+
+ @Override
+ public void positionCursor(int elementIx) {
+ int numPages = endPageId - startPageId + 1;
+
+ currentPageIx = binarySearch(elementIndexes, 0, numPages, elementIx);
+ if(currentPageIx < 0) {
+ throw new IndexOutOfBoundsException("Requested index: " + elementIx + " from array with numElements: " + numElements);
+ }
+
+ if(currentPageIx == 0) {
+ currentOff = startOff + elementIx * elementSize;
+ }
+ else {
+ int relativeElementIx = elementIx - elementIndexes[currentPageIx-1] - 1;
+ currentOff = relativeElementIx * elementSize;
+ }
+
+ currentElementIx = elementIx;
+ tuple.reset(pages[currentPageIx].getBuffer().array(), currentOff);
+ }
+
+ @Override
+ public boolean containsKey(byte[] searchKey, int keyStartOff, int keyLength, IBinaryComparator comparator) {
+ int mid;
+ int begin = 0;
+ int end = numElements - 1;
+
+ while (begin <= end) {
+ mid = (begin + end) / 2;
+ positionCursor(mid);
+ int cmp = comparator.compare(searchKey, keyStartOff, keyLength, getPage().getBuffer().array(), getOffset(), elementSize);
+ if (cmp < 0) {
+ end = mid - 1;
+ } else if (cmp > 0) {
+ begin = mid + 1;
+ } else {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ @Override
+ public void reset(int startPageId, int endPageId, int startOff, int numElements) {
+ this.startPageId = startPageId;
+ this.endPageId = endPageId;
+ this.startOff = startOff;
+ this.numElements = numElements;
+ this.currentElementIx = 0;
+ this.currentPageIx = 0;
+ this.currentOff = startOff - elementSize;
+
+ int numPages = endPageId - startPageId + 1;
+ if(numPages > pages.length) {
+ pages = new ICachedPage[endPageId - startPageId + 1];
+ elementIndexes = new int[endPageId - startPageId + 1];
+ }
+
+ // fill elementIndexes
+ // first page
+ int cumulElements = (bufferCache.getPageSize() - startOff) / elementSize;
+ elementIndexes[0] = cumulElements-1;
+
+ // middle, full pages
+ for(int i = 1; i < numPages-1; i++) {
+ elementIndexes[i] = elementIndexes[i-1] + (bufferCache.getPageSize() / elementSize);
+ }
+
+ // last page
+ elementIndexes[numPages-1] = numElements-1;
+ }
+
+ @Override
+ public String printInvList(ISerializerDeserializer[] serdes) throws HyracksDataException {
+ int oldCurrentOff = currentOff;
+ int oldCurrentPageId = currentPageIx;
+ int oldCurrentElementIx = currentElementIx;
+
+ currentOff = startOff - elementSize;
+ currentPageIx = 0;
+ currentElementIx = 0;
+
+ StringBuilder strBuilder = new StringBuilder();
+
+ int count = 0;
+ while(hasNext()) {
+ next();
+ count++;
+
+ // System.out.println(offset + " " + currentElementIx);
+
+ for(int i = 0; i < tuple.getFieldCount(); i++) {
+ ByteArrayInputStream inStream = new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i), tuple.getFieldLength(i));
+ DataInput dataIn = new DataInputStream(inStream);
+ Object o = serdes[i].deserialize(dataIn);
+ strBuilder.append(o.toString());
+ if(i+1 < tuple.getFieldCount()) strBuilder.append(",");
+ }
+ strBuilder.append(" ");
+ }
+ //System.out.println("PRINT COUNT: " + count);
+
+ // reset previous state
+ currentOff = oldCurrentOff;
+ currentPageIx = oldCurrentPageId;
+ currentElementIx = oldCurrentElementIx;
+
+ return strBuilder.toString();
+ }
+
+ public String printCurrentElement(ISerializerDeserializer[] serdes) throws HyracksDataException {
+ StringBuilder strBuilder = new StringBuilder();
+ for(int i = 0; i < tuple.getFieldCount(); i++) {
+ ByteArrayInputStream inStream = new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i), tuple.getFieldLength(i));
+ DataInput dataIn = new DataInputStream(inStream);
+ Object o = serdes[i].deserialize(dataIn);
+ strBuilder.append(o.toString());
+ if(i+1 < tuple.getFieldCount()) strBuilder.append(",");
+ }
+ return strBuilder.toString();
+ }
+
+ private int binarySearch(int[] arr, int arrStart, int arrLength, int key) {
+ int mid;
+ int begin = arrStart;
+ int end = arrStart + arrLength - 1;
+
+ while (begin <= end) {
+ mid = (begin + end) / 2;
+ int cmp = (key - arr[mid]);
+ if (cmp < 0) {
+ end = mid - 1;
+ } else if (cmp > 0) {
+ begin = mid + 1;
+ } else {
+ return mid;
+ }
+ }
+
+ if(begin > arr.length - 1) return -1;
+ if(key < arr[begin]) return begin;
+ else return -1;
+ }
+
+ @Override
+ public int compareTo(IInvertedListCursor invListCursor) {
+ return numElements - invListCursor.getNumElements();
+ }
+
+ @Override
+ public int getEndPageId() {
+ return endPageId;
+ }
+
+ @Override
+ public int getNumElements() {
+ return numElements;
+ }
+
+ @Override
+ public int getStartOff() {
+ return startOff;
+ }
+
+ @Override
+ public int getStartPageId() {
+ return startPageId;
+ }
+
+ public int getOffset() {
+ return currentOff;
+ }
+
+ public ICachedPage getPage() {
+ return pages[currentPageIx];
+ }
+
+ @Override
+ public ITupleReference getTuple() {
+ return tuple;
+ }
+}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeTupleReference.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeTupleReference.java
new file mode 100644
index 0000000..0ddde07
--- /dev/null
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/FixedSizeTupleReference.java
@@ -0,0 +1,46 @@
+package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
+
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+
+public class FixedSizeTupleReference implements ITupleReference {
+
+ private final ITypeTrait[] typeTraits;
+ private final int[] fieldStartOffsets;
+ private byte[] data;
+ private int startOff;
+
+ public FixedSizeTupleReference(ITypeTrait[] typeTraits) {
+ this.typeTraits = typeTraits;
+ this.fieldStartOffsets = new int[typeTraits.length];
+ this.fieldStartOffsets[0] = 0;
+ for(int i = 1; i < typeTraits.length; i++) {
+ fieldStartOffsets[i] = fieldStartOffsets[i-1] + typeTraits[i-1].getStaticallyKnownDataLength();
+ }
+ }
+
+ public void reset(byte[] data, int startOff) {
+ this.data = data;
+ this.startOff = startOff;
+ }
+
+ @Override
+ public int getFieldCount() {
+ return typeTraits.length;
+ }
+
+ @Override
+ public byte[] getFieldData(int fIdx) {
+ return data;
+ }
+
+ @Override
+ public int getFieldLength(int fIdx) {
+ return typeTraits[fIdx].getStaticallyKnownDataLength();
+ }
+
+ @Override
+ public int getFieldStart(int fIdx) {
+ return startOff + fieldStartOffsets[fIdx];
+ }
+}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java
index 34a2825..779cfbf 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java
@@ -1,146 +1,271 @@
package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.nio.ByteBuffer;
+
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ByteArrayAccessibleOutputStream;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeCursor;
+import edu.uci.ics.hyracks.storage.am.btree.dataflow.PermutingFrameTupleReference;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeOpContext;
+import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
+import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedListBuilder;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedListCursor;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.buffercache.ICachedPage;
import edu.uci.ics.hyracks.storage.common.file.BufferedFileHandle;
public class InvertedIndex {
- private int rootPageId = 0;
- private IBufferCache bufferCache;
+
+ private BTree btree;
+ private int rootPageId = 0;
+ private IBufferCache bufferCache;
private int fileId;
-
- public BulkLoadContext beginBulkLoad(IInvertedListBuilder invListBuilder, IBinaryComparator tokenCmp) throws HyracksDataException {
- BulkLoadContext ctx = new BulkLoadContext(invListBuilder, tokenCmp);
- ctx.init(rootPageId, fileId);
- return ctx;
- }
-
- public void bulkLoadAddTuple(BulkLoadContext ctx, ITupleReference tuple, int tokenField, int[] listElementFields) throws HyracksDataException {
-
- // first inverted list, copy token to baaos and start new list
- if(ctx.currentInvListTokenBaaos.size() == 0) {
- ctx.currentInvListStartPageId = ctx.currentPageId;
- ctx.currentInvListStartOffset = ctx.invListBuilder.getPos();
-
- ctx.currentInvListTokenBaaos.reset();
- ctx.currentInvListTokenBaaos.write(tuple.getFieldData(tokenField), tuple.getFieldStart(tokenField), tuple.getFieldLength(tokenField));
-
- if(!ctx.invListBuilder.startNewList(tuple, tokenField)) {
- ctx.pinNextPage();
- ctx.invListBuilder.setTargetBuffer(ctx.currentPage.getBuffer().array(), 0);
- if(!ctx.invListBuilder.startNewList(tuple, tokenField)) {
- throw new IllegalStateException("Failed to create first inverted list.");
- }
- }
- }
-
- // create new inverted list?
- if(ctx.tokenCmp.compare(tuple.getFieldData(tokenField),
- tuple.getFieldStart(tokenField),
- tuple.getFieldLength(tokenField),
- ctx.currentInvListTokenBaaos.getByteArray(),
- 0,
- ctx.currentInvListTokenBaaos.size()) != 0) {
+ private final MultiComparator invListCmp;
+ private final int numTokenFields;
+ private final int numInvListKeys;
+
+ public InvertedIndex(IBufferCache bufferCache, BTree btree, MultiComparator invListCmp) {
+ this.bufferCache = bufferCache;
+ this.btree = btree;
+ this.invListCmp = invListCmp;
+ this.numTokenFields = btree.getMultiComparator().getKeyFieldCount();
+ this.numInvListKeys = invListCmp.getKeyFieldCount();
+ }
- ctx.lastInvListStartPageId = ctx.currentInvListStartPageId;
- ctx.lastInvListStartOffset = ctx.currentInvListStartOffset;
-
- ctx.lastInvListTokenBaaos.reset();
- ctx.lastInvListTokenBaaos.write(ctx.currentInvListTokenBaaos.getByteArray(), 0, ctx.currentInvListTokenBaaos.size());
-
- ctx.currentInvListTokenBaaos.reset();
- ctx.currentInvListTokenBaaos.write(tuple.getFieldData(tokenField), tuple.getFieldStart(tokenField), tuple.getFieldLength(tokenField));
+ public void open(int fileId) {
+ this.fileId = fileId;
+ }
+
+ public BulkLoadContext beginBulkLoad(IInvertedListBuilder invListBuilder, int hyracksFrameSize) throws HyracksDataException {
+ BulkLoadContext ctx = new BulkLoadContext(invListBuilder, hyracksFrameSize);
+ ctx.init(rootPageId, fileId);
+ return ctx;
+ }
+
+ // ASSUMPTIONS:
+ // the first btree.getMultiComparator().getKeyFieldCount() fields in tuple are btree keys (e.g., a string token)
+ // the next invListCmp.getKeyFieldCount() fields in tuple are keys of the inverted list (e.g., primary key)
+ // key fields of inverted list are fixed size
+ public void bulkLoadAddTuple(BulkLoadContext ctx, ITupleReference tuple)
+ throws HyracksDataException {
+
+ // debug
+ //UTF8StringSerializerDeserializer serde = UTF8StringSerializerDeserializer.INSTANCE;
+ //ByteArrayInputStream inStream = new ByteArrayInputStream(tuple.getFieldData(tokenField), tuple.getFieldStart(tokenField), tuple
+ // .getFieldLength(tokenField));
+ //DataInput dataIn = new DataInputStream(inStream);
+ //Object o = serde.deserialize(dataIn);
+ //System.out.println(o.toString());
+
+ // first inverted list, copy token to baaos and start new list
+ if (ctx.currentInvListTokenBaaos.size() == 0) {
+ ctx.currentInvListStartPageId = ctx.currentPageId;
+ ctx.currentInvListStartOffset = ctx.invListBuilder.getPos();
- ctx.lastInvListSize = ctx.invListBuilder.getListSize();
- if(!ctx.invListBuilder.startNewList(tuple, tokenField)) {
- ctx.pinNextPage();
- ctx.invListBuilder.setTargetBuffer(ctx.currentPage.getBuffer().array(), 0);
- if(!ctx.invListBuilder.startNewList(tuple, tokenField)) {
- throw new IllegalStateException("Failed to start new inverted list after switching to a new page.");
- }
- }
-
- ctx.currentInvListStartPageId = ctx.currentPageId;
- ctx.currentInvListStartOffset = ctx.invListBuilder.getPos();
- }
+ // remember current token
+ ctx.currentInvListTokenBaaos.reset();
+ for (int i = 0; i < numTokenFields; i++) {
+ ctx.currentInvListTokenBaaos.write(tuple.getFieldData(i), tuple.getFieldStart(i), tuple
+ .getFieldLength(i));
+ }
- // append to current inverted list
- if(!ctx.invListBuilder.appendElement(tuple, listElementFields)) {
- ctx.pinNextPage();
- ctx.invListBuilder.setTargetBuffer(ctx.currentPage.getBuffer().array(), 0);
- if(!ctx.invListBuilder.appendElement(tuple, listElementFields)) {
- throw new IllegalStateException("Failed to append element to inverted list after switching to a new page.");
- }
- }
- }
-
- // returns size of last inverted list
- public int endBulkLoad(BulkLoadContext ctx) throws HyracksDataException {
- ctx.lastInvListStartPageId = ctx.currentInvListStartPageId;
- ctx.lastInvListStartOffset = ctx.currentInvListStartOffset;
-
- ctx.lastInvListTokenBaaos.reset();
- ctx.lastInvListTokenBaaos.write(ctx.currentInvListTokenBaaos.getByteArray(), 0, ctx.currentInvListTokenBaaos.size());
-
- ctx.deinit();
- return ctx.invListBuilder.getListSize();
- }
-
- public final class BulkLoadContext {
- private int lastInvListSize;
- private int lastInvListStartPageId;
- private int lastInvListStartOffset;
- private final ByteArrayAccessibleOutputStream lastInvListTokenBaaos = new ByteArrayAccessibleOutputStream();
-
- private int currentInvListStartPageId;
- private int currentInvListStartOffset;
- private final ByteArrayAccessibleOutputStream currentInvListTokenBaaos = new ByteArrayAccessibleOutputStream();
-
- private int currentPageId;
- private ICachedPage currentPage;
- private final IInvertedListBuilder invListBuilder;
- private final IBinaryComparator tokenCmp;
-
- public BulkLoadContext(IInvertedListBuilder invListBuilder, IBinaryComparator tokenCmp) {
- this.invListBuilder = invListBuilder;
- this.tokenCmp = tokenCmp;
- }
-
- public void init(int startPageId, int fileId) throws HyracksDataException {
- currentPageId = startPageId;
- currentPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), true);
- invListBuilder.setTargetBuffer(currentPage.getBuffer().array(), 0);
- }
-
- public void deinit() throws HyracksDataException {
- if(currentPage != null) bufferCache.unpin(currentPage);
- }
-
- public void pinNextPage() throws HyracksDataException {
- bufferCache.unpin(currentPage);
- currentPageId++;
- currentPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), true);
- }
-
- public ByteArrayAccessibleOutputStream getLastInvListTokenBaaos() {
- return lastInvListTokenBaaos;
- }
-
- public int getLastInvListStartPageId() {
- return lastInvListStartPageId;
- }
-
- public int getLastInvListStartOffset() {
- return lastInvListStartOffset;
- }
-
- public int getLastInvListSize() {
- return lastInvListSize;
- }
- };
+ if (!ctx.invListBuilder.startNewList(tuple, numTokenFields)) {
+ ctx.pinNextPage();
+ ctx.invListBuilder.setTargetBuffer(ctx.currentPage.getBuffer().array(), 0);
+ if (!ctx.invListBuilder.startNewList(tuple, numTokenFields)) {
+ throw new IllegalStateException("Failed to create first inverted list.");
+ }
+ }
+ }
+
+ // create new inverted list?
+ ctx.currentInvListToken.reset(ctx.currentInvListTokenBaaos.getByteArray(), 0);
+ if (ctx.tokenCmp.compare(tuple, ctx.currentInvListToken) != 0) {
+
+ // create entry in btree for last inverted list
+ createAndInsertBTreeTuple(ctx);
+
+ // remember new token
+ ctx.currentInvListTokenBaaos.reset();
+ for (int i = 0; i < numTokenFields; i++) {
+ ctx.currentInvListTokenBaaos.write(tuple.getFieldData(i), tuple.getFieldStart(i), tuple
+ .getFieldLength(i));
+ }
+
+ // start new list
+ if (!ctx.invListBuilder.startNewList(tuple, numTokenFields)) {
+ ctx.pinNextPage();
+ ctx.invListBuilder.setTargetBuffer(ctx.currentPage.getBuffer().array(), 0);
+ if (!ctx.invListBuilder.startNewList(tuple, numTokenFields)) {
+ throw new IllegalStateException("Failed to start new inverted list after switching to a new page.");
+ }
+ }
+
+ ctx.currentInvListStartPageId = ctx.currentPageId;
+ ctx.currentInvListStartOffset = ctx.invListBuilder.getPos();
+ }
+
+ // append to current inverted list
+ if (!ctx.invListBuilder.appendElement(tuple, numTokenFields, numInvListKeys)) {
+ ctx.pinNextPage();
+ ctx.invListBuilder.setTargetBuffer(ctx.currentPage.getBuffer().array(), 0);
+ if (!ctx.invListBuilder.appendElement(tuple, numTokenFields, numInvListKeys)) {
+ throw new IllegalStateException(
+ "Failed to append element to inverted list after switching to a new page.");
+ }
+ }
+ }
+
+ public boolean openCursor(IBTreeCursor btreeCursor, RangePredicate btreePred, BTreeOpContext btreeOpCtx, IInvertedListCursor invListCursor) throws Exception {
+ btree.search(btreeCursor, btreePred, btreeOpCtx);
+
+ UTF8StringSerializerDeserializer serde = UTF8StringSerializerDeserializer.INSTANCE;
+ ByteArrayInputStream inStream = new ByteArrayInputStream(btreePred.getHighKey().getFieldData(0), btreePred.getHighKey().getFieldStart(0), btreePred.getHighKey().getFieldLength(0));
+ DataInput dataIn = new DataInputStream(inStream);
+ Object o = serde.deserialize(dataIn);
+ System.out.println(o.toString());
+
+ boolean ret = false;
+ if(btreeCursor.hasNext()) {
+
+ btreeCursor.next();
+ ITupleReference frameTuple = btreeCursor.getTuple();
+
+ // TODO: hardcoded mapping of btree fields
+ int startPageId = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(1), frameTuple.getFieldStart(1));
+ int endPageId = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(2), frameTuple.getFieldStart(2));
+ int startOff = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(3), frameTuple.getFieldStart(3));
+ int numElements = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(4), frameTuple.getFieldStart(4));
+
+ invListCursor.reset(startPageId, endPageId, startOff, numElements);
+ ret = true;
+ }
+ else {
+ invListCursor.reset(0, 0, 0, 0);
+ }
+
+ btreeCursor.close();
+ btreeCursor.reset();
+
+ return ret;
+ }
+
+ public void createAndInsertBTreeTuple(BulkLoadContext ctx) throws HyracksDataException {
+ // build tuple
+ ctx.btreeTupleBuilder.reset();
+ ctx.btreeTupleBuilder.addField(ctx.currentInvListTokenBaaos.getByteArray(), 0, ctx.currentInvListTokenBaaos
+ .size());
+ ctx.btreeTupleBuilder.addField(IntegerSerializerDeserializer.INSTANCE, ctx.currentInvListStartPageId);
+ ctx.btreeTupleBuilder.addField(IntegerSerializerDeserializer.INSTANCE, ctx.currentPageId);
+ ctx.btreeTupleBuilder.addField(IntegerSerializerDeserializer.INSTANCE, ctx.currentInvListStartOffset);
+ ctx.btreeTupleBuilder.addField(IntegerSerializerDeserializer.INSTANCE, ctx.invListBuilder.getListSize());
+
+ // append to buffer
+ ctx.btreeTupleAppender.reset(ctx.btreeTupleBuffer, true);
+ ctx.btreeTupleAppender.append(ctx.btreeTupleBuilder.getFieldEndOffsets(), ctx.btreeTupleBuilder.getByteArray(),
+ 0, ctx.btreeTupleBuilder.getSize());
+
+ // reset tuple reference
+ ctx.btreeFrameTupleReference.reset(ctx.btreeFrameTupleAccessor, 0);
+
+ btree.bulkLoadAddTuple(ctx.btreeBulkLoadCtx, ctx.btreeFrameTupleReference);
+ }
+
+ public void endBulkLoad(BulkLoadContext ctx) throws HyracksDataException {
+ // create entry in btree for last inverted list
+ createAndInsertBTreeTuple(ctx);
+ btree.endBulkLoad(ctx.btreeBulkLoadCtx);
+ ctx.deinit();
+ }
+
+ public IBufferCache getBufferCache() {
+ return bufferCache;
+ }
+
+ public int getInvListsFileId() {
+ return fileId;
+ }
+
+ public MultiComparator getInvListElementCmp() {
+ return invListCmp;
+ }
+
+ public BTree getBTree() {
+ return btree;
+ }
+
+ public final class BulkLoadContext {
+ private final ByteBuffer btreeTupleBuffer;
+ private final ArrayTupleBuilder btreeTupleBuilder;
+ private final FrameTupleAppender btreeTupleAppender;
+ private final FrameTupleAccessor btreeFrameTupleAccessor;
+ private final FrameTupleReference btreeFrameTupleReference = new FrameTupleReference();
+ private BTree.BulkLoadContext btreeBulkLoadCtx;
+
+ private int currentInvListStartPageId;
+ private int currentInvListStartOffset;
+ private final ByteArrayAccessibleOutputStream currentInvListTokenBaaos = new ByteArrayAccessibleOutputStream();
+ private final FixedSizeTupleReference currentInvListToken = new FixedSizeTupleReference(invListCmp.getTypeTraits());
+
+ private int currentPageId;
+ private ICachedPage currentPage;
+ private final IInvertedListBuilder invListBuilder;
+ private final MultiComparator tokenCmp;
+
+ public BulkLoadContext(IInvertedListBuilder invListBuilder, int hyracksFrameSize) {
+ this.invListBuilder = invListBuilder;
+ this.tokenCmp = btree.getMultiComparator();
+ this.btreeTupleBuffer = ByteBuffer.allocate(hyracksFrameSize);
+ this.btreeTupleBuilder = new ArrayTupleBuilder(tokenCmp.getFieldCount());
+ this.btreeTupleAppender = new FrameTupleAppender(hyracksFrameSize);
+ // TODO: dummy record descriptor, serde never used anyway, only need
+ // correct number of fields
+ // tuple contains (token, start page, end page, start offset, num elements)
+ RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] {
+ IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+ this.btreeFrameTupleAccessor = new FrameTupleAccessor(hyracksFrameSize, recDesc);
+ }
+
+ public void init(int startPageId, int fileId) throws HyracksDataException {
+ btreeBulkLoadCtx = btree.beginBulkLoad(BTree.DEFAULT_FILL_FACTOR, btree.getLeafFrameFactory().getFrame(),
+ btree.getInteriorFrameFactory().getFrame(), btree.getFreePageManager().getMetaDataFrameFactory()
+ .getFrame());
+ currentPageId = startPageId;
+ currentPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), true);
+ currentPage.acquireWriteLatch();
+ invListBuilder.setTargetBuffer(currentPage.getBuffer().array(), 0);
+ btreeFrameTupleAccessor.reset(btreeTupleBuffer);
+ }
+
+ public void deinit() throws HyracksDataException {
+ if (currentPage != null) {
+ currentPage.releaseWriteLatch();
+ bufferCache.unpin(currentPage);
+ }
+ }
+
+ public void pinNextPage() throws HyracksDataException {
+ currentPage.releaseWriteLatch();
+ bufferCache.unpin(currentPage);
+ currentPageId++;
+ currentPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), true);
+ currentPage.acquireWriteLatch();
+ }
+ };
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/SimpleConjunctiveSearcher.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/SimpleConjunctiveSearcher.java
index 47b4310..1692ee7 100644
--- a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/SimpleConjunctiveSearcher.java
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/SimpleConjunctiveSearcher.java
@@ -143,7 +143,7 @@
throw new HyracksDataException(e);
}
- // WARNING: assuming one frame is enough to hold all tokens
+ // WARNING: assuming one frame is big enough to hold all tokens
queryTokenAppender.append(queryTokenBuilder.getFieldEndOffsets(), queryTokenBuilder.getByteArray(), 0,
queryTokenBuilder.getSize());
}
diff --git a/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java
new file mode 100644
index 0000000..1998a7c
--- /dev/null
+++ b/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java
@@ -0,0 +1,343 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import edu.uci.ics.fuzzyjoin.tokenizer.IBinaryTokenizer;
+import edu.uci.ics.fuzzyjoin.tokenizer.IToken;
+import edu.uci.ics.hyracks.api.context.IHyracksStageletContext;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeCursor;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeInteriorFrame;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeOpContext;
+import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
+import edu.uci.ics.hyracks.storage.am.btree.impls.RangeSearchCursor;
+import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
+import edu.uci.ics.hyracks.storage.am.common.ophelpers.TreeIndexOp;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedListCursor;
+
+public class TOccurrenceSearcher {
+
+ private final IHyracksStageletContext ctx;
+ private final ArrayTupleBuilder resultTupleBuilder;
+ private final FrameTupleAppender resultTupleAppender;
+ private final FrameTupleAccessor resultFrameAccessor;
+
+ private List<ByteBuffer> newResultBuffers = new ArrayList<ByteBuffer>();
+ private List<ByteBuffer> prevResultBuffers = new ArrayList<ByteBuffer>();
+ private List<ByteBuffer> swap = null;
+ private final ListResultCursor resultCursor = new ListResultCursor();
+ private int maxResultBufIdx = 0;
+
+ private final IBTreeLeafFrame leafFrame;
+ private final IBTreeInteriorFrame interiorFrame;
+ private final IBTreeCursor btreeCursor;
+ private final FrameTupleReference searchKey = new FrameTupleReference();
+ private final RangePredicate btreePred = new RangePredicate(true, null, null, true, true, null, null);
+
+ private final InvertedIndex invIndex;
+ private final IBinaryTokenizer queryTokenizer;
+ private final int occurrenceThreshold;
+
+ private final int cursorCacheSize = 10;
+ private ArrayList<IInvertedListCursor> invListCursorCache = new ArrayList<IInvertedListCursor>(cursorCacheSize);
+ private ArrayList<IInvertedListCursor> invListCursors = new ArrayList<IInvertedListCursor>(cursorCacheSize);
+
+ public TOccurrenceSearcher(IHyracksStageletContext ctx, InvertedIndex invIndex, IBinaryTokenizer queryTokenizer, int occurrenceThreshold) {
+ this.ctx = ctx;
+ this.invIndex = invIndex;
+ this.queryTokenizer = queryTokenizer;
+ this.occurrenceThreshold = occurrenceThreshold;
+
+ leafFrame = invIndex.getBTree().getLeafFrameFactory().getFrame();
+ interiorFrame = invIndex.getBTree().getInteriorFrameFactory().getFrame();
+
+ btreeCursor = new RangeSearchCursor(leafFrame);
+ resultTupleAppender = new FrameTupleAppender(ctx.getFrameSize());
+ resultTupleBuilder = new ArrayTupleBuilder(1); // TODO: fix hardcoded
+ newResultBuffers.add(ctx.allocateFrame());
+ prevResultBuffers.add(ctx.allocateFrame());
+
+ MultiComparator searchCmp = invIndex.getBTree().getMultiComparator();
+ btreePred.setLowKeyComparator(searchCmp);
+ btreePred.setHighKeyComparator(searchCmp);
+ btreePred.setLowKey(searchKey, true);
+ btreePred.setHighKey(searchKey, true);
+
+ ISerializerDeserializer[] valueSerde = { IntegerSerializerDeserializer.INSTANCE };
+ RecordDescriptor valueRecDesc = new RecordDescriptor(valueSerde);
+ resultFrameAccessor = new FrameTupleAccessor(ctx.getFrameSize(), valueRecDesc);
+
+ // pre-create cursor objects
+ for (int i = 0; i < cursorCacheSize; i++) {
+ invListCursorCache.add(new FixedSizeElementInvertedListCursor(invIndex.getBufferCache(), invIndex
+ .getInvListsFileId(), invIndex.getInvListElementCmp().getTypeTraits()));
+ }
+ }
+
+ public void search(ITupleReference queryTuple, int queryFieldIndex) throws Exception {
+
+ // parse query, TODO: this parsing is too simple
+ RecordDescriptor queryTokenRecDesc = new RecordDescriptor(
+ new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+
+ ArrayTupleBuilder queryTokenBuilder = new ArrayTupleBuilder(queryTokenRecDesc.getFields().length);
+ DataOutput queryTokenDos = queryTokenBuilder.getDataOutput();
+ FrameTupleAppender queryTokenAppender = new FrameTupleAppender(ctx.getFrameSize());
+ ByteBuffer queryTokenFrame = ctx.allocateFrame();
+ queryTokenAppender.reset(queryTokenFrame, true);
+
+ queryTokenizer.reset(queryTuple.getFieldData(queryFieldIndex), queryTuple.getFieldStart(queryFieldIndex),
+ queryTuple.getFieldLength(queryFieldIndex));
+ while (queryTokenizer.hasNext()) {
+ queryTokenizer.next();
+
+ queryTokenBuilder.reset();
+ try {
+ IToken token = queryTokenizer.getToken();
+ token.serializeToken(queryTokenDos);
+ queryTokenBuilder.addFieldEndOffset();
+ // WARNING: assuming one frame is big enough to hold all tokens
+ queryTokenAppender.append(queryTokenBuilder.getFieldEndOffsets(), queryTokenBuilder.getByteArray(), 0,
+ queryTokenBuilder.getSize());
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ FrameTupleAccessor queryTokenAccessor = new FrameTupleAccessor(ctx.getFrameSize(), queryTokenRecDesc);
+ queryTokenAccessor.reset(queryTokenFrame);
+ int numQueryTokens = queryTokenAccessor.getTupleCount();
+
+ maxResultBufIdx = 0;
+
+ // expand cursor cache if necessary
+ if (numQueryTokens > invListCursorCache.size()) {
+ int diff = numQueryTokens - invListCursorCache.size();
+ for (int i = 0; i < diff; i++) {
+ invListCursorCache.add(new FixedSizeElementInvertedListCursor(invIndex.getBufferCache(), invIndex
+ .getInvListsFileId(), invIndex.getInvListElementCmp().getTypeTraits()));
+ }
+ }
+
+ BTreeOpContext btreeOpCtx = invIndex.getBTree().createOpContext(TreeIndexOp.TI_SEARCH, leafFrame,
+ interiorFrame, null);
+ invListCursors.clear();
+ System.out.println("NUM QUERY TOKENS: " + numQueryTokens);
+ for (int i = 0; i < numQueryTokens; i++) {
+ searchKey.reset(queryTokenAccessor, i);
+ invIndex.openCursor(btreeCursor, btreePred, btreeOpCtx, invListCursorCache.get(i));
+ invListCursors.add(invListCursorCache.get(i));
+ }
+ Collections.sort(invListCursors);
+
+ ISerializerDeserializer[] invListSerdes = new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE };
+ for (int i = 0; i < numQueryTokens; i++) {
+ System.out.println("LISTSIZE: " + invListCursors.get(i).getNumElements());
+
+ invListCursors.get(i).pinPagesSync();
+ String s = invListCursors.get(i).printInvList(invListSerdes);
+ System.out.println(s);
+ invListCursors.get(i).unpinPages();
+ }
+
+ int numPrefixTokens = numQueryTokens - occurrenceThreshold + 1;
+
+ resultTupleAppender.reset(newResultBuffers.get(0), true);
+
+ /*
+ try {
+ // append first inverted list to temporary results
+ searchKey.reset(queryTokenAccessor, 0);
+ btree.search(btreeCursor, pred, opCtx);
+ while (btreeCursor.hasNext()) {
+ btreeCursor.next();
+ maxResultBufIdx = appendTupleToNewResults(btreeCursor, maxResultBufIdx);
+ }
+ btreeCursor.close();
+ btreeCursor.reset();
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ */
+
+
+ resultTupleAppender.reset(newResultBuffers.get(0), true);
+
+ for(int i = 0; i < numPrefixTokens; i++) {
+
+ }
+
+ }
+
+
+ /*
+ private int appendTupleToNewResults(IBTreeCursor btreeCursor, int newBufIdx) throws IOException {
+ ByteBuffer newCurrentBuffer = newResultBuffers.get(newBufIdx);
+
+ ITupleReference tuple = btreeCursor.getTuple();
+ resultTupleBuilder.reset();
+ DataOutput dos = resultTupleBuilder.getDataOutput();
+ for (int i = 0; i < numValueFields; i++) {
+ int fIdx = numKeyFields + i;
+ dos.write(tuple.getFieldData(fIdx), tuple.getFieldStart(fIdx), tuple.getFieldLength(fIdx));
+ resultTupleBuilder.addFieldEndOffset();
+ }
+
+ if (!resultTupleAppender.append(resultTupleBuilder.getFieldEndOffsets(), resultTupleBuilder.getByteArray(), 0,
+ resultTupleBuilder.getSize())) {
+ newBufIdx++;
+ if (newBufIdx >= newResultBuffers.size()) {
+ newResultBuffers.add(ctx.allocateFrame());
+ }
+ newCurrentBuffer = newResultBuffers.get(newBufIdx);
+ resultTupleAppender.reset(newCurrentBuffer, true);
+ if (!resultTupleAppender.append(resultTupleBuilder.getFieldEndOffsets(), resultTupleBuilder.getByteArray(),
+ 0, resultTupleBuilder.getSize())) {
+ throw new IllegalStateException();
+ }
+ }
+
+ return newBufIdx;
+ }
+ */
+
+
+ /*
+ private int mergeInvList(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers, int maxPrevBufIdx, List<ByteBuffer> newResultBuffers) throws IOException, Exception {
+
+ int newBufIdx = 0;
+ ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
+
+ int prevBufIdx = 0;
+ ByteBuffer prevCurrentBuffer = prevResultBuffers.get(0);
+
+ resultTupleBuilder.reset();
+ resultTupleAppender.reset(newCurrentBuffer, true);
+ resultFrameAccessor.reset(prevCurrentBuffer);
+
+ // WARNING: not very efficient but good enough for the first cut
+ boolean advanceCursor = true;
+ boolean advancePrevResult = false;
+ int resultTidx = 0;
+
+ while ((!advanceCursor || invListCursor.hasNext()) && prevBufIdx <= maxPrevBufIdx
+ && resultTidx < resultFrameAccessor.getTupleCount()) {
+
+ if (advanceCursor)
+ invListCursor.next();
+
+ ICachedPage invListPage = invListCursor.getPage();
+ int invListOff = invListCursor.getOffset();
+
+ int cmp = 0;
+ int valueFields = 1;
+ for (int i = 0; i < valueFields; i++) {
+
+ int tupleFidx = numKeyFields + i;
+ cmp = valueCmps[i].compare(tuple.getFieldData(tupleFidx), tuple.getFieldStart(tupleFidx),
+ tuple.getFieldLength(tupleFidx), resultFrameAccessor.getBuffer().array(),
+ resultFrameAccessor.getTupleStartOffset(resultTidx) + resultFrameAccessor.getFieldSlotsLength()
+ + resultFrameAccessor.getFieldStartOffset(resultTidx, i),
+ resultFrameAccessor.getFieldLength(resultTidx, i));
+ if (cmp != 0)
+ break;
+ }
+
+ // match found
+ if (cmp == 0) {
+ newBufIdx = appendTupleToNewResults(btreeCursor, newBufIdx);
+
+ advanceCursor = true;
+ advancePrevResult = true;
+ } else {
+ if (cmp < 0) {
+ advanceCursor = true;
+ advancePrevResult = false;
+ } else {
+ advanceCursor = false;
+ advancePrevResult = true;
+ }
+ }
+
+ if (advancePrevResult) {
+ resultTidx++;
+ if (resultTidx >= resultFrameAccessor.getTupleCount()) {
+ prevBufIdx++;
+ if (prevBufIdx <= maxPrevBufIdx) {
+ prevCurrentBuffer = prevResultBuffers.get(prevBufIdx);
+ resultFrameAccessor.reset(prevCurrentBuffer);
+ resultTidx = 0;
+ }
+ }
+ }
+ }
+
+ return newBufIdx;
+ }
+
+ private int appendTupleToNewResults(IBTreeCursor btreeCursor, int newBufIdx) throws IOException {
+ ByteBuffer newCurrentBuffer = newResultBuffers.get(newBufIdx);
+
+ ITupleReference tuple = btreeCursor.getTuple();
+ resultTupleBuilder.reset();
+ DataOutput dos = resultTupleBuilder.getDataOutput();
+ for (int i = 0; i < numValueFields; i++) {
+ int fIdx = numKeyFields + i;
+ dos.write(tuple.getFieldData(fIdx), tuple.getFieldStart(fIdx), tuple.getFieldLength(fIdx));
+ resultTupleBuilder.addFieldEndOffset();
+ }
+
+ if (!resultTupleAppender.append(resultTupleBuilder.getFieldEndOffsets(), resultTupleBuilder.getByteArray(), 0,
+ resultTupleBuilder.getSize())) {
+ newBufIdx++;
+ if (newBufIdx >= newResultBuffers.size()) {
+ newResultBuffers.add(ctx.allocateFrame());
+ }
+ newCurrentBuffer = newResultBuffers.get(newBufIdx);
+ resultTupleAppender.reset(newCurrentBuffer, true);
+ if (!resultTupleAppender.append(resultTupleBuilder.getFieldEndOffsets(), resultTupleBuilder.getByteArray(),
+ 0, resultTupleBuilder.getSize())) {
+ throw new IllegalStateException();
+ }
+ }
+
+ return newBufIdx;
+ }
+ */
+
+ /*
+ * @Override public IInvertedIndexResultCursor getResultCursor() {
+ * resultCursor.setResults(newResultBuffers, maxResultBufIdx + 1); return
+ * resultCursor; }
+ */
+}
diff --git a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeTest.java b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeTest.java
index 19b8fff..505521c 100644
--- a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeTest.java
+++ b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeTest.java
@@ -133,7 +133,7 @@
IBTreeInteriorFrame interiorFrame = interiorFrameFactory.getFrame();
ITreeIndexMetaDataFrame metaFrame = metaFrameFactory.getFrame();
- IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0);
+ IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0, metaFrameFactory);
BTree btree = new BTree(bufferCache, freePageManager, interiorFrameFactory,
leafFrameFactory, cmp);
@@ -371,7 +371,7 @@
IBTreeInteriorFrame interiorFrame = interiorFrameFactory.getFrame();
ITreeIndexMetaDataFrame metaFrame = metaFrameFactory.getFrame();
- IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0);
+ IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0, metaFrameFactory);
BTree btree = new BTree(bufferCache, freePageManager, interiorFrameFactory,
leafFrameFactory, cmp);
@@ -578,7 +578,7 @@
IBTreeInteriorFrame interiorFrame = interiorFrameFactory.getFrame();
ITreeIndexMetaDataFrame metaFrame = metaFrameFactory.getFrame();
- IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0);
+ IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0, metaFrameFactory);
BTree btree = new BTree(bufferCache, freePageManager, interiorFrameFactory,
leafFrameFactory, cmp);
@@ -778,7 +778,7 @@
IBTreeInteriorFrame interiorFrame = interiorFrameFactory.getFrame();
ITreeIndexMetaDataFrame metaFrame = metaFrameFactory.getFrame();
- IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0);
+ IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0, metaFrameFactory);
BTree btree = new BTree(bufferCache, freePageManager, interiorFrameFactory,
leafFrameFactory, cmp);
@@ -965,7 +965,7 @@
IBTreeInteriorFrame interiorFrame = interiorFrameFactory.getFrame();
ITreeIndexMetaDataFrame metaFrame = metaFrameFactory.getFrame();
- IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0);
+ IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0, metaFrameFactory);
BTree btree = new BTree(bufferCache, freePageManager, interiorFrameFactory,
leafFrameFactory, cmp);
@@ -1141,7 +1141,7 @@
IBTreeInteriorFrame interiorFrame = interiorFrameFactory.getFrame();
ITreeIndexMetaDataFrame metaFrame = metaFrameFactory.getFrame();
- IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0);
+ IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0, metaFrameFactory);
BTree btree = new BTree(bufferCache, freePageManager, interiorFrameFactory,
leafFrameFactory, cmp);
diff --git a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/RangeSearchCursorTest.java b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/RangeSearchCursorTest.java
index 2baa4bb..320fa1d 100644
--- a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/RangeSearchCursorTest.java
+++ b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/RangeSearchCursorTest.java
@@ -148,7 +148,7 @@
MultiComparator cmp = new MultiComparator(typeTraits, cmps);
- IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0);
+ IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0, metaFrameFactory);
BTree btree = new BTree(bufferCache, freePageManager, interiorFrameFactory,
leafFrameFactory, cmp);
@@ -255,7 +255,7 @@
MultiComparator cmp = new MultiComparator(typeTraits, cmps);
- IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0);
+ IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0, metaFrameFactory);
BTree btree = new BTree(bufferCache, freePageManager, interiorFrameFactory,
leafFrameFactory, cmp);
@@ -364,7 +364,7 @@
MultiComparator cmp = new MultiComparator(typeTraits, cmps);
- IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0);
+ IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0, metaFrameFactory);
BTree btree = new BTree(bufferCache, freePageManager, interiorFrameFactory,
leafFrameFactory, cmp);
diff --git a/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/AbstractInvIndexTest.java b/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/AbstractInvIndexTest.java
new file mode 100644
index 0000000..0ea886d
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/AbstractInvIndexTest.java
@@ -0,0 +1,29 @@
+package edu.uci.ics.hyracks.storage.am.invertedindex;
+
+import java.io.File;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+import org.junit.AfterClass;
+
+public abstract class AbstractInvIndexTest {
+
+ protected final static SimpleDateFormat simpleDateFormat = new SimpleDateFormat("ddMMyy-hhmmssSS");
+ protected final static String tmpDir = System.getProperty("java.io.tmpdir");
+ protected final static String sep = System.getProperty("file.separator");
+ protected final static String baseFileName = tmpDir + sep + simpleDateFormat.format(new Date());
+ protected final static String btreeFileName = baseFileName + "btree";
+ protected final static String invListsFileName = baseFileName + "invlists";
+
+ protected void print(String str) {
+ System.out.print(str);
+ }
+
+ @AfterClass
+ public static void cleanup() throws Exception {
+ File btreeFile = new File(btreeFileName);
+ btreeFile.deleteOnExit();
+ File invListsFile = new File(invListsFileName);
+ invListsFile.deleteOnExit();
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/BulkLoadTest.java b/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/BulkLoadTest.java
new file mode 100644
index 0000000..17f45e4
--- /dev/null
+++ b/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/BulkLoadTest.java
@@ -0,0 +1,320 @@
+package edu.uci.ics.hyracks.storage.am.invertedindex;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.junit.Test;
+
+import edu.uci.ics.fuzzyjoin.tokenizer.DelimitedUTF8StringBinaryTokenizer;
+import edu.uci.ics.fuzzyjoin.tokenizer.IBinaryTokenizer;
+import edu.uci.ics.fuzzyjoin.tokenizer.ITokenFactory;
+import edu.uci.ics.fuzzyjoin.tokenizer.UTF8WordTokenFactory;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.context.IHyracksStageletContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.value.TypeTrait;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.comparators.IntegerBinaryComparatorFactory;
+import edu.uci.ics.hyracks.dataflow.common.data.comparators.UTF8StringBinaryComparatorFactory;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeInteriorFrame;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeInteriorFrameFactory;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrameFactory;
+import edu.uci.ics.hyracks.storage.am.btree.frames.NSMInteriorFrameFactory;
+import edu.uci.ics.hyracks.storage.am.btree.frames.NSMLeafFrameFactory;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrame;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrameFactory;
+import edu.uci.ics.hyracks.storage.am.common.frames.LIFOMetaDataFrameFactory;
+import edu.uci.ics.hyracks.storage.am.common.freepage.LinkedListFreePageManager;
+import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
+import edu.uci.ics.hyracks.storage.am.common.tuples.TypeAwareTupleWriterFactory;
+import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedListBuilder;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.FixedSizeElementInvertedListBuilder;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.InvertedIndex;
+import edu.uci.ics.hyracks.storage.am.invertedindex.impls.TOccurrenceSearcher;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
+import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
+import edu.uci.ics.hyracks.test.support.TestUtils;
+
+public class BulkLoadTest extends AbstractInvIndexTest {
+ // testing params
+ private static final int PAGE_SIZE = 256;
+ private static final int NUM_PAGES = 10;
+ private static final int HYRACKS_FRAME_SIZE = 256;
+
+ // realistic params
+ // private static final int PAGE_SIZE = 65536;
+ //private static final int PAGE_SIZE = 32768;
+ //private static final int NUM_PAGES = 10;
+ //private static final int HYRACKS_FRAME_SIZE = 32768;
+ private IHyracksStageletContext stageletCtx = TestUtils.create(HYRACKS_FRAME_SIZE);
+
+ @Test
+ public void test01() throws Exception {
+
+ TestStorageManagerComponentHolder.init(PAGE_SIZE, NUM_PAGES);
+ IBufferCache bufferCache = TestStorageManagerComponentHolder.getBufferCache(stageletCtx);
+ IFileMapProvider fmp = TestStorageManagerComponentHolder.getFileMapProvider(stageletCtx);
+
+ // create file refs
+ System.out.println(btreeFileName);
+ FileReference btreeFile = new FileReference(new File(btreeFileName));
+ bufferCache.createFile(btreeFile);
+ int btreeFileId = fmp.lookupFileId(btreeFile);
+ bufferCache.openFile(btreeFileId);
+
+ System.out.println(invListsFileName);
+ FileReference invListsFile = new FileReference(new File(invListsFileName));
+ bufferCache.createFile(invListsFile);
+ int invListsFileId = fmp.lookupFileId(invListsFile);
+ bufferCache.openFile(invListsFileId);
+
+ // declare btree fields
+ int fieldCount = 5;
+ ITypeTrait[] typeTraits = new ITypeTrait[fieldCount];
+ typeTraits[0] = new TypeTrait(ITypeTrait.VARIABLE_LENGTH);
+ typeTraits[1] = new TypeTrait(4);
+ typeTraits[2] = new TypeTrait(4);
+ typeTraits[3] = new TypeTrait(4);
+ typeTraits[4] = new TypeTrait(4);
+
+ // declare btree keys
+ int keyFieldCount = 1;
+ IBinaryComparator[] cmps = new IBinaryComparator[keyFieldCount];
+ cmps[0] = UTF8StringBinaryComparatorFactory.INSTANCE.createBinaryComparator();
+
+ MultiComparator cmp = new MultiComparator(typeTraits, cmps);
+
+ TypeAwareTupleWriterFactory tupleWriterFactory = new TypeAwareTupleWriterFactory(typeTraits);
+ IBTreeLeafFrameFactory leafFrameFactory = new NSMLeafFrameFactory(tupleWriterFactory);
+ // IBTreeLeafFrameFactory leafFrameFactory = new
+ // FieldPrefixNSMLeafFrameFactory(tupleWriterFactory);
+ IBTreeInteriorFrameFactory interiorFrameFactory = new NSMInteriorFrameFactory(tupleWriterFactory);
+ ITreeIndexMetaDataFrameFactory metaFrameFactory = new LIFOMetaDataFrameFactory();
+
+ IBTreeLeafFrame leafFrame = leafFrameFactory.getFrame();
+ IBTreeInteriorFrame interiorFrame = interiorFrameFactory.getFrame();
+ ITreeIndexMetaDataFrame metaFrame = metaFrameFactory.getFrame();
+
+ IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, btreeFileId, 0, metaFrameFactory);
+
+ BTree btree = new BTree(bufferCache, freePageManager, interiorFrameFactory, leafFrameFactory, cmp);
+ btree.create(btreeFileId, leafFrame, metaFrame);
+ btree.open(btreeFileId);
+
+ int invListFields = 1;
+ ITypeTrait[] invListTypeTraits = new ITypeTrait[invListFields];
+ invListTypeTraits[0] = new TypeTrait(4);
+
+ int invListKeys = 1;
+ IBinaryComparator[] invListBinCmps = new IBinaryComparator[invListKeys];
+ invListBinCmps[0] = IntegerBinaryComparatorFactory.INSTANCE.createBinaryComparator();
+
+ MultiComparator invListCmp = new MultiComparator(invListTypeTraits, invListBinCmps);
+
+ InvertedIndex invIndex = new InvertedIndex(bufferCache, btree, invListCmp);
+ invIndex.open(invListsFileId);
+
+ Random rnd = new Random();
+ rnd.setSeed(50);
+
+ ByteBuffer frame = stageletCtx.allocateFrame();
+ FrameTupleAppender appender = new FrameTupleAppender(stageletCtx.getFrameSize());
+ ArrayTupleBuilder tb = new ArrayTupleBuilder(2);
+ DataOutput dos = tb.getDataOutput();
+
+ ISerializerDeserializer[] insertSerde = { UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE };
+ RecordDescriptor insertRecDesc = new RecordDescriptor(insertSerde);
+ IFrameTupleAccessor accessor = new FrameTupleAccessor(stageletCtx.getFrameSize(), insertRecDesc);
+ accessor.reset(frame);
+ FrameTupleReference tuple = new FrameTupleReference();
+
+ List<String> tokens = new ArrayList<String>();
+ tokens.add("compilers");
+ tokens.add("computer");
+ tokens.add("databases");
+ tokens.add("fast");
+ tokens.add("hyracks");
+ tokens.add("major");
+ tokens.add("science");
+ tokens.add("systems");
+ tokens.add("university");
+
+ int maxId = 1000;
+ int addProb = 0;
+ int addProbStep = 2;
+
+ IInvertedListBuilder invListBuilder = new FixedSizeElementInvertedListBuilder(invListTypeTraits);
+ InvertedIndex.BulkLoadContext ctx = invIndex.beginBulkLoad(invListBuilder, HYRACKS_FRAME_SIZE);
+
+ int totalElements = 0;
+ int tokenField = 0;
+ int[] elementFields = { 1 };
+ for (int i = 0; i < tokens.size(); i++) {
+
+ addProb += addProbStep;
+ StringBuilder strBuilder = new StringBuilder();
+ for (int j = 0; j < maxId; j++) {
+ if ((Math.abs(rnd.nextInt()) % addProb) == 0) {
+
+ totalElements++;
+
+ tb.reset();
+ UTF8StringSerializerDeserializer.INSTANCE.serialize(tokens.get(i), dos);
+ tb.addFieldEndOffset();
+ IntegerSerializerDeserializer.INSTANCE.serialize(j, dos);
+ tb.addFieldEndOffset();
+
+ //strBuilder.append(j + " ");
+
+ appender.reset(frame, true);
+ appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize());
+
+ tuple.reset(accessor, 0);
+
+ try {
+ invIndex.bulkLoadAddTuple(ctx, tuple);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ System.out.println(tokens.get(i));
+ System.out.println(strBuilder.toString());
+ }
+ invIndex.endBulkLoad(ctx);
+
+ int numPages = btree.getFreePageManager().getMaxPage(metaFrame);
+ System.out.println("NUMPAGES: " + numPages);
+ System.out.println("TOTAL ELEMENTS: " + totalElements);
+
+ // --------------------------- TEST A
+ // build query as tuple reference
+ ISerializerDeserializer[] querySerde = { UTF8StringSerializerDeserializer.INSTANCE };
+ RecordDescriptor queryRecDesc = new RecordDescriptor(querySerde);
+
+ FrameTupleAppender queryAppender = new FrameTupleAppender(stageletCtx.getFrameSize());
+ ArrayTupleBuilder queryTb = new ArrayTupleBuilder(querySerde.length);
+ DataOutput queryDos = queryTb.getDataOutput();
+
+ IFrameTupleAccessor queryAccessor = new FrameTupleAccessor(stageletCtx.getFrameSize(), queryRecDesc);
+ queryAccessor.reset(frame);
+ FrameTupleReference queryTuple = new FrameTupleReference();
+
+ String query = "computer hyracks fast blubb";
+
+ ITokenFactory tokenFactory = new UTF8WordTokenFactory();
+ IBinaryTokenizer queryTokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, tokenFactory);
+
+ queryTb.reset();
+ UTF8StringSerializerDeserializer.INSTANCE.serialize(query, queryDos);
+ queryTb.addFieldEndOffset();
+
+ queryAppender.reset(frame, true);
+ queryAppender.append(queryTb.getFieldEndOffsets(), queryTb.getByteArray(), 0, queryTb.getSize());
+ queryTuple.reset(queryAccessor, 0);
+
+ UTF8StringSerializerDeserializer serde = UTF8StringSerializerDeserializer.INSTANCE;
+ ByteArrayInputStream inStream = new ByteArrayInputStream(queryTuple.getFieldData(0), queryTuple.getFieldStart(0), queryTuple.getFieldLength(0));
+ DataInput dataIn = new DataInputStream(inStream);
+ Object o = serde.deserialize(dataIn);
+ System.out.println(o.toString());
+
+
+ TOccurrenceSearcher searcher = new TOccurrenceSearcher(stageletCtx, invIndex, queryTokenizer, 1);
+
+ long timeStart = System.currentTimeMillis();
+ searcher.search(queryTuple, 0);
+ long timeEnd = System.currentTimeMillis();
+ System.out.println("SEARCH TIME: " + (timeEnd - timeStart) + "ms");
+
+
+
+
+ /*
+ // ------------------------- TEST B
+ IInvertedListCursor invListCursor = new FixedSizeElementInvertedListCursor(bufferCache, invListsFileId, invListTypeTraits);
+
+ ISerializerDeserializer[] btreeSerde = { UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] invListSerdes = { IntegerSerializerDeserializer.INSTANCE };
+
+ System.out.println("ORDERED SCAN:");
+ IBTreeCursor scanCursor = new RangeSearchCursor(leafFrame);
+ RangePredicate nullPred = new RangePredicate(true, null, null, true,
+ true, null, null);
+ BTreeOpContext searchOpCtx = btree.createOpContext(TreeIndexOp.TI_SEARCH,
+ leafFrame, interiorFrame, null);
+ btree.search(scanCursor, nullPred, searchOpCtx);
+ try {
+ while (scanCursor.hasNext()) {
+ scanCursor.next();
+ ITupleReference frameTuple = scanCursor.getTuple();
+ String rec = cmp.printTuple(frameTuple, btreeSerde);
+ System.out.println(rec);
+
+ int startPageId = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(1), frameTuple.getFieldStart(1));
+ int endPageId = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(2), frameTuple.getFieldStart(2));
+ int startOff = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(3), frameTuple.getFieldStart(3));
+ int numElements = IntegerSerializerDeserializer.getInt(frameTuple.getFieldData(4), frameTuple.getFieldStart(4));
+
+ invListCursor.reset(startPageId, endPageId, startOff, numElements);
+ invListCursor.pinPagesSync();
+ try {
+ String invList = invListCursor.printInvList(invListSerdes);
+ System.out.println(invList);
+
+ for(int i = 0; i < numElements; i++) {
+ invListCursor.positionCursor(i);
+ String curr = invListCursor.printCurrentElement(invListSerdes);
+ System.out.print(curr + " ");
+ }
+ System.out.println();
+
+ ByteBuffer buf = ByteBuffer.allocate(4);
+ IBinaryComparator intCmp = IntegerBinaryComparatorFactory.INSTANCE.createBinaryComparator();
+ for(int i = 0; i < maxId; i++) {
+ buf.putInt(0, i);
+ invListCursor.reset(startPageId, endPageId, startOff, numElements);
+ if(invListCursor.containsKey(buf.array(), 0, 4, intCmp)) {
+ System.out.print(i + " ");
+ }
+ }
+ System.out.println();
+
+ } finally {
+ invListCursor.unpinPages();
+ }
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ scanCursor.close();
+ }
+ */
+
+ btree.close();
+ bufferCache.closeFile(btreeFileId);
+ bufferCache.closeFile(invListsFileId);
+ bufferCache.close();
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchers/SimpleConjunctiveSearcherTest.java b/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/SimpleConjunctiveSearcherTest.java
similarity index 94%
rename from hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchers/SimpleConjunctiveSearcherTest.java
rename to hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/SimpleConjunctiveSearcherTest.java
index 373e3b6..1ebb2f3 100644
--- a/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchers/SimpleConjunctiveSearcherTest.java
+++ b/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/SimpleConjunctiveSearcherTest.java
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-package edu.uci.ics.hyracks.storage.am.invertedindex.searchers;
+package edu.uci.ics.hyracks.storage.am.invertedindex;
import java.io.ByteArrayInputStream;
import java.io.DataInput;
@@ -66,7 +66,6 @@
import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexResultCursor;
import edu.uci.ics.hyracks.storage.am.invertedindex.impls.SimpleConjunctiveSearcher;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
-import edu.uci.ics.hyracks.storage.common.buffercache.ICacheMemoryAllocator;
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
import edu.uci.ics.hyracks.test.support.TestUtils;
@@ -83,18 +82,7 @@
private static final int PAGE_SIZE = 32768;
private static final int NUM_PAGES = 10;
private static final int HYRACKS_FRAME_SIZE = 32768;
- private IHyracksStageletContext ctx = TestUtils.create(HYRACKS_FRAME_SIZE);
-
- public class BufferAllocator implements ICacheMemoryAllocator {
- @Override
- public ByteBuffer[] allocate(int pageSize, int numPages) {
- ByteBuffer[] buffers = new ByteBuffer[numPages];
- for (int i = 0; i < numPages; ++i) {
- buffers[i] = ByteBuffer.allocate(pageSize);
- }
- return buffers;
- }
- }
+ private IHyracksStageletContext ctx = TestUtils.create(HYRACKS_FRAME_SIZE);
@Test
public void test01() throws Exception {
@@ -102,7 +90,7 @@
TestStorageManagerComponentHolder.init(PAGE_SIZE, NUM_PAGES);
IBufferCache bufferCache = TestStorageManagerComponentHolder.getBufferCache(ctx);
IFileMapProvider fmp = TestStorageManagerComponentHolder.getFileMapProvider(ctx);
- FileReference file = new FileReference(new File(fileName));
+ FileReference file = new FileReference(new File(btreeFileName));
bufferCache.createFile(file);
int fileId = fmp.lookupFileId(file);
bufferCache.openFile(fileId);
@@ -121,9 +109,7 @@
MultiComparator cmp = new MultiComparator(typeTraits, cmps);
- TypeAwareTupleWriterFactory tupleWriterFactory = new TypeAwareTupleWriterFactory(typeTraits);
- // SimpleTupleWriterFactory tupleWriterFactory = new
- // SimpleTupleWriterFactory();
+ TypeAwareTupleWriterFactory tupleWriterFactory = new TypeAwareTupleWriterFactory(typeTraits);
IBTreeLeafFrameFactory leafFrameFactory = new NSMLeafFrameFactory(tupleWriterFactory);
// IBTreeLeafFrameFactory leafFrameFactory = new
// FieldPrefixNSMLeafFrameFactory(tupleWriterFactory);
@@ -134,7 +120,7 @@
IBTreeInteriorFrame interiorFrame = interiorFrameFactory.getFrame();
ITreeIndexMetaDataFrame metaFrame = metaFrameFactory.getFrame();
- IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0);
+ IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0, metaFrameFactory);
BTree btree = new BTree(bufferCache, freePageManager, interiorFrameFactory, leafFrameFactory, cmp);
btree.create(fileId, leafFrame, metaFrame);
diff --git a/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchers/AbstractInvIndexTest.java b/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchers/AbstractInvIndexTest.java
deleted file mode 100644
index 5d2cfff..0000000
--- a/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/searchers/AbstractInvIndexTest.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package edu.uci.ics.hyracks.storage.am.invertedindex.searchers;
-
-import java.io.File;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-
-import org.junit.AfterClass;
-
-public abstract class AbstractInvIndexTest {
-
- protected final static SimpleDateFormat simpleDateFormat = new SimpleDateFormat("ddMMyy-hhmmssSS");
- protected final static String tmpDir = System.getProperty("java.io.tmpdir");
- protected final static String sep = System.getProperty("file.separator");
- protected final static String fileName = tmpDir + sep + simpleDateFormat.format(new Date());
-
- protected void print(String str) {
- System.out.print(str);
- }
-
- @AfterClass
- public static void cleanup() throws Exception {
- File f = new File(fileName);
- f.deleteOnExit();
- }
-}
diff --git a/hyracks-tests/hyracks-storage-am-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/rtree/RTreeTest.java b/hyracks-tests/hyracks-storage-am-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/rtree/RTreeTest.java
index f6f9ccd..90ff219 100644
--- a/hyracks-tests/hyracks-storage-am-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/rtree/RTreeTest.java
+++ b/hyracks-tests/hyracks-storage-am-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/rtree/RTreeTest.java
@@ -104,7 +104,7 @@
IRTreeFrame interiorFrame = interiorFrameFactory.getFrame();
IRTreeFrame leafFrame = leafFrameFactory.getFrame();
- IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0);
+ IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0, metaFrameFactory);
int dim = 2;
RTree rtree = new RTree(bufferCache, freePageManager, interiorFrameFactory, leafFrameFactory, interiorCmp,
@@ -245,7 +245,7 @@
IRTreeFrame interiorFrame = interiorFrameFactory.getFrame();
IRTreeFrame leafFrame = leafFrameFactory.getFrame();
- IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0);
+ IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0, metaFrameFactory);
int dim = 2;
RTree rtree = new RTree(bufferCache, freePageManager, interiorFrameFactory, leafFrameFactory, interiorCmp,