1. Fixed BTree bug where the insertion of a duplicate key that would cause a split (if it wasn't a duplicate) leads to allocation of a new page that is never used. This bug did not affect correctness, only performance, i.e., the file size.
2. Added utility to gather stats for tree-based indexes (tree height, num tuples, fill factor, etc.).
3. Added utility to warm up buffer cache with pages at upper levels of a any tree-based index.
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_indexes@447 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/FieldPrefixNSMLeafFrame.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/FieldPrefixNSMLeafFrame.java
index 05d7e2c..2c1f003 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/FieldPrefixNSMLeafFrame.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/FieldPrefixNSMLeafFrame.java
@@ -115,7 +115,7 @@
// 3. prefix tuple are sorted (last prefix tuple is at highest offset)
// this procedure will not move prefix tuples
@Override
- public void compact(MultiComparator cmp) {
+ public boolean compact(MultiComparator cmp) {
resetSpaceParams();
frameTuple.setFieldCount(cmp.getFieldCount());
@@ -168,11 +168,13 @@
slotManager.setSlot(sortedTupleOffs.get(i).slotOff, slotManager.encodeSlotFields(prefixSlotNum, freeSpace));
freeSpace += tupleLength;
}
-
+
buf.putInt(freeSpaceOff, freeSpace);
int totalFreeSpace = buf.capacity() - buf.getInt(freeSpaceOff)
- ((buf.getInt(tupleCountOff) + buf.getInt(prefixTupleCountOff)) * slotManager.getSlotSize());
buf.putInt(totalFreeSpaceOff, totalFreeSpace);
+
+ return false;
}
@Override
@@ -290,12 +292,14 @@
}
@Override
- public void insert(ITupleReference tuple, MultiComparator cmp) throws Exception {
- int slot = slotManager.findSlot(tuple, frameTuple, framePrefixTuple, cmp, FindTupleMode.FTM_INCLUSIVE,
- FindTupleNoExactMatchPolicy.FTP_HIGHER_KEY);
-
- slot = slotManager.insertSlot(slot, buf.getInt(freeSpaceOff));
-
+ public int findTupleIndex(ITupleReference tuple, MultiComparator cmp) throws Exception {
+ return slotManager.findSlot(tuple, frameTuple, framePrefixTuple, cmp, FindTupleMode.FTM_INCLUSIVE,
+ FindTupleNoExactMatchPolicy.FTP_HIGHER_KEY);
+ }
+
+ @Override
+ public void insert(ITupleReference tuple, MultiComparator cmp, int tupleIndex) throws Exception {
+ int slot = slotManager.insertSlot(tupleIndex, buf.getInt(freeSpaceOff));
int prefixSlotNum = slotManager.decodeFirstSlotField(slot);
int numPrefixFields = 0;
if (prefixSlotNum != FieldPrefixSlotManager.TUPLE_UNCOMPRESSED) {
@@ -457,18 +461,7 @@
FieldPrefixNSMLeafFrame rf = (FieldPrefixNSMLeafFrame) rightFrame;
frameTuple.setFieldCount(cmp.getFieldCount());
-
- // before doing anything check if key already exists
- int slot = slotManager.findSlot(tuple, frameTuple, framePrefixTuple, cmp, FindTupleMode.FTM_EXACT,
- FindTupleNoExactMatchPolicy.FTP_HIGHER_KEY);
- int tupleSlotNum = slotManager.decodeSecondSlotField(slot);
- if (tupleSlotNum != FieldPrefixSlotManager.GREATEST_SLOT) {
- frameTuple.resetByTupleIndex(this, tupleSlotNum);
- if (cmp.compare(tuple, frameTuple) == 0) {
- throw new BTreeException("Inserting duplicate key into unique index");
- }
- }
-
+
ByteBuffer right = rf.getBuffer();
int tupleCount = getTupleCount();
int prefixTupleCount = getPrefixTupleCount();
@@ -578,7 +571,8 @@
rightFrame.compact(cmp);
// insert last key
- targetFrame.insert(tuple, cmp);
+ int targetTupleIndex = targetFrame.findTupleIndex(tuple, cmp);
+ targetFrame.insert(tuple, cmp, targetTupleIndex);
// set split key to be highest value in left page
frameTuple.resetByTupleIndex(this, getTupleCount() - 1);
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/NSMInteriorFrame.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/NSMInteriorFrame.java
index 8498958..12de948 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/NSMInteriorFrame.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/NSMInteriorFrame.java
@@ -77,9 +77,8 @@
return FrameOpSpaceStatus.INSUFFICIENT_SPACE;
}
- @Override
- public void insert(ITupleReference tuple, MultiComparator cmp) throws Exception {
- frameTuple.setFieldCount(cmp.getKeyFieldCount());
+ public int findTupleIndex(ITupleReference tuple, MultiComparator cmp) throws Exception {
+ frameTuple.setFieldCount(cmp.getKeyFieldCount());
int tupleIndex = slotManager.findTupleIndex(tuple, frameTuple, cmp, FindTupleMode.FTM_INCLUSIVE,
FindTupleNoExactMatchPolicy.FTP_HIGHER_KEY);
int slotOff = slotManager.getSlotOff(tupleIndex);
@@ -92,42 +91,45 @@
if (cmp.compare(tuple, frameTuple) != 0)
isDuplicate = false;
}
-
if (isDuplicate) {
throw new BTreeException("Trying to insert duplicate value into interior node.");
- } else {
- slotOff = slotManager.insertSlot(tupleIndex, buf.getInt(freeSpaceOff));
-
- int freeSpace = buf.getInt(freeSpaceOff);
- int bytesWritten = tupleWriter.writeTupleFields(tuple, 0, cmp.getKeyFieldCount(), buf, freeSpace);
- System.arraycopy(tuple.getFieldData(cmp.getKeyFieldCount() - 1), getLeftChildPageOff(tuple, cmp), buf
- .array(), freeSpace + bytesWritten, childPtrSize);
- int tupleSize = bytesWritten + childPtrSize;
-
- buf.putInt(tupleCountOff, buf.getInt(tupleCountOff) + 1);
- buf.putInt(freeSpaceOff, buf.getInt(freeSpaceOff) + tupleSize);
- buf.putInt(totalFreeSpaceOff, buf.getInt(totalFreeSpaceOff) - tupleSize - slotManager.getSlotSize());
-
- // did insert into the rightmost slot?
- if (slotOff == slotManager.getSlotEndOff()) {
- System.arraycopy(tuple.getFieldData(cmp.getKeyFieldCount() - 1), getLeftChildPageOff(tuple, cmp)
- + childPtrSize, buf.array(), rightLeafOff, childPtrSize);
- } else {
- // if slotOff has a right (slot-)neighbor then update its child
- // pointer
- // the only time when this is NOT the case, is when this is the
- // first tuple
- // (or when the splitkey goes into the rightmost slot but that
- // case was handled in the if above)
- if (buf.getInt(tupleCountOff) > 1) {
- int rightNeighborOff = slotOff - slotManager.getSlotSize();
- frameTuple.resetByTupleOffset(buf, slotManager.getTupleOff(rightNeighborOff));
- System.arraycopy(tuple.getFieldData(0), getLeftChildPageOff(tuple, cmp) + childPtrSize,
- buf.array(), getLeftChildPageOff(frameTuple, cmp), childPtrSize);
- }
- }
}
+ return tupleIndex;
}
+
+ @Override
+ public void insert(ITupleReference tuple, MultiComparator cmp, int tupleIndex) throws Exception {
+ int slotOff = slotManager.insertSlot(tupleIndex, buf.getInt(freeSpaceOff));
+ int freeSpace = buf.getInt(freeSpaceOff);
+ int bytesWritten = tupleWriter.writeTupleFields(tuple, 0, cmp.getKeyFieldCount(), buf, freeSpace);
+ System.arraycopy(tuple.getFieldData(cmp.getKeyFieldCount() - 1), getLeftChildPageOff(tuple, cmp), buf
+ .array(), freeSpace + bytesWritten, childPtrSize);
+ int tupleSize = bytesWritten + childPtrSize;
+
+ buf.putInt(tupleCountOff, buf.getInt(tupleCountOff) + 1);
+ buf.putInt(freeSpaceOff, buf.getInt(freeSpaceOff) + tupleSize);
+ buf.putInt(totalFreeSpaceOff, buf.getInt(totalFreeSpaceOff) - tupleSize - slotManager.getSlotSize());
+
+ // did insert into the rightmost slot?
+ if (slotOff == slotManager.getSlotEndOff()) {
+ System.arraycopy(tuple.getFieldData(cmp.getKeyFieldCount() - 1), getLeftChildPageOff(tuple, cmp)
+ + childPtrSize, buf.array(), rightLeafOff, childPtrSize);
+ } else {
+ // if slotOff has a right (slot-)neighbor then update its child
+ // pointer
+ // the only time when this is NOT the case, is when this is the
+ // first tuple
+ // (or when the splitkey goes into the rightmost slot but that
+ // case was handled in the if above)
+ if (buf.getInt(tupleCountOff) > 1) {
+ int rightNeighborOff = slotOff - slotManager.getSlotSize();
+ frameTuple.resetByTupleOffset(buf, slotManager.getTupleOff(rightNeighborOff));
+ System.arraycopy(tuple.getFieldData(0), getLeftChildPageOff(tuple, cmp) + childPtrSize,
+ buf.array(), getLeftChildPageOff(frameTuple, cmp), childPtrSize);
+ }
+ }
+ }
+
@Override
public void insertSorted(ITupleReference tuple, MultiComparator cmp) throws HyracksDataException {
@@ -149,16 +151,7 @@
throws Exception {
// before doing anything check if key already exists
frameTuple.setFieldCount(cmp.getKeyFieldCount());
- int tupleIndex = slotManager.findTupleIndex(tuple, frameTuple, cmp, FindTupleMode.FTM_EXACT,
- FindTupleNoExactMatchPolicy.FTP_HIGHER_KEY);
- int slotOff = slotManager.getSlotOff(tupleIndex);
- if (tupleIndex >= 0) {
- frameTuple.resetByTupleOffset(buf, slotManager.getTupleOff(slotOff));
- if (cmp.compare(tuple, frameTuple) == 0) {
- throw new BTreeException("Inserting duplicate key in interior node during split");
- }
- }
-
+
ByteBuffer right = rightFrame.getBuffer();
int tupleCount = buf.getInt(tupleCountOff);
@@ -210,13 +203,14 @@
compact(cmp);
// insert key
- targetFrame.insert(savedSplitKey.getTuple(), cmp);
-
+ int targetTupleIndex = targetFrame.findTupleIndex(savedSplitKey.getTuple(), cmp);
+ targetFrame.insert(savedSplitKey.getTuple(), cmp, targetTupleIndex);
+
return 0;
}
@Override
- public void compact(MultiComparator cmp) {
+ public boolean compact(MultiComparator cmp) {
resetSpaceParams();
frameTuple.setFieldCount(cmp.getKeyFieldCount());
@@ -248,6 +242,8 @@
buf.putInt(freeSpaceOff, freeSpace);
buf.putInt(totalFreeSpaceOff, buf.capacity() - freeSpace - tupleCount * slotManager.getSlotSize());
+
+ return false;
}
@Override
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/NSMLeafFrame.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/NSMLeafFrame.java
index 2acaba6..6cafa07 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/NSMLeafFrame.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/NSMLeafFrame.java
@@ -66,8 +66,8 @@
}
@Override
- public void insert(ITupleReference tuple, MultiComparator cmp) throws Exception {
- frameTuple.setFieldCount(cmp.getFieldCount());
+ public int findTupleIndex(ITupleReference tuple, MultiComparator cmp) throws Exception {
+ frameTuple.setFieldCount(cmp.getFieldCount());
int tupleIndex = slotManager.findTupleIndex(tuple, frameTuple, cmp, FindTupleMode.FTM_INCLUSIVE,
FindTupleNoExactMatchPolicy.FTP_HIGHER_KEY);
int slotOff = slotManager.getSlotOff(tupleIndex);
@@ -80,19 +80,21 @@
if (cmp.compare(tuple, frameTuple) != 0)
isDuplicate = false;
}
-
if (isDuplicate) {
throw new BTreeException("Trying to insert duplicate value into leaf of unique index");
- } else {
- slotOff = slotManager.insertSlot(tupleIndex, buf.getInt(freeSpaceOff));
-
- int freeSpace = buf.getInt(freeSpaceOff);
- int bytesWritten = tupleWriter.writeTuple(tuple, buf, freeSpace);
-
- buf.putInt(tupleCountOff, buf.getInt(tupleCountOff) + 1);
- buf.putInt(freeSpaceOff, buf.getInt(freeSpaceOff) + bytesWritten);
- buf.putInt(totalFreeSpaceOff, buf.getInt(totalFreeSpaceOff) - bytesWritten - slotManager.getSlotSize());
}
+
+ return tupleIndex;
+ }
+
+ @Override
+ public void insert(ITupleReference tuple, MultiComparator cmp, int tupleIndex) throws Exception {
+ slotManager.insertSlot(tupleIndex, buf.getInt(freeSpaceOff));
+ int freeSpace = buf.getInt(freeSpaceOff);
+ int bytesWritten = tupleWriter.writeTuple(tuple, buf, freeSpace);
+ buf.putInt(tupleCountOff, buf.getInt(tupleCountOff) + 1);
+ buf.putInt(freeSpaceOff, buf.getInt(freeSpaceOff) + bytesWritten);
+ buf.putInt(totalFreeSpaceOff, buf.getInt(totalFreeSpaceOff) - bytesWritten - slotManager.getSlotSize());
}
@Override
@@ -110,17 +112,7 @@
throws Exception {
frameTuple.setFieldCount(cmp.getFieldCount());
-
- // before doing anything check if key already exists
- int tupleIndex = slotManager.findTupleIndex(tuple, frameTuple, cmp, FindTupleMode.FTM_EXACT,
- FindTupleNoExactMatchPolicy.FTP_HIGHER_KEY);
- if (tupleIndex >= 0) {
- frameTuple.resetByTupleIndex(this, tupleIndex);
- if (cmp.compare(tuple, frameTuple) == 0) {
- throw new BTreeException("Inserting duplicate key into unique index");
- }
- }
-
+
ByteBuffer right = rightFrame.getBuffer();
int tupleCount = getTupleCount();
@@ -157,7 +149,8 @@
compact(cmp);
// insert last key
- targetFrame.insert(tuple, cmp);
+ int targetTupleIndex = targetFrame.findTupleIndex(tuple, cmp);
+ targetFrame.insert(tuple, cmp, targetTupleIndex);
// set split key to be highest value in left page
tupleOff = slotManager.getTupleOff(slotManager.getSlotEndOff());
@@ -167,7 +160,7 @@
splitKey.initData(splitKeySize);
tupleWriter.writeTupleFields(frameTuple, 0, cmp.getKeyFieldCount(), splitKey.getBuffer(), 0);
splitKey.getTuple().resetByTupleOffset(splitKey.getBuffer(), 0);
-
+
return 0;
}
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/OrderedSlotManager.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/OrderedSlotManager.java
index 86e287d..89a70e0 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/OrderedSlotManager.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/OrderedSlotManager.java
@@ -15,7 +15,12 @@
package edu.uci.ics.hyracks.storage.am.btree.frames;
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexTupleReference;
import edu.uci.ics.hyracks.storage.am.common.frames.AbstractSlotManager;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.FindTupleMode;
@@ -33,12 +38,12 @@
int mid;
int begin = 0;
int end = frame.getTupleCount() - 1;
-
+
while (begin <= end) {
mid = (begin + end) / 2;
- frameTuple.resetByTupleIndex(frame, mid);
-
- int cmp = multiCmp.compare(searchKey, frameTuple);
+ frameTuple.resetByTupleIndex(frame, mid);
+
+ int cmp = multiCmp.compare(searchKey, frameTuple);
if (cmp < 0) {
end = mid - 1;
} else if (cmp > 0) {
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java
index 38316d3..b65bd02 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java
@@ -41,1226 +41,1291 @@
import edu.uci.ics.hyracks.storage.common.file.BufferedFileHandle;
public class BTree {
-
+
public static final float DEFAULT_FILL_FACTOR = 0.7f;
+
+ private final static int RESTART_OP = Integer.MIN_VALUE;
+ private final static int MAX_RESTARTS = 10;
+
+ // the root page never changes
+ private final int rootPage = 1;
+
+ private final IFreePageManager freePageManager;
+
+ private boolean created = false;
+ private boolean loaded = false;
+
+ private final IBufferCache bufferCache;
+ private int fileId;
+ private final IBTreeInteriorFrameFactory interiorFrameFactory;
+ private final IBTreeLeafFrameFactory leafFrameFactory;
+ private final MultiComparator cmp;
+ private final ReadWriteLock treeLatch;
+ private final RangePredicate diskOrderScanPredicate;
+
+ public int rootSplits = 0;
+ public int[] splitsByLevel = new int[500];
+ public long readLatchesAcquired = 0;
+ public long readLatchesReleased = 0;
+ public long writeLatchesAcquired = 0;
+ public long writeLatchesReleased = 0;
+ public long pins = 0;
+ public long unpins = 0;
+
+ public long treeLatchesAcquired = 0;
+ public long treeLatchesReleased = 0;
+
+ public byte currentLevel = 0;
+
+ public int usefulCompression = 0;
+ public int uselessCompression = 0;
+
+ public void treeLatchStatus() {
+ System.out.println(treeLatch.writeLock().toString());
+ }
+
+ public String printStats() {
+ StringBuilder strBuilder = new StringBuilder();
+ strBuilder.append("\n");
+ strBuilder.append("ROOTSPLITS: " + rootSplits + "\n");
+ strBuilder.append("SPLITS BY LEVEL\n");
+ for (int i = 0; i < currentLevel; i++) {
+ strBuilder.append(String.format("%3d ", i)
+ + String.format("%8d ", splitsByLevel[i]) + "\n");
+ }
+ strBuilder.append(String.format("READ LATCHES: %10d %10d\n",
+ readLatchesAcquired, readLatchesReleased));
+ strBuilder.append(String.format("WRITE LATCHES: %10d %10d\n",
+ writeLatchesAcquired, writeLatchesReleased));
+ strBuilder.append(String.format("PINS: %10d %10d\n", pins,
+ unpins));
+ return strBuilder.toString();
+ }
+
+ public BTree(IBufferCache bufferCache, IFreePageManager freePageManager,
+ IBTreeInteriorFrameFactory interiorFrameFactory,
+ IBTreeLeafFrameFactory leafFrameFactory, MultiComparator cmp) {
+ this.bufferCache = bufferCache;
+ this.interiorFrameFactory = interiorFrameFactory;
+ this.leafFrameFactory = leafFrameFactory;
+ this.cmp = cmp;
+ this.freePageManager = freePageManager;
+ this.treeLatch = new ReentrantReadWriteLock(true);
+ this.diskOrderScanPredicate = new RangePredicate(true, null, null,
+ true, true, cmp, cmp);
+ }
+
+ public void create(int fileId, IBTreeLeafFrame leafFrame,
+ ITreeIndexMetaDataFrame metaFrame) throws Exception {
+
+ if (created)
+ return;
+
+ treeLatch.writeLock().lock();
+ try {
+
+ // check if another thread beat us to it
+ if (created)
+ return;
+
+ freePageManager.init(metaFrame, rootPage);
+
+ // initialize root page
+ ICachedPage rootNode = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, rootPage), true);
+ pins++;
+
+ rootNode.acquireWriteLatch();
+ writeLatchesAcquired++;
+ try {
+ leafFrame.setPage(rootNode);
+ leafFrame.initBuffer((byte) 0);
+ } finally {
+ rootNode.releaseWriteLatch();
+ writeLatchesReleased++;
+ bufferCache.unpin(rootNode);
+ unpins++;
+ }
+ currentLevel = 0;
+
+ created = true;
+ } finally {
+ treeLatch.writeLock().unlock();
+ }
+ }
+
+ public void open(int fileId) {
+ this.fileId = fileId;
+ }
+
+ public void close() {
+ fileId = -1;
+ }
+
+ private void addFreePages(BTreeOpContext ctx) throws Exception {
+ for (int i = 0; i < ctx.freePages.size(); i++) {
+ // root page is special, don't add it to free pages
+ if (ctx.freePages.get(i) != rootPage) {
+ freePageManager
+ .addFreePage(ctx.metaFrame, ctx.freePages.get(i));
+ }
+ }
+ ctx.freePages.clear();
+ }
+
+ public void printTree(IBTreeLeafFrame leafFrame,
+ IBTreeInteriorFrame interiorFrame, ISerializerDeserializer[] fields)
+ throws Exception {
+ printTree(rootPage, null, false, leafFrame, interiorFrame, fields);
+ }
+
+ public void printTree(int pageId, ICachedPage parent, boolean unpin,
+ IBTreeLeafFrame leafFrame, IBTreeInteriorFrame interiorFrame,
+ ISerializerDeserializer[] fields) throws Exception {
+
+ ICachedPage node = bufferCache.pin(BufferedFileHandle.getDiskPageId(
+ fileId, pageId), false);
+ pins++;
+ node.acquireReadLatch();
+ readLatchesAcquired++;
+
+ try {
+ if (parent != null && unpin == true) {
+ parent.releaseReadLatch();
+ readLatchesReleased++;
+
+ bufferCache.unpin(parent);
+ unpins++;
+ }
+
+ interiorFrame.setPage(node);
+ int level = interiorFrame.getLevel();
+
+ System.out.format("%1d ", level);
+ System.out.format("%3d ", pageId);
+ for (int i = 0; i < currentLevel - level; i++)
+ System.out.format(" ");
+
+ String keyString;
+ if (interiorFrame.isLeaf()) {
+ leafFrame.setPage(node);
+ keyString = leafFrame.printKeys(cmp, fields);
+ } else {
+ keyString = interiorFrame.printKeys(cmp, fields);
+ }
+
+ System.out.format(keyString);
+ if (!interiorFrame.isLeaf()) {
+ ArrayList<Integer> children = ((NSMInteriorFrame) (interiorFrame))
+ .getChildren(cmp);
+
+ for (int i = 0; i < children.size(); i++) {
+ printTree(children.get(i), node, i == children.size() - 1,
+ leafFrame, interiorFrame, fields);
+ }
+ } else {
+ node.releaseReadLatch();
+ readLatchesReleased++;
+
+ bufferCache.unpin(node);
+ unpins++;
+ }
+ } catch (Exception e) {
+ node.releaseReadLatch();
+ readLatchesReleased++;
+
+ bufferCache.unpin(node);
+ unpins++;
+ e.printStackTrace();
+ }
+ }
+
+ public void diskOrderScan(DiskOrderScanCursor cursor,
+ IBTreeLeafFrame leafFrame, ITreeIndexMetaDataFrame metaFrame)
+ throws HyracksDataException {
+ int currentPageId = rootPage + 1;
+ int maxPageId = freePageManager.getMaxPage(metaFrame);
+
+ ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(
+ fileId, currentPageId), false);
+ page.acquireReadLatch();
+ cursor.setBufferCache(bufferCache);
+ cursor.setFileId(fileId);
+ cursor.setCurrentPageId(currentPageId);
+ cursor.setMaxPageId(maxPageId);
+ cursor.open(page, diskOrderScanPredicate);
+ }
+
+ public void search(IBTreeCursor cursor, RangePredicate pred,
+ BTreeOpContext ctx) throws Exception {
+ ctx.reset();
+ ctx.pred = pred;
+ ctx.cursor = cursor;
+ // simple index scan
+ if (ctx.pred.getLowKeyComparator() == null)
+ ctx.pred.setLowKeyComparator(cmp);
+ if (ctx.pred.getHighKeyComparator() == null)
+ ctx.pred.setHighKeyComparator(cmp);
+
+ boolean repeatOp = true;
+ // we use this loop to deal with possibly multiple operation restarts
+ // due to ongoing structure modifications during the descent
+ while (repeatOp && ctx.opRestarts < MAX_RESTARTS) {
+ performOp(rootPage, null, ctx);
+
+ // if we reach this stage then we need to restart from the (possibly
+ // new) root
+ if (!ctx.pageLsns.isEmpty() && ctx.pageLsns.getLast() == RESTART_OP) {
+ ctx.pageLsns.removeLast(); // pop the restart op indicator
+ continue;
+ }
+
+ repeatOp = false;
+ }
+
+ cursor.setBufferCache(bufferCache);
+ cursor.setFileId(fileId);
+ }
+
+ private void unsetSmPages(BTreeOpContext ctx) throws HyracksDataException {
+ ICachedPage originalPage = ctx.interiorFrame.getPage();
+ for (int i = 0; i < ctx.smPages.size(); i++) {
+ int pageId = ctx.smPages.get(i);
+ ICachedPage smPage = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, pageId), false);
+ pins++;
+ smPage.acquireWriteLatch(); // TODO: would like to set page dirty
+ // without latching
+ writeLatchesAcquired++;
+ try {
+ ctx.interiorFrame.setPage(smPage);
+ ctx.interiorFrame.setSmFlag(false);
+ } finally {
+ smPage.releaseWriteLatch();
+ writeLatchesReleased++;
+ bufferCache.unpin(smPage);
+ unpins++;
+ }
+ }
+ if (ctx.smPages.size() > 0) {
+ treeLatch.writeLock().unlock();
+ treeLatchesReleased++;
+ ctx.smPages.clear();
+ }
+ ctx.interiorFrame.setPage(originalPage);
+ }
+
+ private void createNewRoot(BTreeOpContext ctx) throws Exception {
+ rootSplits++; // debug
+ splitsByLevel[currentLevel]++;
+ currentLevel++;
+
+ // make sure the root is always at the same level
+ ICachedPage leftNode = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, ctx.splitKey.getLeftPage()), false);
+ pins++;
+ leftNode.acquireWriteLatch(); // TODO: think about whether latching is
+ // really required
+ writeLatchesAcquired++;
+ try {
+ ICachedPage rightNode = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, ctx.splitKey.getRightPage()), false);
+ pins++;
+ rightNode.acquireWriteLatch(); // TODO: think about whether latching
+ // is really required
+ writeLatchesAcquired++;
+ try {
+ int newLeftId = freePageManager.getFreePage(ctx.metaFrame);
+ ICachedPage newLeftNode = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, newLeftId), true);
+ pins++;
+ newLeftNode.acquireWriteLatch(); // TODO: think about whether
+ // latching is really
+ // required
+ writeLatchesAcquired++;
+ try {
+ // copy left child to new left child
+ System.arraycopy(leftNode.getBuffer().array(), 0,
+ newLeftNode.getBuffer().array(), 0, newLeftNode
+ .getBuffer().capacity());
+ ctx.interiorFrame.setPage(newLeftNode);
+ ctx.interiorFrame.setSmFlag(false);
+
+ // change sibling pointer if children are leaves
+ ctx.leafFrame.setPage(rightNode);
+ if (ctx.leafFrame.isLeaf()) {
+ ctx.leafFrame.setPrevLeaf(newLeftId);
+ }
+
+ // initialize new root (leftNode becomes new root)
+ ctx.interiorFrame.setPage(leftNode);
+ ctx.interiorFrame.initBuffer((byte) (ctx.leafFrame
+ .getLevel() + 1));
+ ctx.interiorFrame.setSmFlag(true); // will be cleared later
+ // in unsetSmPages
+ ctx.splitKey.setLeftPage(newLeftId);
+ int targetTupleIndex = ctx.interiorFrame.findTupleIndex(
+ ctx.splitKey.getTuple(), cmp);
+ ctx.interiorFrame.insert(ctx.splitKey.getTuple(), cmp,
+ targetTupleIndex);
+ } finally {
+ newLeftNode.releaseWriteLatch();
+ writeLatchesReleased++;
+ bufferCache.unpin(newLeftNode);
+ unpins++;
+ }
+ } finally {
+ rightNode.releaseWriteLatch();
+ writeLatchesReleased++;
+ bufferCache.unpin(rightNode);
+ unpins++;
+ }
+ } finally {
+ leftNode.releaseWriteLatch();
+ writeLatchesReleased++;
+ bufferCache.unpin(leftNode);
+ unpins++;
+ }
+ }
+
+ public void insert(ITupleReference tuple, BTreeOpContext ctx)
+ throws Exception {
+ ctx.reset();
+ ctx.pred.setLowKeyComparator(cmp);
+ ctx.pred.setHighKeyComparator(cmp);
+ ctx.pred.setLowKey(tuple, true);
+ ctx.pred.setHighKey(tuple, true);
+ ctx.splitKey.reset();
+ ctx.splitKey.getTuple().setFieldCount(cmp.getKeyFieldCount());
+
+ boolean repeatOp = true;
+ // we use this loop to deal with possibly multiple operation restarts
+ // due to ongoing structure modifications during the descent
+ while (repeatOp && ctx.opRestarts < MAX_RESTARTS) {
+ performOp(rootPage, null, ctx);
+
+ // if we reach this stage then we need to restart from the (possibly
+ // new) root
+ if (!ctx.pageLsns.isEmpty() && ctx.pageLsns.getLast() == RESTART_OP) {
+ ctx.pageLsns.removeLast(); // pop the restart op indicator
+ continue;
+ }
+
+ // we split the root, here is the key for a new root
+ if (ctx.splitKey.getBuffer() != null) {
+ createNewRoot(ctx);
+ }
+
+ unsetSmPages(ctx);
+
+ repeatOp = false;
+ }
+ }
+
+ public long uselessCompressionTime = 0;
+
+ private void insertLeaf(ICachedPage node, int pageId,
+ ITupleReference tuple, BTreeOpContext ctx) throws Exception {
+ ctx.leafFrame.setPage(node);
+ ctx.leafFrame.setPageTupleFieldCount(cmp.getFieldCount());
+
+ int targetTupleIndex = ctx.leafFrame.findTupleIndex(tuple, cmp);
+ FrameOpSpaceStatus spaceStatus = ctx.leafFrame.hasSpaceInsert(tuple,
+ cmp);
+ switch (spaceStatus) {
+
+ case SUFFICIENT_CONTIGUOUS_SPACE: {
+ // System.out.println("SUFFICIENT_CONTIGUOUS_SPACE");
+ ctx.leafFrame.insert(tuple, cmp, targetTupleIndex);
+ ctx.splitKey.reset();
+ }
+ break;
+
+ case SUFFICIENT_SPACE: {
+ // System.out.println("SUFFICIENT_SPACE");
+ boolean slotsChanged = ctx.leafFrame.compact(cmp);
+ if (slotsChanged)
+ targetTupleIndex = ctx.leafFrame.findTupleIndex(tuple, cmp);
+ ctx.leafFrame.insert(tuple, cmp, targetTupleIndex);
+ ctx.splitKey.reset();
+ }
+ break;
+
+ case INSUFFICIENT_SPACE: {
+ // System.out.println("INSUFFICIENT_SPACE");
+
+ // try compressing the page first and see if there is space
+ // available
+ long start = System.currentTimeMillis();
+ boolean reCompressed = ctx.leafFrame.compress(cmp);
+ long end = System.currentTimeMillis();
+ if (reCompressed)
+ spaceStatus = ctx.leafFrame.hasSpaceInsert(tuple, cmp);
+
+ if (spaceStatus == FrameOpSpaceStatus.SUFFICIENT_CONTIGUOUS_SPACE) {
+ ctx.leafFrame.insert(tuple, cmp, targetTupleIndex);
+ ctx.splitKey.reset();
+
+ usefulCompression++;
+ } else {
+
+ uselessCompressionTime += (end - start);
+ uselessCompression++;
+
+ // perform split
+ splitsByLevel[0]++; // debug
+ int rightSiblingPageId = ctx.leafFrame.getNextLeaf();
+ ICachedPage rightSibling = null;
+ if (rightSiblingPageId > 0) {
+ rightSibling = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, rightSiblingPageId), false);
+ pins++;
+ }
+
+ treeLatch.writeLock().lock(); // lock is released in
+ // unsetSmPages(), after sm has
+ // fully completed
+ treeLatchesAcquired++;
+ try {
+
+ int rightPageId = freePageManager
+ .getFreePage(ctx.metaFrame);
+ ICachedPage rightNode = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, rightPageId), true);
+ pins++;
+ rightNode.acquireWriteLatch();
+ writeLatchesAcquired++;
+ try {
+ IBTreeLeafFrame rightFrame = leafFrameFactory
+ .getFrame();
+ rightFrame.setPage(rightNode);
+ rightFrame.initBuffer((byte) 0);
+ rightFrame.setPageTupleFieldCount(cmp.getFieldCount());
+
+ int ret = ctx.leafFrame.split(rightFrame, tuple, cmp,
+ ctx.splitKey);
+
+ ctx.smPages.add(pageId);
+ ctx.smPages.add(rightPageId);
+ ctx.leafFrame.setSmFlag(true);
+ rightFrame.setSmFlag(true);
+
+ rightFrame.setNextLeaf(ctx.leafFrame.getNextLeaf());
+ rightFrame.setPrevLeaf(pageId);
+ ctx.leafFrame.setNextLeaf(rightPageId);
+
+ // TODO: we just use increasing numbers as pageLsn,
+ // we
+ // should tie this together with the LogManager and
+ // TransactionManager
+ rightFrame.setPageLsn(rightFrame.getPageLsn() + 1);
+ ctx.leafFrame
+ .setPageLsn(ctx.leafFrame.getPageLsn() + 1);
+
+ if (ret != 0) {
+ ctx.splitKey.reset();
+ } else {
+ // System.out.print("LEAF SPLITKEY: ");
+ // cmp.printKey(splitKey.getData(), 0);
+ // System.out.println("");
+
+ ctx.splitKey.setPages(pageId, rightPageId);
+ }
+ if (rightSibling != null) {
+ rightSibling.acquireWriteLatch();
+ writeLatchesAcquired++;
+ try {
+ rightFrame.setPage(rightSibling); // reuse
+ // rightFrame
+ // for
+ // modification
+ rightFrame.setPrevLeaf(rightPageId);
+ } finally {
+ rightSibling.releaseWriteLatch();
+ writeLatchesReleased++;
+ }
+ }
+ } finally {
+ rightNode.releaseWriteLatch();
+ writeLatchesReleased++;
+ bufferCache.unpin(rightNode);
+ unpins++;
+ }
+ } catch (Exception e) {
+ treeLatch.writeLock().unlock();
+ treeLatchesReleased++;
+ throw e;
+ } finally {
+ if (rightSibling != null) {
+ bufferCache.unpin(rightSibling);
+ unpins++;
+ }
+ }
+ }
+ }
+ break;
+
+ }
+
+ node.releaseWriteLatch();
+ writeLatchesReleased++;
+ bufferCache.unpin(node);
+ unpins++;
+ }
+
+ private void insertInterior(ICachedPage node, int pageId,
+ ITupleReference tuple, BTreeOpContext ctx) throws Exception {
+ ctx.interiorFrame.setPage(node);
+ ctx.interiorFrame.setPageTupleFieldCount(cmp.getKeyFieldCount());
+
+ int targetTupleIndex = ctx.interiorFrame.findTupleIndex(tuple, cmp);
+ FrameOpSpaceStatus spaceStatus = ctx.interiorFrame.hasSpaceInsert(
+ tuple, cmp);
+ switch (spaceStatus) {
+ case INSUFFICIENT_SPACE: {
+ splitsByLevel[ctx.interiorFrame.getLevel()]++; // debug
+ int rightPageId = freePageManager.getFreePage(ctx.metaFrame);
+ ICachedPage rightNode = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, rightPageId), true);
+ pins++;
+ rightNode.acquireWriteLatch();
+ writeLatchesAcquired++;
+ try {
+ ITreeIndexFrame rightFrame = interiorFrameFactory.getFrame();
+ rightFrame.setPage(rightNode);
+ rightFrame.initBuffer((byte) ctx.interiorFrame.getLevel());
+ rightFrame.setPageTupleFieldCount(cmp.getKeyFieldCount());
+ // instead of creating a new split key, use the existing
+ // splitKey
+ int ret = ctx.interiorFrame.split(rightFrame, ctx.splitKey
+ .getTuple(), cmp, ctx.splitKey);
+
+ ctx.smPages.add(pageId);
+ ctx.smPages.add(rightPageId);
+ ctx.interiorFrame.setSmFlag(true);
+ rightFrame.setSmFlag(true);
+
+ // TODO: we just use increasing numbers as pageLsn, we
+ // should tie this together with the LogManager and
+ // TransactionManager
+ rightFrame.setPageLsn(rightFrame.getPageLsn() + 1);
+ ctx.interiorFrame
+ .setPageLsn(ctx.interiorFrame.getPageLsn() + 1);
+
+ if (ret != 0) {
+ ctx.splitKey.reset();
+ } else {
+ // System.out.print("INTERIOR SPLITKEY: ");
+ // cmp.printKey(splitKey.getData(), 0);
+ // System.out.println("");
+
+ ctx.splitKey.setPages(pageId, rightPageId);
+ }
+ } finally {
+ rightNode.releaseWriteLatch();
+ writeLatchesReleased++;
+ bufferCache.unpin(rightNode);
+ unpins++;
+ }
+ }
+ break;
+
+ case SUFFICIENT_CONTIGUOUS_SPACE: {
+ // System.out.println("INSERT INTERIOR: " + pageId);
+ ctx.interiorFrame.insert(tuple, cmp, targetTupleIndex);
+ ctx.splitKey.reset();
+ }
+ break;
+
+ case SUFFICIENT_SPACE: {
+ boolean slotsChanged = ctx.interiorFrame.compact(cmp);
+ if (slotsChanged)
+ targetTupleIndex = ctx.interiorFrame.findTupleIndex(tuple, cmp);
+ ctx.interiorFrame.insert(tuple, cmp, targetTupleIndex);
+ ctx.splitKey.reset();
+ }
+ break;
+
+ }
+ }
+
+ public void delete(ITupleReference tuple, BTreeOpContext ctx)
+ throws Exception {
+ ctx.reset();
+ ctx.pred.setLowKeyComparator(cmp);
+ ctx.pred.setHighKeyComparator(cmp);
+ ctx.pred.setLowKey(tuple, true);
+ ctx.pred.setHighKey(tuple, true);
+ ctx.splitKey.reset();
+ ctx.splitKey.getTuple().setFieldCount(cmp.getKeyFieldCount());
+
+ boolean repeatOp = true;
+ // we use this loop to deal with possibly multiple operation restarts
+ // due to ongoing structure modifications during the descent
+ while (repeatOp && ctx.opRestarts < MAX_RESTARTS) {
+ performOp(rootPage, null, ctx);
+
+ // if we reach this stage then we need to restart from the (possibly
+ // new) root
+ if (!ctx.pageLsns.isEmpty() && ctx.pageLsns.getLast() == RESTART_OP) {
+ ctx.pageLsns.removeLast(); // pop the restart op indicator
+ continue;
+ }
+
+ // tree is empty, reset level to zero
+ if (ctx.splitKey.getBuffer() != null) {
+ ICachedPage rootNode = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, rootPage), false);
+ pins++;
+ rootNode.acquireWriteLatch();
+ writeLatchesAcquired++;
+ try {
+ ctx.leafFrame.setPage(rootNode);
+ ctx.leafFrame.initBuffer((byte) 0);
+ currentLevel = 0; // debug
+ } finally {
+ rootNode.releaseWriteLatch();
+ writeLatchesReleased++;
+ bufferCache.unpin(rootNode);
+ unpins++;
+ }
+ }
+
+ unsetSmPages(ctx);
+
+ addFreePages(ctx);
+
+ repeatOp = false;
+ }
+ }
+
+ // TODO: to avoid latch deadlock, must modify cursor to detect empty leaves
+ private void deleteLeaf(ICachedPage node, int pageId,
+ ITupleReference tuple, BTreeOpContext ctx) throws Exception {
+ ctx.leafFrame.setPage(node);
+
+ // will this leaf become empty?
+ if (ctx.leafFrame.getTupleCount() == 1) {
+ IBTreeLeafFrame siblingFrame = leafFrameFactory.getFrame();
+
+ ICachedPage leftNode = null;
+ ICachedPage rightNode = null;
+ int nextLeaf = ctx.leafFrame.getNextLeaf();
+ int prevLeaf = ctx.leafFrame.getPrevLeaf();
+
+ if (prevLeaf > 0)
+ leftNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(
+ fileId, prevLeaf), false);
+
+ try {
+
+ if (nextLeaf > 0)
+ rightNode = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, nextLeaf), false);
+
+ try {
+ treeLatch.writeLock().lock();
+ treeLatchesAcquired++;
+
+ try {
+ ctx.leafFrame.delete(tuple, cmp, true);
+ // to propagate the deletion we only need to make the
+ // splitKey != null
+ // we can reuse data to identify which key to delete in
+ // the parent
+ ctx.splitKey.initData(1);
+ } catch (Exception e) {
+ // don't propagate deletion upwards if deletion at this
+ // level fails
+ ctx.splitKey.reset();
+ throw e;
+ }
+
+ // TODO: tie together with loggins
+ ctx.leafFrame.setPageLsn(ctx.leafFrame.getPageLsn() + 1);
+ ctx.leafFrame.setLevel(freePageManager
+ .getFreePageLevelIndicator());
+
+ ctx.smPages.add(pageId);
+ ctx.leafFrame.setSmFlag(true);
+
+ node.releaseWriteLatch();
+ writeLatchesReleased++;
+ bufferCache.unpin(node);
+ unpins++;
+
+ if (leftNode != null) {
+ leftNode.acquireWriteLatch();
+ try {
+ siblingFrame.setPage(leftNode);
+ siblingFrame.setNextLeaf(nextLeaf);
+ siblingFrame
+ .setPageLsn(siblingFrame.getPageLsn() + 1); // TODO:
+ // tie
+ // together
+ // with
+ // logging
+ } finally {
+ leftNode.releaseWriteLatch();
+ }
+ }
+
+ if (rightNode != null) {
+ rightNode.acquireWriteLatch();
+ try {
+ siblingFrame.setPage(rightNode);
+ siblingFrame.setPrevLeaf(prevLeaf);
+ siblingFrame
+ .setPageLsn(siblingFrame.getPageLsn() + 1); // TODO:
+ // tie
+ // together
+ // with
+ // logging
+ } finally {
+ rightNode.releaseWriteLatch();
+ }
+ }
+
+ // register pageId as a free
+ ctx.freePages.add(pageId);
+
+ } catch (Exception e) {
+ treeLatch.writeLock().unlock();
+ treeLatchesReleased++;
+ throw e;
+ } finally {
+ if (rightNode != null) {
+ bufferCache.unpin(rightNode);
+ }
+ }
+ } finally {
+ if (leftNode != null) {
+ bufferCache.unpin(leftNode);
+ }
+ }
+ } else { // leaf will not become empty
+ ctx.leafFrame.delete(tuple, cmp, true);
+ node.releaseWriteLatch();
+ writeLatchesReleased++;
+ bufferCache.unpin(node);
+ unpins++;
+ }
+ }
+
+ private void deleteInterior(ICachedPage node, int pageId,
+ ITupleReference tuple, BTreeOpContext ctx) throws Exception {
+ ctx.interiorFrame.setPage(node);
+
+ // this means there is only a child pointer but no key, this case
+ // propagates the split
+ if (ctx.interiorFrame.getTupleCount() == 0) {
+ ctx.interiorFrame.setPageLsn(ctx.interiorFrame.getPageLsn() + 1); // TODO:
+ // tie
+ // together
+ // with
+ // logging
+ ctx.leafFrame.setLevel(freePageManager.getFreePageLevelIndicator());
+ ctx.smPages.add(pageId);
+ ctx.interiorFrame.setSmFlag(true);
+ ctx.interiorFrame.setRightmostChildPageId(-1); // this node is
+ // completely empty
+ // register this pageId as a free page
+ ctx.freePages.add(pageId);
+
+ } else {
+ ctx.interiorFrame.delete(tuple, cmp, false);
+ ctx.interiorFrame.setPageLsn(ctx.interiorFrame.getPageLsn() + 1); // TODO:
+ // tie
+ // together
+ // with
+ // logging
+ ctx.splitKey.reset(); // don't propagate deletion
+ }
+ }
+
+ private final void acquireLatch(ICachedPage node, TreeIndexOp op,
+ boolean isLeaf) {
+ if (isLeaf
+ && (op.equals(TreeIndexOp.TI_INSERT) || op
+ .equals(TreeIndexOp.TI_DELETE))) {
+ node.acquireWriteLatch();
+ writeLatchesAcquired++;
+ } else {
+ node.acquireReadLatch();
+ readLatchesAcquired++;
+ }
+ }
+
+ private final void releaseLatch(ICachedPage node, TreeIndexOp op,
+ boolean isLeaf) {
+ if (isLeaf
+ && (op.equals(TreeIndexOp.TI_INSERT) || op
+ .equals(TreeIndexOp.TI_DELETE))) {
+ node.releaseWriteLatch();
+ writeLatchesReleased++;
+ } else {
+ node.releaseReadLatch();
+ readLatchesReleased++;
+ }
+ }
+
+ private boolean isConsistent(int pageId, BTreeOpContext ctx)
+ throws Exception {
+ ICachedPage node = bufferCache.pin(BufferedFileHandle.getDiskPageId(
+ fileId, pageId), false);
+ pins++;
+ node.acquireReadLatch();
+ readLatchesAcquired++;
+ ctx.interiorFrame.setPage(node);
+ boolean isConsistent = false;
+ try {
+ isConsistent = ctx.pageLsns.getLast() == ctx.interiorFrame
+ .getPageLsn();
+ } finally {
+ node.releaseReadLatch();
+ readLatchesReleased++;
+ bufferCache.unpin(node);
+ unpins++;
+ }
+ return isConsistent;
+ }
+
+ private void performOp(int pageId, ICachedPage parent, BTreeOpContext ctx)
+ throws Exception {
+ ICachedPage node = bufferCache.pin(BufferedFileHandle.getDiskPageId(
+ fileId, pageId), false);
+ pins++;
+
+ ctx.interiorFrame.setPage(node);
+ // this check performs an unprotected read in the page
+ // the following could happen: TODO fill out
+ boolean unsafeIsLeaf = ctx.interiorFrame.isLeaf();
+ acquireLatch(node, ctx.op, unsafeIsLeaf);
+ boolean smFlag = ctx.interiorFrame.getSmFlag();
+ // re-check leafness after latching
+ boolean isLeaf = ctx.interiorFrame.isLeaf();
+
+ // remember trail of pageLsns, to unwind recursion in case of an ongoing
+ // structure modification
+ ctx.pageLsns.add(ctx.interiorFrame.getPageLsn());
+
+ try {
+
+ // latch coupling, note: parent should never be write latched,
+ // otherwise something is wrong.
+ if (parent != null) {
+ parent.releaseReadLatch();
+ readLatchesReleased++;
+ bufferCache.unpin(parent);
+ unpins++;
+ }
+
+ if (!isLeaf || smFlag) {
+ if (!smFlag) {
+ // we use this loop to deal with possibly multiple operation
+ // restarts due to ongoing structure modifications during
+ // the descent
+ boolean repeatOp = true;
+ while (repeatOp && ctx.opRestarts < MAX_RESTARTS) {
+ int childPageId = ctx.interiorFrame.getChildPageId(
+ ctx.pred, cmp);
+ performOp(childPageId, node, ctx);
+
+ if (!ctx.pageLsns.isEmpty()
+ && ctx.pageLsns.getLast() == RESTART_OP) {
+ ctx.pageLsns.removeLast(); // pop the restart op
+ // indicator
+ if (isConsistent(pageId, ctx)) {
+ node = null; // to avoid unpinning and
+ // unlatching node again in
+ // recursive call
+ continue; // descend the tree again
+ } else {
+ ctx.pageLsns.removeLast(); // pop pageLsn of
+ // this page
+ // (version seen by this op
+ // during descent)
+ ctx.pageLsns.add(RESTART_OP); // this node is
+ // not
+ // consistent,
+ // set the
+ // restart
+ // indicator for
+ // upper level
+ break;
+ }
+ }
+
+ switch (ctx.op) {
+
+ case TI_INSERT: {
+ if (ctx.splitKey.getBuffer() != null) {
+ node = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, pageId), false);
+ pins++;
+ node.acquireWriteLatch();
+ writeLatchesAcquired++;
+ try {
+ insertInterior(node, pageId, ctx.splitKey
+ .getTuple(), ctx);
+ } finally {
+ node.releaseWriteLatch();
+ writeLatchesReleased++;
+ bufferCache.unpin(node);
+ unpins++;
+ }
+ } else {
+ unsetSmPages(ctx);
+ }
+ }
+ break;
+
+ case TI_DELETE: {
+ if (ctx.splitKey.getBuffer() != null) {
+ node = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, pageId), false);
+ pins++;
+ node.acquireWriteLatch();
+ writeLatchesAcquired++;
+ try {
+ deleteInterior(node, pageId, ctx.pred
+ .getLowKey(), ctx);
+ } finally {
+ node.releaseWriteLatch();
+ writeLatchesReleased++;
+ bufferCache.unpin(node);
+ unpins++;
+ }
+ } else {
+ unsetSmPages(ctx);
+ }
+ }
+ break;
+
+ case TI_SEARCH: {
+ // do nothing
+ }
+ break;
+
+ }
+
+ repeatOp = false; // operation completed
+
+ } // end while
+ } else { // smFlag
+ ctx.opRestarts++;
+ System.out.println("ONGOING SM ON PAGE " + pageId
+ + " AT LEVEL " + ctx.interiorFrame.getLevel()
+ + ", RESTARTS: " + ctx.opRestarts);
+ releaseLatch(node, ctx.op, unsafeIsLeaf);
+ bufferCache.unpin(node);
+ unpins++;
+
+ // TODO: this should be an instant duration lock, how to do
+ // this in java?
+ // instead we just immediately release the lock. this is
+ // inefficient but still correct and will not cause
+ // latch-deadlock
+ treeLatch.readLock().lock();
+ treeLatch.readLock().unlock();
+
+ // unwind recursion and restart operation, find lowest page
+ // with a pageLsn as seen by this operation during descent
+ ctx.pageLsns.removeLast(); // pop current page lsn
+ // put special value on the stack to inform caller of
+ // restart
+ ctx.pageLsns.add(RESTART_OP);
+ }
+ } else { // isLeaf and !smFlag
+ switch (ctx.op) {
+ case TI_INSERT: {
+ insertLeaf(node, pageId, ctx.pred.getLowKey(), ctx);
+ }
+ break;
+
+ case TI_DELETE: {
+ deleteLeaf(node, pageId, ctx.pred.getLowKey(), ctx);
+ }
+ break;
+
+ case TI_SEARCH: {
+ ctx.cursor.open(node, ctx.pred);
+ }
+ break;
+ }
+ }
+ } catch (TreeIndexException e) {
+ // System.out.println("BTREE EXCEPTION");
+ // System.out.println(e.getMessage());
+ // e.printStackTrace();
+ if (!e.getHandled()) {
+ releaseLatch(node, ctx.op, unsafeIsLeaf);
+ bufferCache.unpin(node);
+ unpins++;
+ e.setHandled(true);
+ }
+ throw e;
+ } catch (Exception e) { // this could be caused, e.g. by a
+ // failure to pin a new node during a split
+ System.out.println("ASTERIX EXCEPTION");
+ e.printStackTrace();
+ releaseLatch(node, ctx.op, unsafeIsLeaf);
+ bufferCache.unpin(node);
+ unpins++;
+ BTreeException propException = new BTreeException(e);
+ propException.setHandled(true); // propagate a BTreeException,
+ // indicating that the parent node
+ // must not be unlatched and
+ // unpinned
+ throw propException;
+ }
+ }
+
+ private boolean bulkNewPage = false;
+
+ public final class BulkLoadContext {
+ public final int slotSize;
+ public final int leafMaxBytes;
+ public final int interiorMaxBytes;
+ public final BTreeSplitKey splitKey;
+ // we maintain a frontier of nodes for each level
+ private final ArrayList<NodeFrontier> nodeFrontiers = new ArrayList<NodeFrontier>();
+ private final IBTreeLeafFrame leafFrame;
+ private final IBTreeInteriorFrame interiorFrame;
+ private final ITreeIndexMetaDataFrame metaFrame;
+
+ private final ITreeIndexTupleWriter tupleWriter;
+
+ public BulkLoadContext(float fillFactor, IBTreeLeafFrame leafFrame,
+ IBTreeInteriorFrame interiorFrame,
+ ITreeIndexMetaDataFrame metaFrame) throws HyracksDataException {
+
+ splitKey = new BTreeSplitKey(leafFrame.getTupleWriter()
+ .createTupleReference());
+ tupleWriter = leafFrame.getTupleWriter();
+
+ NodeFrontier leafFrontier = new NodeFrontier(leafFrame
+ .createTupleReference());
+ leafFrontier.pageId = freePageManager.getFreePage(metaFrame);
+ leafFrontier.page = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, leafFrontier.pageId), bulkNewPage);
+ leafFrontier.page.acquireWriteLatch();
+
+ interiorFrame.setPage(leafFrontier.page);
+ interiorFrame.initBuffer((byte) 0);
+ interiorMaxBytes = (int) ((float) interiorFrame.getBuffer()
+ .capacity() * fillFactor);
+
+ leafFrame.setPage(leafFrontier.page);
+ leafFrame.initBuffer((byte) 0);
+ leafMaxBytes = (int) ((float) leafFrame.getBuffer().capacity() * fillFactor);
+
+ slotSize = leafFrame.getSlotSize();
+
+ this.leafFrame = leafFrame;
+ this.interiorFrame = interiorFrame;
+ this.metaFrame = metaFrame;
+
+ nodeFrontiers.add(leafFrontier);
+ }
+
+ private void addLevel() throws HyracksDataException {
+ NodeFrontier frontier = new NodeFrontier(tupleWriter
+ .createTupleReference());
+ frontier.pageId = freePageManager.getFreePage(metaFrame);
+ frontier.page = bufferCache.pin(BufferedFileHandle.getDiskPageId(
+ fileId, frontier.pageId), bulkNewPage);
+ frontier.page.acquireWriteLatch();
+ frontier.lastTuple.setFieldCount(cmp.getKeyFieldCount());
+ interiorFrame.setPage(frontier.page);
+ interiorFrame.initBuffer((byte) nodeFrontiers.size());
+ nodeFrontiers.add(frontier);
+ }
+ }
+
+ private void propagateBulk(BulkLoadContext ctx, int level)
+ throws HyracksDataException {
+
+ if (ctx.splitKey.getBuffer() == null)
+ return;
+
+ if (level >= ctx.nodeFrontiers.size())
+ ctx.addLevel();
+
+ NodeFrontier frontier = ctx.nodeFrontiers.get(level);
+ ctx.interiorFrame.setPage(frontier.page);
+
+ ITupleReference tuple = ctx.splitKey.getTuple();
+ int spaceNeeded = ctx.tupleWriter.bytesRequired(tuple, 0, cmp
+ .getKeyFieldCount())
+ + ctx.slotSize + 4;
+ int spaceUsed = ctx.interiorFrame.getBuffer().capacity()
+ - ctx.interiorFrame.getTotalFreeSpace();
+ if (spaceUsed + spaceNeeded > ctx.interiorMaxBytes) {
+
+ BTreeSplitKey copyKey = ctx.splitKey.duplicate(ctx.leafFrame
+ .getTupleWriter().createTupleReference());
+ tuple = copyKey.getTuple();
+
+ frontier.lastTuple.resetByTupleOffset(frontier.page.getBuffer(),
+ ctx.interiorFrame.getTupleOffset(ctx.interiorFrame
+ .getTupleCount() - 1));
+ int splitKeySize = ctx.tupleWriter.bytesRequired(
+ frontier.lastTuple, 0, cmp.getKeyFieldCount());
+ ctx.splitKey.initData(splitKeySize);
+ ctx.tupleWriter.writeTupleFields(frontier.lastTuple, 0, cmp
+ .getKeyFieldCount(), ctx.splitKey.getBuffer(), 0);
+ ctx.splitKey.getTuple().resetByTupleOffset(
+ ctx.splitKey.getBuffer(), 0);
+ ctx.splitKey.setLeftPage(frontier.pageId);
+
+ ctx.interiorFrame.deleteGreatest(cmp);
+
+ frontier.page.releaseWriteLatch();
+ bufferCache.unpin(frontier.page);
+ frontier.pageId = freePageManager.getFreePage(ctx.metaFrame);
+
+ ctx.splitKey.setRightPage(frontier.pageId);
+ propagateBulk(ctx, level + 1);
+
+ frontier.page = bufferCache.pin(BufferedFileHandle.getDiskPageId(
+ fileId, frontier.pageId), bulkNewPage);
+ frontier.page.acquireWriteLatch();
+ ctx.interiorFrame.setPage(frontier.page);
+ ctx.interiorFrame.initBuffer((byte) level);
+ }
+ ctx.interiorFrame.insertSorted(tuple, cmp);
+
+ // debug print
+ // ISerializerDeserializer[] btreeSerde = {
+ // UTF8StringSerializerDeserializer.INSTANCE,
+ // IntegerSerializerDeserializer.INSTANCE };
+ // String s = ctx.interiorFrame.printKeys(cmp, btreeSerde);
+ // System.out.println(s);
+ }
+
+ // assumes btree has been created and opened
+ public BulkLoadContext beginBulkLoad(float fillFactor,
+ IBTreeLeafFrame leafFrame, IBTreeInteriorFrame interiorFrame,
+ ITreeIndexMetaDataFrame metaFrame) throws HyracksDataException {
+
+ if (loaded)
+ throw new HyracksDataException(
+ "Trying to bulk-load BTree but has BTree already been loaded.");
+
+ BulkLoadContext ctx = new BulkLoadContext(fillFactor, leafFrame,
+ interiorFrame, metaFrame);
+ ctx.nodeFrontiers.get(0).lastTuple.setFieldCount(cmp.getFieldCount());
+ ctx.splitKey.getTuple().setFieldCount(cmp.getKeyFieldCount());
+ return ctx;
+ }
+
+ public void bulkLoadAddTuple(BulkLoadContext ctx, ITupleReference tuple)
+ throws HyracksDataException {
+ NodeFrontier leafFrontier = ctx.nodeFrontiers.get(0);
+ IBTreeLeafFrame leafFrame = ctx.leafFrame;
+
+ int spaceNeeded = ctx.tupleWriter.bytesRequired(tuple) + ctx.slotSize;
+ int spaceUsed = leafFrame.getBuffer().capacity()
+ - leafFrame.getTotalFreeSpace();
+
+ // try to free space by compression
+ if (spaceUsed + spaceNeeded > ctx.leafMaxBytes) {
+ leafFrame.compress(cmp);
+ spaceUsed = leafFrame.getBuffer().capacity()
+ - leafFrame.getTotalFreeSpace();
+ }
+
+ if (spaceUsed + spaceNeeded > ctx.leafMaxBytes) {
+ leafFrontier.lastTuple.resetByTupleIndex(leafFrame, leafFrame
+ .getTupleCount() - 1);
+ int splitKeySize = ctx.tupleWriter.bytesRequired(
+ leafFrontier.lastTuple, 0, cmp.getKeyFieldCount());
+ ctx.splitKey.initData(splitKeySize);
+ ctx.tupleWriter.writeTupleFields(leafFrontier.lastTuple, 0, cmp
+ .getKeyFieldCount(), ctx.splitKey.getBuffer(), 0);
+ ctx.splitKey.getTuple().resetByTupleOffset(
+ ctx.splitKey.getBuffer(), 0);
+ ctx.splitKey.setLeftPage(leafFrontier.pageId);
+ int prevPageId = leafFrontier.pageId;
+ leafFrontier.pageId = freePageManager.getFreePage(ctx.metaFrame);
+
+ leafFrame.setNextLeaf(leafFrontier.pageId);
+ leafFrontier.page.releaseWriteLatch();
+ bufferCache.unpin(leafFrontier.page);
+
+ ctx.splitKey.setRightPage(leafFrontier.pageId);
+ propagateBulk(ctx, 1);
+
+ leafFrontier.page = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, leafFrontier.pageId), bulkNewPage);
+ leafFrontier.page.acquireWriteLatch();
+ leafFrame.setPage(leafFrontier.page);
+ leafFrame.initBuffer((byte) 0);
+ leafFrame.setPrevLeaf(prevPageId);
+ }
+
+ leafFrame.setPage(leafFrontier.page);
+ leafFrame.insertSorted(tuple, cmp);
+
+ // debug print
+ // ISerializerDeserializer[] btreeSerde = {
+ // UTF8StringSerializerDeserializer.INSTANCE,
+ // IntegerSerializerDeserializer.INSTANCE };
+ // String s = leafFrame.printKeys(cmp, btreeSerde);
+ // System.out.println(s);
+ }
+
+ public void endBulkLoad(BulkLoadContext ctx) throws HyracksDataException {
+ // copy root
+ ICachedPage rootNode = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, rootPage), bulkNewPage);
+ rootNode.acquireWriteLatch();
+ try {
+ ICachedPage toBeRoot = ctx.nodeFrontiers.get(ctx.nodeFrontiers
+ .size() - 1).page;
+ System.arraycopy(toBeRoot.getBuffer().array(), 0, rootNode
+ .getBuffer().array(), 0, toBeRoot.getBuffer().capacity());
+ } finally {
+ rootNode.releaseWriteLatch();
+ bufferCache.unpin(rootNode);
+
+ // cleanup
+ for (int i = 0; i < ctx.nodeFrontiers.size(); i++) {
+ ctx.nodeFrontiers.get(i).page.releaseWriteLatch();
+ bufferCache.unpin(ctx.nodeFrontiers.get(i).page);
+ }
+ }
+ // debug
+ currentLevel = (byte) ctx.nodeFrontiers.size();
+
+ loaded = true;
+ }
- private final static int RESTART_OP = Integer.MIN_VALUE;
- private final static int MAX_RESTARTS = 10;
-
- // the root page never changes
- private final int rootPage = 1;
+ public BTreeOpContext createOpContext(TreeIndexOp op,
+ IBTreeLeafFrame leafFrame, IBTreeInteriorFrame interiorFrame,
+ ITreeIndexMetaDataFrame metaFrame) {
+ // TODO: figure out better tree-height hint
+ return new BTreeOpContext(op, leafFrame, interiorFrame, metaFrame, 6);
+ }
- private final IFreePageManager freePageManager;
-
- private boolean created = false;
- private boolean loaded = false;
+ public IBTreeInteriorFrameFactory getInteriorFrameFactory() {
+ return interiorFrameFactory;
+ }
- private final IBufferCache bufferCache;
- private int fileId;
- private final IBTreeInteriorFrameFactory interiorFrameFactory;
- private final IBTreeLeafFrameFactory leafFrameFactory;
- private final MultiComparator cmp;
- private final ReadWriteLock treeLatch;
- private final RangePredicate diskOrderScanPredicate;
+ public IBTreeLeafFrameFactory getLeafFrameFactory() {
+ return leafFrameFactory;
+ }
- public int rootSplits = 0;
- public int[] splitsByLevel = new int[500];
- public long readLatchesAcquired = 0;
- public long readLatchesReleased = 0;
- public long writeLatchesAcquired = 0;
- public long writeLatchesReleased = 0;
- public long pins = 0;
- public long unpins = 0;
+ public MultiComparator getMultiComparator() {
+ return cmp;
+ }
- public long treeLatchesAcquired = 0;
- public long treeLatchesReleased = 0;
-
- public byte currentLevel = 0;
-
- public int usefulCompression = 0;
- public int uselessCompression = 0;
-
- public void treeLatchStatus() {
- System.out.println(treeLatch.writeLock().toString());
- }
-
- public String printStats() {
- StringBuilder strBuilder = new StringBuilder();
- strBuilder.append("\n");
- strBuilder.append("ROOTSPLITS: " + rootSplits + "\n");
- strBuilder.append("SPLITS BY LEVEL\n");
- for (int i = 0; i < currentLevel; i++) {
- strBuilder.append(String.format("%3d ", i) + String.format("%8d ", splitsByLevel[i]) + "\n");
- }
- strBuilder.append(String.format("READ LATCHES: %10d %10d\n", readLatchesAcquired, readLatchesReleased));
- strBuilder.append(String.format("WRITE LATCHES: %10d %10d\n", writeLatchesAcquired, writeLatchesReleased));
- strBuilder.append(String.format("PINS: %10d %10d\n", pins, unpins));
- return strBuilder.toString();
- }
-
- public BTree(IBufferCache bufferCache, IFreePageManager freePageManager, IBTreeInteriorFrameFactory interiorFrameFactory,
- IBTreeLeafFrameFactory leafFrameFactory, MultiComparator cmp) {
- this.bufferCache = bufferCache;
- this.interiorFrameFactory = interiorFrameFactory;
- this.leafFrameFactory = leafFrameFactory;
- this.cmp = cmp;
- this.freePageManager = freePageManager;
- this.treeLatch = new ReentrantReadWriteLock(true);
- this.diskOrderScanPredicate = new RangePredicate(true, null, null, true, true, cmp, cmp);
- }
-
- public void create(int fileId, IBTreeLeafFrame leafFrame, ITreeIndexMetaDataFrame metaFrame) throws Exception {
-
- if (created)
- return;
-
- treeLatch.writeLock().lock();
- try {
-
- // check if another thread beat us to it
- if (created)
- return;
-
- freePageManager.init(metaFrame, rootPage);
-
- // initialize root page
- ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), true);
- pins++;
-
- rootNode.acquireWriteLatch();
- writeLatchesAcquired++;
- try {
- leafFrame.setPage(rootNode);
- leafFrame.initBuffer((byte) 0);
- } finally {
- rootNode.releaseWriteLatch();
- writeLatchesReleased++;
- bufferCache.unpin(rootNode);
- unpins++;
- }
- currentLevel = 0;
-
- created = true;
- } finally {
- treeLatch.writeLock().unlock();
- }
- }
-
- public void open(int fileId) {
- this.fileId = fileId;
- }
-
- public void close() {
- fileId = -1;
- }
-
- private void addFreePages(BTreeOpContext ctx) throws Exception {
- for (int i = 0; i < ctx.freePages.size(); i++) {
- // root page is special, don't add it to free pages
- if(ctx.freePages.get(i) != rootPage) {
- freePageManager.addFreePage(ctx.metaFrame, ctx.freePages.get(i));
- }
- }
- ctx.freePages.clear();
- }
-
- public void printTree(IBTreeLeafFrame leafFrame, IBTreeInteriorFrame interiorFrame, ISerializerDeserializer[] fields)
- throws Exception {
- printTree(rootPage, null, false, leafFrame, interiorFrame, fields);
- }
-
- public void printTree(int pageId, ICachedPage parent, boolean unpin, IBTreeLeafFrame leafFrame,
- IBTreeInteriorFrame interiorFrame, ISerializerDeserializer[] fields) throws Exception {
-
- ICachedPage node = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false);
- pins++;
- node.acquireReadLatch();
- readLatchesAcquired++;
-
- try {
- if (parent != null && unpin == true) {
- parent.releaseReadLatch();
- readLatchesReleased++;
-
- bufferCache.unpin(parent);
- unpins++;
- }
-
- interiorFrame.setPage(node);
- int level = interiorFrame.getLevel();
-
- System.out.format("%1d ", level);
- System.out.format("%3d ", pageId);
- for (int i = 0; i < currentLevel - level; i++)
- System.out.format(" ");
-
- String keyString;
- if (interiorFrame.isLeaf()) {
- leafFrame.setPage(node);
- keyString = leafFrame.printKeys(cmp, fields);
- } else {
- keyString = interiorFrame.printKeys(cmp, fields);
- }
-
- System.out.format(keyString);
- if (!interiorFrame.isLeaf()) {
- ArrayList<Integer> children = ((NSMInteriorFrame) (interiorFrame)).getChildren(cmp);
-
- for (int i = 0; i < children.size(); i++) {
- printTree(children.get(i), node, i == children.size() - 1, leafFrame, interiorFrame, fields);
- }
- } else {
- node.releaseReadLatch();
- readLatchesReleased++;
-
- bufferCache.unpin(node);
- unpins++;
- }
- } catch (Exception e) {
- node.releaseReadLatch();
- readLatchesReleased++;
-
- bufferCache.unpin(node);
- unpins++;
- e.printStackTrace();
- }
- }
-
- public void diskOrderScan(DiskOrderScanCursor cursor, IBTreeLeafFrame leafFrame, ITreeIndexMetaDataFrame metaFrame)
- throws HyracksDataException {
- int currentPageId = rootPage + 1;
- int maxPageId = freePageManager.getMaxPage(metaFrame);
-
- ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), false);
- page.acquireReadLatch();
- cursor.setBufferCache(bufferCache);
- cursor.setFileId(fileId);
- cursor.setCurrentPageId(currentPageId);
- cursor.setMaxPageId(maxPageId);
- cursor.open(page, diskOrderScanPredicate);
- }
-
- public void search(IBTreeCursor cursor, RangePredicate pred, BTreeOpContext ctx) throws Exception {
- ctx.reset();
- ctx.pred = pred;
- ctx.cursor = cursor;
- // simple index scan
- if (ctx.pred.getLowKeyComparator() == null)
- ctx.pred.setLowKeyComparator(cmp);
- if (ctx.pred.getHighKeyComparator() == null)
- ctx.pred.setHighKeyComparator(cmp);
-
- boolean repeatOp = true;
- // we use this loop to deal with possibly multiple operation restarts
- // due to ongoing structure modifications during the descent
- while (repeatOp && ctx.opRestarts < MAX_RESTARTS) {
- performOp(rootPage, null, ctx);
-
- // if we reach this stage then we need to restart from the (possibly
- // new) root
- if (!ctx.pageLsns.isEmpty() && ctx.pageLsns.getLast() == RESTART_OP) {
- ctx.pageLsns.removeLast(); // pop the restart op indicator
- continue;
- }
-
- repeatOp = false;
- }
-
- cursor.setBufferCache(bufferCache);
- cursor.setFileId(fileId);
- }
-
- private void unsetSmPages(BTreeOpContext ctx) throws HyracksDataException {
- ICachedPage originalPage = ctx.interiorFrame.getPage();
- for (int i = 0; i < ctx.smPages.size(); i++) {
- int pageId = ctx.smPages.get(i);
- ICachedPage smPage = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false);
- pins++;
- smPage.acquireWriteLatch(); // TODO: would like to set page dirty
- // without latching
- writeLatchesAcquired++;
- try {
- ctx.interiorFrame.setPage(smPage);
- ctx.interiorFrame.setSmFlag(false);
- } finally {
- smPage.releaseWriteLatch();
- writeLatchesReleased++;
- bufferCache.unpin(smPage);
- unpins++;
- }
- }
- if (ctx.smPages.size() > 0) {
- treeLatch.writeLock().unlock();
- treeLatchesReleased++;
- ctx.smPages.clear();
- }
- ctx.interiorFrame.setPage(originalPage);
- }
-
- private void createNewRoot(BTreeOpContext ctx) throws Exception {
- rootSplits++; // debug
- splitsByLevel[currentLevel]++;
- currentLevel++;
-
- // make sure the root is always at the same level
- ICachedPage leftNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, ctx.splitKey.getLeftPage()), false);
- pins++;
- leftNode.acquireWriteLatch(); // TODO: think about whether latching is
- // really required
- writeLatchesAcquired++;
- try {
- ICachedPage rightNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, ctx.splitKey.getRightPage()),
- false);
- pins++;
- rightNode.acquireWriteLatch(); // TODO: think about whether latching
- // is really required
- writeLatchesAcquired++;
- try {
- int newLeftId = freePageManager.getFreePage(ctx.metaFrame);
- ICachedPage newLeftNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, newLeftId), true);
- pins++;
- newLeftNode.acquireWriteLatch(); // TODO: think about whether
- // latching is really
- // required
- writeLatchesAcquired++;
- try {
- // copy left child to new left child
- System.arraycopy(leftNode.getBuffer().array(), 0, newLeftNode.getBuffer().array(), 0, newLeftNode
- .getBuffer().capacity());
- ctx.interiorFrame.setPage(newLeftNode);
- ctx.interiorFrame.setSmFlag(false);
-
- // change sibling pointer if children are leaves
- ctx.leafFrame.setPage(rightNode);
- if (ctx.leafFrame.isLeaf()) {
- ctx.leafFrame.setPrevLeaf(newLeftId);
- }
-
- // initialize new root (leftNode becomes new root)
- ctx.interiorFrame.setPage(leftNode);
- ctx.interiorFrame.initBuffer((byte) (ctx.leafFrame.getLevel() + 1));
- ctx.interiorFrame.setSmFlag(true); // will be cleared later
- // in unsetSmPages
- ctx.splitKey.setLeftPage(newLeftId);
- ctx.interiorFrame.insert(ctx.splitKey.getTuple(), cmp);
- } finally {
- newLeftNode.releaseWriteLatch();
- writeLatchesReleased++;
- bufferCache.unpin(newLeftNode);
- unpins++;
- }
- } finally {
- rightNode.releaseWriteLatch();
- writeLatchesReleased++;
- bufferCache.unpin(rightNode);
- unpins++;
- }
- } finally {
- leftNode.releaseWriteLatch();
- writeLatchesReleased++;
- bufferCache.unpin(leftNode);
- unpins++;
- }
- }
-
- public void insert(ITupleReference tuple, BTreeOpContext ctx) throws Exception {
- ctx.reset();
- ctx.pred.setLowKeyComparator(cmp);
- ctx.pred.setHighKeyComparator(cmp);
- ctx.pred.setLowKey(tuple, true);
- ctx.pred.setHighKey(tuple, true);
- ctx.splitKey.reset();
- ctx.splitKey.getTuple().setFieldCount(cmp.getKeyFieldCount());
-
- boolean repeatOp = true;
- // we use this loop to deal with possibly multiple operation restarts
- // due to ongoing structure modifications during the descent
- while (repeatOp && ctx.opRestarts < MAX_RESTARTS) {
- performOp(rootPage, null, ctx);
-
- // if we reach this stage then we need to restart from the (possibly
- // new) root
- if (!ctx.pageLsns.isEmpty() && ctx.pageLsns.getLast() == RESTART_OP) {
- ctx.pageLsns.removeLast(); // pop the restart op indicator
- continue;
- }
-
- // we split the root, here is the key for a new root
- if (ctx.splitKey.getBuffer() != null) {
- createNewRoot(ctx);
- }
-
- unsetSmPages(ctx);
-
- repeatOp = false;
- }
- }
-
- public long uselessCompressionTime = 0;
-
- private void insertLeaf(ICachedPage node, int pageId, ITupleReference tuple, BTreeOpContext ctx) throws Exception {
- ctx.leafFrame.setPage(node);
- ctx.leafFrame.setPageTupleFieldCount(cmp.getFieldCount());
- FrameOpSpaceStatus spaceStatus = ctx.leafFrame.hasSpaceInsert(tuple, cmp);
-
- switch (spaceStatus) {
-
- case SUFFICIENT_CONTIGUOUS_SPACE: {
- // System.out.println("SUFFICIENT_CONTIGUOUS_SPACE");
- ctx.leafFrame.insert(tuple, cmp);
- ctx.splitKey.reset();
- }
- break;
-
- case SUFFICIENT_SPACE: {
- // System.out.println("SUFFICIENT_SPACE");
- ctx.leafFrame.compact(cmp);
- ctx.leafFrame.insert(tuple, cmp);
- ctx.splitKey.reset();
- }
- break;
-
- case INSUFFICIENT_SPACE: {
- // System.out.println("INSUFFICIENT_SPACE");
-
- // try compressing the page first and see if there is space
- // available
- long start = System.currentTimeMillis();
- boolean reCompressed = ctx.leafFrame.compress(cmp);
- long end = System.currentTimeMillis();
- if (reCompressed)
- spaceStatus = ctx.leafFrame.hasSpaceInsert(tuple, cmp);
-
- if (spaceStatus == FrameOpSpaceStatus.SUFFICIENT_CONTIGUOUS_SPACE) {
- ctx.leafFrame.insert(tuple, cmp);
- ctx.splitKey.reset();
-
- usefulCompression++;
- } else {
-
- uselessCompressionTime += (end - start);
- uselessCompression++;
-
- // perform split
- splitsByLevel[0]++; // debug
- int rightSiblingPageId = ctx.leafFrame.getNextLeaf();
- ICachedPage rightSibling = null;
- if (rightSiblingPageId > 0) {
- rightSibling = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rightSiblingPageId), false);
- pins++;
- }
-
- treeLatch.writeLock().lock(); // lock is released in
- // unsetSmPages(), after sm has
- // fully completed
- treeLatchesAcquired++;
- try {
-
- int rightPageId = freePageManager.getFreePage(ctx.metaFrame);
- ICachedPage rightNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rightPageId), true);
- pins++;
- rightNode.acquireWriteLatch();
- writeLatchesAcquired++;
- try {
- IBTreeLeafFrame rightFrame = leafFrameFactory.getFrame();
- rightFrame.setPage(rightNode);
- rightFrame.initBuffer((byte) 0);
- rightFrame.setPageTupleFieldCount(cmp.getFieldCount());
-
- int ret = ctx.leafFrame.split(rightFrame, tuple, cmp, ctx.splitKey);
-
- ctx.smPages.add(pageId);
- ctx.smPages.add(rightPageId);
- ctx.leafFrame.setSmFlag(true);
- rightFrame.setSmFlag(true);
-
- rightFrame.setNextLeaf(ctx.leafFrame.getNextLeaf());
- rightFrame.setPrevLeaf(pageId);
- ctx.leafFrame.setNextLeaf(rightPageId);
-
- // TODO: we just use increasing numbers as pageLsn,
- // we
- // should tie this together with the LogManager and
- // TransactionManager
- rightFrame.setPageLsn(rightFrame.getPageLsn() + 1);
- ctx.leafFrame.setPageLsn(ctx.leafFrame.getPageLsn() + 1);
-
- if (ret != 0) {
- ctx.splitKey.reset();
- } else {
- // System.out.print("LEAF SPLITKEY: ");
- // cmp.printKey(splitKey.getData(), 0);
- // System.out.println("");
-
- ctx.splitKey.setPages(pageId, rightPageId);
- }
- if (rightSibling != null) {
- rightSibling.acquireWriteLatch();
- writeLatchesAcquired++;
- try {
- rightFrame.setPage(rightSibling); // reuse
- // rightFrame
- // for
- // modification
- rightFrame.setPrevLeaf(rightPageId);
- } finally {
- rightSibling.releaseWriteLatch();
- writeLatchesReleased++;
- }
- }
- } finally {
- rightNode.releaseWriteLatch();
- writeLatchesReleased++;
- bufferCache.unpin(rightNode);
- unpins++;
- }
- } catch (Exception e) {
- treeLatch.writeLock().unlock();
- treeLatchesReleased++;
- throw e;
- } finally {
- if (rightSibling != null) {
- bufferCache.unpin(rightSibling);
- unpins++;
- }
- }
- }
- }
- break;
-
- }
-
- node.releaseWriteLatch();
- writeLatchesReleased++;
- bufferCache.unpin(node);
- unpins++;
- }
-
- private void insertInterior(ICachedPage node, int pageId, ITupleReference tuple, BTreeOpContext ctx)
- throws Exception {
- ctx.interiorFrame.setPage(node);
- ctx.interiorFrame.setPageTupleFieldCount(cmp.getKeyFieldCount());
- FrameOpSpaceStatus spaceStatus = ctx.interiorFrame.hasSpaceInsert(tuple, cmp);
- switch (spaceStatus) {
- case INSUFFICIENT_SPACE: {
- splitsByLevel[ctx.interiorFrame.getLevel()]++; // debug
- int rightPageId = freePageManager.getFreePage(ctx.metaFrame);
- ICachedPage rightNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rightPageId), true);
- pins++;
- rightNode.acquireWriteLatch();
- writeLatchesAcquired++;
- try {
- ITreeIndexFrame rightFrame = interiorFrameFactory.getFrame();
- rightFrame.setPage(rightNode);
- rightFrame.initBuffer((byte) ctx.interiorFrame.getLevel());
- rightFrame.setPageTupleFieldCount(cmp.getKeyFieldCount());
- // instead of creating a new split key, use the existing
- // splitKey
- int ret = ctx.interiorFrame.split(rightFrame, ctx.splitKey.getTuple(), cmp, ctx.splitKey);
-
- ctx.smPages.add(pageId);
- ctx.smPages.add(rightPageId);
- ctx.interiorFrame.setSmFlag(true);
- rightFrame.setSmFlag(true);
-
- // TODO: we just use increasing numbers as pageLsn, we
- // should tie this together with the LogManager and
- // TransactionManager
- rightFrame.setPageLsn(rightFrame.getPageLsn() + 1);
- ctx.interiorFrame.setPageLsn(ctx.interiorFrame.getPageLsn() + 1);
-
- if (ret != 0) {
- ctx.splitKey.reset();
- } else {
- // System.out.print("INTERIOR SPLITKEY: ");
- // cmp.printKey(splitKey.getData(), 0);
- // System.out.println("");
-
- ctx.splitKey.setPages(pageId, rightPageId);
- }
- } finally {
- rightNode.releaseWriteLatch();
- writeLatchesReleased++;
- bufferCache.unpin(rightNode);
- unpins++;
- }
- }
- break;
-
- case SUFFICIENT_CONTIGUOUS_SPACE: {
- // System.out.println("INSERT INTERIOR: " + pageId);
- ctx.interiorFrame.insert(tuple, cmp);
- ctx.splitKey.reset();
- }
- break;
-
- case SUFFICIENT_SPACE: {
- ctx.interiorFrame.compact(cmp);
- ctx.interiorFrame.insert(tuple, cmp);
- ctx.splitKey.reset();
- }
- break;
-
- }
- }
-
- public void delete(ITupleReference tuple, BTreeOpContext ctx) throws Exception {
- ctx.reset();
- ctx.pred.setLowKeyComparator(cmp);
- ctx.pred.setHighKeyComparator(cmp);
- ctx.pred.setLowKey(tuple, true);
- ctx.pred.setHighKey(tuple, true);
- ctx.splitKey.reset();
- ctx.splitKey.getTuple().setFieldCount(cmp.getKeyFieldCount());
-
- boolean repeatOp = true;
- // we use this loop to deal with possibly multiple operation restarts
- // due to ongoing structure modifications during the descent
- while (repeatOp && ctx.opRestarts < MAX_RESTARTS) {
- performOp(rootPage, null, ctx);
-
- // if we reach this stage then we need to restart from the (possibly
- // new) root
- if (!ctx.pageLsns.isEmpty() && ctx.pageLsns.getLast() == RESTART_OP) {
- ctx.pageLsns.removeLast(); // pop the restart op indicator
- continue;
- }
-
- // tree is empty, reset level to zero
- if (ctx.splitKey.getBuffer() != null) {
- ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), false);
- pins++;
- rootNode.acquireWriteLatch();
- writeLatchesAcquired++;
- try {
- ctx.leafFrame.setPage(rootNode);
- ctx.leafFrame.initBuffer((byte) 0);
- currentLevel = 0; // debug
- } finally {
- rootNode.releaseWriteLatch();
- writeLatchesReleased++;
- bufferCache.unpin(rootNode);
- unpins++;
- }
- }
-
- unsetSmPages(ctx);
-
- addFreePages(ctx);
-
- repeatOp = false;
- }
- }
-
- // TODO: to avoid latch deadlock, must modify cursor to detect empty leaves
- private void deleteLeaf(ICachedPage node, int pageId, ITupleReference tuple, BTreeOpContext ctx) throws Exception {
- ctx.leafFrame.setPage(node);
-
- // will this leaf become empty?
- if (ctx.leafFrame.getTupleCount() == 1) {
- IBTreeLeafFrame siblingFrame = leafFrameFactory.getFrame();
-
- ICachedPage leftNode = null;
- ICachedPage rightNode = null;
- int nextLeaf = ctx.leafFrame.getNextLeaf();
- int prevLeaf = ctx.leafFrame.getPrevLeaf();
-
- if (prevLeaf > 0)
- leftNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, prevLeaf), false);
-
- try {
-
- if (nextLeaf > 0)
- rightNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, nextLeaf), false);
-
- try {
- treeLatch.writeLock().lock();
- treeLatchesAcquired++;
-
- try {
- ctx.leafFrame.delete(tuple, cmp, true);
- // to propagate the deletion we only need to make the
- // splitKey != null
- // we can reuse data to identify which key to delete in
- // the parent
- ctx.splitKey.initData(1);
- } catch (Exception e) {
- // don't propagate deletion upwards if deletion at this
- // level fails
- ctx.splitKey.reset();
- throw e;
- }
-
- ctx.leafFrame.setPageLsn(ctx.leafFrame.getPageLsn() + 1); // TODO:
- // tie
- // together
- // with
- // logging
- ctx.leafFrame.setLevel(freePageManager.getFreePageLevelIndicator());
-
- ctx.smPages.add(pageId);
- ctx.leafFrame.setSmFlag(true);
-
- node.releaseWriteLatch();
- writeLatchesReleased++;
- bufferCache.unpin(node);
- unpins++;
-
- if (leftNode != null) {
- leftNode.acquireWriteLatch();
- try {
- siblingFrame.setPage(leftNode);
- siblingFrame.setNextLeaf(nextLeaf);
- siblingFrame.setPageLsn(siblingFrame.getPageLsn() + 1); // TODO:
- // tie
- // together
- // with
- // logging
- } finally {
- leftNode.releaseWriteLatch();
- }
- }
-
- if (rightNode != null) {
- rightNode.acquireWriteLatch();
- try {
- siblingFrame.setPage(rightNode);
- siblingFrame.setPrevLeaf(prevLeaf);
- siblingFrame.setPageLsn(siblingFrame.getPageLsn() + 1); // TODO:
- // tie
- // together
- // with
- // logging
- } finally {
- rightNode.releaseWriteLatch();
- }
- }
-
- // register pageId as a free
- ctx.freePages.add(pageId);
-
- } catch (Exception e) {
- treeLatch.writeLock().unlock();
- treeLatchesReleased++;
- throw e;
- } finally {
- if (rightNode != null) {
- bufferCache.unpin(rightNode);
- }
- }
- } finally {
- if (leftNode != null) {
- bufferCache.unpin(leftNode);
- }
- }
- } else { // leaf will not become empty
- ctx.leafFrame.delete(tuple, cmp, true);
- node.releaseWriteLatch();
- writeLatchesReleased++;
- bufferCache.unpin(node);
- unpins++;
- }
- }
-
- private void deleteInterior(ICachedPage node, int pageId, ITupleReference tuple, BTreeOpContext ctx)
- throws Exception {
- ctx.interiorFrame.setPage(node);
-
- // this means there is only a child pointer but no key, this case
- // propagates the split
- if (ctx.interiorFrame.getTupleCount() == 0) {
- ctx.interiorFrame.setPageLsn(ctx.interiorFrame.getPageLsn() + 1); // TODO:
- // tie
- // together
- // with
- // logging
- ctx.leafFrame.setLevel(freePageManager.getFreePageLevelIndicator());
- ctx.smPages.add(pageId);
- ctx.interiorFrame.setSmFlag(true);
- ctx.interiorFrame.setRightmostChildPageId(-1); // this node is
- // completely empty
- // register this pageId as a free page
- ctx.freePages.add(pageId);
-
- } else {
- ctx.interiorFrame.delete(tuple, cmp, false);
- ctx.interiorFrame.setPageLsn(ctx.interiorFrame.getPageLsn() + 1); // TODO:
- // tie
- // together
- // with
- // logging
- ctx.splitKey.reset(); // don't propagate deletion
- }
- }
-
- private final void acquireLatch(ICachedPage node, TreeIndexOp op, boolean isLeaf) {
- if (isLeaf && (op.equals(TreeIndexOp.TI_INSERT) || op.equals(TreeIndexOp.TI_DELETE))) {
- node.acquireWriteLatch();
- writeLatchesAcquired++;
- } else {
- node.acquireReadLatch();
- readLatchesAcquired++;
- }
- }
-
- private final void releaseLatch(ICachedPage node, TreeIndexOp op, boolean isLeaf) {
- if (isLeaf && (op.equals(TreeIndexOp.TI_INSERT) || op.equals(TreeIndexOp.TI_DELETE))) {
- node.releaseWriteLatch();
- writeLatchesReleased++;
- } else {
- node.releaseReadLatch();
- readLatchesReleased++;
- }
- }
-
- private boolean isConsistent(int pageId, BTreeOpContext ctx) throws Exception {
- ICachedPage node = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false);
- pins++;
- node.acquireReadLatch();
- readLatchesAcquired++;
- ctx.interiorFrame.setPage(node);
- boolean isConsistent = false;
- try {
- isConsistent = ctx.pageLsns.getLast() == ctx.interiorFrame.getPageLsn();
- } finally {
- node.releaseReadLatch();
- readLatchesReleased++;
- bufferCache.unpin(node);
- unpins++;
- }
- return isConsistent;
- }
-
- private void performOp(int pageId, ICachedPage parent, BTreeOpContext ctx) throws Exception {
- ICachedPage node = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false);
- pins++;
-
- ctx.interiorFrame.setPage(node);
- // this check performs an unprotected read in the page
- // the following could happen: TODO fill out
- boolean unsafeIsLeaf = ctx.interiorFrame.isLeaf();
- acquireLatch(node, ctx.op, unsafeIsLeaf);
- boolean smFlag = ctx.interiorFrame.getSmFlag();
- // re-check leafness after latching
- boolean isLeaf = ctx.interiorFrame.isLeaf();
-
- // remember trail of pageLsns, to unwind recursion in case of an ongoing
- // structure modification
- ctx.pageLsns.add(ctx.interiorFrame.getPageLsn());
-
- try {
-
- // latch coupling, note: parent should never be write latched,
- // otherwise something is wrong.
- if (parent != null) {
- parent.releaseReadLatch();
- readLatchesReleased++;
- bufferCache.unpin(parent);
- unpins++;
- }
-
- if (!isLeaf || smFlag) {
- if (!smFlag) {
- // we use this loop to deal with possibly multiple operation
- // restarts due to ongoing structure modifications during
- // the descent
- boolean repeatOp = true;
- while (repeatOp && ctx.opRestarts < MAX_RESTARTS) {
- int childPageId = ctx.interiorFrame.getChildPageId(ctx.pred, cmp);
- performOp(childPageId, node, ctx);
-
- if (!ctx.pageLsns.isEmpty() && ctx.pageLsns.getLast() == RESTART_OP) {
- ctx.pageLsns.removeLast(); // pop the restart op
- // indicator
- if (isConsistent(pageId, ctx)) {
- node = null; // to avoid unpinning and
- // unlatching node again in
- // recursive call
- continue; // descend the tree again
- } else {
- ctx.pageLsns.removeLast(); // pop pageLsn of
- // this page
- // (version seen by this op
- // during descent)
- ctx.pageLsns.add(RESTART_OP); // this node is
- // not
- // consistent,
- // set the
- // restart
- // indicator for
- // upper level
- break;
- }
- }
-
- switch (ctx.op) {
-
- case TI_INSERT: {
- if (ctx.splitKey.getBuffer() != null) {
- node = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false);
- pins++;
- node.acquireWriteLatch();
- writeLatchesAcquired++;
- try {
- insertInterior(node, pageId, ctx.splitKey.getTuple(), ctx);
- } finally {
- node.releaseWriteLatch();
- writeLatchesReleased++;
- bufferCache.unpin(node);
- unpins++;
- }
- } else {
- unsetSmPages(ctx);
- }
- }
- break;
-
- case TI_DELETE: {
- if (ctx.splitKey.getBuffer() != null) {
- node = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false);
- pins++;
- node.acquireWriteLatch();
- writeLatchesAcquired++;
- try {
- deleteInterior(node, pageId, ctx.pred.getLowKey(), ctx);
- } finally {
- node.releaseWriteLatch();
- writeLatchesReleased++;
- bufferCache.unpin(node);
- unpins++;
- }
- } else {
- unsetSmPages(ctx);
- }
- }
- break;
-
- case TI_SEARCH: {
- // do nothing
- }
- break;
-
- }
-
- repeatOp = false; // operation completed
-
- } // end while
- } else { // smFlag
- ctx.opRestarts++;
- System.out.println("ONGOING SM ON PAGE " + pageId + " AT LEVEL " + ctx.interiorFrame.getLevel()
- + ", RESTARTS: " + ctx.opRestarts);
- releaseLatch(node, ctx.op, unsafeIsLeaf);
- bufferCache.unpin(node);
- unpins++;
-
- // TODO: this should be an instant duration lock, how to do
- // this in java?
- // instead we just immediately release the lock. this is
- // inefficient but still correct and will not cause
- // latch-deadlock
- treeLatch.readLock().lock();
- treeLatch.readLock().unlock();
-
- // unwind recursion and restart operation, find lowest page
- // with a pageLsn as seen by this operation during descent
- ctx.pageLsns.removeLast(); // pop current page lsn
- // put special value on the stack to inform caller of
- // restart
- ctx.pageLsns.add(RESTART_OP);
- }
- } else { // isLeaf and !smFlag
- switch (ctx.op) {
- case TI_INSERT: {
- insertLeaf(node, pageId, ctx.pred.getLowKey(), ctx);
- }
- break;
-
- case TI_DELETE: {
- deleteLeaf(node, pageId, ctx.pred.getLowKey(), ctx);
- }
- break;
-
- case TI_SEARCH: {
- ctx.cursor.open(node, ctx.pred);
- }
- break;
- }
- }
- } catch (TreeIndexException e) {
- // System.out.println("BTREE EXCEPTION");
- // System.out.println(e.getMessage());
- // e.printStackTrace();
- if (!e.getHandled()) {
- releaseLatch(node, ctx.op, unsafeIsLeaf);
- bufferCache.unpin(node);
- unpins++;
- e.setHandled(true);
- }
- throw e;
- } catch (Exception e) { // this could be caused, e.g. by a
- // failure to pin a new node during a split
- System.out.println("ASTERIX EXCEPTION");
- e.printStackTrace();
- releaseLatch(node, ctx.op, unsafeIsLeaf);
- bufferCache.unpin(node);
- unpins++;
- BTreeException propException = new BTreeException(e);
- propException.setHandled(true); // propagate a BTreeException,
- // indicating that the parent node
- // must not be unlatched and
- // unpinned
- throw propException;
- }
- }
-
- private boolean bulkNewPage = false;
-
- public final class BulkLoadContext {
- public final int slotSize;
- public final int leafMaxBytes;
- public final int interiorMaxBytes;
- public final BTreeSplitKey splitKey;
- // we maintain a frontier of nodes for each level
- private final ArrayList<NodeFrontier> nodeFrontiers = new ArrayList<NodeFrontier>();
- private final IBTreeLeafFrame leafFrame;
- private final IBTreeInteriorFrame interiorFrame;
- private final ITreeIndexMetaDataFrame metaFrame;
-
- private final ITreeIndexTupleWriter tupleWriter;
-
- public BulkLoadContext(float fillFactor, IBTreeLeafFrame leafFrame, IBTreeInteriorFrame interiorFrame,
- ITreeIndexMetaDataFrame metaFrame) throws HyracksDataException {
-
- splitKey = new BTreeSplitKey(leafFrame.getTupleWriter().createTupleReference());
- tupleWriter = leafFrame.getTupleWriter();
-
- NodeFrontier leafFrontier = new NodeFrontier(leafFrame.createTupleReference());
- leafFrontier.pageId = freePageManager.getFreePage(metaFrame);
- leafFrontier.page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, leafFrontier.pageId), bulkNewPage);
- leafFrontier.page.acquireWriteLatch();
-
- interiorFrame.setPage(leafFrontier.page);
- interiorFrame.initBuffer((byte) 0);
- interiorMaxBytes = (int) ((float) interiorFrame.getBuffer().capacity() * fillFactor);
-
- leafFrame.setPage(leafFrontier.page);
- leafFrame.initBuffer((byte) 0);
- leafMaxBytes = (int) ((float) leafFrame.getBuffer().capacity() * fillFactor);
-
- slotSize = leafFrame.getSlotSize();
-
- this.leafFrame = leafFrame;
- this.interiorFrame = interiorFrame;
- this.metaFrame = metaFrame;
-
- nodeFrontiers.add(leafFrontier);
- }
-
- private void addLevel() throws HyracksDataException {
- NodeFrontier frontier = new NodeFrontier(tupleWriter.createTupleReference());
- frontier.pageId = freePageManager.getFreePage(metaFrame);
- frontier.page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, frontier.pageId), bulkNewPage);
- frontier.page.acquireWriteLatch();
- frontier.lastTuple.setFieldCount(cmp.getKeyFieldCount());
- interiorFrame.setPage(frontier.page);
- interiorFrame.initBuffer((byte) nodeFrontiers.size());
- nodeFrontiers.add(frontier);
- }
- }
-
- private void propagateBulk(BulkLoadContext ctx, int level) throws HyracksDataException {
-
- if (ctx.splitKey.getBuffer() == null)
- return;
-
- if (level >= ctx.nodeFrontiers.size())
- ctx.addLevel();
-
- NodeFrontier frontier = ctx.nodeFrontiers.get(level);
- ctx.interiorFrame.setPage(frontier.page);
-
- ITupleReference tuple = ctx.splitKey.getTuple();
- int spaceNeeded = ctx.tupleWriter.bytesRequired(tuple, 0, cmp.getKeyFieldCount()) + ctx.slotSize + 4;
- int spaceUsed = ctx.interiorFrame.getBuffer().capacity() - ctx.interiorFrame.getTotalFreeSpace();
- if (spaceUsed + spaceNeeded > ctx.interiorMaxBytes) {
-
- BTreeSplitKey copyKey = ctx.splitKey.duplicate(ctx.leafFrame.getTupleWriter().createTupleReference());
- tuple = copyKey.getTuple();
-
- frontier.lastTuple.resetByTupleOffset(frontier.page.getBuffer(), ctx.interiorFrame
- .getTupleOffset(ctx.interiorFrame.getTupleCount() - 1));
- int splitKeySize = ctx.tupleWriter.bytesRequired(frontier.lastTuple, 0, cmp.getKeyFieldCount());
- ctx.splitKey.initData(splitKeySize);
- ctx.tupleWriter
- .writeTupleFields(frontier.lastTuple, 0, cmp.getKeyFieldCount(), ctx.splitKey.getBuffer(), 0);
- ctx.splitKey.getTuple().resetByTupleOffset(ctx.splitKey.getBuffer(), 0);
- ctx.splitKey.setLeftPage(frontier.pageId);
-
- ctx.interiorFrame.deleteGreatest(cmp);
-
- frontier.page.releaseWriteLatch();
- bufferCache.unpin(frontier.page);
- frontier.pageId = freePageManager.getFreePage(ctx.metaFrame);
-
- ctx.splitKey.setRightPage(frontier.pageId);
- propagateBulk(ctx, level + 1);
-
- frontier.page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, frontier.pageId), bulkNewPage);
- frontier.page.acquireWriteLatch();
- ctx.interiorFrame.setPage(frontier.page);
- ctx.interiorFrame.initBuffer((byte) level);
- }
- ctx.interiorFrame.insertSorted(tuple, cmp);
-
- // debug print
- // ISerializerDeserializer[] btreeSerde = {
- // UTF8StringSerializerDeserializer.INSTANCE,
- // IntegerSerializerDeserializer.INSTANCE };
- // String s = ctx.interiorFrame.printKeys(cmp, btreeSerde);
- // System.out.println(s);
- }
-
- // assumes btree has been created and opened
- public BulkLoadContext beginBulkLoad(float fillFactor, IBTreeLeafFrame leafFrame,
- IBTreeInteriorFrame interiorFrame, ITreeIndexMetaDataFrame metaFrame) throws HyracksDataException {
-
- if (loaded)
- throw new HyracksDataException("Trying to bulk-load BTree but has BTree already been loaded.");
-
- BulkLoadContext ctx = new BulkLoadContext(fillFactor, leafFrame, interiorFrame, metaFrame);
- ctx.nodeFrontiers.get(0).lastTuple.setFieldCount(cmp.getFieldCount());
- ctx.splitKey.getTuple().setFieldCount(cmp.getKeyFieldCount());
- return ctx;
- }
-
- public void bulkLoadAddTuple(BulkLoadContext ctx, ITupleReference tuple) throws HyracksDataException {
- NodeFrontier leafFrontier = ctx.nodeFrontiers.get(0);
- IBTreeLeafFrame leafFrame = ctx.leafFrame;
-
- int spaceNeeded = ctx.tupleWriter.bytesRequired(tuple) + ctx.slotSize;
- int spaceUsed = leafFrame.getBuffer().capacity() - leafFrame.getTotalFreeSpace();
-
- // try to free space by compression
- if (spaceUsed + spaceNeeded > ctx.leafMaxBytes) {
- leafFrame.compress(cmp);
- spaceUsed = leafFrame.getBuffer().capacity() - leafFrame.getTotalFreeSpace();
- }
-
- if (spaceUsed + spaceNeeded > ctx.leafMaxBytes) {
- leafFrontier.lastTuple.resetByTupleIndex(leafFrame, leafFrame.getTupleCount() - 1);
- int splitKeySize = ctx.tupleWriter.bytesRequired(leafFrontier.lastTuple, 0, cmp.getKeyFieldCount());
- ctx.splitKey.initData(splitKeySize);
- ctx.tupleWriter.writeTupleFields(leafFrontier.lastTuple, 0, cmp.getKeyFieldCount(), ctx.splitKey
- .getBuffer(), 0);
- ctx.splitKey.getTuple().resetByTupleOffset(ctx.splitKey.getBuffer(), 0);
- ctx.splitKey.setLeftPage(leafFrontier.pageId);
- int prevPageId = leafFrontier.pageId;
- leafFrontier.pageId = freePageManager.getFreePage(ctx.metaFrame);
-
- leafFrame.setNextLeaf(leafFrontier.pageId);
- leafFrontier.page.releaseWriteLatch();
- bufferCache.unpin(leafFrontier.page);
-
- ctx.splitKey.setRightPage(leafFrontier.pageId);
- propagateBulk(ctx, 1);
-
- leafFrontier.page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, leafFrontier.pageId), bulkNewPage);
- leafFrontier.page.acquireWriteLatch();
- leafFrame.setPage(leafFrontier.page);
- leafFrame.initBuffer((byte) 0);
- leafFrame.setPrevLeaf(prevPageId);
- }
-
- leafFrame.setPage(leafFrontier.page);
- leafFrame.insertSorted(tuple, cmp);
-
- // debug print
- // ISerializerDeserializer[] btreeSerde = {
- // UTF8StringSerializerDeserializer.INSTANCE,
- // IntegerSerializerDeserializer.INSTANCE };
- // String s = leafFrame.printKeys(cmp, btreeSerde);
- // System.out.println(s);
- }
-
- public void endBulkLoad(BulkLoadContext ctx) throws HyracksDataException {
- // copy root
- ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), bulkNewPage);
- rootNode.acquireWriteLatch();
- try {
- ICachedPage toBeRoot = ctx.nodeFrontiers.get(ctx.nodeFrontiers.size() - 1).page;
- System.arraycopy(toBeRoot.getBuffer().array(), 0, rootNode.getBuffer().array(), 0, toBeRoot.getBuffer()
- .capacity());
- } finally {
- rootNode.releaseWriteLatch();
- bufferCache.unpin(rootNode);
-
- // cleanup
- for (int i = 0; i < ctx.nodeFrontiers.size(); i++) {
- ctx.nodeFrontiers.get(i).page.releaseWriteLatch();
- bufferCache.unpin(ctx.nodeFrontiers.get(i).page);
- }
- }
- // debug
- currentLevel = (byte) ctx.nodeFrontiers.size();
-
- loaded = true;
- }
-
- public void getBTreeStats(IBTreeLeafFrame leafFrame, IBTreeInteriorFrame interiorFrame,
- ITreeIndexMetaDataFrame metaFrame, BTreeStats btreeStats) throws HyracksDataException {
-
- btreeStats.begin();
-
- int maxPageId = freePageManager.getMaxPage(metaFrame);
- for(int pageId = 0; pageId < maxPageId; pageId++) {
- ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, pageId), false);
- page.acquireReadLatch();
- try {
- metaFrame.setPage(page);
- leafFrame.setPage(page);
- interiorFrame.setPage(page);
-
- if(pageId == rootPage) {
- btreeStats.addRoot(leafFrame, interiorFrame);
- } else if(leafFrame.isLeaf()) {
- double fillFactor = (double)(leafFrame.getBuffer().capacity() - leafFrame.getTotalFreeSpace()) / (double)leafFrame.getBuffer().capacity();
- System.out.println("FILLFACTOR: " + pageId + " " + fillFactor + " " + leafFrame.getTupleCount());
- btreeStats.add(leafFrame);
- } else if(interiorFrame.isInterior()) {
- btreeStats.add(interiorFrame);
- } else {
- System.out.println("META: " + metaFrame.getLevel());
- btreeStats.add(metaFrame, freePageManager);
- }
-
- } finally {
- page.releaseReadLatch();
- bufferCache.unpin(page);
- }
- }
-
- btreeStats.end();
- }
-
- public BTreeOpContext createOpContext(TreeIndexOp op, IBTreeLeafFrame leafFrame, IBTreeInteriorFrame interiorFrame,
- ITreeIndexMetaDataFrame metaFrame) {
- // TODO: figure out better tree-height hint
- return new BTreeOpContext(op, leafFrame, interiorFrame, metaFrame, 6);
- }
-
- public IBTreeInteriorFrameFactory getInteriorFrameFactory() {
- return interiorFrameFactory;
- }
-
- public IBTreeLeafFrameFactory getLeafFrameFactory() {
- return leafFrameFactory;
- }
-
- public MultiComparator getMultiComparator() {
- return cmp;
- }
-
- public IFreePageManager getFreePageManager() {
- return freePageManager;
- }
+ public IFreePageManager getFreePageManager() {
+ return freePageManager;
+ }
+
+ public int getRootPageId() {
+ return rootPage;
+ }
}
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeOpContext.java b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeOpContext.java
index eb1f53d..7d38dac 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeOpContext.java
+++ b/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeOpContext.java
@@ -19,6 +19,7 @@
import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeInteriorFrame;
import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrame;
+import edu.uci.ics.hyracks.storage.am.common.api.IntArrayList;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.TreeIndexOp;
public final class BTreeOpContext {
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/ITreeIndexFrame.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/ITreeIndexFrame.java
index 425821c..636a1f7 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/ITreeIndexFrame.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/ITreeIndexFrame.java
@@ -31,13 +31,15 @@
public ByteBuffer getBuffer();
- public void insert(ITupleReference tuple, MultiComparator cmp) throws Exception;
-
+ public int findTupleIndex(ITupleReference tuple, MultiComparator cmp) throws Exception;
+ public void insert(ITupleReference tuple, MultiComparator cmp, int tupleIndex) throws Exception;
+
public void update(int rid, ITupleReference tuple) throws Exception;
public void delete(ITupleReference tuple, MultiComparator cmp, boolean exactDelete) throws Exception;
- public void compact(MultiComparator cmp);
+ // returns true if slots were modified, false otherwise
+ public boolean compact(MultiComparator cmp);
public boolean compress(MultiComparator cmp) throws HyracksDataException;
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/IntArrayList.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/IntArrayList.java
similarity index 96%
rename from hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/IntArrayList.java
rename to hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/IntArrayList.java
index 2d0b9df..c232ba8 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/IntArrayList.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/api/IntArrayList.java
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-package edu.uci.ics.hyracks.storage.am.btree.impls;
+package edu.uci.ics.hyracks.storage.am.common.api;
public class IntArrayList {
private int[] data;
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/frames/LIFOMetaDataFrame.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/frames/LIFOMetaDataFrame.java
index 759efc3..b621b89 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/frames/LIFOMetaDataFrame.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/frames/LIFOMetaDataFrame.java
@@ -30,8 +30,9 @@
protected static final int tupleCountOff = 0;
protected static final int freeSpaceOff = tupleCountOff + 4;
protected static final int maxPageOff = freeSpaceOff + 4;
- protected static final byte levelOff = maxPageOff + 1;
- protected static final byte nextPageOff = maxPageOff + 8;
+ protected static final int dummyFieldOff = maxPageOff + 4;
+ protected static final byte levelOff = dummyFieldOff + 4;
+ protected static final byte nextPageOff = levelOff + 1;
protected ICachedPage page = null;
protected ByteBuffer buf = null;
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java
index 1e02d61..0b24c95 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java
@@ -129,7 +129,7 @@
}
@Override
- public void compact(MultiComparator cmp) {
+ public boolean compact(MultiComparator cmp) {
resetSpaceParams();
frameTuple.setFieldCount(cmp.getFieldCount());
@@ -160,11 +160,14 @@
buf.putInt(freeSpaceOff, freeSpace);
buf.putInt(totalFreeSpaceOff, buf.capacity() - freeSpace - tupleCount * slotManager.getSlotSize());
+
+ return false;
}
@Override
public void delete(ITupleReference tuple, MultiComparator cmp, boolean exactDelete) throws Exception {
- frameTuple.setFieldCount(cmp.getFieldCount());
+
+ frameTuple.setFieldCount(cmp.getFieldCount());
int tupleIndex = slotManager.findTupleIndex(tuple, frameTuple, cmp, FindTupleMode.FTM_EXACT,
FindTupleNoExactMatchPolicy.FTP_HIGHER_KEY);
int slotOff = slotManager.getSlotOff(tupleIndex);
@@ -224,13 +227,15 @@
}
@Override
- public void insert(ITupleReference tuple, MultiComparator cmp) throws Exception {
- frameTuple.setFieldCount(cmp.getFieldCount());
- int tupleIndex = slotManager.findTupleIndex(tuple, frameTuple, cmp, FindTupleMode.FTM_INCLUSIVE,
- FindTupleNoExactMatchPolicy.FTP_HIGHER_KEY);
+ public int findTupleIndex(ITupleReference tuple, MultiComparator cmp) throws Exception {
+ frameTuple.setFieldCount(cmp.getFieldCount());
+ return slotManager.findTupleIndex(tuple, frameTuple, cmp, FindTupleMode.FTM_INCLUSIVE, FindTupleNoExactMatchPolicy.FTP_HIGHER_KEY);
+ }
+
+ @Override
+ public void insert(ITupleReference tuple, MultiComparator cmp, int tupleIndex) throws Exception {
slotManager.insertSlot(tupleIndex, buf.getInt(freeSpaceOff));
int bytesWritten = tupleWriter.writeTuple(tuple, buf, buf.getInt(freeSpaceOff));
-
buf.putInt(tupleCountOff, buf.getInt(tupleCountOff) + 1);
buf.putInt(freeSpaceOff, buf.getInt(freeSpaceOff) + bytesWritten);
buf.putInt(totalFreeSpaceOff, buf.getInt(totalFreeSpaceOff) - bytesWritten - slotManager.getSlotSize());
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/freepage/LinkedListFreePageManager.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/freepage/LinkedListFreePageManager.java
index 15cc943..75470da 100644
--- a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/freepage/LinkedListFreePageManager.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/freepage/LinkedListFreePageManager.java
@@ -159,7 +159,7 @@
throws HyracksDataException {
// initialize meta data page
ICachedPage metaNode = bufferCache.pin(BufferedFileHandle
- .getDiskPageId(fileId, headPage), false);
+ .getDiskPageId(fileId, headPage), true);
metaNode.acquireWriteLatch();
try {
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/utility/TreeIndexBufferCacheWarmup.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/utility/TreeIndexBufferCacheWarmup.java
new file mode 100644
index 0000000..8fe9db4
--- /dev/null
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/utility/TreeIndexBufferCacheWarmup.java
@@ -0,0 +1,86 @@
+package edu.uci.ics.hyracks.storage.am.common.utility;
+
+import java.util.ArrayList;
+import java.util.Random;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrame;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrame;
+import edu.uci.ics.hyracks.storage.am.common.api.IntArrayList;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+import edu.uci.ics.hyracks.storage.common.buffercache.ICachedPage;
+import edu.uci.ics.hyracks.storage.common.file.BufferedFileHandle;
+
+public class TreeIndexBufferCacheWarmup {
+ private final IBufferCache bufferCache;
+ private final IFreePageManager freePageManager;
+ private final int fileId;
+ private final ArrayList<IntArrayList> pagesByLevel = new ArrayList<IntArrayList>();
+ private final Random rnd = new Random();
+
+ public TreeIndexBufferCacheWarmup(IBufferCache bufferCache,
+ IFreePageManager freePageManager, int fileId) {
+ this.bufferCache = bufferCache;
+ this.freePageManager = freePageManager;
+ this.fileId = fileId;
+ }
+
+ public void warmup(ITreeIndexFrame frame, ITreeIndexMetaDataFrame metaFrame, int[] warmupTreeLevels, int[] warmupRepeats) throws HyracksDataException {
+ bufferCache.openFile(fileId);
+
+ // scan entire file to determine pages in each level
+ int maxPageId = freePageManager.getMaxPage(metaFrame);
+ for (int pageId = 0; pageId <= maxPageId; pageId++) {
+ ICachedPage page = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, pageId), false);
+ page.acquireReadLatch();
+ try {
+ frame.setPage(page);
+ byte level = frame.getLevel();
+ while(level >= pagesByLevel.size()) {
+ pagesByLevel.add(new IntArrayList(100, 100));
+ }
+ if(level >= 0) {
+ //System.out.println("ADDING: " + level + " " + pageId);
+ pagesByLevel.get(level).add(pageId);
+ }
+ } finally {
+ page.releaseReadLatch();
+ bufferCache.unpin(page);
+ }
+ }
+
+ // pin certain pages again to simulate frequent access
+ for(int i = 0; i < warmupTreeLevels.length; i++) {
+ if(warmupTreeLevels[i] < pagesByLevel.size()) {
+ int repeats = warmupRepeats[i];
+ IntArrayList pageIds = pagesByLevel.get(warmupTreeLevels[i]);
+ int[] remainingPageIds = new int[pageIds.size()];
+ for(int r = 0; r < repeats; r++) {
+ for(int j = 0; j < pageIds.size(); j++) {
+ remainingPageIds[j] = pageIds.get(j);
+ }
+
+ int remainingLength = pageIds.size();
+ for(int j = 0; j < pageIds.size(); j++) {
+ int index = Math.abs(rnd.nextInt()) % remainingLength;
+ int pageId = remainingPageIds[index];
+
+ // pin & latch then immediately unlatch & unpin
+ ICachedPage page = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, pageId), false);
+ page.acquireReadLatch();
+ page.releaseReadLatch();
+ bufferCache.unpin(page);
+
+ remainingPageIds[index] = remainingPageIds[remainingLength-1];
+ remainingLength--;
+ }
+ }
+ }
+ }
+
+ bufferCache.closeFile(fileId);
+ }
+}
diff --git a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeStats.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/utility/TreeIndexStats.java
similarity index 75%
rename from hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeStats.java
rename to hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/utility/TreeIndexStats.java
index 6f6ba52..861f43c 100644
--- a/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTreeStats.java
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/utility/TreeIndexStats.java
@@ -1,14 +1,12 @@
-package edu.uci.ics.hyracks.storage.am.btree.impls;
+package edu.uci.ics.hyracks.storage.am.common.utility;
import java.text.DecimalFormat;
-import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeInteriorFrame;
-import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrame;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrame;
-public class BTreeStats {
+public class TreeIndexStats {
private TreeIndexNodeTypeStats rootStats = new TreeIndexNodeTypeStats();
private TreeIndexNodeTypeStats interiorStats = new TreeIndexNodeTypeStats();
@@ -27,24 +25,19 @@
treeLevels = 0;
}
- public void addRoot(IBTreeLeafFrame leafFrame, IBTreeInteriorFrame interiorFrame) {
- treeLevels = leafFrame.getLevel();
- if(leafFrame.isLeaf()) {
- rootStats.add(leafFrame);
- }
- else {
- rootStats.add(interiorFrame);
+ public void addRoot(ITreeIndexFrame frame) {
+ treeLevels = frame.getLevel() + 1;
+ rootStats.add(frame);
+ }
+
+ public void add(ITreeIndexFrame frame) {
+ if(frame.isLeaf()) {
+ leafStats.add(frame);
+ } else if(frame.isInterior()) {
+ interiorStats.add(frame);
}
}
- public void add(IBTreeLeafFrame leafFrame) {
- leafStats.add(leafFrame);
- }
-
- public void add(IBTreeInteriorFrame interiorFrame) {
- interiorStats.add(interiorFrame);
- }
-
public void add(ITreeIndexMetaDataFrame metaFrame, IFreePageManager freePageManager) {
if(freePageManager.isFreePage(metaFrame)) {
freePages++;
@@ -108,12 +101,8 @@
public void add(ITreeIndexFrame frame) {
numPages++;
- numTuples += frame.getTupleCount();
- sumFillFactors += (double)(frame.getBuffer().capacity() - frame.getTotalFreeSpace()) / (double)frame.getBuffer().capacity();
- double fillFactor = (double)(frame.getBuffer().capacity() - frame.getTotalFreeSpace()) / (double)frame.getBuffer().capacity();
- //System.out.println("BLUBBI: " + frame.getBuffer().capacity() + " " + frame.getTotalFreeSpace() + " " + frame.getBuffer().capacity());
- //System.out.println("INDIVIDUAL FILL FACTOR: " + fillFactor);
- //if(fillFactor < 0.5) System.out.println("LOW FILL FACTOR: " + frame.getTupleCount());
+ numTuples += frame.getTupleCount();
+ sumFillFactors += (double)(frame.getBuffer().capacity() - frame.getTotalFreeSpace()) / (double)frame.getBuffer().capacity();
}
public long getNumTuples() {
diff --git a/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/utility/TreeIndexStatsGatherer.java b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/utility/TreeIndexStatsGatherer.java
new file mode 100644
index 0000000..f05bc39
--- /dev/null
+++ b/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/utility/TreeIndexStatsGatherer.java
@@ -0,0 +1,75 @@
+package edu.uci.ics.hyracks.storage.am.common.utility;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrame;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrame;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+import edu.uci.ics.hyracks.storage.common.buffercache.ICachedPage;
+import edu.uci.ics.hyracks.storage.common.file.BufferedFileHandle;
+
+public class TreeIndexStatsGatherer {
+
+ private final TreeIndexStats treeIndexStats = new TreeIndexStats();
+ private final IBufferCache bufferCache;
+ private final IFreePageManager freePageManager;
+ private final int fileId;
+ private final int rootPage;
+
+ public TreeIndexStatsGatherer(IBufferCache bufferCache,
+ IFreePageManager freePageManager, int fileId, int rootPage) {
+ this.bufferCache = bufferCache;
+ this.freePageManager = freePageManager;
+ this.fileId = fileId;
+ this.rootPage = rootPage;
+ }
+
+ public TreeIndexStats gatherStats(ITreeIndexFrame leafFrame,
+ ITreeIndexFrame interiorFrame, ITreeIndexMetaDataFrame metaFrame)
+ throws HyracksDataException {
+
+ bufferCache.openFile(fileId);
+
+ treeIndexStats.begin();
+
+ int maxPageId = freePageManager.getMaxPage(metaFrame);
+ for (int pageId = 0; pageId <= maxPageId; pageId++) {
+ ICachedPage page = bufferCache.pin(BufferedFileHandle
+ .getDiskPageId(fileId, pageId), false);
+ page.acquireReadLatch();
+ try {
+ metaFrame.setPage(page);
+ leafFrame.setPage(page);
+ interiorFrame.setPage(page);
+
+ if (leafFrame.isLeaf()) {
+ if (pageId == rootPage) {
+ treeIndexStats.addRoot(leafFrame);
+ }
+ else {
+ treeIndexStats.add(leafFrame);
+ }
+ } else if (interiorFrame.isInterior()) {
+ if(pageId == rootPage) {
+ treeIndexStats.addRoot(interiorFrame);
+ }
+ else {
+ treeIndexStats.add(interiorFrame);
+ }
+ } else {
+ treeIndexStats.add(metaFrame, freePageManager);
+ }
+
+ } finally {
+ page.releaseReadLatch();
+ bufferCache.unpin(page);
+ }
+ }
+
+ treeIndexStats.end();
+
+ bufferCache.closeFile(fileId);
+
+ return treeIndexStats;
+ }
+}
diff --git a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeFieldPrefixNSMTest.java b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeFieldPrefixNSMTest.java
index e9e5456..8ef1c40 100644
--- a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeFieldPrefixNSMTest.java
+++ b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeFieldPrefixNSMTest.java
@@ -184,7 +184,8 @@
ITupleReference tuple = createTuple(ctx, a, b, c, false);
try {
- frame.insert(tuple, cmp);
+ int targetTupleIndex = frame.findTupleIndex(tuple, cmp);
+ frame.insert(tuple, cmp, targetTupleIndex);
} catch (BTreeException e) {
e.printStackTrace();
} catch (Exception e) {
diff --git a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeStatsTest.java b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeStatsTest.java
index 90243f5..df8da57 100644
--- a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeStatsTest.java
+++ b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeStatsTest.java
@@ -19,10 +19,8 @@
import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.comparators.IntegerBinaryComparatorFactory;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
-import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeCursor;
import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeInteriorFrame;
import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeInteriorFrameFactory;
import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
@@ -31,9 +29,6 @@
import edu.uci.ics.hyracks.storage.am.btree.frames.NSMLeafFrameFactory;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeOpContext;
-import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeStats;
-import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
-import edu.uci.ics.hyracks.storage.am.btree.impls.RangeSearchCursor;
import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrame;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrameFactory;
@@ -43,18 +38,24 @@
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.TreeIndexOp;
import edu.uci.ics.hyracks.storage.am.common.tuples.TypeAwareTupleWriterFactory;
+import edu.uci.ics.hyracks.storage.am.common.utility.TreeIndexBufferCacheWarmup;
+import edu.uci.ics.hyracks.storage.am.common.utility.TreeIndexStats;
+import edu.uci.ics.hyracks.storage.am.common.utility.TreeIndexStatsGatherer;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
import edu.uci.ics.hyracks.test.support.TestUtils;
public class BTreeStatsTest extends AbstractBTreeTest {
-
- private static final int PAGE_SIZE = 32768;
- private static final int NUM_PAGES = 1000;
+
+ // private static final int PAGE_SIZE = 256;
+ // private static final int NUM_PAGES = 10;
+ //private static final int PAGE_SIZE = 32768;
+ private static final int PAGE_SIZE = 4096;
+ private static final int NUM_PAGES = 1000;
private static final int HYRACKS_FRAME_SIZE = 128;
private IHyracksStageletContext ctx = TestUtils.create(HYRACKS_FRAME_SIZE);
-
+
// FIXED-LENGTH KEY TEST
// create a B-tree with one fixed-length "key" field and one fixed-length
// "value" field
@@ -90,7 +91,7 @@
MultiComparator cmp = new MultiComparator(typeTraits, cmps);
TypeAwareTupleWriterFactory tupleWriterFactory = new TypeAwareTupleWriterFactory(
- typeTraits);
+ typeTraits);
IBTreeLeafFrameFactory leafFrameFactory = new NSMLeafFrameFactory(
tupleWriterFactory);
IBTreeInteriorFrameFactory interiorFrameFactory = new NSMInteriorFrameFactory(
@@ -101,10 +102,11 @@
IBTreeInteriorFrame interiorFrame = interiorFrameFactory.getFrame();
ITreeIndexMetaDataFrame metaFrame = metaFrameFactory.getFrame();
- IFreePageManager freePageManager = new LinkedListFreePageManager(bufferCache, fileId, 0, metaFrameFactory);
-
- BTree btree = new BTree(bufferCache, freePageManager, interiorFrameFactory,
- leafFrameFactory, cmp);
+ IFreePageManager freePageManager = new LinkedListFreePageManager(
+ bufferCache, fileId, 0, metaFrameFactory);
+
+ BTree btree = new BTree(bufferCache, freePageManager,
+ interiorFrameFactory, leafFrameFactory, cmp);
btree.create(fileId, leafFrame, metaFrame);
btree.open(fileId);
@@ -129,13 +131,13 @@
accessor.reset(frame);
FrameTupleReference tuple = new FrameTupleReference();
- BTreeOpContext insertOpCtx = btree.createOpContext(TreeIndexOp.TI_INSERT,
- leafFrame, interiorFrame, metaFrame);
+ BTreeOpContext insertOpCtx = btree.createOpContext(
+ TreeIndexOp.TI_INSERT, leafFrame, interiorFrame, metaFrame);
// 10000
- for (int i = 0; i < 100000; i++) {
+ for (int i = 0; i < 1000000; i++) {
- int f0 = rnd.nextInt() % 100000;
+ int f0 = rnd.nextInt() % 1000000;
int f1 = 5;
tb.reset();
@@ -171,33 +173,20 @@
}
// btree.printTree(leafFrame, interiorFrame);
// System.out.println();
+
- print("ORDERED SCAN:\n");
- IBTreeCursor scanCursor = new RangeSearchCursor(leafFrame);
- RangePredicate nullPred = new RangePredicate(true, null, null, true,
- true, null, null);
- BTreeOpContext searchOpCtx = btree.createOpContext(TreeIndexOp.TI_SEARCH,
- leafFrame, interiorFrame, null);
- btree.search(scanCursor, nullPred, searchOpCtx);
- try {
- while (scanCursor.hasNext()) {
- scanCursor.next();
- }
- } catch (Exception e) {
- e.printStackTrace();
- } finally {
- scanCursor.close();
- }
-
- BTreeStats btreeStats = new BTreeStats();
- btree.getBTreeStats(leafFrame, interiorFrame, metaFrame, btreeStats);
- String s = btreeStats.toString();
+ TreeIndexStatsGatherer statsGatherer = new TreeIndexStatsGatherer(bufferCache, freePageManager, fileId, btree.getRootPageId());
+ TreeIndexStats stats = statsGatherer.gatherStats(leafFrame, interiorFrame, metaFrame);
+ String s = stats.toString();
System.out.println(s);
+
+ TreeIndexBufferCacheWarmup bufferCacheWarmup = new TreeIndexBufferCacheWarmup(bufferCache, freePageManager, fileId);
+ bufferCacheWarmup.warmup(leafFrame, metaFrame, new int[] {1, 2}, new int[] {2, 5});
btree.close();
bufferCache.closeFile(fileId);
bufferCache.close();
- print("\n");
+ print("\n");
}
}
diff --git a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeTest.java b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeTest.java
index 7a00914..76f42f9 100644
--- a/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeTest.java
+++ b/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/BTreeTest.java
@@ -36,7 +36,9 @@
import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.comparators.IntegerBinaryComparatorFactory;
+import edu.uci.ics.hyracks.dataflow.common.data.comparators.UTF8StringBinaryComparatorFactory;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeCursor;
import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeInteriorFrame;
import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeInteriorFrameFactory;
@@ -57,6 +59,7 @@
import edu.uci.ics.hyracks.storage.am.common.freepage.LinkedListFreePageManager;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.TreeIndexOp;
+import edu.uci.ics.hyracks.storage.am.common.tuples.SimpleTupleWriterFactory;
import edu.uci.ics.hyracks.storage.am.common.tuples.TypeAwareTupleWriterFactory;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.buffercache.ICacheMemoryAllocator;
@@ -314,8 +317,6 @@
print("\n");
}
-
- /*
// COMPOSITE KEY TEST (NON-UNIQUE B-TREE)
// create a B-tree with one two fixed-length "key" fields and one
@@ -1347,5 +1348,4 @@
}
return strBuilder.toString();
}
- */
}
\ No newline at end of file