fix the multiple deletion issue in large amounts of deletions
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/BTreeSearchFunctionUpdateOperatorNodePushable.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/BTreeSearchFunctionUpdateOperatorNodePushable.java
index ff95e52..1fb5aca 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/BTreeSearchFunctionUpdateOperatorNodePushable.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/BTreeSearchFunctionUpdateOperatorNodePushable.java
@@ -43,7 +43,9 @@
import edu.uci.ics.pregelix.dataflow.std.base.IRecordDescriptorFactory;
import edu.uci.ics.pregelix.dataflow.std.base.IRuntimeHookFactory;
import edu.uci.ics.pregelix.dataflow.std.base.IUpdateFunctionFactory;
+import edu.uci.ics.pregelix.dataflow.util.CopyUpdateUtil;
import edu.uci.ics.pregelix.dataflow.util.FunctionProxy;
+import edu.uci.ics.pregelix.dataflow.util.SearchKeyTupleReference;
import edu.uci.ics.pregelix.dataflow.util.UpdateBuffer;
public class BTreeSearchFunctionUpdateOperatorNodePushable extends AbstractUnaryInputOperatorNodePushable {
@@ -74,6 +76,7 @@
private final FunctionProxy functionProxy;
private ArrayTupleBuilder cloneUpdateTb;
private final UpdateBuffer updateBuffer;
+ private final SearchKeyTupleReference tempTupleReference = new SearchKeyTupleReference();
public BTreeSearchFunctionUpdateOperatorNodePushable(AbstractTreeIndexOperatorDescriptor opDesc,
IHyracksTaskContext ctx, int partition, IRecordDescriptorProvider recordDescProvider, boolean isForward,
@@ -147,21 +150,8 @@
functionProxy.functionCall(tuple, cloneUpdateTb);
//doing clone update
- if (cloneUpdateTb.getSize() > 0) {
- if (!updateBuffer.appendTuple(cloneUpdateTb)) {
- //release the cursor/latch
- cursor.close();
- //batch update
- updateBuffer.updateBTree(indexAccessor);
-
- //search again
- cursor.reset();
- rangePred.setLowKey(tuple, true);
- rangePred.setHighKey(highKey, highKeyInclusive);
- indexAccessor.search(cursor, rangePred);
- }
- }
- cloneUpdateTb.reset();
+ CopyUpdateUtil.copyUpdate(tempTupleReference, tuple, updateBuffer, cloneUpdateTb, indexAccessor, cursor,
+ rangePred);
}
}
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinFunctionUpdateOperatorNodePushable.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinFunctionUpdateOperatorNodePushable.java
index 61e4649..427ffe9 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinFunctionUpdateOperatorNodePushable.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinFunctionUpdateOperatorNodePushable.java
@@ -43,7 +43,9 @@
import edu.uci.ics.pregelix.dataflow.std.base.IRecordDescriptorFactory;
import edu.uci.ics.pregelix.dataflow.std.base.IRuntimeHookFactory;
import edu.uci.ics.pregelix.dataflow.std.base.IUpdateFunctionFactory;
+import edu.uci.ics.pregelix.dataflow.util.CopyUpdateUtil;
import edu.uci.ics.pregelix.dataflow.util.FunctionProxy;
+import edu.uci.ics.pregelix.dataflow.util.SearchKeyTupleReference;
import edu.uci.ics.pregelix.dataflow.util.UpdateBuffer;
public class IndexNestedLoopJoinFunctionUpdateOperatorNodePushable extends AbstractUnaryInputOperatorNodePushable {
@@ -69,6 +71,7 @@
private final FunctionProxy functionProxy;
private ArrayTupleBuilder cloneUpdateTb;
private final UpdateBuffer updateBuffer;
+ private final SearchKeyTupleReference tempTupleReference = new SearchKeyTupleReference();
public IndexNestedLoopJoinFunctionUpdateOperatorNodePushable(AbstractTreeIndexOperatorDescriptor opDesc,
IHyracksTaskContext ctx, int partition, IRecordDescriptorProvider recordDescProvider, boolean isForward,
@@ -163,21 +166,9 @@
*/
functionProxy.functionCall(leftAccessor, tIndex, tupleRef, cloneUpdateTb);
- if (cloneUpdateTb.getSize() > 0) {
- if (!updateBuffer.appendTuple(cloneUpdateTb)) {
- //release the cursor/latch
- cursor.close();
- //batch update
- updateBuffer.updateBTree(indexAccessor);
-
- //search again
- cursor.reset();
- rangePred.setLowKey(tupleRef, true);
- rangePred.setHighKey(highKey, highKeyInclusive);
- indexAccessor.search(cursor, rangePred);
- }
- }
- cloneUpdateTb.reset();
+ //doing copy update
+ CopyUpdateUtil.copyUpdate(tempTupleReference, tupleRef, updateBuffer, cloneUpdateTb, indexAccessor, cursor,
+ rangePred);
}
}
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopRightOuterJoinFunctionUpdateOperatorNodePushable.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopRightOuterJoinFunctionUpdateOperatorNodePushable.java
index 5ca5382..0c13a09 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopRightOuterJoinFunctionUpdateOperatorNodePushable.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopRightOuterJoinFunctionUpdateOperatorNodePushable.java
@@ -45,7 +45,9 @@
import edu.uci.ics.pregelix.dataflow.std.base.IRecordDescriptorFactory;
import edu.uci.ics.pregelix.dataflow.std.base.IRuntimeHookFactory;
import edu.uci.ics.pregelix.dataflow.std.base.IUpdateFunctionFactory;
+import edu.uci.ics.pregelix.dataflow.util.CopyUpdateUtil;
import edu.uci.ics.pregelix.dataflow.util.FunctionProxy;
+import edu.uci.ics.pregelix.dataflow.util.SearchKeyTupleReference;
import edu.uci.ics.pregelix.dataflow.util.UpdateBuffer;
public class IndexNestedLoopRightOuterJoinFunctionUpdateOperatorNodePushable extends
@@ -79,7 +81,8 @@
private final IFrameWriter[] writers;
private final FunctionProxy functionProxy;
private ArrayTupleBuilder cloneUpdateTb;
- private UpdateBuffer updateBuffer;
+ private final UpdateBuffer updateBuffer;
+ private final SearchKeyTupleReference tempTupleReference = new SearchKeyTupleReference();
public IndexNestedLoopRightOuterJoinFunctionUpdateOperatorNodePushable(AbstractTreeIndexOperatorDescriptor opDesc,
IHyracksTaskContext ctx, int partition, IRecordDescriptorProvider recordDescProvider, boolean isForward,
@@ -184,32 +187,6 @@
}
}
- //for the join match casesos
- private void writeResults(IFrameTupleAccessor leftAccessor, int tIndex, ITupleReference frameTuple)
- throws Exception {
- /**
- * function call
- */
- functionProxy.functionCall(leftAccessor, tIndex, frameTuple, cloneUpdateTb);
-
- //doing clone update
- if (cloneUpdateTb.getSize() > 0) {
- if (!updateBuffer.appendTuple(cloneUpdateTb)) {
- //release the cursor/latch
- cursor.close();
- //batch update
- updateBuffer.updateBTree(indexAccessor);
-
- //search again and recover the cursor
- cursor.reset();
- rangePred.setLowKey(frameTuple, true);
- rangePred.setHighKey(null, true);
- indexAccessor.search(cursor, rangePred);
- }
- cloneUpdateTb.reset();
- }
- }
-
@Override
public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
accessor.reset(buffer);
@@ -289,6 +266,19 @@
return lowKeySearchCmp.compare(left, right);
}
+ //for the join match casesos
+ private void writeResults(IFrameTupleAccessor leftAccessor, int tIndex, ITupleReference frameTuple)
+ throws Exception {
+ /**
+ * function call
+ */
+ functionProxy.functionCall(leftAccessor, tIndex, frameTuple, cloneUpdateTb);
+
+ //doing clone update
+ CopyUpdateUtil.copyUpdate(tempTupleReference, frameTuple, updateBuffer, cloneUpdateTb, indexAccessor, cursor,
+ rangePred);
+ }
+
/** write result for outer case */
private void writeResults(ITupleReference frameTuple) throws Exception {
/**
@@ -297,21 +287,8 @@
functionProxy.functionCall(nullTupleBuilder, frameTuple, cloneUpdateTb);
//doing clone update
- if (cloneUpdateTb.getSize() > 0) {
- if (!updateBuffer.appendTuple(cloneUpdateTb)) {
- //release the cursor/latch
- cursor.close();
- //batch update
- updateBuffer.updateBTree(indexAccessor);
-
- //search again and recover the cursor
- cursor.reset();
- rangePred.setLowKey(frameTuple, true);
- rangePred.setHighKey(null, true);
- indexAccessor.search(cursor, rangePred);
- }
- cloneUpdateTb.reset();
- }
+ CopyUpdateUtil.copyUpdate(tempTupleReference, frameTuple, updateBuffer, cloneUpdateTb, indexAccessor, cursor,
+ rangePred);
}
@Override
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopSetUnionFunctionUpdateOperatorNodePushable.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopSetUnionFunctionUpdateOperatorNodePushable.java
index 160324e..1e5c2ea 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopSetUnionFunctionUpdateOperatorNodePushable.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopSetUnionFunctionUpdateOperatorNodePushable.java
@@ -43,7 +43,9 @@
import edu.uci.ics.pregelix.dataflow.std.base.IRecordDescriptorFactory;
import edu.uci.ics.pregelix.dataflow.std.base.IRuntimeHookFactory;
import edu.uci.ics.pregelix.dataflow.std.base.IUpdateFunctionFactory;
+import edu.uci.ics.pregelix.dataflow.util.CopyUpdateUtil;
import edu.uci.ics.pregelix.dataflow.util.FunctionProxy;
+import edu.uci.ics.pregelix.dataflow.util.SearchKeyTupleReference;
import edu.uci.ics.pregelix.dataflow.util.UpdateBuffer;
public class IndexNestedLoopSetUnionFunctionUpdateOperatorNodePushable extends AbstractUnaryInputOperatorNodePushable {
@@ -72,6 +74,7 @@
private final FunctionProxy functionProxy;
private ArrayTupleBuilder cloneUpdateTb;
private UpdateBuffer updateBuffer;
+ private final SearchKeyTupleReference tempTupleReference = new SearchKeyTupleReference();
public IndexNestedLoopSetUnionFunctionUpdateOperatorNodePushable(AbstractTreeIndexOperatorDescriptor opDesc,
IHyracksTaskContext ctx, int partition, IRecordDescriptorProvider recordDescProvider, boolean isForward,
@@ -235,21 +238,8 @@
functionProxy.functionCall(frameTuple, cloneUpdateTb);
//doing clone update
- if (cloneUpdateTb.getSize() > 0) {
- if (!updateBuffer.appendTuple(cloneUpdateTb)) {
- //release the cursor/latch
- cursor.close();
- //batch update
- updateBuffer.updateBTree(indexAccessor);
-
- //search again
- cursor.reset();
- rangePred.setLowKey(frameTuple, true);
- rangePred.setHighKey(null, true);
- indexAccessor.search(cursor, rangePred);
- }
- cloneUpdateTb.reset();
- }
+ CopyUpdateUtil.copyUpdate(tempTupleReference, frameTuple, updateBuffer, cloneUpdateTb, indexAccessor, cursor,
+ rangePred);
}
/** write the left result */
@@ -258,21 +248,8 @@
functionProxy.functionCall(leftAccessor, tIndex, frameTuple, cloneUpdateTb);
//doing clone update
- if (cloneUpdateTb.getSize() > 0) {
- if (!updateBuffer.appendTuple(cloneUpdateTb)) {
- //release the cursor/latch
- cursor.close();
- //batch update
- updateBuffer.updateBTree(indexAccessor);
-
- //search again
- cursor.reset();
- rangePred.setLowKey(frameTuple, true);
- rangePred.setHighKey(null, true);
- indexAccessor.search(cursor, rangePred);
- }
- cloneUpdateTb.reset();
- }
+ CopyUpdateUtil.copyUpdate(tempTupleReference, frameTuple, updateBuffer, cloneUpdateTb, indexAccessor, cursor,
+ rangePred);
}
@Override
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/CopyUpdateUtil.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/CopyUpdateUtil.java
new file mode 100644
index 0000000..3f89543
--- /dev/null
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/CopyUpdateUtil.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.pregelix.dataflow.util;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+
+public class CopyUpdateUtil {
+
+ public static void copyUpdate(SearchKeyTupleReference tempTupleReference, ITupleReference frameTuple,
+ UpdateBuffer updateBuffer, ArrayTupleBuilder cloneUpdateTb, ITreeIndexAccessor indexAccessor,
+ ITreeIndexCursor cursor, RangePredicate rangePred) throws HyracksDataException, IndexException {
+ if (cloneUpdateTb.getSize() > 0) {
+ if (!updateBuffer.appendTuple(cloneUpdateTb)) {
+ tempTupleReference.reset(frameTuple.getFieldData(0), frameTuple.getFieldStart(0),
+ frameTuple.getFieldLength(0));
+ //release the cursor/latch
+ cursor.close();
+ //batch update
+ updateBuffer.updateBTree(indexAccessor);
+ //try append the to-be-updated tuple again
+ if (!updateBuffer.appendTuple(cloneUpdateTb)) {
+ throw new HyracksDataException("cannot append tuple builder!");
+ }
+ //search again and recover the cursor
+ cursor.reset();
+ rangePred.setLowKey(tempTupleReference, false);
+ rangePred.setHighKey(null, true);
+ indexAccessor.search(cursor, rangePred);
+ }
+ cloneUpdateTb.reset();
+ }
+ }
+
+}
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/SearchKeyTupleReference.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/SearchKeyTupleReference.java
new file mode 100644
index 0000000..86fa24a
--- /dev/null
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/SearchKeyTupleReference.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.dataflow.util;
+
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+
+public class SearchKeyTupleReference implements ITupleReference {
+
+ private byte[] copiedData;
+ private int length;
+
+ public void reset(byte[] data, int start, int len) {
+ if (copiedData == null) {
+ copiedData = new byte[len];
+ }
+ if (copiedData.length < len) {
+ copiedData = new byte[len];
+ }
+ System.arraycopy(data, start, copiedData, 0, len);
+ length = len;
+ }
+
+ @Override
+ public int getFieldCount() {
+ return 1;
+ }
+
+ @Override
+ public byte[] getFieldData(int fIdx) {
+ return copiedData;
+ }
+
+ @Override
+ public int getFieldStart(int fIdx) {
+ return 0;
+ }
+
+ @Override
+ public int getFieldLength(int fIdx) {
+ return length;
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestSuite.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestSuite.java
index 87bc40d..0a444fa 100644
--- a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestSuite.java
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobrun/RunJobTestSuite.java
@@ -51,7 +51,6 @@
private static final String PATH_TO_JOBS = "src/test/resources/jobs/";
private static final String PATH_TO_IGNORE = "src/test/resources/ignore.txt";
private static final String PATH_TO_ONLY = "src/test/resources/only.txt";
- private static final String FILE_EXTENSION_OF_RESULTS = "result";
private static final String DATA_PATH = "data/webmap/webmap_link.txt";
private static final String HDFS_PATH = "/webmap/";