fix the memory recycle issue in FrameSorter
git-svn-id: https://hyracks.googlecode.com/svn/branches/hyracks_sort_join_opts@1131 123451ca-8445-de46-9d55-352943316053
diff --git a/hyracks-admin-console/.classpath b/hyracks-admin-console/.classpath
index c5921a9..d93f560 100644
--- a/hyracks-admin-console/.classpath
+++ b/hyracks-admin-console/.classpath
@@ -4,7 +4,6 @@
<classpathentry excluding="**" kind="src" output="target/hyracks-admin-console-0.1.8-SNAPSHOT/WEB-INF/classes" path="src/main/resources"/>
<classpathentry kind="src" output="target/test-classes" path="src/test/java"/>
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources"/>
- <classpathentry kind="src" path="target/generated-sources/gwt"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
<classpathentry kind="con" path="org.maven.ide.eclipse.MAVEN2_CLASSPATH_CONTAINER"/>
<classpathentry kind="con" path="com.google.gwt.eclipse.core.GWT_CONTAINER"/>
diff --git a/hyracks-cli/.classpath b/hyracks-cli/.classpath
index ba0bb5a..1f3c1ff 100644
--- a/hyracks-cli/.classpath
+++ b/hyracks-cli/.classpath
@@ -1,7 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java"/>
- <classpathentry kind="src" path="target/generated-sources/javacc"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
<classpathentry kind="con" path="org.maven.ide.eclipse.MAVEN2_CLASSPATH_CONTAINER"/>
<classpathentry kind="output" path="target/classes"/>
diff --git a/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/ExternalSortOperatorDescriptor.java b/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/ExternalSortOperatorDescriptor.java
index 52b292b..073ff28 100644
--- a/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/ExternalSortOperatorDescriptor.java
+++ b/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/ExternalSortOperatorDescriptor.java
@@ -172,8 +172,9 @@
for (int i = 0; i < comparatorFactories.length; ++i) {
comparators[i] = comparatorFactories[i].createBinaryComparator();
}
+ int necessaryFrames = Math.min(runs.size() + 2, framesLimit);
ExternalSortRunMerger merger = new ExternalSortRunMerger(ctx, frameSorter, runs, sortFields,
- comparators, recordDescriptors[0], framesLimit, writer);
+ comparators, recordDescriptors[0], necessaryFrames, writer);
merger.process();
}
};
diff --git a/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/ExternalSortRunMerger.java b/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/ExternalSortRunMerger.java
index 59cf056..5e413da 100644
--- a/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/ExternalSortRunMerger.java
+++ b/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/ExternalSortRunMerger.java
@@ -19,25 +19,24 @@
import edu.uci.ics.hyracks.dataflow.common.io.RunFileWriter;
/**
- * @author pouria
- * This class defines the logic for merging the run, generated during
- * the first phase of external sort (for both sorting without replacement
- * selection and with it). For the case with replacement selection, this
- * code also takes the limit on the output into account (if specified).
- * If number of input runs is less than the available memory frames,
- * then merging can be done in one pass, by allocating one buffer per
- * run, and one buffer as the output buffer. A priorityQueue is used to
- * find the top tuple at each iteration, among all the runs' heads in
- * memory (check RunMergingFrameReader for more details). Otherwise,
- * assuming that we have R runs and M memory buffers, where (R > M), we
- * first merge first (M-1) runs and create a new sorted run, out of
- * them. Discarding the first (M-1) runs, now merging procedure gets
- * applied recursively on the (R-M+2) remaining runs using the M memory
- * buffers.
- * For the case of replacement selection, if outputLimit is specified,
- * once the final pass is done on the runs (which is the pass
- * that generates the final sorted output), as soon as the output size
- * hits the output limit, the process stops, closes, and returns.
+ * @author pouria This class defines the logic for merging the run, generated
+ * during the first phase of external sort (for both sorting without
+ * replacement selection and with it). For the case with replacement
+ * selection, this code also takes the limit on the output into account
+ * (if specified). If number of input runs is less than the available
+ * memory frames, then merging can be done in one pass, by allocating
+ * one buffer per run, and one buffer as the output buffer. A
+ * priorityQueue is used to find the top tuple at each iteration, among
+ * all the runs' heads in memory (check RunMergingFrameReader for more
+ * details). Otherwise, assuming that we have R runs and M memory
+ * buffers, where (R > M), we first merge first (M-1) runs and create a
+ * new sorted run, out of them. Discarding the first (M-1) runs, now
+ * merging procedure gets applied recursively on the (R-M+2) remaining
+ * runs using the M memory buffers. For the case of replacement
+ * selection, if outputLimit is specified, once the final pass is done
+ * on the runs (which is the pass that generates the final sorted
+ * output), as soon as the output size hits the output limit, the
+ * process stops, closes, and returns.
*/
public class ExternalSortRunMerger {
@@ -53,12 +52,16 @@
private ByteBuffer outFrame;
private FrameTupleAppender outFrameAppender;
- private FrameSorter frameSorter; //Used in External sort, no replacement selection
- private FrameTupleAccessor outFrameAccessor; //Used in External sort, with replacement selection
- private final int outputLimit; //Used in External sort, with replacement selection and limit on output size
- private int currentSize; //Used in External sort, with replacement selection and limit on output size
+ private FrameSorter frameSorter; // Used in External sort, no replacement
+ // selection
+ private FrameTupleAccessor outFrameAccessor; // Used in External sort, with
+ // replacement selection
+ private final int outputLimit; // Used in External sort, with replacement
+ // selection and limit on output size
+ private int currentSize; // Used in External sort, with replacement
+ // selection and limit on output size
- //Constructor for external sort, no replacement selection
+ // Constructor for external sort, no replacement selection
public ExternalSortRunMerger(IHyracksTaskContext ctx, FrameSorter frameSorter, List<IFrameReader> runs,
int[] sortFields, IBinaryComparator[] comparators, RecordDescriptor recordDesc, int framesLimit,
IFrameWriter writer) {
@@ -73,7 +76,7 @@
this.outputLimit = -1;
}
- //Constructor for external sort with replacement selection
+ // Constructor for external sort with replacement selection
public ExternalSortRunMerger(IHyracksTaskContext ctx, int outputLimit, List<IFrameReader> runs, int[] sortFields,
IBinaryComparator[] comparators, RecordDescriptor recordDesc, int framesLimit, IFrameWriter writer) {
this.ctx = ctx;
@@ -96,12 +99,12 @@
frameSorter.flushFrames(writer);
}
/** recycle sort buffer */
- frameSorter = null;
+ frameSorter.close();
System.gc();
} else {
- /** recycle sort buffer */
- frameSorter = null;
+ /** recycle sort buffer */
+ frameSorter.close();
System.gc();
inFrames = new ArrayList<ByteBuffer>();
@@ -186,7 +189,7 @@
System.gc();
return;
}
- //Limit on the output size
+ // Limit on the output size
int totalCount = 0;
runs.get(0).open();
FrameTupleAccessor fta = new FrameTupleAccessor(ctx.getFrameSize(), recordDesc);
diff --git a/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/FrameSorter.java b/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/FrameSorter.java
index 8cf782e..3de9742 100644
--- a/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/FrameSorter.java
+++ b/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/FrameSorter.java
@@ -238,4 +238,8 @@
}
return 0;
}
+
+ public void close() {
+ this.buffers.clear();
+ }
}
\ No newline at end of file
diff --git a/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/OptimizedExternalSortRunGenerator.java b/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/OptimizedExternalSortRunGenerator.java
index 8b9e4fe..f04365d 100644
--- a/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/OptimizedExternalSortRunGenerator.java
+++ b/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/OptimizedExternalSortRunGenerator.java
@@ -19,32 +19,27 @@
import edu.uci.ics.hyracks.dataflow.common.io.RunFileWriter;
/**
- * @author pouria
- * This class implements the run generator for sorting with replacement
- * selection, where there is no limit on the output, i.e. the whole data
- * should be sorted. A SortMinHeap is used as the selectionTree to
- * decide the order of writing tuples into the runs, while memory
- * manager is based on a binary search tree to allocate tuples in the
- * memory.
- * The overall process is as follows: - Read the input data frame by
- * frame. For each tuple T in the current frame:
- * - Try to allocate a memory slot for writing T along with the attached
- * header/footer (for memory management purpose)
- * - If T can not be allocated, try to output as many tuples, currently
- * resident in memory, as needed so that a free slot, large enough to
- * hold T, gets created. MinHeap decides about which tuple should be
- * sent to the output at each step.
- * - Write T into the memory
- * - Calculate the runID of T (based on the last output tuple for the
- * current run). It is either the current run or the next run. Also
- * calculate Poorman's Normalized Key (PNK) for T, to make comparisons
- * faster later.
- * - Create a heap element for T, containing: its runID, the slot
- * pointer to its memory location, and its PNK.
- * - Insert the created heap element into the heap
- * - Upon closing, write all the tuples, currently resident in memory,
- * into their corresponding run(s). Again min heap decides about which
- * tuple is the next for output.
+ * @author pouria This class implements the run generator for sorting with
+ * replacement selection, where there is no limit on the output, i.e.
+ * the whole data should be sorted. A SortMinHeap is used as the
+ * selectionTree to decide the order of writing tuples into the runs,
+ * while memory manager is based on a binary search tree to allocate
+ * tuples in the memory. The overall process is as follows: - Read the
+ * input data frame by frame. For each tuple T in the current frame: -
+ * Try to allocate a memory slot for writing T along with the attached
+ * header/footer (for memory management purpose) - If T can not be
+ * allocated, try to output as many tuples, currently resident in
+ * memory, as needed so that a free slot, large enough to hold T, gets
+ * created. MinHeap decides about which tuple should be sent to the
+ * output at each step. - Write T into the memory - Calculate the runID
+ * of T (based on the last output tuple for the current run). It is
+ * either the current run or the next run. Also calculate Poorman's
+ * Normalized Key (PNK) for T, to make comparisons faster later. -
+ * Create a heap element for T, containing: its runID, the slot pointer
+ * to its memory location, and its PNK. - Insert the created heap
+ * element into the heap - Upon closing, write all the tuples, currently
+ * resident in memory, into their corresponding run(s). Again min heap
+ * decides about which tuple is the next for output.
* OptimizedSortOperatorDescriptor will merge the generated runs, to
* generate the final sorted output of the data.
*/
@@ -70,7 +65,6 @@
private FrameTupleAccessor lastRecordAccessor; // Used to read last output
// record from the output
// buffer
- private int curRunSize;
private int lastTupleIx; // Holds index of last output tuple in the
// dedicated output buffer
private Slot allocationPtr; // Contains the ptr to the allocated memory slot
@@ -198,7 +192,6 @@
}
outputedTuple.set(tFrameIx, tOffset);
newRun = false;
- curRunSize++;
return memMgr.unallocate(outputedTuple);
}
@@ -284,7 +277,6 @@
writer.open();
curRunId++;
newRun = true;
- curRunSize = 0;
lastTupleIx = -1;
}
diff --git a/hyracks-documentation/.classpath b/hyracks-documentation/.classpath
new file mode 100644
index 0000000..d0bec0f
--- /dev/null
+++ b/hyracks-documentation/.classpath
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/>
+ <classpathentry kind="con" path="org.maven.ide.eclipse.MAVEN2_CLASSPATH_CONTAINER"/>
+ <classpathentry kind="output" path="target/classes"/>
+</classpath>
diff --git a/hyracks-documentation/.settings/org.eclipse.jdt.core.prefs b/hyracks-documentation/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..d368ea9
--- /dev/null
+++ b/hyracks-documentation/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,6 @@
+#Thu Dec 15 06:58:53 PST 2011
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.5
diff --git a/hyracks-examples/btree-example/btreeapp/.classpath b/hyracks-examples/btree-example/btreeapp/.classpath
new file mode 100644
index 0000000..d0bec0f
--- /dev/null
+++ b/hyracks-examples/btree-example/btreeapp/.classpath
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/>
+ <classpathentry kind="con" path="org.maven.ide.eclipse.MAVEN2_CLASSPATH_CONTAINER"/>
+ <classpathentry kind="output" path="target/classes"/>
+</classpath>
diff --git a/hyracks-examples/btree-example/btreeapp/.settings/org.eclipse.jdt.core.prefs b/hyracks-examples/btree-example/btreeapp/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..0526f68
--- /dev/null
+++ b/hyracks-examples/btree-example/btreeapp/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,6 @@
+#Thu Dec 15 06:58:55 PST 2011
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.5
diff --git a/hyracks-examples/tpch-example/tpchapp/.classpath b/hyracks-examples/tpch-example/tpchapp/.classpath
new file mode 100644
index 0000000..d0bec0f
--- /dev/null
+++ b/hyracks-examples/tpch-example/tpchapp/.classpath
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/>
+ <classpathentry kind="con" path="org.maven.ide.eclipse.MAVEN2_CLASSPATH_CONTAINER"/>
+ <classpathentry kind="output" path="target/classes"/>
+</classpath>
diff --git a/hyracks-examples/tpch-example/tpchapp/.settings/org.eclipse.jdt.core.prefs b/hyracks-examples/tpch-example/tpchapp/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..0526f68
--- /dev/null
+++ b/hyracks-examples/tpch-example/tpchapp/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,6 @@
+#Thu Dec 15 06:58:55 PST 2011
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
+org.eclipse.jdt.core.compiler.compliance=1.5
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.5