Merged fullstack_asterix_stabilization -r 2933:3157
git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_hyracks_ioc@3164 123451ca-8445-de46-9d55-352943316053
diff --git a/algebricks/algebricks-common/pom.xml b/algebricks/algebricks-common/pom.xml
index a5677a5..521ef12 100644
--- a/algebricks/algebricks-common/pom.xml
+++ b/algebricks/algebricks-common/pom.xml
@@ -16,8 +16,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/algebricks/algebricks-compiler/pom.xml b/algebricks/algebricks-compiler/pom.xml
index 8dc083d..bd35835 100644
--- a/algebricks/algebricks-compiler/pom.xml
+++ b/algebricks/algebricks-compiler/pom.xml
@@ -16,8 +16,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/algebricks/algebricks-core/pom.xml b/algebricks/algebricks-core/pom.xml
index a74a540..def5b35 100644
--- a/algebricks/algebricks-core/pom.xml
+++ b/algebricks/algebricks-core/pom.xml
@@ -16,8 +16,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/LogicalOperatorTag.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/LogicalOperatorTag.java
index 5234d2c..b8bdf3e 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/LogicalOperatorTag.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/LogicalOperatorTag.java
@@ -20,6 +20,7 @@
CLUSTER,
DATASOURCESCAN,
DISTINCT,
+ DISTRIBUTE_RESULT,
GROUP,
EMPTYTUPLESOURCE,
EXCHANGE,
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/PhysicalOperatorTag.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/PhysicalOperatorTag.java
index a969372..32cfb9a 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/PhysicalOperatorTag.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/PhysicalOperatorTag.java
@@ -7,6 +7,7 @@
BTREE_SEARCH,
STATS,
DATASOURCE_SCAN,
+ DISTRIBUTE_RESULT,
EMPTY_TUPLE_SOURCE,
EXTERNAL_GROUP_BY,
IN_MEMORY_HASH_JOIN,
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/metadata/IMetadataProvider.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/metadata/IMetadataProvider.java
index 899b633..82187e3 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/metadata/IMetadataProvider.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/metadata/IMetadataProvider.java
@@ -51,6 +51,10 @@
int[] printColumns, IPrinterFactory[] printerFactories, RecordDescriptor inputDesc)
throws AlgebricksException;
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getResultHandleRuntime(IDataSink sink,
+ int[] printColumns, IPrinterFactory[] printerFactories, RecordDescriptor inputDesc, boolean ordered,
+ JobSpecification spec) throws AlgebricksException;
+
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getWriteResultRuntime(IDataSource<S> dataSource,
IOperatorSchema propagatedSchema, List<LogicalVariable> keys, LogicalVariable payLoadVar,
JobGenContext context, JobSpecification jobSpec) throws AlgebricksException;
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/DistributeResultOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/DistributeResultOperator.java
new file mode 100644
index 0000000..6ca6d87
--- /dev/null
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/DistributeResultOperator.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSink;
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.VariablePropagationPolicy;
+import edu.uci.ics.hyracks.algebricks.core.algebra.typing.ITypingContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.visitors.ILogicalExpressionReferenceTransform;
+import edu.uci.ics.hyracks.algebricks.core.algebra.visitors.ILogicalOperatorVisitor;
+
+public class DistributeResultOperator extends AbstractLogicalOperator {
+ private List<Mutable<ILogicalExpression>> expressions;
+ private IDataSink dataSink;
+
+ public DistributeResultOperator(List<Mutable<ILogicalExpression>> expressions, IDataSink dataSink) {
+ this.expressions = expressions;
+ this.dataSink = dataSink;
+ }
+
+ public List<Mutable<ILogicalExpression>> getExpressions() {
+ return expressions;
+ }
+
+ public IDataSink getDataSink() {
+ return dataSink;
+ }
+
+ @Override
+ public LogicalOperatorTag getOperatorTag() {
+ return LogicalOperatorTag.DISTRIBUTE_RESULT;
+ }
+
+ @Override
+ public <R, T> R accept(ILogicalOperatorVisitor<R, T> visitor, T arg) throws AlgebricksException {
+ return visitor.visitDistributeResultOperator(this, arg);
+ }
+
+ @Override
+ public boolean acceptExpressionTransform(ILogicalExpressionReferenceTransform visitor) throws AlgebricksException {
+ boolean modif = false;
+ for (int i = 0; i < expressions.size(); i++) {
+ boolean b = visitor.transform(expressions.get(i));
+ if (b) {
+ modif = true;
+ }
+ }
+ return modif;
+ }
+
+ @Override
+ public VariablePropagationPolicy getVariablePropagationPolicy() {
+ return VariablePropagationPolicy.ALL;
+ }
+
+ @Override
+ public boolean isMap() {
+ return false; // actually depends on the physical op.
+ }
+
+ @Override
+ public void recomputeSchema() {
+ schema = new ArrayList<LogicalVariable>();
+ schema.addAll(inputs.get(0).getValue().getSchema());
+ }
+
+ @Override
+ public IVariableTypeEnvironment computeOutputTypeEnvironment(ITypingContext ctx) throws AlgebricksException {
+ return createPropagatingAllInputsTypeEnvironment(ctx);
+ }
+
+}
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/FDsAndEquivClassesVisitor.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/FDsAndEquivClassesVisitor.java
index 0539cbe..1b4be1e 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/FDsAndEquivClassesVisitor.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/FDsAndEquivClassesVisitor.java
@@ -47,8 +47,10 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DieOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistributeResultOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IndexInsertDeleteOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
@@ -64,7 +66,6 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ScriptOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SinkOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator;
@@ -476,6 +477,14 @@
}
@Override
+ public Void visitDistributeResultOperator(DistributeResultOperator op, IOptimizationContext ctx)
+ throws AlgebricksException {
+ // propagateFDsAndEquivClasses(op, ctx);
+ setEmptyFDsEqClasses(op, ctx);
+ return null;
+ }
+
+ @Override
public Void visitWriteResultOperator(WriteResultOperator op, IOptimizationContext ctx) throws AlgebricksException {
// propagateFDsAndEquivClasses(op, ctx);
setEmptyFDsEqClasses(op, ctx);
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java
index b97597d..ac6d887 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java
@@ -38,6 +38,7 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DieOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistributeResultOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
@@ -425,6 +426,17 @@
}
@Override
+ public Boolean visitDistributeResultOperator(DistributeResultOperator op, ILogicalOperator arg)
+ throws AlgebricksException {
+ AbstractLogicalOperator aop = (AbstractLogicalOperator) arg;
+ if (aop.getOperatorTag() != LogicalOperatorTag.DISTRIBUTE_RESULT)
+ return Boolean.FALSE;
+ DistributeResultOperator writeOpArg = (DistributeResultOperator) copyAndSubstituteVar(op, arg);
+ boolean isomorphic = VariableUtilities.varListEqualUnordered(op.getSchema(), writeOpArg.getSchema());
+ return isomorphic;
+ }
+
+ @Override
public Boolean visitWriteResultOperator(WriteResultOperator op, ILogicalOperator arg) throws AlgebricksException {
AbstractLogicalOperator aop = (AbstractLogicalOperator) arg;
if (aop.getOperatorTag() != LogicalOperatorTag.WRITE_RESULT)
@@ -762,6 +774,14 @@
}
@Override
+ public ILogicalOperator visitDistributeResultOperator(DistributeResultOperator op, Void arg)
+ throws AlgebricksException {
+ ArrayList<Mutable<ILogicalExpression>> newExpressions = new ArrayList<Mutable<ILogicalExpression>>();
+ deepCopyExpressionRefs(newExpressions, op.getExpressions());
+ return new DistributeResultOperator(newExpressions, op.getDataSink());
+ }
+
+ @Override
public ILogicalOperator visitWriteResultOperator(WriteResultOperator op, Void arg) throws AlgebricksException {
ArrayList<Mutable<ILogicalExpression>> newKeyExpressions = new ArrayList<Mutable<ILogicalExpression>>();
deepCopyExpressionRefs(newKeyExpressions, op.getKeyExpressions());
@@ -784,8 +804,8 @@
deepCopyExpressionRefs(newPrimaryKeyExpressions, op.getPrimaryKeyExpressions());
List<Mutable<ILogicalExpression>> newSecondaryKeyExpressions = new ArrayList<Mutable<ILogicalExpression>>();
deepCopyExpressionRefs(newSecondaryKeyExpressions, op.getSecondaryKeyExpressions());
- Mutable<ILogicalExpression> newFilterExpression = new MutableObject<ILogicalExpression>(((AbstractLogicalExpression)op.getFilterExpression())
- .cloneExpression());
+ Mutable<ILogicalExpression> newFilterExpression = new MutableObject<ILogicalExpression>(
+ ((AbstractLogicalExpression) op.getFilterExpression()).cloneExpression());
return new IndexInsertDeleteOperator(op.getDataSourceIndex(), newPrimaryKeyExpressions,
newSecondaryKeyExpressions, newFilterExpression, op.getOperation());
}
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismVariableMappingVisitor.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismVariableMappingVisitor.java
index 562bb4c..b9544c7 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismVariableMappingVisitor.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismVariableMappingVisitor.java
@@ -37,8 +37,10 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DieOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistributeResultOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IndexInsertDeleteOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
@@ -54,7 +56,6 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ScriptOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SinkOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator;
@@ -230,6 +231,13 @@
}
@Override
+ public Void visitDistributeResultOperator(DistributeResultOperator op, ILogicalOperator arg)
+ throws AlgebricksException {
+ mapVariablesStandard(op, arg);
+ return null;
+ }
+
+ @Override
public Void visitWriteResultOperator(WriteResultOperator op, ILogicalOperator arg) throws AlgebricksException {
mapVariablesStandard(op, arg);
return null;
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/LogicalPropertiesVisitor.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/LogicalPropertiesVisitor.java
index 9b2f5a0..8f1d686 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/LogicalPropertiesVisitor.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/LogicalPropertiesVisitor.java
@@ -29,8 +29,10 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DieOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistributeResultOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IndexInsertDeleteOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
@@ -46,7 +48,6 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ScriptOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SinkOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator;
@@ -240,6 +241,12 @@
}
@Override
+ public Void visitDistributeResultOperator(DistributeResultOperator op, IOptimizationContext arg)
+ throws AlgebricksException {
+ return null;
+ }
+
+ @Override
public Void visitWriteResultOperator(WriteResultOperator op, IOptimizationContext arg) throws AlgebricksException {
// TODO Auto-generated method stub
return null;
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/ProducedVariableVisitor.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/ProducedVariableVisitor.java
index 78b6801..31adcba 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/ProducedVariableVisitor.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/ProducedVariableVisitor.java
@@ -32,8 +32,10 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DieOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistributeResultOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IndexInsertDeleteOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
@@ -49,7 +51,6 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ScriptOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SinkOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator;
@@ -223,6 +224,11 @@
}
@Override
+ public Void visitDistributeResultOperator(DistributeResultOperator op, Void arg) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
public Void visitWriteResultOperator(WriteResultOperator op, Void arg) throws AlgebricksException {
return null;
}
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/SchemaVariableVisitor.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/SchemaVariableVisitor.java
index a759e35..cd0cee3 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/SchemaVariableVisitor.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/SchemaVariableVisitor.java
@@ -31,8 +31,10 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DieOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistributeResultOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IndexInsertDeleteOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
@@ -48,7 +50,6 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ScriptOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SinkOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator;
@@ -238,6 +239,12 @@
}
@Override
+ public Void visitDistributeResultOperator(DistributeResultOperator op, Void arg) throws AlgebricksException {
+ standardLayout(op);
+ return null;
+ }
+
+ @Override
public Void visitWriteResultOperator(WriteResultOperator op, Void arg) throws AlgebricksException {
standardLayout(op);
return null;
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java
index 11e56ca..69fb3f8 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/SubstituteVariableVisitor.java
@@ -33,8 +33,10 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DieOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistributeResultOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IndexInsertDeleteOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
@@ -51,7 +53,6 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ScriptOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SinkOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator;
@@ -335,6 +336,16 @@
}
@Override
+ public Void visitDistributeResultOperator(DistributeResultOperator op, Pair<LogicalVariable, LogicalVariable> pair)
+ throws AlgebricksException {
+ for (Mutable<ILogicalExpression> e : op.getExpressions()) {
+ e.getValue().substituteVar(pair.first, pair.second);
+ }
+ substVarTypes(op, pair);
+ return null;
+ }
+
+ @Override
public Void visitWriteResultOperator(WriteResultOperator op, Pair<LogicalVariable, LogicalVariable> pair)
throws AlgebricksException {
op.getPayloadExpression().getValue().substituteVar(pair.first, pair.second);
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/UsedVariableVisitor.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/UsedVariableVisitor.java
index 0ea9367..5361a19 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/UsedVariableVisitor.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/UsedVariableVisitor.java
@@ -32,6 +32,7 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DieOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistributeResultOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
@@ -304,6 +305,14 @@
}
@Override
+ public Void visitDistributeResultOperator(DistributeResultOperator op, Void arg) {
+ for (Mutable<ILogicalExpression> expr : op.getExpressions()) {
+ expr.getValue().getUsedVariables(usedVariables);
+ }
+ return null;
+ }
+
+ @Override
public Void visitWriteResultOperator(WriteResultOperator op, Void arg) {
op.getPayloadExpression().getValue().getUsedVariables(usedVariables);
for (Mutable<ILogicalExpression> e : op.getKeyExpressions()) {
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/DistributeResultPOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/DistributeResultPOperator.java
new file mode 100644
index 0000000..302d4d2
--- /dev/null
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/DistributeResultPOperator.java
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical;
+
+import org.apache.commons.lang3.mutable.Mutable;
+
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IHyracksJobBuilder;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.PhysicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSink;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistributeResultOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.IPartitioningProperty;
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.IPartitioningRequirementsCoordinator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.IPhysicalPropertiesVector;
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.PhysicalRequirements;
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.StructuralPropertiesVector;
+import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenContext;
+import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenHelper;
+import edu.uci.ics.hyracks.algebricks.data.IPrinterFactory;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+
+public class DistributeResultPOperator extends AbstractPhysicalOperator {
+
+ @Override
+ public PhysicalOperatorTag getOperatorTag() {
+ return PhysicalOperatorTag.DISTRIBUTE_RESULT;
+ }
+
+ @Override
+ public boolean isMicroOperator() {
+ return false;
+ }
+
+ @Override
+ public void computeDeliveredProperties(ILogicalOperator op, IOptimizationContext context) {
+ ILogicalOperator op2 = op.getInputs().get(0).getValue();
+ deliveredProperties = op2.getDeliveredPhysicalProperties().clone();
+ }
+
+ @Override
+ public PhysicalRequirements getRequiredPropertiesForChildren(ILogicalOperator op,
+ IPhysicalPropertiesVector reqdByParent) {
+ DistributeResultOperator write = (DistributeResultOperator) op;
+ IDataSink sink = write.getDataSink();
+ IPartitioningProperty pp = sink.getPartitioningProperty();
+ StructuralPropertiesVector[] r = new StructuralPropertiesVector[] { new StructuralPropertiesVector(pp, null) };
+ return new PhysicalRequirements(r, IPartitioningRequirementsCoordinator.NO_COORDINATION);
+ }
+
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ @Override
+ public void contributeRuntimeOperator(IHyracksJobBuilder builder, JobGenContext context, ILogicalOperator op,
+ IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema)
+ throws AlgebricksException {
+ DistributeResultOperator resultOp = (DistributeResultOperator) op;
+ IMetadataProvider mp = context.getMetadataProvider();
+
+ JobSpecification spec = builder.getJobSpec();
+
+ int[] columns = new int[resultOp.getExpressions().size()];
+ int i = 0;
+ for (Mutable<ILogicalExpression> exprRef : resultOp.getExpressions()) {
+ ILogicalExpression expr = exprRef.getValue();
+ if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
+ throw new NotImplementedException("Only writing variable expressions is supported.");
+ }
+ VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
+ LogicalVariable v = varRef.getVariableReference();
+ columns[i++] = inputSchemas[0].findVariable(v);
+ }
+ RecordDescriptor inputDesc = JobGenHelper.mkRecordDescriptor(
+ context.getTypeEnvironment(op.getInputs().get(0).getValue()), inputSchemas[0], context);
+
+ IPrinterFactory[] pf = JobGenHelper.mkPrinterFactories(inputSchemas[0], context.getTypeEnvironment(op),
+ context, columns);
+
+ Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> runtimeAndConstraints = mp.getResultHandleRuntime(
+ resultOp.getDataSink(), columns, pf, inputDesc, false, spec);
+
+ builder.contributeHyracksOperator(resultOp, runtimeAndConstraints.first);
+ builder.contributeAlgebricksPartitionConstraint(runtimeAndConstraints.first, runtimeAndConstraints.second);
+ ILogicalOperator src = resultOp.getInputs().get(0).getValue();
+ builder.contributeGraphEdge(src, 0, resultOp, 0);
+ }
+}
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/prettyprint/LogicalOperatorPrettyPrintVisitor.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/prettyprint/LogicalOperatorPrettyPrintVisitor.java
index a94c78e..fc0c433 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/prettyprint/LogicalOperatorPrettyPrintVisitor.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/prettyprint/LogicalOperatorPrettyPrintVisitor.java
@@ -30,8 +30,10 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DieOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistributeResultOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IndexInsertDeleteOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
@@ -48,7 +50,6 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ScriptOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SinkOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator;
@@ -164,6 +165,13 @@
}
@Override
+ public String visitDistributeResultOperator(DistributeResultOperator op, Integer indent) {
+ StringBuilder buffer = new StringBuilder();
+ addIndent(buffer, indent).append("distribute result ").append(op.getExpressions());
+ return buffer.toString();
+ }
+
+ @Override
public String visitWriteResultOperator(WriteResultOperator op, Integer indent) {
StringBuilder buffer = new StringBuilder();
addIndent(buffer, indent).append("load ").append(op.getDataSource()).append(" from ")
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/visitors/ILogicalOperatorVisitor.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/visitors/ILogicalOperatorVisitor.java
index 6b5949e..23dac2a 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/visitors/ILogicalOperatorVisitor.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/visitors/ILogicalOperatorVisitor.java
@@ -20,8 +20,10 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DieOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistributeResultOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IndexInsertDeleteOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
@@ -37,7 +39,6 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ScriptOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SinkOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator;
@@ -97,6 +98,8 @@
public R visitWriteOperator(WriteOperator op, T arg) throws AlgebricksException;
+ public R visitDistributeResultOperator(DistributeResultOperator op, T arg) throws AlgebricksException;
+
public R visitWriteResultOperator(WriteResultOperator op, T arg) throws AlgebricksException;
public R visitInsertDeleteOperator(InsertDeleteOperator op, T tag) throws AlgebricksException;
diff --git a/algebricks/algebricks-data/pom.xml b/algebricks/algebricks-data/pom.xml
index 3d927d9..9536416 100644
--- a/algebricks/algebricks-data/pom.xml
+++ b/algebricks/algebricks-data/pom.xml
@@ -16,8 +16,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/algebricks/algebricks-data/src/main/java/edu/uci/ics/hyracks/algebricks/data/IResultSerializerFactoryProvider.java b/algebricks/algebricks-data/src/main/java/edu/uci/ics/hyracks/algebricks/data/IResultSerializerFactoryProvider.java
new file mode 100644
index 0000000..5e62612
--- /dev/null
+++ b/algebricks/algebricks-data/src/main/java/edu/uci/ics/hyracks/algebricks/data/IResultSerializerFactoryProvider.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.algebricks.data;
+
+import java.io.Serializable;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IResultSerializerFactory;
+
+public interface IResultSerializerFactoryProvider extends Serializable {
+ /**
+ * Returns a result serializer factory
+ *
+ * @param fields
+ * - A position of the fields in the order it should be written in the output.
+ * @param printerFactories
+ * - A printer factory array to print the tuple containing different fields.
+ * @param writerFactory
+ * - A writer factory to write the serialized data to the print stream.
+ * @param inputRecordDesc
+ * - The record descriptor describing the input frame to be serialized.
+ * @return A new instance of result serialized appender.
+ */
+ public IResultSerializerFactory getAqlResultSerializerFactoryProvider(int[] fields,
+ IPrinterFactory[] printerFactories, IAWriterFactory writerFactory);
+}
diff --git a/algebricks/algebricks-examples/piglet-example/pom.xml b/algebricks/algebricks-examples/piglet-example/pom.xml
index 954938a..ca7467b 100644
--- a/algebricks/algebricks-examples/piglet-example/pom.xml
+++ b/algebricks/algebricks-examples/piglet-example/pom.xml
@@ -16,8 +16,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
diff --git a/algebricks/algebricks-examples/piglet-example/src/main/java/edu/uci/ics/hyracks/algebricks/examples/piglet/compiler/PigletCompiler.java b/algebricks/algebricks-examples/piglet-example/src/main/java/edu/uci/ics/hyracks/algebricks/examples/piglet/compiler/PigletCompiler.java
index d105759..2981157 100644
--- a/algebricks/algebricks-examples/piglet-example/src/main/java/edu/uci/ics/hyracks/algebricks/examples/piglet/compiler/PigletCompiler.java
+++ b/algebricks/algebricks-examples/piglet-example/src/main/java/edu/uci/ics/hyracks/algebricks/examples/piglet/compiler/PigletCompiler.java
@@ -127,9 +127,9 @@
}
});
builder.setTypeTraitProvider(new ITypeTraitProvider() {
- public ITypeTraits getTypeTrait(Object type) {
- return null;
- }
+ public ITypeTraits getTypeTrait(Object type) {
+ return null;
+ }
});
builder.setPrinterProvider(PigletPrinterFactoryProvider.INSTANCE);
builder.setExpressionRuntimeProvider(new LogicalExpressionJobGenToExpressionRuntimeProviderAdapter(
diff --git a/algebricks/algebricks-examples/piglet-example/src/main/java/edu/uci/ics/hyracks/algebricks/examples/piglet/metadata/PigletMetadataProvider.java b/algebricks/algebricks-examples/piglet-example/src/main/java/edu/uci/ics/hyracks/algebricks/examples/piglet/metadata/PigletMetadataProvider.java
index d678803..15b290e 100644
--- a/algebricks/algebricks-examples/piglet-example/src/main/java/edu/uci/ics/hyracks/algebricks/examples/piglet/metadata/PigletMetadataProvider.java
+++ b/algebricks/algebricks-examples/piglet-example/src/main/java/edu/uci/ics/hyracks/algebricks/examples/piglet/metadata/PigletMetadataProvider.java
@@ -145,6 +145,13 @@
}
@Override
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getResultHandleRuntime(IDataSink sink,
+ int[] printColumns, IPrinterFactory[] printerFactories, RecordDescriptor inputDesc, boolean ordered,
+ JobSpecification spec) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getWriteResultRuntime(
IDataSource<String> dataSource, IOperatorSchema propagatedSchema, List<LogicalVariable> keys,
LogicalVariable payLoadVar, JobGenContext context, JobSpecification jobSpec) throws AlgebricksException {
diff --git a/algebricks/algebricks-rewriter/pom.xml b/algebricks/algebricks-rewriter/pom.xml
index 41979d3..7968773 100644
--- a/algebricks/algebricks-rewriter/pom.xml
+++ b/algebricks/algebricks-rewriter/pom.xml
@@ -16,8 +16,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/SetAlgebricksPhysicalOperatorsRule.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/SetAlgebricksPhysicalOperatorsRule.java
index 38cf96e..60a4fbb 100644
--- a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/SetAlgebricksPhysicalOperatorsRule.java
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/SetAlgebricksPhysicalOperatorsRule.java
@@ -38,6 +38,7 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.AggregatePOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.AssignPOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.DataSourceScanPOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.DistributeResultPOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.EmptyTupleSourcePOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.ExternalGroupByPOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.InMemoryStableSortPOperator;
@@ -245,6 +246,10 @@
op.setPhysicalOperator(new SinkWritePOperator());
break;
}
+ case DISTRIBUTE_RESULT: {
+ op.setPhysicalOperator(new DistributeResultPOperator());
+ break;
+ }
case WRITE_RESULT: {
WriteResultOperator opLoad = (WriteResultOperator) op;
LogicalVariable payload;
@@ -267,8 +272,8 @@
List<LogicalVariable> secondaryKeys = new ArrayList<LogicalVariable>();
getKeys(opInsDel.getPrimaryKeyExpressions(), primaryKeys);
getKeys(opInsDel.getSecondaryKeyExpressions(), secondaryKeys);
- op.setPhysicalOperator(new IndexInsertDeletePOperator(primaryKeys, secondaryKeys,
- opInsDel.getFilterExpression(), opInsDel.getDataSourceIndex()));
+ op.setPhysicalOperator(new IndexInsertDeletePOperator(primaryKeys, secondaryKeys, opInsDel
+ .getFilterExpression(), opInsDel.getDataSourceIndex()));
break;
}
case SINK: {
diff --git a/algebricks/algebricks-runtime/pom.xml b/algebricks/algebricks-runtime/pom.xml
index e40dfb0..e438283 100644
--- a/algebricks/algebricks-runtime/pom.xml
+++ b/algebricks/algebricks-runtime/pom.xml
@@ -16,8 +16,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/algebricks/algebricks-runtime/src/main/java/edu/uci/ics/hyracks/algebricks/runtime/serializer/ResultSerializerFactoryProvider.java b/algebricks/algebricks-runtime/src/main/java/edu/uci/ics/hyracks/algebricks/runtime/serializer/ResultSerializerFactoryProvider.java
new file mode 100644
index 0000000..4f28c81
--- /dev/null
+++ b/algebricks/algebricks-runtime/src/main/java/edu/uci/ics/hyracks/algebricks/runtime/serializer/ResultSerializerFactoryProvider.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.algebricks.runtime.serializer;
+
+import java.io.PrintStream;
+import java.nio.BufferOverflowException;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.data.IAWriter;
+import edu.uci.ics.hyracks.algebricks.data.IAWriterFactory;
+import edu.uci.ics.hyracks.algebricks.data.IPrinterFactory;
+import edu.uci.ics.hyracks.algebricks.data.IResultSerializerFactoryProvider;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.dataflow.value.IResultSerializer;
+import edu.uci.ics.hyracks.api.dataflow.value.IResultSerializerFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+public class ResultSerializerFactoryProvider implements IResultSerializerFactoryProvider {
+ private static final long serialVersionUID = 1L;
+
+ public static final ResultSerializerFactoryProvider INSTANCE = new ResultSerializerFactoryProvider();
+
+ private ResultSerializerFactoryProvider() {
+ }
+
+ @Override
+ public IResultSerializerFactory getAqlResultSerializerFactoryProvider(final int[] fields,
+ final IPrinterFactory[] printerFactories, final IAWriterFactory writerFactory) {
+ return new IResultSerializerFactory() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IResultSerializer createResultSerializer(RecordDescriptor inputRecordDesc, PrintStream printStream) {
+ final IAWriter writer = writerFactory.createWriter(fields, printStream, printerFactories,
+ inputRecordDesc);
+
+ return new IResultSerializer() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public void init() throws HyracksDataException {
+ try {
+ writer.init();
+ } catch (AlgebricksException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public boolean appendTuple(IFrameTupleAccessor tAccess, int tIdx) throws HyracksDataException {
+ try {
+ writer.printTuple(tAccess, tIdx);
+ } catch (BufferOverflowException e) {
+ return false;
+ } catch (AlgebricksException e) {
+ throw new HyracksDataException(e);
+ }
+ return true;
+ }
+ };
+ }
+ };
+ }
+}
diff --git a/algebricks/algebricks-tests/pom.xml b/algebricks/algebricks-tests/pom.xml
index 19e6711..c114881 100644
--- a/algebricks/algebricks-tests/pom.xml
+++ b/algebricks/algebricks-tests/pom.xml
@@ -16,9 +16,11 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
- </configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
+ <encoding>UTF-8</encoding>
+ </configuration>
</plugin>
<plugin>
<artifactId>maven-antrun-plugin</artifactId>
diff --git a/algebricks/algebricks-tests/src/test/java/edu/uci/ics/hyracks/algebricks/tests/util/AlgebricksHyracksIntegrationUtil.java b/algebricks/algebricks-tests/src/test/java/edu/uci/ics/hyracks/algebricks/tests/util/AlgebricksHyracksIntegrationUtil.java
index 1807dd1..2855eb2 100644
--- a/algebricks/algebricks-tests/src/test/java/edu/uci/ics/hyracks/algebricks/tests/util/AlgebricksHyracksIntegrationUtil.java
+++ b/algebricks/algebricks-tests/src/test/java/edu/uci/ics/hyracks/algebricks/tests/util/AlgebricksHyracksIntegrationUtil.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -55,6 +55,7 @@
ncConfig1.ccPort = TEST_HYRACKS_CC_CLUSTER_NET_PORT;
ncConfig1.clusterNetIPAddress = "127.0.0.1";
ncConfig1.dataIPAddress = "127.0.0.1";
+ ncConfig1.datasetIPAddress = "127.0.0.1";
ncConfig1.nodeId = NC1_ID;
nc1 = new NodeControllerService(ncConfig1);
nc1.start();
@@ -64,6 +65,7 @@
ncConfig2.ccPort = TEST_HYRACKS_CC_CLUSTER_NET_PORT;
ncConfig2.clusterNetIPAddress = "127.0.0.1";
ncConfig2.dataIPAddress = "127.0.0.1";
+ ncConfig2.datasetIPAddress = "127.0.0.1";
ncConfig2.nodeId = NC2_ID;
nc2 = new NodeControllerService(ncConfig2);
nc2.start();
@@ -84,4 +86,4 @@
hcc.waitForCompletion(jobId);
}
-}
\ No newline at end of file
+}
diff --git a/hivesterix/conf/cluster b/hivesterix/conf/cluster
deleted file mode 100644
index 6cc8cca..0000000
--- a/hivesterix/conf/cluster
+++ /dev/null
@@ -1,11 +0,0 @@
-4
-10.0.0.1 asterix-001
-10.0.0.2 asterix-002
-10.0.0.3 asterix-003
-10.0.0.4 asterix-004
-10.0.0.5 asterix-005
-10.0.0.6 asterix-006
-10.0.0.7 asterix-007
-10.0.0.8 asterix-008
-10.0.0.9 asterix-009
-10.0.0.10 asterix-010
diff --git a/hivesterix/hivesterix-common/pom.xml b/hivesterix/hivesterix-common/pom.xml
new file mode 100644
index 0000000..33d8fb3
--- /dev/null
+++ b/hivesterix/hivesterix-common/pom.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0"?>
+<project
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+ xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hivesterix-common</artifactId>
+ <name>hivesterix-common</name>
+
+ <parent>
+ <artifactId>hivesterix</artifactId>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <version>0.2.3-SNAPSHOT</version>
+ </parent>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <encoding>UTF-8</encoding>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-api</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>algebricks-compiler</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-core</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/common/config/ConfUtil.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/common/config/ConfUtil.java
new file mode 100644
index 0000000..648deb6
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/common/config/ConfUtil.java
@@ -0,0 +1,186 @@
+package edu.uci.ics.hivesterix.common.config;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.hyracks.api.client.HyracksConnection;
+import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
+import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.topology.ClusterTopology;
+
+@SuppressWarnings({ "rawtypes", "deprecation" })
+public class ConfUtil {
+ private static final String clusterPropertiesPath = "conf/cluster.properties";
+ private static final String masterFilePath = "conf/master";
+
+ private static JobConf job;
+ private static HiveConf hconf;
+ private static String[] NCs;
+ private static Map<String, List<String>> ncMapping;
+ private static IHyracksClientConnection hcc = null;
+ private static ClusterTopology topology = null;
+ private static Properties clusterProps;
+ private static Map<String, NodeControllerInfo> ncNameToNcInfos;
+
+ public static JobConf getJobConf(Class<? extends InputFormat> format, Path path) {
+ JobConf conf = new JobConf();
+ if (job != null)
+ conf = job;
+
+ String hadoopPath = System.getProperty("HADOOP_HOME", "/hadoop");
+ Path pathCore = new Path(hadoopPath + "/conf/core-site.xml");
+ conf.addResource(pathCore);
+ Path pathMapRed = new Path(hadoopPath + "/conf/mapred-site.xml");
+ conf.addResource(pathMapRed);
+ Path pathHDFS = new Path(hadoopPath + "/conf/hdfs-site.xml");
+ conf.addResource(pathHDFS);
+
+ conf.setInputFormat(format);
+ FileInputFormat.setInputPaths(conf, path);
+ return conf;
+ }
+
+ public static JobConf getJobConf() {
+ JobConf conf = new JobConf();
+ if (job != null)
+ conf = job;
+
+ String hadoopPath = System.getProperty("HADOOP_HOME", "/hadoop");
+ Path pathCore = new Path(hadoopPath + "/conf/core-site.xml");
+ conf.addResource(pathCore);
+ Path pathMapRed = new Path(hadoopPath + "/conf/mapred-site.xml");
+ conf.addResource(pathMapRed);
+ Path pathHDFS = new Path(hadoopPath + "/conf/hdfs-site.xml");
+ conf.addResource(pathHDFS);
+
+ return conf;
+ }
+
+ public static void setJobConf(JobConf conf) {
+ job = conf;
+ }
+
+ public static void setHiveConf(HiveConf hiveConf) {
+ hconf = hiveConf;
+ }
+
+ public static HiveConf getHiveConf() {
+ if (hconf == null) {
+ hconf = new HiveConf(SessionState.class);
+ hconf.addResource(new Path("conf/hive-default.xml"));
+ }
+ return hconf;
+ }
+
+ public static String[] getNCs() throws HyracksException {
+ if (NCs == null) {
+ try {
+ loadClusterConfig();
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+ }
+ return NCs;
+ }
+
+ public static Map<String, List<String>> getNCMapping() throws HyracksException {
+ if (ncMapping == null) {
+ try {
+ loadClusterConfig();
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+ }
+ return ncMapping;
+ }
+
+ public static Map<String, NodeControllerInfo> getNodeControllerInfo() throws HyracksException {
+ if (ncNameToNcInfos == null) {
+ try {
+ loadClusterConfig();
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+ }
+ return ncNameToNcInfos;
+ }
+
+ private static void loadClusterConfig() {
+ try {
+ getHiveConf();
+
+ /**
+ * load the properties file if it is not loaded
+ */
+ if (clusterProps == null) {
+ clusterProps = new Properties();
+ InputStream confIn = new FileInputStream(clusterPropertiesPath);
+ clusterProps.load(confIn);
+ confIn.close();
+ }
+
+ if (hcc == null) {
+ BufferedReader ipReader = new BufferedReader(new InputStreamReader(new FileInputStream(masterFilePath)));
+ String masterNode = ipReader.readLine();
+ ipReader.close();
+
+ InetAddress[] ips = InetAddress.getAllByName(masterNode);
+ int port = Integer.parseInt(clusterProps.getProperty("CC_CLIENTPORT"));
+ for (InetAddress ip : ips) {
+ if (ip.getAddress().length <= 4) {
+ try {
+ hcc = new HyracksConnection(ip.getHostAddress(), port);
+ break;
+ } catch (Exception e) {
+ continue;
+ }
+ }
+ }
+ }
+
+ int mpl = Integer.parseInt(hconf.get("hive.hyracks.parrallelism"));
+ topology = hcc.getClusterTopology();
+ ncNameToNcInfos = hcc.getNodeControllerInfos();
+ NCs = new String[ncNameToNcInfos.size() * mpl];
+ ncMapping = new HashMap<String, List<String>>();
+ int i = 0;
+ for (Map.Entry<String, NodeControllerInfo> entry : ncNameToNcInfos.entrySet()) {
+ String ipAddr = InetAddress.getByAddress(entry.getValue().getNetworkAddress().getIpAddress())
+ .getHostAddress();
+ List<String> matchedNCs = ncMapping.get(ipAddr);
+ if (matchedNCs == null) {
+ matchedNCs = new ArrayList<String>();
+ ncMapping.put(ipAddr, matchedNCs);
+ }
+ matchedNCs.add(entry.getKey());
+ for (int j = i * mpl; j < i * mpl + mpl; j++)
+ NCs[j] = entry.getKey();
+ i++;
+ }
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ public static ClusterTopology getClusterTopology() {
+ if (topology == null)
+ loadClusterConfig();
+ return topology;
+ }
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionConstant.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionConstant.java
new file mode 100644
index 0000000..8fb715b
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionConstant.java
@@ -0,0 +1,24 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+/**
+ * some constants for expression
+ *
+ * @author yingyib
+ */
+public class ExpressionConstant {
+
+ /**
+ * name space for function identifier
+ */
+ public static String NAMESPACE = "hive";
+
+ /**
+ * field expression: modeled as function in Algebricks
+ */
+ public static String FIELDACCESS = "fieldaccess";
+
+ /**
+ * null string: modeled as null in Algebricks
+ */
+ public static String NULL = "null";
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java
new file mode 100644
index 0000000..662ed83
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java
@@ -0,0 +1,209 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+
+public class ExpressionTranslator {
+
+ public static Object getHiveExpression(ILogicalExpression expr, IVariableTypeEnvironment env) throws Exception {
+ if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+ /**
+ * function expression
+ */
+ AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+ IFunctionInfo funcInfo = funcExpr.getFunctionInfo();
+ FunctionIdentifier fid = funcInfo.getFunctionIdentifier();
+
+ if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
+ Object info = ((HiveFunctionInfo) funcInfo).getInfo();
+ ExprNodeFieldDesc desc = (ExprNodeFieldDesc) info;
+ return new ExprNodeFieldDesc(desc.getTypeInfo(), desc.getDesc(), desc.getFieldName(), desc.getIsList());
+ }
+
+ if (fid.getName().equals(ExpressionConstant.NULL)) {
+ return new ExprNodeNullDesc();
+ }
+
+ /**
+ * argument expressions: translate argument expressions recursively
+ * first, this logic is shared in scalar, aggregation and unnesting
+ * function
+ */
+ List<Mutable<ILogicalExpression>> arguments = funcExpr.getArguments();
+ List<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
+ for (Mutable<ILogicalExpression> argument : arguments) {
+ /**
+ * parameters could not be aggregate function desc
+ */
+ ExprNodeDesc parameter = (ExprNodeDesc) getHiveExpression(argument.getValue(), env);
+ parameters.add(parameter);
+ }
+
+ /**
+ * get expression
+ */
+ if (funcExpr instanceof ScalarFunctionCallExpression) {
+ String udfName = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getHiveFunctionName(fid);
+ GenericUDF udf;
+ if (udfName != null) {
+ /**
+ * get corresponding function info for built-in functions
+ */
+ FunctionInfo fInfo = FunctionRegistry.getFunctionInfo(udfName);
+ udf = fInfo.getGenericUDF();
+
+ int inputSize = parameters.size();
+ List<ExprNodeDesc> currentDescs = new ArrayList<ExprNodeDesc>();
+
+ // generate expression tree if necessary
+ while (inputSize > 2) {
+ int pairs = inputSize / 2;
+ for (int i = 0; i < pairs; i++) {
+ List<ExprNodeDesc> descs = new ArrayList<ExprNodeDesc>();
+ descs.add(parameters.get(2 * i));
+ descs.add(parameters.get(2 * i + 1));
+ ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf, descs);
+ currentDescs.add(desc);
+ }
+
+ if (inputSize % 2 != 0) {
+ // List<ExprNodeDesc> descs = new
+ // ArrayList<ExprNodeDesc>();
+ // ExprNodeDesc lastExpr =
+ // currentDescs.remove(currentDescs.size() - 1);
+ // descs.add(lastExpr);
+ currentDescs.add(parameters.get(inputSize - 1));
+ // ExprNodeDesc desc =
+ // ExprNodeGenericFuncDesc.newInstance(udf, descs);
+ // currentDescs.add(desc);
+ }
+ inputSize = currentDescs.size();
+ parameters.clear();
+ parameters.addAll(currentDescs);
+ currentDescs.clear();
+ }
+
+ } else {
+ Object secondInfo = ((HiveFunctionInfo) funcInfo).getInfo();
+ if (secondInfo != null) {
+
+ /**
+ * for GenericUDFBridge: we should not call get type of
+ * this hive expression, because parameters may have
+ * been changed!
+ */
+ ExprNodeGenericFuncDesc hiveExpr = (ExprNodeGenericFuncDesc) ((HiveFunctionInfo) funcInfo)
+ .getInfo();
+ udf = hiveExpr.getGenericUDF();
+ } else {
+ /**
+ * for other generic UDF
+ */
+ Class<?> udfClass;
+ try {
+ udfClass = Class.forName(fid.getName());
+ udf = (GenericUDF) udfClass.newInstance();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ }
+ }
+ /**
+ * get hive generic function expression
+ */
+ ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf, parameters);
+ return desc;
+ } else if (funcExpr instanceof AggregateFunctionCallExpression) {
+ /**
+ * hive aggregation info
+ */
+ AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo())
+ .getInfo();
+ /**
+ * set parameters
+ */
+ aggregateDesc.setParameters((ArrayList<ExprNodeDesc>) parameters);
+
+ List<TypeInfo> originalParameterTypeInfos = new ArrayList<TypeInfo>();
+ for (ExprNodeDesc parameter : parameters) {
+ if (parameter.getTypeInfo() instanceof StructTypeInfo) {
+ originalParameterTypeInfos.add(TypeInfoFactory.doubleTypeInfo);
+ } else
+ originalParameterTypeInfos.add(parameter.getTypeInfo());
+ }
+
+ GenericUDAFEvaluator eval = FunctionRegistry.getGenericUDAFEvaluator(
+ aggregateDesc.getGenericUDAFName(), originalParameterTypeInfos, aggregateDesc.getDistinct(),
+ false);
+
+ AggregationDesc newAggregateDesc = new AggregationDesc(aggregateDesc.getGenericUDAFName(), eval,
+ aggregateDesc.getParameters(), aggregateDesc.getDistinct(), aggregateDesc.getMode());
+ return newAggregateDesc;
+ } else if (funcExpr instanceof UnnestingFunctionCallExpression) {
+ /**
+ * type inference for UDTF function
+ */
+ UDTFDesc hiveDesc = (UDTFDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo()).getInfo();
+ String funcName = hiveDesc.getUDTFName();
+ FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
+ GenericUDTF udtf = fi.getGenericUDTF();
+ UDTFDesc desc = new UDTFDesc(udtf);
+ return desc;
+ } else {
+ throw new IllegalStateException("unrecognized function expression " + expr.getClass().getName());
+ }
+ } else if ((expr.getExpressionTag() == LogicalExpressionTag.VARIABLE)) {
+ /**
+ * get type for variable in the environment
+ */
+ VariableReferenceExpression varExpr = (VariableReferenceExpression) expr;
+ LogicalVariable var = varExpr.getVariableReference();
+ TypeInfo typeInfo = (TypeInfo) env.getVarType(var);
+ ExprNodeDesc desc = new ExprNodeColumnDesc(typeInfo, var.toString(), "", false);
+ return desc;
+ } else if ((expr.getExpressionTag() == LogicalExpressionTag.CONSTANT)) {
+ /**
+ * get expression for constant in the environment
+ */
+ ConstantExpression varExpr = (ConstantExpression) expr;
+ Object value = ((HivesterixConstantValue) varExpr.getValue()).getObject();
+ ExprNodeDesc desc = new ExprNodeConstantDesc(value);
+ return desc;
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveAlgebricksBuiltInFunctionMap.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveAlgebricksBuiltInFunctionMap.java
new file mode 100644
index 0000000..56890eb
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveAlgebricksBuiltInFunctionMap.java
@@ -0,0 +1,82 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.HashMap;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+
+public class HiveAlgebricksBuiltInFunctionMap {
+
+ /**
+ * hive auqa builtin function map instance
+ */
+ public static HiveAlgebricksBuiltInFunctionMap INSTANCE = new HiveAlgebricksBuiltInFunctionMap();
+
+ /**
+ * hive to Algebricks function name mapping
+ */
+ private HashMap<String, FunctionIdentifier> hiveToAlgebricksMap = new HashMap<String, FunctionIdentifier>();
+
+ /**
+ * Algebricks to hive function name mapping
+ */
+ private HashMap<FunctionIdentifier, String> AlgebricksToHiveMap = new HashMap<FunctionIdentifier, String>();
+
+ /**
+ * the bi-directional mapping between hive functions and Algebricks
+ * functions
+ */
+ private HiveAlgebricksBuiltInFunctionMap() {
+ hiveToAlgebricksMap.put("and", AlgebricksBuiltinFunctions.AND);
+ hiveToAlgebricksMap.put("or", AlgebricksBuiltinFunctions.OR);
+ hiveToAlgebricksMap.put("!", AlgebricksBuiltinFunctions.NOT);
+ hiveToAlgebricksMap.put("not", AlgebricksBuiltinFunctions.NOT);
+ hiveToAlgebricksMap.put("=", AlgebricksBuiltinFunctions.EQ);
+ hiveToAlgebricksMap.put("<>", AlgebricksBuiltinFunctions.NEQ);
+ hiveToAlgebricksMap.put(">", AlgebricksBuiltinFunctions.GT);
+ hiveToAlgebricksMap.put("<", AlgebricksBuiltinFunctions.LT);
+ hiveToAlgebricksMap.put(">=", AlgebricksBuiltinFunctions.GE);
+ hiveToAlgebricksMap.put("<=", AlgebricksBuiltinFunctions.LE);
+
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.AND, "and");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.OR, "or");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.NOT, "!");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.NOT, "not");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.EQ, "=");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.NEQ, "<>");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.GT, ">");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.LT, "<");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.GE, ">=");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.LE, "<=");
+ }
+
+ /**
+ * get hive function name from Algebricks function identifier
+ *
+ * @param AlgebricksId
+ * @return hive
+ */
+ public String getHiveFunctionName(FunctionIdentifier AlgebricksId) {
+ return AlgebricksToHiveMap.get(AlgebricksId);
+ }
+
+ /**
+ * get hive UDF or Generic class's corresponding built-in functions
+ *
+ * @param funcClass
+ * @return function identifier
+ */
+ public FunctionIdentifier getAlgebricksFunctionId(Class<?> funcClass) {
+ Description annotation = (Description) funcClass.getAnnotation(Description.class);
+ String hiveUDFName = "";
+ if (annotation == null) {
+ hiveUDFName = null;
+ return null;
+ } else {
+ hiveUDFName = annotation.name();
+ return hiveToAlgebricksMap.get(hiveUDFName);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveExpressionTypeComputer.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveExpressionTypeComputer.java
new file mode 100644
index 0000000..e10e8c1
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveExpressionTypeComputer.java
@@ -0,0 +1,179 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionTypeComputer;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+
+public class HiveExpressionTypeComputer implements IExpressionTypeComputer {
+
+ public static IExpressionTypeComputer INSTANCE = new HiveExpressionTypeComputer();
+
+ @Override
+ public Object getType(ILogicalExpression expr, IMetadataProvider<?, ?> metadataProvider,
+ IVariableTypeEnvironment env) throws AlgebricksException {
+ if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+ /**
+ * function expression
+ */
+ AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+ IFunctionInfo funcInfo = funcExpr.getFunctionInfo();
+
+ /**
+ * argument expressions, types, object inspectors
+ */
+ List<Mutable<ILogicalExpression>> arguments = funcExpr.getArguments();
+ List<TypeInfo> argumentTypes = new ArrayList<TypeInfo>();
+
+ /**
+ * get types of argument
+ */
+ for (Mutable<ILogicalExpression> argument : arguments) {
+ TypeInfo type = (TypeInfo) getType(argument.getValue(), metadataProvider, env);
+ argumentTypes.add(type);
+ }
+
+ ObjectInspector[] childrenOIs = new ObjectInspector[argumentTypes.size()];
+
+ /**
+ * get object inspector
+ */
+ for (int i = 0; i < argumentTypes.size(); i++) {
+ childrenOIs[i] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(argumentTypes.get(i));
+ }
+
+ /**
+ * type inference for scalar function
+ */
+ if (funcExpr instanceof ScalarFunctionCallExpression) {
+
+ FunctionIdentifier AlgebricksId = funcInfo.getFunctionIdentifier();
+ Object functionInfo = ((HiveFunctionInfo) funcInfo).getInfo();
+ String udfName = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getHiveFunctionName(AlgebricksId);
+ GenericUDF udf;
+ if (udfName != null) {
+ /**
+ * get corresponding function info for built-in functions
+ */
+ FunctionInfo fInfo = FunctionRegistry.getFunctionInfo(udfName);
+ udf = fInfo.getGenericUDF();
+ } else if (functionInfo != null) {
+ /**
+ * for GenericUDFBridge: we should not call get type of this
+ * hive expression, because parameters may have been
+ * changed!
+ */
+ ExprNodeGenericFuncDesc hiveExpr = (ExprNodeGenericFuncDesc) functionInfo;
+ udf = hiveExpr.getGenericUDF();
+ } else {
+ /**
+ * for other generic UDF
+ */
+ Class<?> udfClass;
+ try {
+ udfClass = Class.forName(AlgebricksId.getName());
+ udf = (GenericUDF) udfClass.newInstance();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ }
+ /**
+ * doing the actual type inference
+ */
+ ObjectInspector oi = null;
+ try {
+ oi = udf.initialize(childrenOIs);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ TypeInfo exprType = TypeInfoUtils.getTypeInfoFromObjectInspector(oi);
+ return exprType;
+
+ } else if (funcExpr instanceof AggregateFunctionCallExpression) {
+ /**
+ * hive aggregation info
+ */
+ AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo())
+ .getInfo();
+ /**
+ * type inference for aggregation function
+ */
+ GenericUDAFEvaluator result = aggregateDesc.getGenericUDAFEvaluator();
+
+ ObjectInspector returnOI = null;
+ try {
+ returnOI = result.init(aggregateDesc.getMode(), childrenOIs);
+ } catch (HiveException e) {
+ e.printStackTrace();
+ }
+ TypeInfo exprType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI);
+ return exprType;
+ } else if (funcExpr instanceof UnnestingFunctionCallExpression) {
+ /**
+ * type inference for UDTF function
+ */
+ UDTFDesc hiveDesc = (UDTFDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo()).getInfo();
+ GenericUDTF udtf = hiveDesc.getGenericUDTF();
+ ObjectInspector returnOI = null;
+ try {
+ returnOI = udtf.initialize(childrenOIs);
+ } catch (HiveException e) {
+ e.printStackTrace();
+ }
+ TypeInfo exprType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI);
+ return exprType;
+ } else {
+ throw new IllegalStateException("unrecognized function expression " + expr.getClass().getName());
+ }
+ } else if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
+ /**
+ * get type for variable in the environment
+ */
+ VariableReferenceExpression varExpr = (VariableReferenceExpression) expr;
+ LogicalVariable var = varExpr.getVariableReference();
+ TypeInfo type = (TypeInfo) env.getVarType(var);
+ return type;
+ } else if (expr.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
+ /**
+ * get type for constant, from its java class
+ */
+ ConstantExpression constExpr = (ConstantExpression) expr;
+ HivesterixConstantValue value = (HivesterixConstantValue) constExpr.getValue();
+ TypeInfo type = TypeInfoFactory.getPrimitiveTypeInfoFromJavaPrimitive(value.getObject().getClass());
+ return type;
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveFunctionInfo.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveFunctionInfo.java
new file mode 100644
index 0000000..ced8d02
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveFunctionInfo.java
@@ -0,0 +1,36 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.io.Serializable;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+
+public class HiveFunctionInfo implements IFunctionInfo, Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * primary function identifier
+ */
+ private transient FunctionIdentifier fid;
+
+ /**
+ * secondary function identifier: function name
+ */
+ private transient Object secondaryFid;
+
+ public HiveFunctionInfo(FunctionIdentifier fid, Object secondFid) {
+ this.fid = fid;
+ this.secondaryFid = secondFid;
+ }
+
+ @Override
+ public FunctionIdentifier getFunctionIdentifier() {
+ return fid;
+ }
+
+ public Object getInfo() {
+ return secondaryFid;
+ }
+
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveMergeAggregationExpressionFactory.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveMergeAggregationExpressionFactory.java
new file mode 100644
index 0000000..b77fe49
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveMergeAggregationExpressionFactory.java
@@ -0,0 +1,74 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IMergeAggregationExpressionFactory;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+
+/**
+ * generate merge aggregation expression from an aggregation expression
+ *
+ * @author yingyib
+ */
+public class HiveMergeAggregationExpressionFactory implements IMergeAggregationExpressionFactory {
+
+ public static IMergeAggregationExpressionFactory INSTANCE = new HiveMergeAggregationExpressionFactory();
+
+ @Override
+ public ILogicalExpression createMergeAggregation(ILogicalExpression expr, IOptimizationContext context)
+ throws AlgebricksException {
+ /**
+ * type inference for scalar function
+ */
+ if (expr instanceof AggregateFunctionCallExpression) {
+ AggregateFunctionCallExpression funcExpr = (AggregateFunctionCallExpression) expr;
+ /**
+ * hive aggregation info
+ */
+ AggregationDesc aggregator = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo()).getInfo();
+ LogicalVariable inputVar = context.newVar();
+ ExprNodeDesc col = new ExprNodeColumnDesc(TypeInfoFactory.voidTypeInfo, inputVar.toString(), null, false);
+ ArrayList<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
+ parameters.add(col);
+
+ GenericUDAFEvaluator.Mode mergeMode;
+ if (aggregator.getMode() == GenericUDAFEvaluator.Mode.PARTIAL1)
+ mergeMode = GenericUDAFEvaluator.Mode.PARTIAL2;
+ else if (aggregator.getMode() == GenericUDAFEvaluator.Mode.COMPLETE)
+ mergeMode = GenericUDAFEvaluator.Mode.FINAL;
+ else
+ mergeMode = aggregator.getMode();
+ AggregationDesc mergeDesc = new AggregationDesc(aggregator.getGenericUDAFName(),
+ aggregator.getGenericUDAFEvaluator(), parameters, aggregator.getDistinct(), mergeMode);
+
+ String UDAFName = mergeDesc.getGenericUDAFName();
+ List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
+ arguments.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(inputVar)));
+
+ FunctionIdentifier funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, UDAFName + "("
+ + mergeDesc.getMode() + ")");
+ HiveFunctionInfo funcInfo = new HiveFunctionInfo(funcId, mergeDesc);
+ AggregateFunctionCallExpression aggregationExpression = new AggregateFunctionCallExpression(funcInfo,
+ false, arguments);
+ return aggregationExpression;
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveNullableTypeComputer.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveNullableTypeComputer.java
new file mode 100644
index 0000000..906e3ce
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveNullableTypeComputer.java
@@ -0,0 +1,15 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.INullableTypeComputer;
+
+public class HiveNullableTypeComputer implements INullableTypeComputer {
+
+ public static INullableTypeComputer INSTANCE = new HiveNullableTypeComputer();
+
+ @Override
+ public Object makeNullableType(Object type) throws AlgebricksException {
+ return type;
+ }
+
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivePartialAggregationTypeComputer.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivePartialAggregationTypeComputer.java
new file mode 100644
index 0000000..c74966c
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivePartialAggregationTypeComputer.java
@@ -0,0 +1,102 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionTypeComputer;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IPartialAggregationTypeComputer;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+
+public class HivePartialAggregationTypeComputer implements IPartialAggregationTypeComputer {
+
+ public static IPartialAggregationTypeComputer INSTANCE = new HivePartialAggregationTypeComputer();
+
+ @Override
+ public Object getType(ILogicalExpression expr, IVariableTypeEnvironment env,
+ IMetadataProvider<?, ?> metadataProvider) throws AlgebricksException {
+ if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+ IExpressionTypeComputer tc = HiveExpressionTypeComputer.INSTANCE;
+ /**
+ * function expression
+ */
+ AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+
+ /**
+ * argument expressions, types, object inspectors
+ */
+ List<Mutable<ILogicalExpression>> arguments = funcExpr.getArguments();
+ List<TypeInfo> argumentTypes = new ArrayList<TypeInfo>();
+
+ /**
+ * get types of argument
+ */
+ for (Mutable<ILogicalExpression> argument : arguments) {
+ TypeInfo type = (TypeInfo) tc.getType(argument.getValue(), metadataProvider, env);
+ argumentTypes.add(type);
+ }
+
+ ObjectInspector[] childrenOIs = new ObjectInspector[argumentTypes.size()];
+
+ /**
+ * get object inspector
+ */
+ for (int i = 0; i < argumentTypes.size(); i++) {
+ childrenOIs[i] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(argumentTypes.get(i));
+ }
+
+ /**
+ * type inference for scalar function
+ */
+ if (funcExpr instanceof AggregateFunctionCallExpression) {
+ /**
+ * hive aggregation info
+ */
+ AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo())
+ .getInfo();
+ /**
+ * type inference for aggregation function
+ */
+ GenericUDAFEvaluator result = aggregateDesc.getGenericUDAFEvaluator();
+
+ ObjectInspector returnOI = null;
+ try {
+ returnOI = result.init(getPartialMode(aggregateDesc.getMode()), childrenOIs);
+ } catch (HiveException e) {
+ e.printStackTrace();
+ }
+ TypeInfo exprType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI);
+ return exprType;
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ }
+
+ private Mode getPartialMode(Mode mode) {
+ Mode partialMode;
+ if (mode == Mode.FINAL)
+ partialMode = Mode.PARTIAL2;
+ else if (mode == Mode.COMPLETE)
+ partialMode = Mode.PARTIAL1;
+ else
+ partialMode = mode;
+ return partialMode;
+ }
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivesterixConstantValue.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivesterixConstantValue.java
new file mode 100644
index 0000000..3d35e1f
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivesterixConstantValue.java
@@ -0,0 +1,55 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IAlgebricksConstantValue;
+
+public class HivesterixConstantValue implements IAlgebricksConstantValue {
+
+ private Object object;
+
+ public HivesterixConstantValue(Object object) {
+ this.setObject(object);
+ }
+
+ @Override
+ public boolean isFalse() {
+ return object == Boolean.FALSE;
+ }
+
+ @Override
+ public boolean isNull() {
+ return object == null;
+ }
+
+ @Override
+ public boolean isTrue() {
+ return object == Boolean.TRUE;
+ }
+
+ public void setObject(Object object) {
+ this.object = object;
+ }
+
+ public Object getObject() {
+ return object;
+ }
+
+ @Override
+ public String toString() {
+ return object.toString();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof HivesterixConstantValue)) {
+ return false;
+ }
+ HivesterixConstantValue v2 = (HivesterixConstantValue) o;
+ return object.equals(v2.getObject());
+ }
+
+ @Override
+ public int hashCode() {
+ return object.hashCode();
+ }
+
+}
diff --git a/hivesterix/hivesterix-common/src/test/java/edu/uci/ics/hyracks/AppTest.java b/hivesterix/hivesterix-common/src/test/java/edu/uci/ics/hyracks/AppTest.java
new file mode 100644
index 0000000..0c701c8
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/test/java/edu/uci/ics/hyracks/AppTest.java
@@ -0,0 +1,38 @@
+package edu.uci.ics.hyracks;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * Unit test for simple App.
+ */
+public class AppTest
+ extends TestCase
+{
+ /**
+ * Create the test case
+ *
+ * @param testName name of the test case
+ */
+ public AppTest( String testName )
+ {
+ super( testName );
+ }
+
+ /**
+ * @return the suite of tests being tested
+ */
+ public static Test suite()
+ {
+ return new TestSuite( AppTest.class );
+ }
+
+ /**
+ * Rigourous Test :-)
+ */
+ public void testApp()
+ {
+ assertTrue( true );
+ }
+}
diff --git a/hivesterix/hivesterix-dist/conf/cluster.properties b/hivesterix/hivesterix-dist/conf/cluster.properties
new file mode 100644
index 0000000..2d2401a
--- /dev/null
+++ b/hivesterix/hivesterix-dist/conf/cluster.properties
@@ -0,0 +1,37 @@
+#The CC port for Hyracks clients
+CC_CLIENTPORT=3099
+
+#The CC port for Hyracks cluster management
+CC_CLUSTERPORT=1099
+
+#The directory of hyracks binaries
+HYRACKS_HOME=../../../../hyracks
+
+#The tmp directory for cc to install jars
+CCTMP_DIR=/tmp/t1
+
+#The tmp directory for nc to install jars
+NCTMP_DIR=/tmp/t2
+
+#The directory to put cc logs
+CCLOGS_DIR=$CCTMP_DIR/logs
+
+#The directory to put nc logs
+NCLOGS_DIR=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS="/tmp/t3,/tmp/t4"
+
+#The JAVA_HOME
+JAVA_HOME=$JAVA_HOME
+
+#The frame size of the internal dataflow engine
+FRAME_SIZE=65536
+
+#CC JAVA_OPTS
+CCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7001,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+# Yourkit option: -agentpath:/grid/0/dev/vborkar/tools/yjp-10.0.4/bin/linux-x86-64/libyjpagent.so=port=20001"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7002,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/hivesterix/conf/configuration.xsl b/hivesterix/hivesterix-dist/conf/configuration.xsl
similarity index 100%
rename from hivesterix/conf/configuration.xsl
rename to hivesterix/hivesterix-dist/conf/configuration.xsl
diff --git a/hivesterix/hivesterix-dist/conf/debugnc.properties b/hivesterix/hivesterix-dist/conf/debugnc.properties
new file mode 100755
index 0000000..27afa26
--- /dev/null
+++ b/hivesterix/hivesterix-dist/conf/debugnc.properties
@@ -0,0 +1,12 @@
+#The tmp directory for nc to install jars
+NCTMP_DIR2=/tmp/t-1
+
+#The directory to put nc logs
+NCLOGS_DIR2=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS2="/tmp/t-2,/tmp/t-3"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS2="-Xdebug -Xrunjdwp:transport=dt_socket,address=7003,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/hivesterix/conf/hive-default.xml b/hivesterix/hivesterix-dist/conf/hive-default.xml
similarity index 98%
rename from hivesterix/conf/hive-default.xml
rename to hivesterix/hivesterix-dist/conf/hive-default.xml
index 034ea61..587eede 100644
--- a/hivesterix/conf/hive-default.xml
+++ b/hivesterix/hivesterix-dist/conf/hive-default.xml
@@ -23,22 +23,11 @@
By setting this property to -1, Hive will automatically figure out what
should be the number of reducers.
</description>
- </property>
- <property>
- <name>hive.hyracks.host</name>
- <value>128.195.14.4</value>
- </property>
-
- <property>
- <name>hive.hyracks.port</name>
- <value>3099</value>
- </property>
-
- <property>
- <name>hive.hyracks.app</name>
- <value>hivesterix</value>
- </property>
+ <property>
+ <name>hive.hyracks.connectorpolicy</name>
+ <value>PIPELINING</value>
+ </property>
<property>
<name>hive.hyracks.parrallelism</name>
@@ -52,12 +41,12 @@
<property>
<name>hive.algebricks.groupby.external.memory</name>
- <value>536870912</value>
+ <value>33554432</value>
</property>
<property>
<name>hive.algebricks.sort.memory</name>
- <value>536870912</value>
+ <value>33554432</value>
</property>
<property>
diff --git a/hivesterix/conf/hive-log4j.properties b/hivesterix/hivesterix-dist/conf/hive-log4j.properties
similarity index 100%
rename from hivesterix/conf/hive-log4j.properties
rename to hivesterix/hivesterix-dist/conf/hive-log4j.properties
diff --git a/hivesterix/hivesterix-dist/conf/master b/hivesterix/hivesterix-dist/conf/master
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/hivesterix/hivesterix-dist/conf/master
@@ -0,0 +1 @@
+localhost
diff --git a/hivesterix/hivesterix-dist/conf/slaves b/hivesterix/hivesterix-dist/conf/slaves
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/hivesterix/hivesterix-dist/conf/slaves
@@ -0,0 +1 @@
+localhost
diff --git a/hivesterix/hivesterix-dist/pom.xml b/hivesterix/hivesterix-dist/pom.xml
new file mode 100644
index 0000000..ce61422
--- /dev/null
+++ b/hivesterix/hivesterix-dist/pom.xml
@@ -0,0 +1,517 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-dist</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <name>hivesterix-dist</name>
+ <dependencies>
+ <dependency>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ <version>2.5</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.8.1</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>args4j</groupId>
+ <artifactId>args4j</artifactId>
+ <version>2.0.12</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.json</groupId>
+ <artifactId>json</artifactId>
+ <version>20090211</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-server</artifactId>
+ <version>8.0.0.M1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-servlet</artifactId>
+ <version>8.0.0.M1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>0.20.2</version>
+ </dependency>
+ <dependency>
+ <groupId>jline</groupId>
+ <artifactId>jline</artifactId>
+ <version>0.9.94</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-core</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-connectionpool</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-enhancer</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-rdbms</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-dbcp</groupId>
+ <artifactId>commons-dbcp</artifactId>
+ <version>1.4</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-pool</groupId>
+ <artifactId>commons-pool</artifactId>
+ <version>1.5.4</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-collections</groupId>
+ <artifactId>commons-collections</artifactId>
+ <version>3.2.1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ <version>2.4</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax</groupId>
+ <artifactId>jdo2-api</artifactId>
+ <version>2.3-ec</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.facebook</groupId>
+ <artifactId>libfb303</artifactId>
+ <version>0.5.0</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>libthrift</artifactId>
+ <version>0.5.0</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>cli</artifactId>
+ <version>1.2</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache</groupId>
+ <artifactId>log4j</artifactId>
+ <version>1.2.15</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>antlr-runtime</artifactId>
+ <version>3.0.1</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-cli</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-common</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-hwi</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-jdbc</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-metastore</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-service</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-shims</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-serde</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <version>1.6.1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ <version>1.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ <version>1.6.1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>0.20.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ <version>1.1.1</version>
+ <type>jar</type>
+ <classifier>api</classifier>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>r06</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>stringtemplate</artifactId>
+ <version>3.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.derby</groupId>
+ <artifactId>derby</artifactId>
+ <version>10.8.1.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase</artifactId>
+ <version>0.90.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>algebricks-compiler</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-control-cc</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-control-nc</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-translator</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-optimizer</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <encoding>UTF-8</encoding>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>patch</id>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ <phase>package</phase>
+ <configuration>
+ <classifier>patch</classifier>
+ <finalName>a-hive</finalName>
+ <includes>
+ <include>**/org/apache/**</include>
+ </includes>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>appassembler-maven-plugin</artifactId>
+ <version>1.3</version>
+ <executions>
+ <execution>
+ <configuration>
+ <programs>
+ <program>
+ <mainClass>edu.uci.ics.asterix.hive.cli.CliDriver</mainClass>
+ <name>algebricks-hivesterix-cmd</name>
+ </program>
+ </programs>
+ <repositoryLayout>flat</repositoryLayout>
+ <repositoryName>lib</repositoryName>
+ </configuration>
+ <phase>package</phase>
+ <goals>
+ <goal>assemble</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>2.2-beta-5</version>
+ <executions>
+ <execution>
+ <configuration>
+ <descriptors>
+ <descriptor>src/main/assembly/binary-assembly.xml</descriptor>
+ </descriptors>
+ </configuration>
+ <phase>package</phase>
+ <goals>
+ <goal>attached</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>2.13</version>
+ <configuration>
+ <forkMode>pertest</forkMode>
+ <argLine>-enableassertions -Xmx2047m -Dfile.encoding=UTF-8
+ -Djava.util.logging.config.file=src/test/resources/logging.properties</argLine>
+ <includes>
+ <include>**/test/optimizer/*TestSuite.java</include>
+ <include>**/test/optimizer/*Test.java</include>
+ <include>**/test/runtimefunction/*TestSuite.java</include>
+ <include>**/test/runtimefunction/*Test.java</include>
+ </includes>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-resources-plugin</artifactId>
+ <version>2.5</version>
+ <executions>
+ <execution>
+ <id>copy-scripts</id>
+ <!-- here the phase you need -->
+ <phase>package</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>target/appassembler/bin</outputDirectory>
+ <resources>
+ <resource>
+ <directory>src/main/resources/scripts</directory>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ <execution>
+ <id>copy-conf</id>
+ <!-- here the phase you need -->
+ <phase>package</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>target/appassembler/conf</outputDirectory>
+ <resources>
+ <resource>
+ <directory>src/main/resources/conf</directory>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ <execution>
+ <id>copy-jar</id>
+ <!-- here the phase you need -->
+ <phase>package</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>target/appassembler/lib</outputDirectory>
+ <resources>
+ <resource>
+ <directory>target</directory>
+ <includes>
+ <include>*patch.jar</include>
+ </includes>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-clean-plugin</artifactId>
+ <version>2.5</version>
+ <configuration>
+ <filesets>
+ <fileset>
+ <directory>.</directory>
+ <includes>
+ <include>metastore*</include>
+ <include>hadoop*</include>
+ <include>edu*</include>
+ <include>tmp*</include>
+ <include>build*</include>
+ <include>target*</include>
+ <include>log*</include>
+ <include>derby.log</include>
+ <include>ClusterController*</include>
+ </includes>
+ </fileset>
+ </filesets>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ <repositories>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>third-party</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/third-party</url>
+ </repository>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>hyracks-public-release</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-public-releases</url>
+ </repository>
+ </repositories>
+</project>
diff --git a/hivesterix/resource/asterix/destroy.sh b/hivesterix/hivesterix-dist/resource/asterix/destroy.sh
similarity index 100%
rename from hivesterix/resource/asterix/destroy.sh
rename to hivesterix/hivesterix-dist/resource/asterix/destroy.sh
diff --git a/hivesterix/resource/asterix/hivedeploy.hcli b/hivesterix/hivesterix-dist/resource/asterix/hivedeploy.hcli
similarity index 100%
rename from hivesterix/resource/asterix/hivedeploy.hcli
rename to hivesterix/hivesterix-dist/resource/asterix/hivedeploy.hcli
diff --git a/hivesterix/resource/asterix/hivedestroy.hcli b/hivesterix/hivesterix-dist/resource/asterix/hivedestroy.hcli
similarity index 100%
rename from hivesterix/resource/asterix/hivedestroy.hcli
rename to hivesterix/hivesterix-dist/resource/asterix/hivedestroy.hcli
diff --git a/hivesterix/resource/asterix/startall.sh b/hivesterix/hivesterix-dist/resource/asterix/startall.sh
similarity index 100%
rename from hivesterix/resource/asterix/startall.sh
rename to hivesterix/hivesterix-dist/resource/asterix/startall.sh
diff --git a/hivesterix/resource/asterix/startcc.sh b/hivesterix/hivesterix-dist/resource/asterix/startcc.sh
similarity index 100%
rename from hivesterix/resource/asterix/startcc.sh
rename to hivesterix/hivesterix-dist/resource/asterix/startcc.sh
diff --git a/hivesterix/resource/asterix/startnc.sh b/hivesterix/hivesterix-dist/resource/asterix/startnc.sh
similarity index 100%
rename from hivesterix/resource/asterix/startnc.sh
rename to hivesterix/hivesterix-dist/resource/asterix/startnc.sh
diff --git a/hivesterix/resource/asterix/stopall.sh b/hivesterix/hivesterix-dist/resource/asterix/stopall.sh
similarity index 100%
rename from hivesterix/resource/asterix/stopall.sh
rename to hivesterix/hivesterix-dist/resource/asterix/stopall.sh
diff --git a/hivesterix/resource/asterix/stopcc.sh b/hivesterix/hivesterix-dist/resource/asterix/stopcc.sh
similarity index 100%
rename from hivesterix/resource/asterix/stopcc.sh
rename to hivesterix/hivesterix-dist/resource/asterix/stopcc.sh
diff --git a/hivesterix/resource/asterix/stopnc.sh b/hivesterix/hivesterix-dist/resource/asterix/stopnc.sh
similarity index 100%
rename from hivesterix/resource/asterix/stopnc.sh
rename to hivesterix/hivesterix-dist/resource/asterix/stopnc.sh
diff --git a/hivesterix/resource/asterix_dbg/destroy.sh b/hivesterix/hivesterix-dist/resource/asterix_dbg/destroy.sh
similarity index 100%
rename from hivesterix/resource/asterix_dbg/destroy.sh
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/destroy.sh
diff --git a/hivesterix/resource/asterix_dbg/hivedeploy.hcli b/hivesterix/hivesterix-dist/resource/asterix_dbg/hivedeploy.hcli
similarity index 100%
rename from hivesterix/resource/asterix_dbg/hivedeploy.hcli
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/hivedeploy.hcli
diff --git a/hivesterix/resource/asterix_dbg/hivedestroy.hcli b/hivesterix/hivesterix-dist/resource/asterix_dbg/hivedestroy.hcli
similarity index 100%
rename from hivesterix/resource/asterix_dbg/hivedestroy.hcli
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/hivedestroy.hcli
diff --git a/hivesterix/resource/asterix_dbg/startall.sh b/hivesterix/hivesterix-dist/resource/asterix_dbg/startall.sh
similarity index 100%
rename from hivesterix/resource/asterix_dbg/startall.sh
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/startall.sh
diff --git a/hivesterix/resource/asterix_dbg/startcc.sh b/hivesterix/hivesterix-dist/resource/asterix_dbg/startcc.sh
similarity index 100%
rename from hivesterix/resource/asterix_dbg/startcc.sh
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/startcc.sh
diff --git a/hivesterix/resource/asterix_dbg/startnc.sh b/hivesterix/hivesterix-dist/resource/asterix_dbg/startnc.sh
similarity index 100%
rename from hivesterix/resource/asterix_dbg/startnc.sh
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/startnc.sh
diff --git a/hivesterix/resource/asterix_dbg/stopall.sh b/hivesterix/hivesterix-dist/resource/asterix_dbg/stopall.sh
similarity index 100%
rename from hivesterix/resource/asterix_dbg/stopall.sh
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/stopall.sh
diff --git a/hivesterix/resource/asterix_dbg/stopcc.sh b/hivesterix/hivesterix-dist/resource/asterix_dbg/stopcc.sh
similarity index 100%
rename from hivesterix/resource/asterix_dbg/stopcc.sh
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/stopcc.sh
diff --git a/hivesterix/resource/asterix_dbg/stopnc.sh b/hivesterix/hivesterix-dist/resource/asterix_dbg/stopnc.sh
similarity index 100%
rename from hivesterix/resource/asterix_dbg/stopnc.sh
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/stopnc.sh
diff --git a/hivesterix/resource/bin/ext/cli.sh b/hivesterix/hivesterix-dist/resource/bin/ext/cli.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/cli.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/cli.sh
diff --git a/hivesterix/resource/bin/ext/help.sh b/hivesterix/hivesterix-dist/resource/bin/ext/help.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/help.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/help.sh
diff --git a/hivesterix/resource/bin/ext/hiveserver.sh b/hivesterix/hivesterix-dist/resource/bin/ext/hiveserver.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/hiveserver.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/hiveserver.sh
diff --git a/hivesterix/resource/bin/ext/hwi.sh b/hivesterix/hivesterix-dist/resource/bin/ext/hwi.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/hwi.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/hwi.sh
diff --git a/hivesterix/resource/bin/ext/jar.sh b/hivesterix/hivesterix-dist/resource/bin/ext/jar.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/jar.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/jar.sh
diff --git a/hivesterix/resource/bin/ext/lineage.sh b/hivesterix/hivesterix-dist/resource/bin/ext/lineage.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/lineage.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/lineage.sh
diff --git a/hivesterix/resource/bin/ext/metastore.sh b/hivesterix/hivesterix-dist/resource/bin/ext/metastore.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/metastore.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/metastore.sh
diff --git a/hivesterix/resource/bin/ext/rcfilecat.sh b/hivesterix/hivesterix-dist/resource/bin/ext/rcfilecat.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/rcfilecat.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/rcfilecat.sh
diff --git a/hivesterix/resource/bin/ext/util/execHiveCmd.sh b/hivesterix/hivesterix-dist/resource/bin/ext/util/execHiveCmd.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/util/execHiveCmd.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/util/execHiveCmd.sh
diff --git a/hivesterix/resource/bin/hive b/hivesterix/hivesterix-dist/resource/bin/hive
similarity index 100%
rename from hivesterix/resource/bin/hive
rename to hivesterix/hivesterix-dist/resource/bin/hive
diff --git a/hivesterix/resource/bin/hive-config.sh b/hivesterix/hivesterix-dist/resource/bin/hive-config.sh
similarity index 100%
rename from hivesterix/resource/bin/hive-config.sh
rename to hivesterix/hivesterix-dist/resource/bin/hive-config.sh
diff --git a/hivesterix/resource/bin/init-hive-dfs.sh b/hivesterix/hivesterix-dist/resource/bin/init-hive-dfs.sh
similarity index 100%
rename from hivesterix/resource/bin/init-hive-dfs.sh
rename to hivesterix/hivesterix-dist/resource/bin/init-hive-dfs.sh
diff --git a/hivesterix/resource/deploy/balance.jar b/hivesterix/hivesterix-dist/resource/deploy/balance.jar
similarity index 100%
rename from hivesterix/resource/deploy/balance.jar
rename to hivesterix/hivesterix-dist/resource/deploy/balance.jar
Binary files differ
diff --git a/hivesterix/resource/deploy/deploy.sh b/hivesterix/hivesterix-dist/resource/deploy/deploy.sh
similarity index 100%
rename from hivesterix/resource/deploy/deploy.sh
rename to hivesterix/hivesterix-dist/resource/deploy/deploy.sh
diff --git a/hivesterix/resource/deploy/jar.sh b/hivesterix/hivesterix-dist/resource/deploy/jar.sh
similarity index 100%
rename from hivesterix/resource/deploy/jar.sh
rename to hivesterix/hivesterix-dist/resource/deploy/jar.sh
diff --git a/hivesterix/resource/hivesterix/execute.sh b/hivesterix/hivesterix-dist/resource/hivesterix/execute.sh
similarity index 100%
rename from hivesterix/resource/hivesterix/execute.sh
rename to hivesterix/hivesterix-dist/resource/hivesterix/execute.sh
diff --git a/hivesterix/resource/hivesterix/loop.sh b/hivesterix/hivesterix-dist/resource/hivesterix/loop.sh
similarity index 100%
rename from hivesterix/resource/hivesterix/loop.sh
rename to hivesterix/hivesterix-dist/resource/hivesterix/loop.sh
diff --git a/hivesterix/resource/hivesterix/perf.sh b/hivesterix/hivesterix-dist/resource/hivesterix/perf.sh
similarity index 100%
rename from hivesterix/resource/hivesterix/perf.sh
rename to hivesterix/hivesterix-dist/resource/hivesterix/perf.sh
diff --git a/hivesterix/resource/hivesterix/perflog/tpch100/result.log b/hivesterix/hivesterix-dist/resource/hivesterix/perflog/tpch100/result.log
similarity index 100%
rename from hivesterix/resource/hivesterix/perflog/tpch100/result.log
rename to hivesterix/hivesterix-dist/resource/hivesterix/perflog/tpch100/result.log
diff --git a/hivesterix/resource/hivesterix/perflog/tpch_sample/result.log b/hivesterix/hivesterix-dist/resource/hivesterix/perflog/tpch_sample/result.log
similarity index 100%
rename from hivesterix/resource/hivesterix/perflog/tpch_sample/result.log
rename to hivesterix/hivesterix-dist/resource/hivesterix/perflog/tpch_sample/result.log
diff --git a/hivesterix/resource/hivesterix/startcluster.sh b/hivesterix/hivesterix-dist/resource/hivesterix/startcluster.sh
similarity index 100%
rename from hivesterix/resource/hivesterix/startcluster.sh
rename to hivesterix/hivesterix-dist/resource/hivesterix/startcluster.sh
diff --git a/hivesterix/resource/hivesterix/stress.sh b/hivesterix/hivesterix-dist/resource/hivesterix/stress.sh
similarity index 100%
rename from hivesterix/resource/hivesterix/stress.sh
rename to hivesterix/hivesterix-dist/resource/hivesterix/stress.sh
diff --git a/hivesterix/resource/hivesterix/tpch100/q10_returned_item.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q10_returned_item.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q10_returned_item.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q10_returned_item.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q11_important_stock.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q11_important_stock.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q11_important_stock.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q11_important_stock.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q12_shipping.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q12_shipping.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q12_shipping.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q12_shipping.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q13_customer_distribution.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q13_customer_distribution.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q13_customer_distribution.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q13_customer_distribution.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q14_promotion_effect.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q14_promotion_effect.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q14_promotion_effect.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q15_top_supplier.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q15_top_supplier.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q15_top_supplier.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q15_top_supplier.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q16_parts_supplier_relationship.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q16_parts_supplier_relationship.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q16_parts_supplier_relationship.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q16_parts_supplier_relationship.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q17_small_quantity_order_revenue.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q17_small_quantity_order_revenue.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q17_small_quantity_order_revenue.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q17_small_quantity_order_revenue.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q18_large_volume_customer.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q18_large_volume_customer.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q18_large_volume_customer.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q19_discounted_revenue.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q19_discounted_revenue.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q19_discounted_revenue.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q1_pricing_summary_report.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q1_pricing_summary_report.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q1_pricing_summary_report.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q20_potential_part_promotion.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q20_potential_part_promotion.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q20_potential_part_promotion.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q21_suppliers_who_kept_orders_waiting.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q21_suppliers_who_kept_orders_waiting.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q21_suppliers_who_kept_orders_waiting.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q21_suppliers_who_kept_orders_waiting.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q22_global_sales_opportunity.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q22_global_sales_opportunity.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q22_global_sales_opportunity.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q22_global_sales_opportunity.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q2_minimum_cost_supplier.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q2_minimum_cost_supplier.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q2_minimum_cost_supplier.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q2_minimum_cost_supplier.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q3_shipping_priority.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q3_shipping_priority.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q3_shipping_priority.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q4_order_priority.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q4_order_priority.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q4_order_priority.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q4_order_priority.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q5_local_supplier_volume.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q5_local_supplier_volume.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q5_local_supplier_volume.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q6_forecast_revenue_change.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q6_forecast_revenue_change.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q6_forecast_revenue_change.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q6_forecast_revenue_change.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q7_volume_shipping.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q7_volume_shipping.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q7_volume_shipping.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q8_national_market_share.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q8_national_market_share.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q8_national_market_share.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q8_national_market_share.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q9_product_type_profit.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q9_product_type_profit.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q9_product_type_profit.hive
diff --git a/hivesterix/resource/hivesterix/tpch_sample/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q1_pricing_summary_report.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch_sample/q1_pricing_summary_report.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q1_pricing_summary_report.hive
diff --git a/hivesterix/resource/hivesterix/tpch_sample/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q3_shipping_priority.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch_sample/q3_shipping_priority.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q3_shipping_priority.hive
diff --git a/hivesterix/resource/hivesterix/tpch_sample/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q5_local_supplier_volume.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch_sample/q5_local_supplier_volume.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q5_local_supplier_volume.hive
diff --git a/hivesterix/resource/hivesterix/tpch_sample/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q9_product_type_profit.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch_sample/q9_product_type_profit.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q9_product_type_profit.hive
diff --git a/hivesterix/resource/tpch/q10_returned_item.hive b/hivesterix/hivesterix-dist/resource/tpch/q10_returned_item.hive
similarity index 100%
rename from hivesterix/resource/tpch/q10_returned_item.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q10_returned_item.hive
diff --git a/hivesterix/resource/tpch/q11_important_stock.hive b/hivesterix/hivesterix-dist/resource/tpch/q11_important_stock.hive
similarity index 100%
rename from hivesterix/resource/tpch/q11_important_stock.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q11_important_stock.hive
diff --git a/hivesterix/resource/tpch/q12_shipping.hive b/hivesterix/hivesterix-dist/resource/tpch/q12_shipping.hive
similarity index 100%
rename from hivesterix/resource/tpch/q12_shipping.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q12_shipping.hive
diff --git a/hivesterix/resource/tpch/q13_customer_distribution.hive b/hivesterix/hivesterix-dist/resource/tpch/q13_customer_distribution.hive
similarity index 100%
rename from hivesterix/resource/tpch/q13_customer_distribution.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q13_customer_distribution.hive
diff --git a/hivesterix/resource/tpch/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/resource/tpch/q14_promotion_effect.hive
similarity index 100%
rename from hivesterix/resource/tpch/q14_promotion_effect.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q14_promotion_effect.hive
diff --git a/hivesterix/resource/tpch/q15_top_supplier.hive b/hivesterix/hivesterix-dist/resource/tpch/q15_top_supplier.hive
similarity index 100%
rename from hivesterix/resource/tpch/q15_top_supplier.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q15_top_supplier.hive
diff --git a/hivesterix/resource/tpch/q16_parts_supplier_relationship.hive b/hivesterix/hivesterix-dist/resource/tpch/q16_parts_supplier_relationship.hive
similarity index 100%
rename from hivesterix/resource/tpch/q16_parts_supplier_relationship.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q16_parts_supplier_relationship.hive
diff --git a/hivesterix/resource/tpch/q17_small_quantity_order_revenue.hive b/hivesterix/hivesterix-dist/resource/tpch/q17_small_quantity_order_revenue.hive
similarity index 100%
rename from hivesterix/resource/tpch/q17_small_quantity_order_revenue.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q17_small_quantity_order_revenue.hive
diff --git a/hivesterix/resource/tpch/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/resource/tpch/q18_large_volume_customer.hive
similarity index 100%
rename from hivesterix/resource/tpch/q18_large_volume_customer.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q18_large_volume_customer.hive
diff --git a/hivesterix/resource/tpch/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/resource/tpch/q19_discounted_revenue.hive
similarity index 100%
rename from hivesterix/resource/tpch/q19_discounted_revenue.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q19_discounted_revenue.hive
diff --git a/hivesterix/resource/tpch/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/resource/tpch/q1_pricing_summary_report.hive
similarity index 100%
rename from hivesterix/resource/tpch/q1_pricing_summary_report.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q1_pricing_summary_report.hive
diff --git a/hivesterix/resource/tpch/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/resource/tpch/q20_potential_part_promotion.hive
similarity index 100%
rename from hivesterix/resource/tpch/q20_potential_part_promotion.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q20_potential_part_promotion.hive
diff --git a/hivesterix/resource/tpch/q21_suppliers_who_kept_orders_waiting.hive b/hivesterix/hivesterix-dist/resource/tpch/q21_suppliers_who_kept_orders_waiting.hive
similarity index 100%
rename from hivesterix/resource/tpch/q21_suppliers_who_kept_orders_waiting.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q21_suppliers_who_kept_orders_waiting.hive
diff --git a/hivesterix/resource/tpch/q22_global_sales_opportunity.hive b/hivesterix/hivesterix-dist/resource/tpch/q22_global_sales_opportunity.hive
similarity index 100%
rename from hivesterix/resource/tpch/q22_global_sales_opportunity.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q22_global_sales_opportunity.hive
diff --git a/hivesterix/resource/tpch/q2_copy.hive b/hivesterix/hivesterix-dist/resource/tpch/q2_copy.hive
similarity index 100%
rename from hivesterix/resource/tpch/q2_copy.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q2_copy.hive
diff --git a/hivesterix/resource/tpch/q2_minimum_cost_supplier.hive b/hivesterix/hivesterix-dist/resource/tpch/q2_minimum_cost_supplier.hive
similarity index 100%
rename from hivesterix/resource/tpch/q2_minimum_cost_supplier.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q2_minimum_cost_supplier.hive
diff --git a/hivesterix/resource/tpch/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/resource/tpch/q3_shipping_priority.hive
similarity index 100%
rename from hivesterix/resource/tpch/q3_shipping_priority.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q3_shipping_priority.hive
diff --git a/hivesterix/resource/tpch/q4_order_priority.hive b/hivesterix/hivesterix-dist/resource/tpch/q4_order_priority.hive
similarity index 100%
rename from hivesterix/resource/tpch/q4_order_priority.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q4_order_priority.hive
diff --git a/hivesterix/resource/tpch/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/resource/tpch/q5_local_supplier_volume.hive
similarity index 100%
rename from hivesterix/resource/tpch/q5_local_supplier_volume.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q5_local_supplier_volume.hive
diff --git a/hivesterix/resource/tpch/q6_forecast_revenue_change.hive b/hivesterix/hivesterix-dist/resource/tpch/q6_forecast_revenue_change.hive
similarity index 100%
rename from hivesterix/resource/tpch/q6_forecast_revenue_change.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q6_forecast_revenue_change.hive
diff --git a/hivesterix/resource/tpch/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/resource/tpch/q7_volume_shipping.hive
similarity index 100%
rename from hivesterix/resource/tpch/q7_volume_shipping.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q7_volume_shipping.hive
diff --git a/hivesterix/resource/tpch/q8_national_market_share.hive b/hivesterix/hivesterix-dist/resource/tpch/q8_national_market_share.hive
similarity index 100%
rename from hivesterix/resource/tpch/q8_national_market_share.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q8_national_market_share.hive
diff --git a/hivesterix/resource/tpch/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/resource/tpch/q9_product_type_profit.hive
similarity index 100%
rename from hivesterix/resource/tpch/q9_product_type_profit.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q9_product_type_profit.hive
diff --git a/hivesterix/resource/tpch100/q10_returned_item.hive b/hivesterix/hivesterix-dist/resource/tpch100/q10_returned_item.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q10_returned_item.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q10_returned_item.hive
diff --git a/hivesterix/resource/tpch100/q11_important_stock.hive b/hivesterix/hivesterix-dist/resource/tpch100/q11_important_stock.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q11_important_stock.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q11_important_stock.hive
diff --git a/hivesterix/resource/tpch100/q12_shipping.hive b/hivesterix/hivesterix-dist/resource/tpch100/q12_shipping.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q12_shipping.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q12_shipping.hive
diff --git a/hivesterix/resource/tpch100/q13_customer_distribution.hive b/hivesterix/hivesterix-dist/resource/tpch100/q13_customer_distribution.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q13_customer_distribution.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q13_customer_distribution.hive
diff --git a/hivesterix/resource/tpch100/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/resource/tpch100/q14_promotion_effect.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q14_promotion_effect.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q14_promotion_effect.hive
diff --git a/hivesterix/resource/tpch100/q15_top_supplier.hive b/hivesterix/hivesterix-dist/resource/tpch100/q15_top_supplier.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q15_top_supplier.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q15_top_supplier.hive
diff --git a/hivesterix/resource/tpch100/q16_parts_supplier_relationship.hive b/hivesterix/hivesterix-dist/resource/tpch100/q16_parts_supplier_relationship.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q16_parts_supplier_relationship.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q16_parts_supplier_relationship.hive
diff --git a/hivesterix/resource/tpch100/q17_small_quantity_order_revenue.hive b/hivesterix/hivesterix-dist/resource/tpch100/q17_small_quantity_order_revenue.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q17_small_quantity_order_revenue.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q17_small_quantity_order_revenue.hive
diff --git a/hivesterix/resource/tpch100/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/resource/tpch100/q18_large_volume_customer.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q18_large_volume_customer.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q18_large_volume_customer.hive
diff --git a/hivesterix/resource/tpch100/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/resource/tpch100/q19_discounted_revenue.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q19_discounted_revenue.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q19_discounted_revenue.hive
diff --git a/hivesterix/resource/tpch100/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/resource/tpch100/q1_pricing_summary_report.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q1_pricing_summary_report.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q1_pricing_summary_report.hive
diff --git a/hivesterix/resource/tpch100/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/resource/tpch100/q20_potential_part_promotion.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q20_potential_part_promotion.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q20_potential_part_promotion.hive
diff --git a/hivesterix/resource/tpch100/q21_suppliers_who_kept_orders_waiting.hive b/hivesterix/hivesterix-dist/resource/tpch100/q21_suppliers_who_kept_orders_waiting.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q21_suppliers_who_kept_orders_waiting.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q21_suppliers_who_kept_orders_waiting.hive
diff --git a/hivesterix/resource/tpch100/q22_global_sales_opportunity.hive b/hivesterix/hivesterix-dist/resource/tpch100/q22_global_sales_opportunity.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q22_global_sales_opportunity.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q22_global_sales_opportunity.hive
diff --git a/hivesterix/resource/tpch100/q2_minimum_cost_supplier.hive b/hivesterix/hivesterix-dist/resource/tpch100/q2_minimum_cost_supplier.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q2_minimum_cost_supplier.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q2_minimum_cost_supplier.hive
diff --git a/hivesterix/resource/tpch100/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/resource/tpch100/q3_shipping_priority.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q3_shipping_priority.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q3_shipping_priority.hive
diff --git a/hivesterix/resource/tpch100/q4_order_priority.hive b/hivesterix/hivesterix-dist/resource/tpch100/q4_order_priority.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q4_order_priority.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q4_order_priority.hive
diff --git a/hivesterix/resource/tpch100/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/resource/tpch100/q5_local_supplier_volume.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q5_local_supplier_volume.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q5_local_supplier_volume.hive
diff --git a/hivesterix/resource/tpch100/q6_forecast_revenue_change.hive b/hivesterix/hivesterix-dist/resource/tpch100/q6_forecast_revenue_change.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q6_forecast_revenue_change.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q6_forecast_revenue_change.hive
diff --git a/hivesterix/resource/tpch100/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/resource/tpch100/q7_volume_shipping.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q7_volume_shipping.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q7_volume_shipping.hive
diff --git a/hivesterix/resource/tpch100/q8_national_market_share.hive b/hivesterix/hivesterix-dist/resource/tpch100/q8_national_market_share.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q8_national_market_share.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q8_national_market_share.hive
diff --git a/hivesterix/resource/tpch100/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/resource/tpch100/q9_product_type_profit.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q9_product_type_profit.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q9_product_type_profit.hive
diff --git a/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml b/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml
new file mode 100755
index 0000000..de3757f
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml
@@ -0,0 +1,26 @@
+<assembly>
+ <id>binary-assembly</id>
+ <formats>
+ <format>zip</format>
+ <format>dir</format>
+ </formats>
+ <includeBaseDirectory>false</includeBaseDirectory>
+ <fileSets>
+ <fileSet>
+ <directory>target/appassembler/bin</directory>
+ <outputDirectory>bin</outputDirectory>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>target/appassembler/lib</directory>
+ <outputDirectory>lib</outputDirectory>
+ </fileSet>
+ <fileSet>
+ <directory>target</directory>
+ <outputDirectory>lib</outputDirectory>
+ <includes>
+ <include>*.jar</include>
+ </includes>
+ </fileSet>
+ </fileSets>
+</assembly>
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
similarity index 91%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
rename to hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
index f6aef75..e075f09 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
+++ b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
@@ -1,13 +1,19 @@
package edu.uci.ics.hivesterix.runtime.exec;
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Serializable;
+import java.net.InetAddress;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Properties;
import java.util.Set;
import org.apache.commons.logging.Log;
@@ -27,6 +33,7 @@
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
import edu.uci.ics.hivesterix.logical.expression.HiveExpressionTypeComputer;
import edu.uci.ics.hivesterix.logical.expression.HiveMergeAggregationExpressionFactory;
import edu.uci.ics.hivesterix.logical.expression.HiveNullableTypeComputer;
@@ -34,7 +41,6 @@
import edu.uci.ics.hivesterix.logical.plan.HiveAlgebricksTranslator;
import edu.uci.ics.hivesterix.logical.plan.HiveLogicalPlanAndMetaData;
import edu.uci.ics.hivesterix.optimizer.rulecollections.HiveRuleCollections;
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
import edu.uci.ics.hivesterix.runtime.factory.evaluator.HiveExpressionRuntimeProvider;
import edu.uci.ics.hivesterix.runtime.factory.nullwriter.HiveNullWriterFactory;
import edu.uci.ics.hivesterix.runtime.inspector.HiveBinaryBooleanInspectorFactory;
@@ -73,8 +79,8 @@
public class HyracksExecutionEngine implements IExecutionEngine {
private static final Log LOG = LogFactory.getLog(HyracksExecutionEngine.class.getName());
-
- // private static final String[] locConstraints = {}
+ private static final String clusterPropertiesPath = "conf/cluster.properties";
+ private static final String masterFilePath = "conf/master";
private static List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> DEFAULT_LOGICAL_REWRITES = new ArrayList<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>>();
private static List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> DEFAULT_PHYSICAL_REWRITES = new ArrayList<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>>();
@@ -141,6 +147,16 @@
*/
private PrintWriter planPrinter;
+ /**
+ * properties
+ */
+ private Properties clusterProps;
+
+ /**
+ * the Hyracks client connection
+ */
+ private IHyracksClientConnection hcc;
+
public HyracksExecutionEngine(HiveConf conf) {
this.conf = conf;
init(conf);
@@ -222,8 +238,12 @@
}
private void codeGen() throws AlgebricksException {
- // number of cpu cores in the cluster
- builder.setClusterLocations(new AlgebricksAbsolutePartitionConstraint(ConfUtil.getNCs()));
+ try {
+ // number of cpu cores in the cluster
+ builder.setClusterLocations(new AlgebricksAbsolutePartitionConstraint(ConfUtil.getNCs()));
+ } catch (Exception e) {
+ throw new AlgebricksException(e);
+ }
// builder.setClusterTopology(ConfUtil.getClusterTopology());
builder.setBinaryBooleanInspectorFactory(HiveBinaryBooleanInspectorFactory.INSTANCE);
builder.setBinaryIntegerInspectorFactory(HiveBinaryIntegerInspectorFactory.INSTANCE);
@@ -253,7 +273,7 @@
public int executeJob() {
try {
codeGen();
- executeHyraxJob(jobSpec);
+ executeHyracksJob(jobSpec);
} catch (Exception e) {
e.printStackTrace();
return 1;
@@ -273,7 +293,6 @@
* list of map-reduce tasks
*/
Task<? extends Serializable> task = rootTasks.get(i);
- // System.out.println("!" + task.getName());
if (task instanceof MapRedTask) {
List<Operator> mapRootOps = articulateMapReduceOperators(task, rootOps, aliasToPath, rootTasks);
@@ -508,19 +527,42 @@
}
}
- private void executeHyraxJob(JobSpecification job) throws Exception {
- String ipAddress = conf.get("hive.hyracks.host");
- int port = Integer.parseInt(conf.get("hive.hyracks.port"));
- //System.out.println("connect to " + ipAddress + " " + port);
+ private void executeHyracksJob(JobSpecification job) throws Exception {
- IHyracksClientConnection hcc = new HyracksConnection(ipAddress, port);
+ /**
+ * load the properties file if it is not loaded
+ */
+ if (clusterProps == null) {
+ clusterProps = new Properties();
+ InputStream confIn = new FileInputStream(clusterPropertiesPath);
+ clusterProps.load(confIn);
+ confIn.close();
+ }
- //System.out.println("get connected");
+ if (hcc == null) {
+ BufferedReader ipReader = new BufferedReader(new InputStreamReader(new FileInputStream(masterFilePath)));
+ String masterNode = ipReader.readLine();
+ ipReader.close();
+
+ InetAddress[] ips = InetAddress.getAllByName(masterNode);
+ int port = Integer.parseInt(clusterProps.getProperty("CC_CLIENTPORT"));
+ for (InetAddress ip : ips) {
+ if (ip.getAddress().length <= 4) {
+ try {
+ hcc = new HyracksConnection(ip.getHostAddress(), port);
+ break;
+ } catch (Exception e) {
+ continue;
+ }
+ }
+ }
+ }
+
long start = System.currentTimeMillis();
JobId jobId = hcc.startJob(job);
hcc.waitForCompletion(jobId);
- //System.out.println("job finished: " + jobId.toString());
+ // System.out.println("job finished: " + jobId.toString());
// call all leave nodes to end
for (Operator leaf : leaveOps) {
jobClose(leaf);
diff --git a/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/IExecutionEngine.java b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/IExecutionEngine.java
new file mode 100644
index 0000000..c64a39b
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/IExecutionEngine.java
@@ -0,0 +1,25 @@
+package edu.uci.ics.hivesterix.runtime.exec;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Task;
+
+public interface IExecutionEngine {
+
+ /**
+ * compile the job
+ *
+ * @param rootTasks
+ * : Hive MapReduce plan
+ * @return 0 pass, 1 fail
+ */
+ public int compileJob(List<Task<? extends Serializable>> rootTasks);
+
+ /**
+ * execute the job with latest compiled plan
+ *
+ * @return
+ */
+ public int executeJob();
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
new file mode 100644
index 0000000..a385742
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
@@ -0,0 +1,1310 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql;
+
+import java.io.DataInput;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.Set;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.JavaUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Schema;
+import org.apache.hadoop.hive.ql.exec.ConditionalTask;
+import org.apache.hadoop.hive.ql.exec.ExecDriver;
+import org.apache.hadoop.hive.ql.exec.FetchTask;
+import org.apache.hadoop.hive.ql.exec.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.StatsTask;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.exec.TaskResult;
+import org.apache.hadoop.hive.ql.exec.TaskRunner;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.history.HiveHistory.Keys;
+import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext;
+import org.apache.hadoop.hive.ql.hooks.Hook;
+import org.apache.hadoop.hive.ql.hooks.HookContext;
+import org.apache.hadoop.hive.ql.hooks.PostExecute;
+import org.apache.hadoop.hive.ql.hooks.PreExecute;
+import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLock;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockManagerCtx;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockMode;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
+import org.apache.hadoop.hive.ql.lockmgr.LockException;
+import org.apache.hadoop.hive.ql.metadata.AuthorizationException;
+import org.apache.hadoop.hive.ql.metadata.DummyPartition;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveUtils;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.ErrorMsg;
+import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
+import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.ParseDriver;
+import org.apache.hadoop.hive.ql.parse.ParseException;
+import org.apache.hadoop.hive.ql.parse.ParseUtils;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
+import org.apache.hadoop.hive.ql.plan.ConditionalResolver;
+import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles;
+import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.processors.CommandProcessor;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
+import org.apache.hadoop.hive.serde2.ByteStream;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.mapred.ClusterStatus;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import edu.uci.ics.hivesterix.runtime.exec.HyracksExecutionEngine;
+import edu.uci.ics.hivesterix.runtime.exec.IExecutionEngine;
+
+@SuppressWarnings({ "deprecation", "unused" })
+public class Driver implements CommandProcessor {
+
+ static final private Log LOG = LogFactory.getLog(Driver.class.getName());
+ static final private LogHelper console = new LogHelper(LOG);
+
+ // hive-sterix
+ private IExecutionEngine engine;
+ private boolean hivesterix = false;
+
+ private int maxRows = 100;
+ ByteStream.Output bos = new ByteStream.Output();
+
+ private HiveConf conf;
+ private DataInput resStream;
+ private Context ctx;
+ private QueryPlan plan;
+ private Schema schema;
+ private HiveLockManager hiveLockMgr;
+
+ private String errorMessage;
+ private String SQLState;
+
+ // A limit on the number of threads that can be launched
+ private int maxthreads;
+ private final int sleeptime = 2000;
+
+ protected int tryCount = Integer.MAX_VALUE;
+
+ private int checkLockManager() {
+ boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
+ if (supportConcurrency && (hiveLockMgr == null)) {
+ try {
+ setLockManager();
+ } catch (SemanticException e) {
+ errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return (12);
+ }
+ }
+ return (0);
+ }
+
+ private void setLockManager() throws SemanticException {
+ boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
+ if (supportConcurrency) {
+ String lockMgr = conf.getVar(HiveConf.ConfVars.HIVE_LOCK_MANAGER);
+ if ((lockMgr == null) || (lockMgr.isEmpty())) {
+ throw new SemanticException(ErrorMsg.LOCKMGR_NOT_SPECIFIED.getMsg());
+ }
+
+ try {
+ hiveLockMgr = (HiveLockManager) ReflectionUtils.newInstance(conf.getClassByName(lockMgr), conf);
+ hiveLockMgr.setContext(new HiveLockManagerCtx(conf));
+ } catch (Exception e) {
+ throw new SemanticException(ErrorMsg.LOCKMGR_NOT_INITIALIZED.getMsg() + e.getMessage());
+ }
+ }
+ }
+
+ public void init() {
+ Operator.resetId();
+ }
+
+ /**
+ * Return the status information about the Map-Reduce cluster
+ */
+ public ClusterStatus getClusterStatus() throws Exception {
+ ClusterStatus cs;
+ try {
+ JobConf job = new JobConf(conf, ExecDriver.class);
+ JobClient jc = new JobClient(job);
+ cs = jc.getClusterStatus();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw e;
+ }
+ LOG.info("Returning cluster status: " + cs.toString());
+ return cs;
+ }
+
+ public Schema getSchema() {
+ return schema;
+ }
+
+ /**
+ * Get a Schema with fields represented with native Hive types
+ */
+ public static Schema getSchema(BaseSemanticAnalyzer sem, HiveConf conf) {
+ Schema schema = null;
+
+ // If we have a plan, prefer its logical result schema if it's
+ // available; otherwise, try digging out a fetch task; failing that,
+ // give up.
+ if (sem == null) {
+ // can't get any info without a plan
+ } else if (sem.getResultSchema() != null) {
+ List<FieldSchema> lst = sem.getResultSchema();
+ schema = new Schema(lst, null);
+ } else if (sem.getFetchTask() != null) {
+ FetchTask ft = sem.getFetchTask();
+ TableDesc td = ft.getTblDesc();
+ // partitioned tables don't have tableDesc set on the FetchTask.
+ // Instead
+ // they have a list of PartitionDesc objects, each with a table
+ // desc.
+ // Let's
+ // try to fetch the desc for the first partition and use it's
+ // deserializer.
+ if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) {
+ if (ft.getWork().getPartDesc().size() > 0) {
+ td = ft.getWork().getPartDesc().get(0).getTableDesc();
+ }
+ }
+
+ if (td == null) {
+ LOG.info("No returning schema.");
+ } else {
+ String tableName = "result";
+ List<FieldSchema> lst = null;
+ try {
+ lst = MetaStoreUtils.getFieldsFromDeserializer(tableName, td.getDeserializer());
+ } catch (Exception e) {
+ LOG.warn("Error getting schema: " + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ }
+ if (lst != null) {
+ schema = new Schema(lst, null);
+ }
+ }
+ }
+ if (schema == null) {
+ schema = new Schema();
+ }
+ LOG.info("Returning Hive schema: " + schema);
+ return schema;
+ }
+
+ /**
+ * Get a Schema with fields represented with Thrift DDL types
+ */
+ public Schema getThriftSchema() throws Exception {
+ Schema schema;
+ try {
+ schema = getSchema();
+ if (schema != null) {
+ List<FieldSchema> lst = schema.getFieldSchemas();
+ // Go over the schema and convert type to thrift type
+ if (lst != null) {
+ for (FieldSchema f : lst) {
+ f.setType(MetaStoreUtils.typeToThriftType(f.getType()));
+ }
+ }
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw e;
+ }
+ LOG.info("Returning Thrift schema: " + schema);
+ return schema;
+ }
+
+ /**
+ * Return the maximum number of rows returned by getResults
+ */
+ public int getMaxRows() {
+ return maxRows;
+ }
+
+ /**
+ * Set the maximum number of rows returned by getResults
+ */
+ public void setMaxRows(int maxRows) {
+ this.maxRows = maxRows;
+ }
+
+ public boolean hasReduceTasks(List<Task<? extends Serializable>> tasks) {
+ if (tasks == null) {
+ return false;
+ }
+
+ boolean hasReduce = false;
+ for (Task<? extends Serializable> task : tasks) {
+ if (task.hasReduce()) {
+ return true;
+ }
+
+ hasReduce = (hasReduce || hasReduceTasks(task.getChildTasks()));
+ }
+ return hasReduce;
+ }
+
+ /**
+ * for backwards compatibility with current tests
+ */
+ public Driver(HiveConf conf) {
+ this.conf = conf;
+
+ // hivesterix
+ engine = new HyracksExecutionEngine(conf);
+ }
+
+ public Driver() {
+ if (SessionState.get() != null) {
+ conf = SessionState.get().getConf();
+ }
+
+ // hivesterix
+ engine = new HyracksExecutionEngine(conf);
+ }
+
+ // hivesterix: plan printer
+ public Driver(HiveConf conf, PrintWriter planPrinter) {
+ this.conf = conf;
+ engine = new HyracksExecutionEngine(conf, planPrinter);
+ }
+
+ public void clear() {
+ this.hivesterix = false;
+ }
+
+ /**
+ * Compile a new query. Any currently-planned query associated with this
+ * Driver is discarded.
+ *
+ * @param command
+ * The SQL query to compile.
+ */
+ public int compile(String command) {
+ if (plan != null) {
+ close();
+ plan = null;
+ }
+
+ TaskFactory.resetId();
+
+ try {
+ command = new VariableSubstitution().substitute(conf, command);
+ ctx = new Context(conf);
+
+ ParseDriver pd = new ParseDriver();
+ ASTNode tree = pd.parse(command, ctx);
+ tree = ParseUtils.findRootNonNullToken(tree);
+
+ BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree);
+ List<AbstractSemanticAnalyzerHook> saHooks = getSemanticAnalyzerHooks();
+
+ // Do semantic analysis and plan generation
+ if (saHooks != null) {
+ HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
+ hookCtx.setConf(conf);
+ for (AbstractSemanticAnalyzerHook hook : saHooks) {
+ tree = hook.preAnalyze(hookCtx, tree);
+ }
+ sem.analyze(tree, ctx);
+ for (AbstractSemanticAnalyzerHook hook : saHooks) {
+ hook.postAnalyze(hookCtx, sem.getRootTasks());
+ }
+ } else {
+ sem.analyze(tree, ctx);
+ }
+
+ LOG.info("Semantic Analysis Completed");
+
+ // validate the plan
+ sem.validate();
+
+ plan = new QueryPlan(command, sem);
+ // initialize FetchTask right here
+ if (plan.getFetchTask() != null) {
+ plan.getFetchTask().initialize(conf, plan, null);
+ }
+
+ // get the output schema
+ schema = getSchema(sem, conf);
+
+ // test Only - serialize the query plan and deserialize it
+ if (sem instanceof SemanticAnalyzer && command.toLowerCase().indexOf("create") < 0) {
+
+ Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+
+ String queryPlanFileName = ctx.getLocalScratchDir(true) + Path.SEPARATOR_CHAR + "queryplan.xml";
+ LOG.info("query plan = " + queryPlanFileName);
+ queryPlanFileName = new Path(queryPlanFileName).toUri().getPath();
+
+ // serialize the queryPlan
+ FileOutputStream fos = new FileOutputStream(queryPlanFileName);
+ Utilities.serializeQueryPlan(plan, fos);
+ fos.close();
+
+ // deserialize the queryPlan
+ FileInputStream fis = new FileInputStream(queryPlanFileName);
+ QueryPlan newPlan = Utilities.deserializeQueryPlan(fis, conf);
+ fis.close();
+
+ // Use the deserialized plan
+ plan = newPlan;
+ }
+
+ // initialize FetchTask right here
+ if (plan.getFetchTask() != null) {
+ plan.getFetchTask().initialize(conf, plan, null);
+ }
+
+ // do the authorization check
+ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
+ try {
+ // doAuthorization(sem);
+ } catch (AuthorizationException authExp) {
+ console.printError("Authorization failed:" + authExp.getMessage()
+ + ". Use show grant to get more details.");
+ return 403;
+ }
+ }
+
+ // hyracks run
+ if (sem instanceof SemanticAnalyzer && command.toLowerCase().indexOf("create") < 0) {
+ hivesterix = true;
+ return engine.compileJob(sem.getRootTasks());
+ }
+
+ return 0;
+ } catch (SemanticException e) {
+ errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return (10);
+ } catch (ParseException e) {
+ errorMessage = "FAILED: Parse Error: " + e.getMessage();
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return (11);
+ } catch (Exception e) {
+ errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return (12);
+ }
+ }
+
+ private void doAuthorization(BaseSemanticAnalyzer sem) throws HiveException, AuthorizationException {
+ HashSet<ReadEntity> inputs = sem.getInputs();
+ HashSet<WriteEntity> outputs = sem.getOutputs();
+ SessionState ss = SessionState.get();
+ HiveOperation op = ss.getHiveOperation();
+ Hive db = sem.getDb();
+ if (op != null) {
+ if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.CREATETABLE)) {
+ ss.getAuthorizer().authorize(db.getDatabase(db.getCurrentDatabase()), null,
+ HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
+ } else {
+ // if (op.equals(HiveOperation.IMPORT)) {
+ // ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem;
+ // if (!isa.existsTable()) {
+ ss.getAuthorizer().authorize(db.getDatabase(db.getCurrentDatabase()), null,
+ HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
+ // }
+ // }
+ }
+ if (outputs != null && outputs.size() > 0) {
+ for (WriteEntity write : outputs) {
+
+ if (write.getType() == WriteEntity.Type.PARTITION) {
+ Partition part = db.getPartition(write.getTable(), write.getPartition().getSpec(), false);
+ if (part != null) {
+ ss.getAuthorizer().authorize(write.getPartition(), null, op.getOutputRequiredPrivileges());
+ continue;
+ }
+ }
+
+ if (write.getTable() != null) {
+ ss.getAuthorizer().authorize(write.getTable(), null, op.getOutputRequiredPrivileges());
+ }
+ }
+
+ }
+ }
+
+ if (inputs != null && inputs.size() > 0) {
+
+ Map<Table, List<String>> tab2Cols = new HashMap<Table, List<String>>();
+ Map<Partition, List<String>> part2Cols = new HashMap<Partition, List<String>>();
+
+ Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>();
+ for (ReadEntity read : inputs) {
+ if (read.getPartition() != null) {
+ Table tbl = read.getTable();
+ String tblName = tbl.getTableName();
+ if (tableUsePartLevelAuth.get(tblName) == null) {
+ boolean usePartLevelPriv = (tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE") != null && ("TRUE"
+ .equalsIgnoreCase(tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))));
+ if (usePartLevelPriv) {
+ tableUsePartLevelAuth.put(tblName, Boolean.TRUE);
+ } else {
+ tableUsePartLevelAuth.put(tblName, Boolean.FALSE);
+ }
+ }
+ }
+ }
+
+ if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.QUERY)) {
+ SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
+ ParseContext parseCtx = querySem.getParseContext();
+ Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable();
+
+ for (Map.Entry<String, Operator<? extends Serializable>> topOpMap : querySem.getParseContext()
+ .getTopOps().entrySet()) {
+ Operator<? extends Serializable> topOp = topOpMap.getValue();
+ if (topOp instanceof TableScanOperator && tsoTopMap.containsKey(topOp)) {
+ TableScanOperator tableScanOp = (TableScanOperator) topOp;
+ Table tbl = tsoTopMap.get(tableScanOp);
+ List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
+ List<FieldSchema> columns = tbl.getCols();
+ List<String> cols = new ArrayList<String>();
+ if (neededColumnIds != null && neededColumnIds.size() > 0) {
+ for (int i = 0; i < neededColumnIds.size(); i++) {
+ cols.add(columns.get(neededColumnIds.get(i)).getName());
+ }
+ } else {
+ for (int i = 0; i < columns.size(); i++) {
+ cols.add(columns.get(i).getName());
+ }
+ }
+ if (tbl.isPartitioned() && tableUsePartLevelAuth.get(tbl.getTableName())) {
+ String alias_id = topOpMap.getKey();
+ PrunedPartitionList partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp),
+ parseCtx.getOpToPartPruner().get(topOp), parseCtx.getConf(), alias_id,
+ parseCtx.getPrunedPartitions());
+ Set<Partition> parts = new HashSet<Partition>();
+ parts.addAll(partsList.getConfirmedPartns());
+ parts.addAll(partsList.getUnknownPartns());
+ for (Partition part : parts) {
+ List<String> existingCols = part2Cols.get(part);
+ if (existingCols == null) {
+ existingCols = new ArrayList<String>();
+ }
+ existingCols.addAll(cols);
+ part2Cols.put(part, existingCols);
+ }
+ } else {
+ List<String> existingCols = tab2Cols.get(tbl);
+ if (existingCols == null) {
+ existingCols = new ArrayList<String>();
+ }
+ existingCols.addAll(cols);
+ tab2Cols.put(tbl, existingCols);
+ }
+ }
+ }
+ }
+
+ // cache the results for table authorization
+ Set<String> tableAuthChecked = new HashSet<String>();
+ for (ReadEntity read : inputs) {
+ Table tbl = null;
+ if (read.getPartition() != null) {
+ tbl = read.getPartition().getTable();
+ // use partition level authorization
+ if (tableUsePartLevelAuth.get(tbl.getTableName())) {
+ List<String> cols = part2Cols.get(read.getPartition());
+ if (cols != null && cols.size() > 0) {
+ ss.getAuthorizer().authorize(read.getPartition().getTable(), read.getPartition(), cols,
+ op.getInputRequiredPrivileges(), null);
+ } else {
+ ss.getAuthorizer().authorize(read.getPartition(), op.getInputRequiredPrivileges(), null);
+ }
+ continue;
+ }
+ } else if (read.getTable() != null) {
+ tbl = read.getTable();
+ }
+
+ // if we reach here, it means it needs to do a table
+ // authorization
+ // check, and the table authorization may already happened
+ // because of other
+ // partitions
+ if (tbl != null && !tableAuthChecked.contains(tbl.getTableName())) {
+ List<String> cols = tab2Cols.get(tbl);
+ if (cols != null && cols.size() > 0) {
+ ss.getAuthorizer().authorize(tbl, null, cols, op.getInputRequiredPrivileges(), null);
+ } else {
+ ss.getAuthorizer().authorize(tbl, op.getInputRequiredPrivileges(), null);
+ }
+ tableAuthChecked.add(tbl.getTableName());
+ }
+ }
+
+ }
+ }
+
+ /**
+ * @return The current query plan associated with this Driver, if any.
+ */
+ public QueryPlan getPlan() {
+ return plan;
+ }
+
+ /**
+ * @param t
+ * The table to be locked
+ * @param p
+ * The partition to be locked
+ * @param mode
+ * The mode of the lock (SHARED/EXCLUSIVE) Get the list of
+ * objects to be locked. If a partition needs to be locked (in
+ * any mode), all its parents should also be locked in SHARED
+ * mode.
+ **/
+ private List<HiveLockObj> getLockObjects(Table t, Partition p, HiveLockMode mode) throws SemanticException {
+ List<HiveLockObj> locks = new LinkedList<HiveLockObj>();
+
+ HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System
+ .currentTimeMillis()), "IMPLICIT");
+
+ if (t != null) {
+ locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode));
+ mode = HiveLockMode.SHARED;
+ locks.add(new HiveLockObj(new HiveLockObject(t.getDbName(), lockData), mode));
+ return locks;
+ }
+
+ if (p != null) {
+ if (!(p instanceof DummyPartition)) {
+ locks.add(new HiveLockObj(new HiveLockObject(p, lockData), mode));
+ }
+
+ // All the parents are locked in shared mode
+ mode = HiveLockMode.SHARED;
+
+ // For dummy partitions, only partition name is needed
+ String name = p.getName();
+
+ if (p instanceof DummyPartition) {
+ name = p.getName().split("@")[2];
+ }
+
+ String partName = name;
+ String partialName = "";
+ String[] partns = name.split("/");
+ int len = p instanceof DummyPartition ? partns.length : partns.length - 1;
+ for (int idx = 0; idx < len; idx++) {
+ String partn = partns[idx];
+ partialName += partn;
+ try {
+ locks.add(new HiveLockObj(new HiveLockObject(new DummyPartition(p.getTable(), p.getTable()
+ .getDbName() + "/" + p.getTable().getTableName() + "/" + partialName), lockData), mode));
+ partialName += "/";
+ } catch (HiveException e) {
+ throw new SemanticException(e.getMessage());
+ }
+ }
+
+ locks.add(new HiveLockObj(new HiveLockObject(p.getTable(), lockData), mode));
+ locks.add(new HiveLockObj(new HiveLockObject(p.getTable().getDbName(), lockData), mode));
+ }
+ return locks;
+ }
+
+ /**
+ * Acquire read and write locks needed by the statement. The list of objects
+ * to be locked are obtained from he inputs and outputs populated by the
+ * compiler. The lock acuisition scheme is pretty simple. If all the locks
+ * cannot be obtained, error out. Deadlock is avoided by making sure that
+ * the locks are lexicographically sorted.
+ **/
+ public int acquireReadWriteLocks() {
+ try {
+ int sleepTime = conf.getIntVar(HiveConf.ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES) * 1000;
+ int numRetries = conf.getIntVar(HiveConf.ConfVars.HIVE_LOCK_NUMRETRIES);
+
+ boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
+ if (!supportConcurrency) {
+ return 0;
+ }
+
+ List<HiveLockObj> lockObjects = new ArrayList<HiveLockObj>();
+
+ // Sort all the inputs, outputs.
+ // If a lock needs to be acquired on any partition, a read lock
+ // needs to be acquired on all
+ // its parents also
+ for (ReadEntity input : plan.getInputs()) {
+ if (input.getType() == ReadEntity.Type.TABLE) {
+ lockObjects.addAll(getLockObjects(input.getTable(), null, HiveLockMode.SHARED));
+ } else {
+ lockObjects.addAll(getLockObjects(null, input.getPartition(), HiveLockMode.SHARED));
+ }
+ }
+
+ for (WriteEntity output : plan.getOutputs()) {
+ if (output.getTyp() == WriteEntity.Type.TABLE) {
+ lockObjects.addAll(getLockObjects(output.getTable(), null,
+ output.isComplete() ? HiveLockMode.EXCLUSIVE : HiveLockMode.SHARED));
+ } else if (output.getTyp() == WriteEntity.Type.PARTITION) {
+ lockObjects.addAll(getLockObjects(null, output.getPartition(), HiveLockMode.EXCLUSIVE));
+ }
+ // In case of dynamic queries, it is possible to have incomplete
+ // dummy partitions
+ else if (output.getTyp() == WriteEntity.Type.DUMMYPARTITION) {
+ lockObjects.addAll(getLockObjects(null, output.getPartition(), HiveLockMode.SHARED));
+ }
+ }
+
+ if (lockObjects.isEmpty() && !ctx.isNeedLockMgr()) {
+ return 0;
+ }
+
+ int ret = checkLockManager();
+ if (ret != 0) {
+ return ret;
+ }
+
+ HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System
+ .currentTimeMillis()), "IMPLICIT");
+
+ // Lock the database also
+ try {
+ Hive db = Hive.get(conf);
+ lockObjects.add(new HiveLockObj(new HiveLockObject(db.getCurrentDatabase(), lockData),
+ HiveLockMode.SHARED));
+ } catch (HiveException e) {
+ throw new SemanticException(e.getMessage());
+ }
+
+ ctx.setHiveLockMgr(hiveLockMgr);
+ List<HiveLock> hiveLocks = null;
+
+ int tryNum = 1;
+ do {
+
+ // ctx.getHiveLockMgr();
+ // hiveLocks = ctx.getHiveLockMgr().lock(lockObjects, false);
+
+ if (hiveLocks != null) {
+ break;
+ }
+
+ tryNum++;
+ try {
+ Thread.sleep(sleepTime);
+ } catch (InterruptedException e) {
+ }
+ } while (tryNum < numRetries);
+
+ if (hiveLocks == null) {
+ throw new SemanticException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg());
+ } else {
+ ctx.setHiveLocks(hiveLocks);
+ }
+
+ return (0);
+ } catch (SemanticException e) {
+ errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return (10);
+ } catch (Exception e) {
+ errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return (10);
+ }
+ }
+
+ /**
+ * Release all the locks acquired implicitly by the statement. Note that the
+ * locks acquired with 'keepAlive' set to True are not released.
+ **/
+ private void releaseLocks() {
+ if (ctx != null && ctx.getHiveLockMgr() != null) {
+ try {
+ ctx.getHiveLockMgr().close();
+ ctx.setHiveLocks(null);
+ } catch (LockException e) {
+ }
+ }
+ }
+
+ /**
+ * @param hiveLocks
+ * list of hive locks to be released Release all the locks
+ * specified. If some of the locks have already been released,
+ * ignore them
+ **/
+ private void releaseLocks(List<HiveLock> hiveLocks) {
+ if (hiveLocks != null) {
+ ctx.getHiveLockMgr().releaseLocks(hiveLocks);
+ }
+ ctx.setHiveLocks(null);
+ }
+
+ public CommandProcessorResponse run(String command) {
+ errorMessage = null;
+ SQLState = null;
+
+ int ret = compile(command);
+ if (ret != 0) {
+ // releaseLocks(ctx.getHiveLocks());
+ return new CommandProcessorResponse(ret, errorMessage, SQLState);
+ }
+
+ // ret = acquireReadWriteLocks();
+ if (ret != 0) {
+ // releaseLocks(ctx.getHiveLocks());
+ return new CommandProcessorResponse(ret, errorMessage, SQLState);
+ }
+
+ ret = execute();
+ if (ret != 0) {
+ // releaseLocks(ctx.getHiveLocks());
+ return new CommandProcessorResponse(ret, errorMessage, SQLState);
+ }
+
+ // releaseLocks(ctx.getHiveLocks());
+ return new CommandProcessorResponse(ret);
+ }
+
+ private List<AbstractSemanticAnalyzerHook> getSemanticAnalyzerHooks() throws Exception {
+ ArrayList<AbstractSemanticAnalyzerHook> saHooks = new ArrayList<AbstractSemanticAnalyzerHook>();
+ String pestr = conf.getVar(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK);
+ if (pestr == null) {
+ return saHooks;
+ }
+ pestr = pestr.trim();
+ if (pestr.equals("")) {
+ return saHooks;
+ }
+
+ String[] peClasses = pestr.split(",");
+
+ for (String peClass : peClasses) {
+ try {
+ AbstractSemanticAnalyzerHook hook = HiveUtils.getSemanticAnalyzerHook(conf, peClass);
+ saHooks.add(hook);
+ } catch (HiveException e) {
+ console.printError("Pre Exec Hook Class not found:" + e.getMessage());
+ throw e;
+ }
+ }
+
+ return saHooks;
+ }
+
+ private List<Hook> getPreExecHooks() throws Exception {
+ ArrayList<Hook> pehooks = new ArrayList<Hook>();
+ String pestr = conf.getVar(HiveConf.ConfVars.PREEXECHOOKS);
+ pestr = pestr.trim();
+ if (pestr.equals("")) {
+ return pehooks;
+ }
+
+ String[] peClasses = pestr.split(",");
+
+ for (String peClass : peClasses) {
+ try {
+ pehooks.add((Hook) Class.forName(peClass.trim(), true, JavaUtils.getClassLoader()).newInstance());
+ } catch (ClassNotFoundException e) {
+ console.printError("Pre Exec Hook Class not found:" + e.getMessage());
+ throw e;
+ }
+ }
+
+ return pehooks;
+ }
+
+ private List<Hook> getPostExecHooks() throws Exception {
+ ArrayList<Hook> pehooks = new ArrayList<Hook>();
+ String pestr = conf.getVar(HiveConf.ConfVars.POSTEXECHOOKS);
+ pestr = pestr.trim();
+ if (pestr.equals("")) {
+ return pehooks;
+ }
+
+ String[] peClasses = pestr.split(",");
+
+ for (String peClass : peClasses) {
+ try {
+ pehooks.add((Hook) Class.forName(peClass.trim(), true, JavaUtils.getClassLoader()).newInstance());
+ } catch (ClassNotFoundException e) {
+ console.printError("Post Exec Hook Class not found:" + e.getMessage());
+ throw e;
+ }
+ }
+
+ return pehooks;
+ }
+
+ public int execute() {
+ // execute hivesterix plan
+ if (hivesterix) {
+ hivesterix = false;
+ int ret = engine.executeJob();
+ if (ret != 0)
+ return ret;
+ }
+
+ boolean noName = StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HADOOPJOBNAME));
+ int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
+
+ String queryId = plan.getQueryId();
+ String queryStr = plan.getQueryStr();
+
+ conf.setVar(HiveConf.ConfVars.HIVEQUERYID, queryId);
+ conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, queryStr);
+ maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER);
+
+ try {
+ LOG.info("Starting command: " + queryStr);
+
+ plan.setStarted();
+
+ if (SessionState.get() != null) {
+ SessionState.get().getHiveHistory().startQuery(queryStr, conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
+ SessionState.get().getHiveHistory().logPlanProgress(plan);
+ }
+ resStream = null;
+
+ HookContext hookContext = new HookContext(plan, conf);
+
+ for (Hook peh : getPreExecHooks()) {
+ if (peh instanceof ExecuteWithHookContext) {
+ ((ExecuteWithHookContext) peh).run(hookContext);
+ } else if (peh instanceof PreExecute) {
+ ((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), ShimLoader
+ .getHadoopShims().getUGIForConf(conf));
+ }
+ }
+
+ int jobs = Utilities.getMRTasks(plan.getRootTasks()).size();
+ if (jobs > 0) {
+ console.printInfo("Total MapReduce jobs = " + jobs);
+ }
+ if (SessionState.get() != null) {
+ SessionState.get().getHiveHistory()
+ .setQueryProperty(queryId, Keys.QUERY_NUM_TASKS, String.valueOf(jobs));
+ SessionState.get().getHiveHistory().setIdToTableMap(plan.getIdToTableNameMap());
+ }
+ String jobname = Utilities.abbreviate(queryStr, maxlen - 6);
+
+ // A runtime that launches runnable tasks as separate Threads
+ // through
+ // TaskRunners
+ // As soon as a task isRunnable, it is put in a queue
+ // At any time, at most maxthreads tasks can be running
+ // The main thread polls the TaskRunners to check if they have
+ // finished.
+
+ Queue<Task<? extends Serializable>> runnable = new LinkedList<Task<? extends Serializable>>();
+ Map<TaskResult, TaskRunner> running = new HashMap<TaskResult, TaskRunner>();
+
+ DriverContext driverCxt = new DriverContext(runnable, ctx);
+
+ // Add root Tasks to runnable
+
+ for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
+ driverCxt.addToRunnable(tsk);
+ }
+
+ // Loop while you either have tasks running, or tasks queued up
+
+ while (running.size() != 0 || runnable.peek() != null) {
+ // Launch upto maxthreads tasks
+ while (runnable.peek() != null && running.size() < maxthreads) {
+ Task<? extends Serializable> tsk = runnable.remove();
+ console.printInfo("executing task " + tsk.getName());
+ launchTask(tsk, queryId, noName, running, jobname, jobs, driverCxt);
+ }
+
+ // poll the Tasks to see which one completed
+ TaskResult tskRes = pollTasks(running.keySet());
+ TaskRunner tskRun = running.remove(tskRes);
+ Task<? extends Serializable> tsk = tskRun.getTask();
+ hookContext.addCompleteTask(tskRun);
+
+ int exitVal = tskRes.getExitVal();
+ if (exitVal != 0) {
+ Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask();
+ if (backupTask != null) {
+ errorMessage = "FAILED: Execution Error, return code " + exitVal + " from "
+ + tsk.getClass().getName();
+ console.printError(errorMessage);
+
+ errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName();
+ console.printError(errorMessage);
+
+ // add backup task to runnable
+ if (DriverContext.isLaunchable(backupTask)) {
+ driverCxt.addToRunnable(backupTask);
+ }
+ continue;
+
+ } else {
+ // TODO: This error messaging is not very informative.
+ // Fix that.
+ errorMessage = "FAILED: Execution Error, return code " + exitVal + " from "
+ + tsk.getClass().getName();
+ SQLState = "08S01";
+ console.printError(errorMessage);
+ if (running.size() != 0) {
+ taskCleanup();
+ }
+ // in case we decided to run everything in local mode,
+ // restore the
+ // the jobtracker setting to its initial value
+ ctx.restoreOriginalTracker();
+ return 9;
+ }
+ }
+
+ if (SessionState.get() != null) {
+ SessionState.get().getHiveHistory()
+ .setTaskProperty(queryId, tsk.getId(), Keys.TASK_RET_CODE, String.valueOf(exitVal));
+ SessionState.get().getHiveHistory().endTask(queryId, tsk);
+ }
+
+ if (tsk.getChildTasks() != null) {
+ for (Task<? extends Serializable> child : tsk.getChildTasks()) {
+ // hivesterix: don't check launchable condition
+ // if (DriverContext.isLaunchable(child)) {
+ driverCxt.addToRunnable(child);
+ // }
+ }
+ }
+ }
+
+ // in case we decided to run everything in local mode, restore the
+ // the jobtracker setting to its initial value
+ ctx.restoreOriginalTracker();
+
+ // remove incomplete outputs.
+ // Some incomplete outputs may be added at the beginning, for eg:
+ // for dynamic partitions.
+ // remove them
+ HashSet<WriteEntity> remOutputs = new HashSet<WriteEntity>();
+ for (WriteEntity output : plan.getOutputs()) {
+ if (!output.isComplete()) {
+ remOutputs.add(output);
+ }
+ }
+
+ for (WriteEntity output : remOutputs) {
+ plan.getOutputs().remove(output);
+ }
+
+ // Get all the post execution hooks and execute them.
+ for (Hook peh : getPostExecHooks()) {
+ if (peh instanceof ExecuteWithHookContext) {
+ ((ExecuteWithHookContext) peh).run(hookContext);
+ } else if (peh instanceof PostExecute) {
+ ((PostExecute) peh)
+ .run(SessionState.get(), plan.getInputs(), plan.getOutputs(),
+ (SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo()
+ : null), ShimLoader.getHadoopShims().getUGIForConf(conf));
+ }
+ }
+
+ if (SessionState.get() != null) {
+ SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(0));
+ SessionState.get().getHiveHistory().printRowCount(queryId);
+ }
+ } catch (Exception e) {
+ if (SessionState.get() != null) {
+ SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(12));
+ }
+ // TODO: do better with handling types of Exception here
+ errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
+ SQLState = "08S01";
+ console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return (12);
+ } finally {
+ if (SessionState.get() != null) {
+ SessionState.get().getHiveHistory().endQuery(queryId);
+ }
+ if (noName) {
+ conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, "");
+ }
+ }
+ plan.setDone();
+
+ if (SessionState.get() != null) {
+ try {
+ SessionState.get().getHiveHistory().logPlanProgress(plan);
+ } catch (Exception e) {
+ }
+ }
+ console.printInfo("OK");
+
+ return (0);
+ }
+
+ /**
+ * Launches a new task
+ *
+ * @param tsk
+ * task being launched
+ * @param queryId
+ * Id of the query containing the task
+ * @param noName
+ * whether the task has a name set
+ * @param running
+ * map from taskresults to taskrunners
+ * @param jobname
+ * name of the task, if it is a map-reduce job
+ * @param jobs
+ * number of map-reduce jobs
+ * @param curJobNo
+ * the sequential number of the next map-reduce job
+ * @return the updated number of last the map-reduce job launched
+ */
+
+ public void launchTask(Task<? extends Serializable> tsk, String queryId, boolean noName,
+ Map<TaskResult, TaskRunner> running, String jobname, int jobs, DriverContext cxt) {
+
+ if (SessionState.get() != null) {
+ SessionState.get().getHiveHistory().startTask(queryId, tsk, tsk.getClass().getName());
+ }
+ if (tsk.isMapRedTask() && !(tsk instanceof ConditionalTask)) {
+ if (noName) {
+ conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname + "(" + tsk.getId() + ")");
+ }
+ cxt.incCurJobNo(1);
+ console.printInfo("Launching Job " + cxt.getCurJobNo() + " out of " + jobs);
+ }
+ tsk.initialize(conf, plan, cxt);
+ TaskResult tskRes = new TaskResult();
+ TaskRunner tskRun = new TaskRunner(tsk, tskRes);
+
+ // HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) &&
+ // Launch Task: hivesterix tweak
+ if (tsk instanceof MapRedTask || tsk instanceof StatsTask) {
+ // Launch it in the parallel mode, as a separate thread only for MR
+ // tasks
+ tskRes.setRunning(false);
+ tskRes.setExitVal(0);
+ } else if (tsk instanceof ConditionalTask) {
+ ConditionalTask condTask = (ConditionalTask) tsk;
+ ConditionalResolver crs = condTask.getResolver();
+ if (crs instanceof ConditionalResolverMergeFiles) {
+ tskRes.setRunning(false);
+ tskRes.setExitVal(0);
+
+ List<Task<? extends Serializable>> children = condTask.getListTasks();
+ for (Task<? extends Serializable> child : children)
+ if (child instanceof MapRedTask)
+ cxt.addToRunnable(child);
+ }
+ } else {
+ tskRun.runSequential();
+ }
+ running.put(tskRes, tskRun);
+ return;
+ }
+
+ /**
+ * Cleans up remaining tasks in case of failure
+ */
+
+ public void taskCleanup() {
+ // The currently existing Shutdown hooks will be automatically called,
+ // killing the map-reduce processes.
+ // The non MR processes will be killed as well.
+ System.exit(9);
+ }
+
+ /**
+ * Polls running tasks to see if a task has ended.
+ *
+ * @param results
+ * Set of result objects for running tasks
+ * @return The result object for any completed/failed task
+ */
+
+ public TaskResult pollTasks(Set<TaskResult> results) {
+ Iterator<TaskResult> resultIterator = results.iterator();
+ while (true) {
+ while (resultIterator.hasNext()) {
+ TaskResult tskRes = resultIterator.next();
+ if (tskRes.isRunning() == false) {
+ return tskRes;
+ }
+ }
+
+ // In this loop, nothing was found
+ // Sleep 10 seconds and restart
+ try {
+ Thread.sleep(sleeptime);
+ } catch (InterruptedException ie) {
+ // Do Nothing
+ ;
+ }
+ resultIterator = results.iterator();
+ }
+ }
+
+ public boolean getResults(ArrayList<String> res) throws IOException {
+ if (plan != null && plan.getFetchTask() != null) {
+ FetchTask ft = plan.getFetchTask();
+ ft.setMaxRows(maxRows);
+ return ft.fetch(res);
+ }
+
+ if (resStream == null) {
+ resStream = ctx.getStream();
+ }
+ if (resStream == null) {
+ return false;
+ }
+
+ int numRows = 0;
+ String row = null;
+
+ while (numRows < maxRows) {
+ if (resStream == null) {
+ if (numRows > 0) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ bos.reset();
+ Utilities.StreamStatus ss;
+ try {
+ ss = Utilities.readColumn(resStream, bos);
+ if (bos.getCount() > 0) {
+ row = new String(bos.getData(), 0, bos.getCount(), "UTF-8");
+ } else if (ss == Utilities.StreamStatus.TERMINATED) {
+ row = new String();
+ }
+
+ if (row != null) {
+ numRows++;
+ res.add(row);
+ }
+ } catch (IOException e) {
+ console.printError("FAILED: Unexpected IO exception : " + e.getMessage());
+ res = null;
+ return false;
+ }
+
+ if (ss == Utilities.StreamStatus.EOF) {
+ resStream = ctx.getStream();
+ }
+ }
+ return true;
+ }
+
+ public int close() {
+ try {
+ if (plan != null) {
+ FetchTask fetchTask = plan.getFetchTask();
+ if (null != fetchTask) {
+ try {
+ fetchTask.clearFetch();
+ } catch (Exception e) {
+ LOG.debug(" Exception while clearing the Fetch task ", e);
+ }
+ }
+ }
+ if (ctx != null) {
+ ctx.clear();
+ }
+ if (null != resStream) {
+ try {
+ ((FSDataInputStream) resStream).close();
+ } catch (Exception e) {
+ LOG.debug(" Exception while closing the resStream ", e);
+ }
+ }
+ } catch (Exception e) {
+ console.printError("FAILED: Hive Internal Error: " + Utilities.getNameMessage(e) + "\n"
+ + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return 13;
+ }
+
+ return 0;
+ }
+
+ public void destroy() {
+ releaseLocks();
+ }
+
+ public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan() throws IOException {
+ return plan.getQueryPlan();
+ }
+
+ public int getTryCount() {
+ return tryCount;
+ }
+
+ public void setTryCount(int tryCount) {
+ this.tryCount = tryCount;
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
new file mode 100644
index 0000000..0f445f4
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
@@ -0,0 +1,233 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.util.StringUtils;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * GenericUDAFAverage.
+ */
+@Description(name = "avg", value = "_FUNC_(x) - Returns the mean of a set of numbers")
+public class GenericUDAFAverage extends AbstractGenericUDAFResolver {
+
+ static final Log LOG = LogFactory.getLog(GenericUDAFAverage.class.getName());
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length != 1) {
+ throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected.");
+ }
+
+ if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ case STRING:
+ return new GenericUDAFAverageEvaluator();
+ case BOOLEAN:
+ default:
+ throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ }
+
+ /**
+ * GenericUDAFAverageEvaluator.
+ */
+ public static class GenericUDAFAverageEvaluator extends GenericUDAFEvaluator {
+
+ // For PARTIAL1 and COMPLETE
+ PrimitiveObjectInspector inputOI;
+
+ // For PARTIAL2 and FINAL
+ StructObjectInspector soi;
+ StructField countField;
+ StructField sumField;
+ LongObjectInspector countFieldOI;
+ DoubleObjectInspector sumFieldOI;
+
+ // For PARTIAL1 and PARTIAL2
+ Object[] partialResult;
+
+ // For FINAL and COMPLETE
+ DoubleWritable result;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ super.init(m, parameters);
+
+ // init input
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+ inputOI = (PrimitiveObjectInspector) parameters[0];
+ } else {
+ soi = (StructObjectInspector) parameters[0];
+ countField = soi.getStructFieldRef("count");
+ sumField = soi.getStructFieldRef("sum");
+ countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector();
+ sumFieldOI = (DoubleObjectInspector) sumField.getFieldObjectInspector();
+ }
+
+ // init output
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+ // The output of a partial aggregation is a struct containing
+ // a "long" count and a "double" sum.
+
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("sum");
+ partialResult = new Object[2];
+ partialResult[0] = new LongWritable(0);
+ partialResult[1] = new DoubleWritable(0);
+ return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+
+ } else {
+ result = new DoubleWritable(0);
+ return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+ }
+ }
+
+ static class AverageAgg implements SerializableBuffer {
+ long count;
+ double sum;
+
+ @Override
+ public void deSerializeAggBuffer(byte[] data, int start, int len) {
+ count = BufferSerDeUtil.getLong(data, start);
+ start += 8;
+ sum = BufferSerDeUtil.getDouble(data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(byte[] data, int start, int len) {
+ BufferSerDeUtil.writeLong(count, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(sum, data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(DataOutput output) throws IOException {
+ output.writeLong(count);
+ output.writeDouble(sum);
+ }
+ };
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ AverageAgg result = new AverageAgg();
+ reset(result);
+ return result;
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ AverageAgg myagg = (AverageAgg) agg;
+ myagg.count = 0;
+ myagg.sum = 0;
+ }
+
+ boolean warned = false;
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ Object p = parameters[0];
+ if (p != null) {
+ AverageAgg myagg = (AverageAgg) agg;
+ try {
+ double v = PrimitiveObjectInspectorUtils.getDouble(p, inputOI);
+ myagg.count++;
+ myagg.sum += v;
+ } catch (NumberFormatException e) {
+ if (!warned) {
+ warned = true;
+ LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));
+ LOG.warn(getClass().getSimpleName() + " ignoring similar exceptions.");
+ }
+ }
+ }
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ AverageAgg myagg = (AverageAgg) agg;
+ ((LongWritable) partialResult[0]).set(myagg.count);
+ ((DoubleWritable) partialResult[1]).set(myagg.sum);
+ return partialResult;
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial != null) {
+ AverageAgg myagg = (AverageAgg) agg;
+ Object partialCount = soi.getStructFieldData(partial, countField);
+ Object partialSum = soi.getStructFieldData(partial, sumField);
+ myagg.count += countFieldOI.get(partialCount);
+ myagg.sum += sumFieldOI.get(partialSum);
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ AverageAgg myagg = (AverageAgg) agg;
+ if (myagg.count == 0) {
+ return null;
+ } else {
+ result.set(myagg.sum / myagg.count);
+ return result;
+ }
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
new file mode 100644
index 0000000..2c4022e
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
@@ -0,0 +1,392 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * Compute the Pearson correlation coefficient corr(x, y), using the following
+ * stable one-pass method, based on: "Formulas for Robust, One-Pass Parallel
+ * Computation of Covariances and Arbitrary-Order Statistical Moments", Philippe
+ * Pebay, Sandia Labs and
+ * "The Art of Computer Programming, volume 2: Seminumerical Algorithms", Donald
+ * Knuth.
+ * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg> my_n =
+ * my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n - mx_(n-1))*(y_n
+ * - my_n) : <covariance * n> vx_n = vx_(n-1) + (x_n - mx_n)(x_n - mx_(n-1)):
+ * <variance * n> vy_n = vy_(n-1) + (y_n - my_n)(y_n - my_(n-1)): <variance * n>
+ * Merge: c_(A,B) = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
+ * vx_(A,B) = vx_A + vx_B + (mx_A - mx_B)*(mx_A - mx_B)*n_A*n_B/(n_A+n_B)
+ * vy_(A,B) = vy_A + vy_B + (my_A - my_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
+ */
+@Description(name = "corr", value = "_FUNC_(x,y) - Returns the Pearson coefficient of correlation\n"
+ + "between a set of number pairs", extended = "The function takes as arguments any pair of numeric types and returns a double.\n"
+ + "Any pair with a NULL is ignored. If the function is applied to an empty set or\n"
+ + "a singleton set, NULL will be returned. Otherwise, it computes the following:\n"
+ + " COVAR_POP(x,y)/(STDDEV_POP(x)*STDDEV_POP(y))\n"
+ + "where neither x nor y is null,\n"
+ + "COVAR_POP is the population covariance,\n" + "and STDDEV_POP is the population standard deviation.")
+public class GenericUDAFCorrelation extends AbstractGenericUDAFResolver {
+
+ static final Log LOG = LogFactory.getLog(GenericUDAFCorrelation.class.getName());
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length != 2) {
+ throw new UDFArgumentTypeException(parameters.length - 1, "Exactly two arguments are expected.");
+ }
+
+ if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+
+ if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(1, "Only primitive type arguments are accepted but "
+ + parameters[1].getTypeName() + " is passed.");
+ }
+
+ switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ return new GenericUDAFCorrelationEvaluator();
+ case STRING:
+ case BOOLEAN:
+ default:
+ throw new UDFArgumentTypeException(1, "Only numeric type arguments are accepted but "
+ + parameters[1].getTypeName() + " is passed.");
+ }
+ case STRING:
+ case BOOLEAN:
+ default:
+ throw new UDFArgumentTypeException(0, "Only numeric type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ }
+
+ /**
+ * Evaluate the Pearson correlation coefficient using a stable one-pass
+ * algorithm, based on work by Philippe Pébay and Donald Knuth.
+ * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg>
+ * my_n = my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n -
+ * mx_(n-1))*(y_n - my_n) : <covariance * n> vx_n = vx_(n-1) + (x_n -
+ * mx_n)(x_n - mx_(n-1)): <variance * n> vy_n = vy_(n-1) + (y_n - my_n)(y_n
+ * - my_(n-1)): <variance * n>
+ * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X vx_(A,B)
+ * = vx_A + vx_B + (mx_A - mx_B)*(mx_A - mx_B)*n_A*n_B/(n_A+n_B) vy_(A,B) =
+ * vy_A + vy_B + (my_A - my_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
+ */
+ public static class GenericUDAFCorrelationEvaluator extends GenericUDAFEvaluator {
+
+ // For PARTIAL1 and COMPLETE
+ private PrimitiveObjectInspector xInputOI;
+ private PrimitiveObjectInspector yInputOI;
+
+ // For PARTIAL2 and FINAL
+ private StructObjectInspector soi;
+ private StructField countField;
+ private StructField xavgField;
+ private StructField yavgField;
+ private StructField xvarField;
+ private StructField yvarField;
+ private StructField covarField;
+ private LongObjectInspector countFieldOI;
+ private DoubleObjectInspector xavgFieldOI;
+ private DoubleObjectInspector yavgFieldOI;
+ private DoubleObjectInspector xvarFieldOI;
+ private DoubleObjectInspector yvarFieldOI;
+ private DoubleObjectInspector covarFieldOI;
+
+ // For PARTIAL1 and PARTIAL2
+ private Object[] partialResult;
+
+ // For FINAL and COMPLETE
+ private DoubleWritable result;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ super.init(m, parameters);
+
+ // init input
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+ assert (parameters.length == 2);
+ xInputOI = (PrimitiveObjectInspector) parameters[0];
+ yInputOI = (PrimitiveObjectInspector) parameters[1];
+ } else {
+ assert (parameters.length == 1);
+ soi = (StructObjectInspector) parameters[0];
+
+ countField = soi.getStructFieldRef("count");
+ xavgField = soi.getStructFieldRef("xavg");
+ yavgField = soi.getStructFieldRef("yavg");
+ xvarField = soi.getStructFieldRef("xvar");
+ yvarField = soi.getStructFieldRef("yvar");
+ covarField = soi.getStructFieldRef("covar");
+
+ countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector();
+ xavgFieldOI = (DoubleObjectInspector) xavgField.getFieldObjectInspector();
+ yavgFieldOI = (DoubleObjectInspector) yavgField.getFieldObjectInspector();
+ xvarFieldOI = (DoubleObjectInspector) xvarField.getFieldObjectInspector();
+ yvarFieldOI = (DoubleObjectInspector) yvarField.getFieldObjectInspector();
+ covarFieldOI = (DoubleObjectInspector) covarField.getFieldObjectInspector();
+ }
+
+ // init output
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+ // The output of a partial aggregation is a struct containing
+ // a long count, two double averages, two double variances,
+ // and a double covariance.
+
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("xavg");
+ fname.add("yavg");
+ fname.add("xvar");
+ fname.add("yvar");
+ fname.add("covar");
+
+ partialResult = new Object[6];
+ partialResult[0] = new LongWritable(0);
+ partialResult[1] = new DoubleWritable(0);
+ partialResult[2] = new DoubleWritable(0);
+ partialResult[3] = new DoubleWritable(0);
+ partialResult[4] = new DoubleWritable(0);
+ partialResult[5] = new DoubleWritable(0);
+
+ return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+
+ } else {
+ setResult(new DoubleWritable(0));
+ return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+ }
+ }
+
+ static class StdAgg implements SerializableBuffer {
+ long count; // number n of elements
+ double xavg; // average of x elements
+ double yavg; // average of y elements
+ double xvar; // n times the variance of x elements
+ double yvar; // n times the variance of y elements
+ double covar; // n times the covariance
+
+ @Override
+ public void deSerializeAggBuffer(byte[] data, int start, int len) {
+ count = BufferSerDeUtil.getLong(data, start);
+ start += 8;
+ xavg = BufferSerDeUtil.getDouble(data, start);
+ start += 8;
+ yavg = BufferSerDeUtil.getDouble(data, start);
+ start += 8;
+ xvar = BufferSerDeUtil.getDouble(data, start);
+ start += 8;
+ yvar = BufferSerDeUtil.getDouble(data, start);
+ start += 8;
+ covar = BufferSerDeUtil.getDouble(data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(byte[] data, int start, int len) {
+ BufferSerDeUtil.writeLong(count, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(xavg, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(yavg, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(xvar, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(yvar, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(covar, data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(DataOutput output) throws IOException {
+ output.writeLong(count);
+ output.writeDouble(xavg);
+ output.writeDouble(yavg);
+ output.writeDouble(xvar);
+ output.writeDouble(yvar);
+ output.writeDouble(covar);
+ }
+ };
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ StdAgg result = new StdAgg();
+ reset(result);
+ return result;
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+ myagg.count = 0;
+ myagg.xavg = 0;
+ myagg.yavg = 0;
+ myagg.xvar = 0;
+ myagg.yvar = 0;
+ myagg.covar = 0;
+ }
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ assert (parameters.length == 2);
+ Object px = parameters[0];
+ Object py = parameters[1];
+ if (px != null && py != null) {
+ StdAgg myagg = (StdAgg) agg;
+ double vx = PrimitiveObjectInspectorUtils.getDouble(px, xInputOI);
+ double vy = PrimitiveObjectInspectorUtils.getDouble(py, yInputOI);
+ double xavgOld = myagg.xavg;
+ double yavgOld = myagg.yavg;
+ myagg.count++;
+ myagg.xavg += (vx - xavgOld) / myagg.count;
+ myagg.yavg += (vy - yavgOld) / myagg.count;
+ if (myagg.count > 1) {
+ myagg.covar += (vx - xavgOld) * (vy - myagg.yavg);
+ myagg.xvar += (vx - xavgOld) * (vx - myagg.xavg);
+ myagg.yvar += (vy - yavgOld) * (vy - myagg.yavg);
+ }
+ }
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+ ((LongWritable) partialResult[0]).set(myagg.count);
+ ((DoubleWritable) partialResult[1]).set(myagg.xavg);
+ ((DoubleWritable) partialResult[2]).set(myagg.yavg);
+ ((DoubleWritable) partialResult[3]).set(myagg.xvar);
+ ((DoubleWritable) partialResult[4]).set(myagg.yvar);
+ ((DoubleWritable) partialResult[5]).set(myagg.covar);
+ return partialResult;
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial != null) {
+ StdAgg myagg = (StdAgg) agg;
+
+ Object partialCount = soi.getStructFieldData(partial, countField);
+ Object partialXAvg = soi.getStructFieldData(partial, xavgField);
+ Object partialYAvg = soi.getStructFieldData(partial, yavgField);
+ Object partialXVar = soi.getStructFieldData(partial, xvarField);
+ Object partialYVar = soi.getStructFieldData(partial, yvarField);
+ Object partialCovar = soi.getStructFieldData(partial, covarField);
+
+ long nA = myagg.count;
+ long nB = countFieldOI.get(partialCount);
+
+ if (nA == 0) {
+ // Just copy the information since there is nothing so far
+ myagg.count = countFieldOI.get(partialCount);
+ myagg.xavg = xavgFieldOI.get(partialXAvg);
+ myagg.yavg = yavgFieldOI.get(partialYAvg);
+ myagg.xvar = xvarFieldOI.get(partialXVar);
+ myagg.yvar = yvarFieldOI.get(partialYVar);
+ myagg.covar = covarFieldOI.get(partialCovar);
+ }
+
+ if (nA != 0 && nB != 0) {
+ // Merge the two partials
+ double xavgA = myagg.xavg;
+ double yavgA = myagg.yavg;
+ double xavgB = xavgFieldOI.get(partialXAvg);
+ double yavgB = yavgFieldOI.get(partialYAvg);
+ double xvarB = xvarFieldOI.get(partialXVar);
+ double yvarB = yvarFieldOI.get(partialYVar);
+ double covarB = covarFieldOI.get(partialCovar);
+
+ myagg.count += nB;
+ myagg.xavg = (xavgA * nA + xavgB * nB) / myagg.count;
+ myagg.yavg = (yavgA * nA + yavgB * nB) / myagg.count;
+ myagg.xvar += xvarB + (xavgA - xavgB) * (xavgA - xavgB) * myagg.count;
+ myagg.yvar += yvarB + (yavgA - yavgB) * (yavgA - yavgB) * myagg.count;
+ myagg.covar += covarB + (xavgA - xavgB) * (yavgA - yavgB) * ((double) (nA * nB) / myagg.count);
+ }
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+
+ if (myagg.count < 2) { // SQL standard - return null for zero or one
+ // pair
+ return null;
+ } else {
+ getResult().set(myagg.covar / java.lang.Math.sqrt(myagg.xvar) / java.lang.Math.sqrt(myagg.yvar));
+ return getResult();
+ }
+ }
+
+ public void setResult(DoubleWritable result) {
+ this.result = result;
+ }
+
+ public DoubleWritable getResult() {
+ return result;
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
new file mode 100644
index 0000000..dc5eef0
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
@@ -0,0 +1,170 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * This class implements the COUNT aggregation function as in SQL.
+ */
+@Description(name = "count", value = "_FUNC_(*) - Returns the total number of retrieved rows, including "
+ + "rows containing NULL values.\n"
+
+ + "_FUNC_(expr) - Returns the number of rows for which the supplied " + "expression is non-NULL.\n"
+
+ + "_FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for "
+ + "which the supplied expression(s) are unique and non-NULL.")
+public class GenericUDAFCount implements GenericUDAFResolver2 {
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ // This method implementation is preserved for backward compatibility.
+ return new GenericUDAFCountEvaluator();
+ }
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo paramInfo) throws SemanticException {
+
+ TypeInfo[] parameters = paramInfo.getParameters();
+
+ if (parameters.length == 0) {
+ if (!paramInfo.isAllColumns()) {
+ throw new UDFArgumentException("Argument expected");
+ }
+ assert !paramInfo.isDistinct() : "DISTINCT not supported with *";
+ } else {
+ if (parameters.length > 1 && !paramInfo.isDistinct()) {
+ throw new UDFArgumentException("DISTINCT keyword must be specified");
+ }
+ assert !paramInfo.isAllColumns() : "* not supported in expression list";
+ }
+
+ return new GenericUDAFCountEvaluator().setCountAllColumns(paramInfo.isAllColumns());
+ }
+
+ /**
+ * GenericUDAFCountEvaluator.
+ */
+ public static class GenericUDAFCountEvaluator extends GenericUDAFEvaluator {
+ private boolean countAllColumns = false;
+ private LongObjectInspector partialCountAggOI;
+ private LongWritable result;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ super.init(m, parameters);
+ partialCountAggOI = PrimitiveObjectInspectorFactory.writableLongObjectInspector;
+ result = new LongWritable(0);
+ return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
+ }
+
+ private GenericUDAFCountEvaluator setCountAllColumns(boolean countAllCols) {
+ countAllColumns = countAllCols;
+ return this;
+ }
+
+ /** class for storing count value. */
+ static class CountAgg implements SerializableBuffer {
+ long value;
+
+ @Override
+ public void deSerializeAggBuffer(byte[] data, int start, int len) {
+ value = BufferSerDeUtil.getLong(data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(byte[] data, int start, int len) {
+ BufferSerDeUtil.writeLong(value, data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(DataOutput output) throws IOException {
+ output.writeLong(value);
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ CountAgg buffer = new CountAgg();
+ reset(buffer);
+ return buffer;
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ ((CountAgg) agg).value = 0;
+ }
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ // parameters == null means the input table/split is empty
+ if (parameters == null) {
+ return;
+ }
+ if (countAllColumns) {
+ assert parameters.length == 0;
+ ((CountAgg) agg).value++;
+ } else {
+ assert parameters.length > 0;
+ boolean countThisRow = true;
+ for (Object nextParam : parameters) {
+ if (nextParam == null) {
+ countThisRow = false;
+ break;
+ }
+ }
+ if (countThisRow) {
+ ((CountAgg) agg).value++;
+ }
+ }
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial != null) {
+ long p = partialCountAggOI.get(partial);
+ ((CountAgg) agg).value += p;
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ result.set(((CountAgg) agg).value);
+ return result;
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ return terminate(agg);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
new file mode 100644
index 0000000..0c4448b
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
@@ -0,0 +1,341 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * Compute the covariance covar_pop(x, y), using the following one-pass method
+ * (ref. "Formulas for Robust, One-Pass Parallel Computation of Covariances and
+ * Arbitrary-Order Statistical Moments", Philippe Pebay, Sandia Labs):
+ * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg> my_n =
+ * my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n - mx_(n-1))*(y_n
+ * - my_n) : <covariance * n>
+ * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X
+ */
+@Description(name = "covariance,covar_pop", value = "_FUNC_(x,y) - Returns the population covariance of a set of number pairs", extended = "The function takes as arguments any pair of numeric types and returns a double.\n"
+ + "Any pair with a NULL is ignored. If the function is applied to an empty set, NULL\n"
+ + "will be returned. Otherwise, it computes the following:\n"
+ + " (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/COUNT(x,y)\n" + "where neither x nor y is null.")
+public class GenericUDAFCovariance extends AbstractGenericUDAFResolver {
+
+ static final Log LOG = LogFactory.getLog(GenericUDAFCovariance.class.getName());
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length != 2) {
+ throw new UDFArgumentTypeException(parameters.length - 1, "Exactly two arguments are expected.");
+ }
+
+ if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+
+ if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(1, "Only primitive type arguments are accepted but "
+ + parameters[1].getTypeName() + " is passed.");
+ }
+
+ switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ return new GenericUDAFCovarianceEvaluator();
+ case STRING:
+ case BOOLEAN:
+ default:
+ throw new UDFArgumentTypeException(1, "Only numeric or string type arguments are accepted but "
+ + parameters[1].getTypeName() + " is passed.");
+ }
+ case STRING:
+ case BOOLEAN:
+ default:
+ throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ }
+
+ /**
+ * Evaluate the variance using the algorithm described in
+ * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance,
+ * presumably by Pébay, Philippe (2008), in "Formulas for Robust, One-Pass
+ * Parallel Computation of Covariances and Arbitrary-Order Statistical
+ * Moments", Technical Report SAND2008-6212, Sandia National Laboratories,
+ * http://infoserve.sandia.gov/sand_doc/2008/086212.pdf
+ * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg>
+ * my_n = my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n -
+ * mx_(n-1))*(y_n - my_n) : <covariance * n>
+ * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X
+ * This one-pass algorithm is stable.
+ */
+ public static class GenericUDAFCovarianceEvaluator extends GenericUDAFEvaluator {
+
+ // For PARTIAL1 and COMPLETE
+ private PrimitiveObjectInspector xInputOI;
+ private PrimitiveObjectInspector yInputOI;
+
+ // For PARTIAL2 and FINAL
+ private StructObjectInspector soi;
+ private StructField countField;
+ private StructField xavgField;
+ private StructField yavgField;
+ private StructField covarField;
+ private LongObjectInspector countFieldOI;
+ private DoubleObjectInspector xavgFieldOI;
+ private DoubleObjectInspector yavgFieldOI;
+ private DoubleObjectInspector covarFieldOI;
+
+ // For PARTIAL1 and PARTIAL2
+ private Object[] partialResult;
+
+ // For FINAL and COMPLETE
+ private DoubleWritable result;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ super.init(m, parameters);
+
+ // init input
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+ assert (parameters.length == 2);
+ xInputOI = (PrimitiveObjectInspector) parameters[0];
+ yInputOI = (PrimitiveObjectInspector) parameters[1];
+ } else {
+ assert (parameters.length == 1);
+ soi = (StructObjectInspector) parameters[0];
+
+ countField = soi.getStructFieldRef("count");
+ xavgField = soi.getStructFieldRef("xavg");
+ yavgField = soi.getStructFieldRef("yavg");
+ covarField = soi.getStructFieldRef("covar");
+
+ countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector();
+ xavgFieldOI = (DoubleObjectInspector) xavgField.getFieldObjectInspector();
+ yavgFieldOI = (DoubleObjectInspector) yavgField.getFieldObjectInspector();
+ covarFieldOI = (DoubleObjectInspector) covarField.getFieldObjectInspector();
+ }
+
+ // init output
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+ // The output of a partial aggregation is a struct containing
+ // a long count, two double averages, and a double covariance.
+
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("xavg");
+ fname.add("yavg");
+ fname.add("covar");
+
+ partialResult = new Object[4];
+ partialResult[0] = new LongWritable(0);
+ partialResult[1] = new DoubleWritable(0);
+ partialResult[2] = new DoubleWritable(0);
+ partialResult[3] = new DoubleWritable(0);
+
+ return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+
+ } else {
+ setResult(new DoubleWritable(0));
+ return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+ }
+ }
+
+ static class StdAgg implements SerializableBuffer {
+ long count; // number n of elements
+ double xavg; // average of x elements
+ double yavg; // average of y elements
+ double covar; // n times the covariance
+
+ @Override
+ public void deSerializeAggBuffer(byte[] data, int start, int len) {
+ count = BufferSerDeUtil.getLong(data, start);
+ start += 8;
+ xavg = BufferSerDeUtil.getDouble(data, start);
+ start += 8;
+ yavg = BufferSerDeUtil.getDouble(data, start);
+ start += 8;
+ covar = BufferSerDeUtil.getDouble(data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(byte[] data, int start, int len) {
+ BufferSerDeUtil.writeLong(count, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(xavg, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(yavg, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(covar, data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(DataOutput output) throws IOException {
+ output.writeLong(count);
+ output.writeDouble(xavg);
+ output.writeDouble(yavg);
+ output.writeDouble(covar);
+ }
+ };
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ StdAgg result = new StdAgg();
+ reset(result);
+ return result;
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+ myagg.count = 0;
+ myagg.xavg = 0;
+ myagg.yavg = 0;
+ myagg.covar = 0;
+ }
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ assert (parameters.length == 2);
+ Object px = parameters[0];
+ Object py = parameters[1];
+ if (px != null && py != null) {
+ StdAgg myagg = (StdAgg) agg;
+ double vx = PrimitiveObjectInspectorUtils.getDouble(px, xInputOI);
+ double vy = PrimitiveObjectInspectorUtils.getDouble(py, yInputOI);
+ myagg.count++;
+ myagg.yavg = myagg.yavg + (vy - myagg.yavg) / myagg.count;
+ if (myagg.count > 1) {
+ myagg.covar += (vx - myagg.xavg) * (vy - myagg.yavg);
+ }
+ myagg.xavg = myagg.xavg + (vx - myagg.xavg) / myagg.count;
+ }
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+ ((LongWritable) partialResult[0]).set(myagg.count);
+ ((DoubleWritable) partialResult[1]).set(myagg.xavg);
+ ((DoubleWritable) partialResult[2]).set(myagg.yavg);
+ ((DoubleWritable) partialResult[3]).set(myagg.covar);
+ return partialResult;
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial != null) {
+ StdAgg myagg = (StdAgg) agg;
+
+ Object partialCount = soi.getStructFieldData(partial, countField);
+ Object partialXAvg = soi.getStructFieldData(partial, xavgField);
+ Object partialYAvg = soi.getStructFieldData(partial, yavgField);
+ Object partialCovar = soi.getStructFieldData(partial, covarField);
+
+ long nA = myagg.count;
+ long nB = countFieldOI.get(partialCount);
+
+ if (nA == 0) {
+ // Just copy the information since there is nothing so far
+ myagg.count = countFieldOI.get(partialCount);
+ myagg.xavg = xavgFieldOI.get(partialXAvg);
+ myagg.yavg = yavgFieldOI.get(partialYAvg);
+ myagg.covar = covarFieldOI.get(partialCovar);
+ }
+
+ if (nA != 0 && nB != 0) {
+ // Merge the two partials
+ double xavgA = myagg.xavg;
+ double yavgA = myagg.yavg;
+ double xavgB = xavgFieldOI.get(partialXAvg);
+ double yavgB = yavgFieldOI.get(partialYAvg);
+ double covarB = covarFieldOI.get(partialCovar);
+
+ myagg.count += nB;
+ myagg.xavg = (xavgA * nA + xavgB * nB) / myagg.count;
+ myagg.yavg = (yavgA * nA + yavgB * nB) / myagg.count;
+ myagg.covar += covarB + (xavgA - xavgB) * (yavgA - yavgB) * ((double) (nA * nB) / myagg.count);
+ }
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+
+ if (myagg.count == 0) { // SQL standard - return null for zero
+ // elements
+ return null;
+ } else {
+ getResult().set(myagg.covar / (myagg.count));
+ return getResult();
+ }
+ }
+
+ public void setResult(DoubleWritable result) {
+ this.result = result;
+ }
+
+ public DoubleWritable getResult() {
+ return result;
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
new file mode 100644
index 0000000..afdc397
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
@@ -0,0 +1,272 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.util.StringUtils;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * GenericUDAFSum.
+ */
+@Description(name = "sum", value = "_FUNC_(x) - Returns the sum of a set of numbers")
+public class GenericUDAFSum extends AbstractGenericUDAFResolver {
+
+ static final Log LOG = LogFactory.getLog(GenericUDAFSum.class.getName());
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length != 1) {
+ throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected.");
+ }
+
+ if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ return new GenericUDAFSumLong();
+ case FLOAT:
+ case DOUBLE:
+ case STRING:
+ return new GenericUDAFSumDouble();
+ case BOOLEAN:
+ default:
+ throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ }
+
+ /**
+ * GenericUDAFSumDouble.
+ */
+ public static class GenericUDAFSumDouble extends GenericUDAFEvaluator {
+ private PrimitiveObjectInspector inputOI;
+ private DoubleWritable result;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ super.init(m, parameters);
+ result = new DoubleWritable(0);
+ inputOI = (PrimitiveObjectInspector) parameters[0];
+ return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+ }
+
+ /** class for storing double sum value. */
+ static class SumDoubleAgg implements SerializableBuffer {
+ boolean empty;
+ double sum;
+
+ @Override
+ public void deSerializeAggBuffer(byte[] data, int start, int len) {
+ empty = BufferSerDeUtil.getBoolean(data, start);
+ start += 1;
+ sum = BufferSerDeUtil.getDouble(data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(byte[] data, int start, int len) {
+ BufferSerDeUtil.writeBoolean(empty, data, start);
+ start += 1;
+ BufferSerDeUtil.writeDouble(sum, data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(DataOutput output) throws IOException {
+ output.writeBoolean(empty);
+ output.writeDouble(sum);
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ SumDoubleAgg result = new SumDoubleAgg();
+ reset(result);
+ return result;
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ SumDoubleAgg myagg = (SumDoubleAgg) agg;
+ myagg.empty = true;
+ myagg.sum = 0;
+ }
+
+ boolean warned = false;
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ try {
+ merge(agg, parameters[0]);
+ } catch (NumberFormatException e) {
+ if (!warned) {
+ warned = true;
+ LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));
+ LOG.warn(getClass().getSimpleName() + " ignoring similar exceptions.");
+ }
+ }
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ return terminate(agg);
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial != null) {
+ SumDoubleAgg myagg = (SumDoubleAgg) agg;
+ myagg.empty = false;
+ myagg.sum += PrimitiveObjectInspectorUtils.getDouble(partial, inputOI);
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ SumDoubleAgg myagg = (SumDoubleAgg) agg;
+ if (myagg.empty) {
+ return null;
+ }
+ result.set(myagg.sum);
+ return result;
+ }
+
+ }
+
+ /**
+ * GenericUDAFSumLong.
+ */
+ public static class GenericUDAFSumLong extends GenericUDAFEvaluator {
+ private PrimitiveObjectInspector inputOI;
+ private LongWritable result;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ super.init(m, parameters);
+ result = new LongWritable(0);
+ inputOI = (PrimitiveObjectInspector) parameters[0];
+ return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
+ }
+
+ /** class for storing double sum value. */
+ static class SumLongAgg implements SerializableBuffer {
+ boolean empty;
+ long sum;
+
+ @Override
+ public void deSerializeAggBuffer(byte[] data, int start, int len) {
+ empty = BufferSerDeUtil.getBoolean(data, start);
+ start += 1;
+ sum = BufferSerDeUtil.getLong(data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(byte[] data, int start, int len) {
+ BufferSerDeUtil.writeBoolean(empty, data, start);
+ start += 1;
+ BufferSerDeUtil.writeLong(sum, data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(DataOutput output) throws IOException {
+ output.writeBoolean(empty);
+ output.writeLong(sum);
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ SumLongAgg result = new SumLongAgg();
+ reset(result);
+ return result;
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ SumLongAgg myagg = (SumLongAgg) agg;
+ myagg.empty = true;
+ myagg.sum = 0;
+ }
+
+ private boolean warned = false;
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ try {
+ merge(agg, parameters[0]);
+ } catch (NumberFormatException e) {
+ if (!warned) {
+ warned = true;
+ LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));
+ }
+ }
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ return terminate(agg);
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial != null) {
+ SumLongAgg myagg = (SumLongAgg) agg;
+ myagg.sum += PrimitiveObjectInspectorUtils.getLong(partial, inputOI);
+ myagg.empty = false;
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ SumLongAgg myagg = (SumLongAgg) agg;
+ if (myagg.empty) {
+ return null;
+ }
+ result.set(myagg.sum);
+ return result;
+ }
+
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
new file mode 100644
index 0000000..e839008
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
@@ -0,0 +1,305 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.util.StringUtils;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * Compute the variance. This class is extended by: GenericUDAFVarianceSample
+ * GenericUDAFStd GenericUDAFStdSample
+ */
+@Description(name = "variance,var_pop", value = "_FUNC_(x) - Returns the variance of a set of numbers")
+public class GenericUDAFVariance extends AbstractGenericUDAFResolver {
+
+ static final Log LOG = LogFactory.getLog(GenericUDAFVariance.class.getName());
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length != 1) {
+ throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected.");
+ }
+
+ if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ case STRING:
+ return new GenericUDAFVarianceEvaluator();
+ case BOOLEAN:
+ default:
+ throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ }
+
+ /**
+ * Evaluate the variance using the algorithm described by Chan, Golub, and
+ * LeVeque in
+ * "Algorithms for computing the sample variance: analysis and recommendations"
+ * The American Statistician, 37 (1983) pp. 242--247.
+ * variance = variance1 + variance2 + n/(m*(m+n)) * pow(((m/n)*t1 - t2),2)
+ * where: - variance is sum[x-avg^2] (this is actually n times the variance)
+ * and is updated at every step. - n is the count of elements in chunk1 - m
+ * is the count of elements in chunk2 - t1 = sum of elements in chunk1, t2 =
+ * sum of elements in chunk2.
+ * This algorithm was proven to be numerically stable by J.L. Barlow in
+ * "Error analysis of a pairwise summation algorithm to compute sample variance"
+ * Numer. Math, 58 (1991) pp. 583--590
+ */
+ public static class GenericUDAFVarianceEvaluator extends GenericUDAFEvaluator {
+
+ // For PARTIAL1 and COMPLETE
+ private PrimitiveObjectInspector inputOI;
+
+ // For PARTIAL2 and FINAL
+ private StructObjectInspector soi;
+ private StructField countField;
+ private StructField sumField;
+ private StructField varianceField;
+ private LongObjectInspector countFieldOI;
+ private DoubleObjectInspector sumFieldOI;
+
+ // For PARTIAL1 and PARTIAL2
+ private Object[] partialResult;
+
+ // For FINAL and COMPLETE
+ private DoubleWritable result;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ super.init(m, parameters);
+
+ // init input
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+ inputOI = (PrimitiveObjectInspector) parameters[0];
+ } else {
+ soi = (StructObjectInspector) parameters[0];
+
+ countField = soi.getStructFieldRef("count");
+ sumField = soi.getStructFieldRef("sum");
+ varianceField = soi.getStructFieldRef("variance");
+
+ countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector();
+ sumFieldOI = (DoubleObjectInspector) sumField.getFieldObjectInspector();
+ }
+
+ // init output
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+ // The output of a partial aggregation is a struct containing
+ // a long count and doubles sum and variance.
+
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("sum");
+ fname.add("variance");
+
+ partialResult = new Object[3];
+ partialResult[0] = new LongWritable(0);
+ partialResult[1] = new DoubleWritable(0);
+ partialResult[2] = new DoubleWritable(0);
+
+ return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+
+ } else {
+ setResult(new DoubleWritable(0));
+ return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+ }
+ }
+
+ static class StdAgg implements SerializableBuffer {
+ long count; // number of elements
+ double sum; // sum of elements
+ double variance; // sum[x-avg^2] (this is actually n times the
+ // variance)
+
+ @Override
+ public void deSerializeAggBuffer(byte[] data, int start, int len) {
+ count = BufferSerDeUtil.getLong(data, start);
+ start += 8;
+ sum = BufferSerDeUtil.getDouble(data, start);
+ start += 8;
+ variance = BufferSerDeUtil.getDouble(data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(byte[] data, int start, int len) {
+ BufferSerDeUtil.writeLong(count, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(sum, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(variance, data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(DataOutput output) throws IOException {
+ output.writeLong(count);
+ output.writeDouble(sum);
+ output.writeDouble(variance);
+ }
+ };
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ StdAgg result = new StdAgg();
+ reset(result);
+ return result;
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+ myagg.count = 0;
+ myagg.sum = 0;
+ myagg.variance = 0;
+ }
+
+ private boolean warned = false;
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ Object p = parameters[0];
+ if (p != null) {
+ StdAgg myagg = (StdAgg) agg;
+ try {
+ double v = PrimitiveObjectInspectorUtils.getDouble(p, inputOI);
+ myagg.count++;
+ myagg.sum += v;
+ if (myagg.count > 1) {
+ double t = myagg.count * v - myagg.sum;
+ myagg.variance += (t * t) / ((double) myagg.count * (myagg.count - 1));
+ }
+ } catch (NumberFormatException e) {
+ if (!warned) {
+ warned = true;
+ LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));
+ LOG.warn(getClass().getSimpleName() + " ignoring similar exceptions.");
+ }
+ }
+ }
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+ ((LongWritable) partialResult[0]).set(myagg.count);
+ ((DoubleWritable) partialResult[1]).set(myagg.sum);
+ ((DoubleWritable) partialResult[2]).set(myagg.variance);
+ return partialResult;
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial != null) {
+ StdAgg myagg = (StdAgg) agg;
+
+ Object partialCount = soi.getStructFieldData(partial, countField);
+ Object partialSum = soi.getStructFieldData(partial, sumField);
+ Object partialVariance = soi.getStructFieldData(partial, varianceField);
+
+ long n = myagg.count;
+ long m = countFieldOI.get(partialCount);
+
+ if (n == 0) {
+ // Just copy the information since there is nothing so far
+ myagg.variance = sumFieldOI.get(partialVariance);
+ myagg.count = countFieldOI.get(partialCount);
+ myagg.sum = sumFieldOI.get(partialSum);
+ }
+
+ if (m != 0 && n != 0) {
+ // Merge the two partials
+
+ double a = myagg.sum;
+ double b = sumFieldOI.get(partialSum);
+
+ myagg.count += m;
+ myagg.sum += b;
+ double t = (m / (double) n) * a - b;
+ myagg.variance += sumFieldOI.get(partialVariance) + ((n / (double) m) / ((double) n + m)) * t * t;
+ }
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+
+ if (myagg.count == 0) { // SQL standard - return null for zero
+ // elements
+ return null;
+ } else {
+ if (myagg.count > 1) {
+ getResult().set(myagg.variance / (myagg.count));
+ } else { // for one element the variance is always 0
+ getResult().set(0);
+ }
+ return getResult();
+ }
+ }
+
+ public void setResult(DoubleWritable result) {
+ this.result = result;
+ }
+
+ public DoubleWritable getResult() {
+ return result;
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
new file mode 100644
index 0000000..7920001
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.lazy.objectinspector;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Text;
+
+/**
+ * ObjectInspectorFactory is the primary way to create new ObjectInspector
+ * instances.
+ * SerDe classes should call the static functions in this library to create an
+ * ObjectInspector to return to the caller of SerDe2.getObjectInspector().
+ * The reason of having caches here is that ObjectInspectors do not have an
+ * internal state - so ObjectInspectors with the same construction parameters
+ * should result in exactly the same ObjectInspector.
+ */
+public final class LazyObjectInspectorFactory {
+
+ static ConcurrentHashMap<ArrayList<Object>, LazySimpleStructObjectInspector> cachedLazySimpleStructObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazySimpleStructObjectInspector>();
+
+ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(List<String> structFieldNames,
+ List<ObjectInspector> structFieldObjectInspectors, byte separator, Text nullSequence,
+ boolean lastColumnTakesRest, boolean escaped, byte escapeChar) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(structFieldNames);
+ signature.add(structFieldObjectInspectors);
+ signature.add(Byte.valueOf(separator));
+ signature.add(nullSequence.toString());
+ signature.add(Boolean.valueOf(lastColumnTakesRest));
+ signature.add(Boolean.valueOf(escaped));
+ signature.add(Byte.valueOf(escapeChar));
+ LazySimpleStructObjectInspector result = cachedLazySimpleStructObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors, separator,
+ nullSequence, lastColumnTakesRest, escaped, escapeChar);
+ cachedLazySimpleStructObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyListObjectInspector> cachedLazySimpleListObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyListObjectInspector>();
+
+ public static LazyListObjectInspector getLazySimpleListObjectInspector(ObjectInspector listElementObjectInspector,
+ byte separator, Text nullSequence, boolean escaped, byte escapeChar) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(listElementObjectInspector);
+ signature.add(Byte.valueOf(separator));
+ signature.add(nullSequence.toString());
+ signature.add(Boolean.valueOf(escaped));
+ signature.add(Byte.valueOf(escapeChar));
+ LazyListObjectInspector result = cachedLazySimpleListObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyListObjectInspector(listElementObjectInspector, separator, nullSequence, escaped,
+ escapeChar);
+ cachedLazySimpleListObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyMapObjectInspector> cachedLazySimpleMapObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyMapObjectInspector>();
+
+ public static LazyMapObjectInspector getLazySimpleMapObjectInspector(ObjectInspector mapKeyObjectInspector,
+ ObjectInspector mapValueObjectInspector, byte itemSeparator, byte keyValueSeparator, Text nullSequence,
+ boolean escaped, byte escapeChar) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(mapKeyObjectInspector);
+ signature.add(mapValueObjectInspector);
+ signature.add(Byte.valueOf(itemSeparator));
+ signature.add(Byte.valueOf(keyValueSeparator));
+ signature.add(nullSequence.toString());
+ signature.add(Boolean.valueOf(escaped));
+ signature.add(Byte.valueOf(escapeChar));
+ LazyMapObjectInspector result = cachedLazySimpleMapObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyMapObjectInspector(mapKeyObjectInspector, mapValueObjectInspector, itemSeparator,
+ keyValueSeparator, nullSequence, escaped, escapeChar);
+ cachedLazySimpleMapObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ static ConcurrentHashMap<List<Object>, LazyUnionObjectInspector> cachedLazyUnionObjectInspector = new ConcurrentHashMap<List<Object>, LazyUnionObjectInspector>();
+
+ public static LazyUnionObjectInspector getLazyUnionObjectInspector(List<ObjectInspector> ois, byte separator,
+ Text nullSequence, boolean escaped, byte escapeChar) {
+ List<Object> signature = new ArrayList<Object>();
+ signature.add(ois);
+ signature.add(Byte.valueOf(separator));
+ signature.add(nullSequence.toString());
+ signature.add(Boolean.valueOf(escaped));
+ signature.add(Byte.valueOf(escapeChar));
+ LazyUnionObjectInspector result = cachedLazyUnionObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyUnionObjectInspector(ois, separator, nullSequence, escaped, escapeChar);
+ cachedLazyUnionObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ private LazyObjectInspectorFactory() {
+ // prevent instantiation
+ }
+}
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java
new file mode 100644
index 0000000..95b999e
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.typeinfo;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+/**
+ * TypeInfoFactory can be used to create the TypeInfo object for any types.
+ * TypeInfo objects are all read-only so we can reuse them easily.
+ * TypeInfoFactory has internal cache to make sure we don't create 2 TypeInfo
+ * objects that represents the same type.
+ */
+public final class TypeInfoFactory {
+
+ static ConcurrentHashMap<String, TypeInfo> cachedPrimitiveTypeInfo = new ConcurrentHashMap<String, TypeInfo>();
+
+ private TypeInfoFactory() {
+ // prevent instantiation
+ }
+
+ public static TypeInfo getPrimitiveTypeInfo(String typeName) {
+ if (null == PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(typeName)) {
+ throw new RuntimeException("Cannot getPrimitiveTypeInfo for " + typeName);
+ }
+ TypeInfo result = cachedPrimitiveTypeInfo.get(typeName);
+ if (result == null) {
+ result = new PrimitiveTypeInfo(typeName);
+ cachedPrimitiveTypeInfo.put(typeName, result);
+ }
+ return result;
+ }
+
+ public static final TypeInfo voidTypeInfo = getPrimitiveTypeInfo(Constants.VOID_TYPE_NAME);
+ public static final TypeInfo booleanTypeInfo = getPrimitiveTypeInfo(Constants.BOOLEAN_TYPE_NAME);
+ public static final TypeInfo intTypeInfo = getPrimitiveTypeInfo(Constants.INT_TYPE_NAME);
+ public static final TypeInfo longTypeInfo = getPrimitiveTypeInfo(Constants.BIGINT_TYPE_NAME);
+ public static final TypeInfo stringTypeInfo = getPrimitiveTypeInfo(Constants.STRING_TYPE_NAME);
+ public static final TypeInfo floatTypeInfo = getPrimitiveTypeInfo(Constants.FLOAT_TYPE_NAME);
+ public static final TypeInfo doubleTypeInfo = getPrimitiveTypeInfo(Constants.DOUBLE_TYPE_NAME);
+ public static final TypeInfo byteTypeInfo = getPrimitiveTypeInfo(Constants.TINYINT_TYPE_NAME);
+ public static final TypeInfo shortTypeInfo = getPrimitiveTypeInfo(Constants.SMALLINT_TYPE_NAME);
+
+ public static final TypeInfo unknownTypeInfo = getPrimitiveTypeInfo("unknown");
+
+ public static TypeInfo getPrimitiveTypeInfoFromPrimitiveWritable(Class<?> clazz) {
+ String typeName = PrimitiveObjectInspectorUtils.getTypeNameFromPrimitiveWritable(clazz);
+ if (typeName == null) {
+ throw new RuntimeException("Internal error: Cannot get typeName for " + clazz);
+ }
+ return getPrimitiveTypeInfo(typeName);
+ }
+
+ public static TypeInfo getPrimitiveTypeInfoFromJavaPrimitive(Class<?> clazz) {
+ return getPrimitiveTypeInfo(PrimitiveObjectInspectorUtils.getTypeNameFromPrimitiveJava(clazz));
+ }
+
+ static ConcurrentHashMap<ArrayList<List<?>>, TypeInfo> cachedStructTypeInfo = new ConcurrentHashMap<ArrayList<List<?>>, TypeInfo>();
+
+ public static TypeInfo getStructTypeInfo(List<String> names, List<TypeInfo> typeInfos) {
+ ArrayList<List<?>> signature = new ArrayList<List<?>>(2);
+ signature.add(names);
+ signature.add(typeInfos);
+ TypeInfo result = cachedStructTypeInfo.get(signature);
+ if (result == null) {
+ result = new StructTypeInfo(names, typeInfos);
+ cachedStructTypeInfo.put(signature, result);
+ }
+ return result;
+ }
+
+ static ConcurrentHashMap<List<?>, TypeInfo> cachedUnionTypeInfo = new ConcurrentHashMap<List<?>, TypeInfo>();
+
+ public static TypeInfo getUnionTypeInfo(List<TypeInfo> typeInfos) {
+ TypeInfo result = cachedUnionTypeInfo.get(typeInfos);
+ if (result == null) {
+ result = new UnionTypeInfo(typeInfos);
+ cachedUnionTypeInfo.put(typeInfos, result);
+ }
+ return result;
+ }
+
+ static ConcurrentHashMap<TypeInfo, TypeInfo> cachedListTypeInfo = new ConcurrentHashMap<TypeInfo, TypeInfo>();
+
+ public static TypeInfo getListTypeInfo(TypeInfo elementTypeInfo) {
+ TypeInfo result = cachedListTypeInfo.get(elementTypeInfo);
+ if (result == null) {
+ result = new ListTypeInfo(elementTypeInfo);
+ cachedListTypeInfo.put(elementTypeInfo, result);
+ }
+ return result;
+ }
+
+ static ConcurrentHashMap<ArrayList<TypeInfo>, TypeInfo> cachedMapTypeInfo = new ConcurrentHashMap<ArrayList<TypeInfo>, TypeInfo>();
+
+ public static TypeInfo getMapTypeInfo(TypeInfo keyTypeInfo, TypeInfo valueTypeInfo) {
+ ArrayList<TypeInfo> signature = new ArrayList<TypeInfo>(2);
+ signature.add(keyTypeInfo);
+ signature.add(valueTypeInfo);
+ TypeInfo result = cachedMapTypeInfo.get(signature);
+ if (result == null) {
+ result = new MapTypeInfo(keyTypeInfo, valueTypeInfo);
+ cachedMapTypeInfo.put(signature, result);
+ }
+ return result;
+ };
+
+}
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/cluster.properties b/hivesterix/hivesterix-dist/src/main/resources/conf/cluster.properties
new file mode 100644
index 0000000..2d2401a
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/cluster.properties
@@ -0,0 +1,37 @@
+#The CC port for Hyracks clients
+CC_CLIENTPORT=3099
+
+#The CC port for Hyracks cluster management
+CC_CLUSTERPORT=1099
+
+#The directory of hyracks binaries
+HYRACKS_HOME=../../../../hyracks
+
+#The tmp directory for cc to install jars
+CCTMP_DIR=/tmp/t1
+
+#The tmp directory for nc to install jars
+NCTMP_DIR=/tmp/t2
+
+#The directory to put cc logs
+CCLOGS_DIR=$CCTMP_DIR/logs
+
+#The directory to put nc logs
+NCLOGS_DIR=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS="/tmp/t3,/tmp/t4"
+
+#The JAVA_HOME
+JAVA_HOME=$JAVA_HOME
+
+#The frame size of the internal dataflow engine
+FRAME_SIZE=65536
+
+#CC JAVA_OPTS
+CCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7001,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+# Yourkit option: -agentpath:/grid/0/dev/vborkar/tools/yjp-10.0.4/bin/linux-x86-64/libyjpagent.so=port=20001"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7002,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/hivesterix/conf/configuration.xsl b/hivesterix/hivesterix-dist/src/main/resources/conf/configuration.xsl
similarity index 100%
copy from hivesterix/conf/configuration.xsl
copy to hivesterix/hivesterix-dist/src/main/resources/conf/configuration.xsl
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/debugnc.properties b/hivesterix/hivesterix-dist/src/main/resources/conf/debugnc.properties
new file mode 100755
index 0000000..27afa26
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/debugnc.properties
@@ -0,0 +1,12 @@
+#The tmp directory for nc to install jars
+NCTMP_DIR2=/tmp/t-1
+
+#The directory to put nc logs
+NCLOGS_DIR2=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS2="/tmp/t-2,/tmp/t-3"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS2="-Xdebug -Xrunjdwp:transport=dt_socket,address=7003,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/hivesterix/conf/hive-default.xml b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml
similarity index 98%
copy from hivesterix/conf/hive-default.xml
copy to hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml
index 034ea61..587eede 100644
--- a/hivesterix/conf/hive-default.xml
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml
@@ -23,22 +23,11 @@
By setting this property to -1, Hive will automatically figure out what
should be the number of reducers.
</description>
- </property>
- <property>
- <name>hive.hyracks.host</name>
- <value>128.195.14.4</value>
- </property>
-
- <property>
- <name>hive.hyracks.port</name>
- <value>3099</value>
- </property>
-
- <property>
- <name>hive.hyracks.app</name>
- <value>hivesterix</value>
- </property>
+ <property>
+ <name>hive.hyracks.connectorpolicy</name>
+ <value>PIPELINING</value>
+ </property>
<property>
<name>hive.hyracks.parrallelism</name>
@@ -52,12 +41,12 @@
<property>
<name>hive.algebricks.groupby.external.memory</name>
- <value>536870912</value>
+ <value>33554432</value>
</property>
<property>
<name>hive.algebricks.sort.memory</name>
- <value>536870912</value>
+ <value>33554432</value>
</property>
<property>
diff --git a/hivesterix/conf/hive-log4j.properties b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties
similarity index 100%
copy from hivesterix/conf/hive-log4j.properties
copy to hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/master b/hivesterix/hivesterix-dist/src/main/resources/conf/master
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/master
@@ -0,0 +1 @@
+localhost
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/slaves b/hivesterix/hivesterix-dist/src/main/resources/conf/slaves
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/slaves
@@ -0,0 +1 @@
+localhost
diff --git a/hivesterix/resource/bin/ext/cli.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/cli.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/cli.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/cli.sh
diff --git a/hivesterix/resource/bin/ext/help.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/help.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/help.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/help.sh
diff --git a/hivesterix/resource/bin/ext/hiveserver.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/hiveserver.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/hiveserver.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/hiveserver.sh
diff --git a/hivesterix/resource/bin/ext/hwi.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/hwi.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/hwi.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/hwi.sh
diff --git a/hivesterix/resource/bin/ext/jar.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/jar.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/jar.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/jar.sh
diff --git a/hivesterix/resource/bin/ext/lineage.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/lineage.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/lineage.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/lineage.sh
diff --git a/hivesterix/resource/bin/ext/metastore.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/metastore.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/metastore.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/metastore.sh
diff --git a/hivesterix/resource/bin/ext/rcfilecat.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/rcfilecat.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/rcfilecat.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/rcfilecat.sh
diff --git a/hivesterix/resource/bin/ext/util/execHiveCmd.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/util/execHiveCmd.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/util/execHiveCmd.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/util/execHiveCmd.sh
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/getip.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/getip.sh
new file mode 100755
index 0000000..8c9ae76
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/getip.sh
@@ -0,0 +1,25 @@
+#get the OS
+OS_NAME=`uname -a|awk '{print $1}'`
+LINUX_OS='Linux'
+
+if [ $OS_NAME = $LINUX_OS ];
+then
+ #Get IP Address
+ IPADDR=`/sbin/ifconfig eth0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ if [ "$IPADDR" = "" ]
+ then
+ IPADDR=`/sbin/ifconfig em1 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ fi
+ if [ "$IPADDR" = "" ]
+ then
+ IPADDR=`/sbin/ifconfig lo | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ fi
+else
+ IPADDR=`/sbin/ifconfig en1 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ if [ "$IPADDR" = "" ]
+ then
+ IPADDR=`/sbin/ifconfig lo0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ fi
+
+fi
+echo $IPADDR
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/hive b/hivesterix/hivesterix-dist/src/main/resources/scripts/hive
new file mode 100755
index 0000000..f98f340
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/hive
@@ -0,0 +1,213 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cygwin=false
+case "`uname`" in
+ CYGWIN*) cygwin=true;;
+esac
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hive-config.sh
+
+SERVICE=""
+HELP=""
+while [ $# -gt 0 ]; do
+ case "$1" in
+ --service)
+ shift
+ SERVICE=$1
+ shift
+ ;;
+ --rcfilecat)
+ SERVICE=rcfilecat
+ shift
+ ;;
+ --help)
+ HELP=_help
+ shift
+ ;;
+ *)
+ break
+ ;;
+ esac
+done
+
+if [ "$SERVICE" = "" ] ; then
+ if [ "$HELP" = "_help" ] ; then
+ SERVICE="help"
+ else
+ SERVICE="cli"
+ fi
+fi
+
+if [ -f "${HIVE_CONF_DIR}/hive-env.sh" ]; then
+ . "${HIVE_CONF_DIR}/hive-env.sh"
+fi
+
+CLASSPATH="${HIVE_CONF_DIR}"
+
+HIVE_LIB=${HIVE_HOME}/lib
+
+# needed for execution
+if [ ! -f ${HIVE_LIB}/hive-exec-*.jar ]; then
+ echo "Missing Hive Execution Jar: ${HIVE_LIB}/hive-exec-*.jar"
+ exit 1;
+fi
+
+if [ ! -f ${HIVE_LIB}/hive-metastore-*.jar ]; then
+ echo "Missing Hive MetaStore Jar"
+ exit 2;
+fi
+
+# cli specific code
+if [ ! -f ${HIVE_LIB}/hive-cli-*.jar ]; then
+ echo "Missing Hive CLI Jar"
+ exit 3;
+fi
+
+CLASSPATH=${CLASSPATH}:${HIVE_LIB}/a-hive-path.jar
+
+for f in ${HIVE_LIB}/*.jar; do
+ CLASSPATH=${CLASSPATH}:$f;
+done
+
+# add the auxillary jars such as serdes
+if [ -d "${HIVE_AUX_JARS_PATH}" ]; then
+ for f in ${HIVE_AUX_JARS_PATH}/*.jar; do
+ if [[ ! -f $f ]]; then
+ continue;
+ fi
+ if $cygwin; then
+ f=`cygpath -w "$f"`
+ fi
+ AUX_CLASSPATH=${AUX_CLASSPATH}:$f
+ if [ "${AUX_PARAM}" == "" ]; then
+ AUX_PARAM=file://$f
+ else
+ AUX_PARAM=${AUX_PARAM},file://$f;
+ fi
+ done
+elif [ "${HIVE_AUX_JARS_PATH}" != "" ]; then
+ if $cygwin; then
+ HIVE_AUX_JARS_PATH=`echo $HIVE_AUX_JARS_PATH | sed 's/,/:/g'`
+ HIVE_AUX_JARS_PATH=`cygpath -p -w "$HIVE_AUX_JARS_PATH"`
+ HIVE_AUX_JARS_PATH=`echo $HIVE_AUX_JARS_PATH | sed 's/;/,/g'`
+ fi
+ AUX_CLASSPATH=${HIVE_AUX_JARS_PATH}
+ AUX_PARAM=file://${HIVE_AUX_JARS_PATH}
+ AUX_PARAM=`echo $AUX_PARAM | sed 's/,/,file:\/\//g'`
+fi
+
+# adding jars from auxlib directory
+for f in ${HIVE_HOME}/auxlib/*.jar; do
+ if [[ ! -f $f ]]; then
+ continue;
+ fi
+ if $cygwin; then
+ f=`cygpath -w "$f"`
+ fi
+ AUX_CLASSPATH=${AUX_CLASSPATH}:$f
+ if [ "${AUX_PARAM}" == "" ]; then
+ AUX_PARAM=file://$f
+ else
+ AUX_PARAM=${AUX_PARAM},file://$f;
+ fi
+done
+if $cygwin; then
+ CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+ CLASSPATH=${CLASSPATH};${AUX_CLASSPATH}
+else
+ CLASSPATH=${CLASSPATH}:${AUX_CLASSPATH}
+fi
+
+# pass classpath to hadoop
+export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${CLASSPATH}"
+
+# check for hadoop in the path
+HADOOP_IN_PATH=`which hadoop 2>/dev/null`
+if [ -f ${HADOOP_IN_PATH} ]; then
+ HADOOP_DIR=`dirname "$HADOOP_IN_PATH"`/..
+fi
+# HADOOP_HOME env variable overrides hadoop in the path
+HADOOP_HOME=${HADOOP_HOME:-$HADOOP_DIR}
+if [ "$HADOOP_HOME" == "" ]; then
+ echo "Cannot find hadoop installation: \$HADOOP_HOME must be set or hadoop must be in the path";
+ exit 4;
+fi
+
+HADOOP=$HADOOP_HOME/bin/hadoop
+if [ ! -f ${HADOOP} ]; then
+ echo "Cannot find hadoop installation: \$HADOOP_HOME must be set or hadoop must be in the path";
+ exit 4;
+fi
+
+# Make sure we're using a compatible version of Hadoop
+hadoop_version=$($HADOOP version | awk '{if (NR == 1) {print $2;}}');
+
+# Save the regex to a var to workaround quoting incompatabilities
+# between Bash 3.1 and 3.2
+hadoop_version_re="^([[:digit:]]+)\.([[:digit:]]+)(\.([[:digit:]]+))?.*$"
+
+if [[ "$hadoop_version" =~ $hadoop_version_re ]]; then
+ hadoop_major_ver=${BASH_REMATCH[1]}
+ hadoop_minor_ver=${BASH_REMATCH[2]}
+ hadoop_patch_ver=${BASH_REMATCH[4]}
+else
+ echo "Unable to determine Hadoop version information."
+ echo "'hadoop version' returned:"
+ echo `$HADOOP version`
+ exit 5
+fi
+
+if [ $hadoop_minor_ver -ne 20 -o $hadoop_patch_ver -eq 0 ]; then
+ echo "Hive requires Hadoop 0.20.x (x >= 1)."
+ echo "'hadoop version' returned:"
+ echo `$HADOOP version`
+ exit 6
+fi
+
+if [ "${AUX_PARAM}" != "" ]; then
+ HIVE_OPTS="$HIVE_OPTS -hiveconf hive.aux.jars.path=${AUX_PARAM}"
+ AUX_JARS_CMD_LINE="-libjars ${AUX_PARAM}"
+fi
+
+SERVICE_LIST=""
+
+for i in "$bin"/ext/*.sh ; do
+ . $i
+done
+
+for i in "$bin"/ext/util/*.sh ; do
+ . $i
+done
+
+TORUN=""
+for j in $SERVICE_LIST ; do
+ if [ "$j" = "$SERVICE" ] ; then
+ TORUN=${j}$HELP
+ fi
+done
+
+if [ "$TORUN" = "" ] ; then
+ echo "Service $SERVICE not found"
+ echo "Available Services: $SERVICE_LIST"
+ exit 7
+else
+ $TORUN "$@"
+fi
diff --git a/hivesterix/resource/bin/hive-config.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/hive-config.sh
similarity index 100%
copy from hivesterix/resource/bin/hive-config.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/hive-config.sh
diff --git a/hivesterix/resource/bin/init-hive-dfs.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/init-hive-dfs.sh
similarity index 100%
copy from hivesterix/resource/bin/init-hive-dfs.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/init-hive-dfs.sh
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/startAllNCs.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/startAllNCs.sh
new file mode 100644
index 0000000..d30da26
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/startAllNCs.sh
@@ -0,0 +1,6 @@
+PREGELIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+ ssh $i "cd ${PREGELIX_PATH}; export JAVA_HOME=${JAVA_HOME}; bin/startnc.sh"
+done
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/startCluster.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/startCluster.sh
new file mode 100644
index 0000000..6aa9161
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/startCluster.sh
@@ -0,0 +1,19 @@
+bin/startcc.sh
+sleep 5
+bin/startAllNCs.sh
+
+. conf/cluster.properties
+# do we need to specify the version somewhere?
+hyrackcmd=`ls ${HYRACKS_HOME}/hyracks-cli/target/hyracks-cli-*-binary-assembly/bin/hyrackscli`
+# find zip file
+appzip=`ls $PWD/../hivesterix-dist-*-binary-assembly.zip`
+
+[ -f $hyrackcmd ] || { echo "Hyracks commandline is missing"; exit -1;}
+[ -f $appzip ] || { echo "Genomix binary-assembly.zip is missing"; exit -1;}
+
+CCHOST_NAME=`cat conf/master`
+
+IPADDR=`bin/getip.sh`
+echo "connect to \"${IPADDR}:${CC_CLIENTPORT}\"; create application hivesterix \"$appzip\";" | $hyrackcmd
+echo ""
+
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/startDebugNc.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/startDebugNc.sh
new file mode 100755
index 0000000..fe6cf27
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/startDebugNc.sh
@@ -0,0 +1,50 @@
+hostname
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+. conf/debugnc.properties
+
+#Clean up temp dir
+
+rm -rf $NCTMP_DIR2
+mkdir $NCTMP_DIR2
+
+#Clean up log dir
+rm -rf $NCLOGS_DIR2
+mkdir $NCLOGS_DIR2
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS2 | tr "," "\n")
+for io_dir in $io_dirs
+do
+ rm -rf $io_dir
+ mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+#Get OS
+IPADDR=`bin/getip.sh`
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+NODEID=${NODEID}2
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS2
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR2
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR -data-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS2}" &> $NCLOGS_DIR2/$NODEID.log &
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/startcc.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/startcc.sh
new file mode 100644
index 0000000..efb79ce
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/startcc.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+hostname
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CCHOST=`bin/getip.sh`
+
+#Remove the temp dir
+rm -rf $CCTMP_DIR
+mkdir $CCTMP_DIR
+
+#Remove the logs dir
+rm -rf $CCLOGS_DIR
+mkdir $CCLOGS_DIR
+
+#Export JAVA_HOME and JAVA_OPTS
+export JAVA_HOME=$JAVA_HOME
+export JAVA_OPTS=$CCJAVA_OPTS
+
+#Launch hyracks cc script
+chmod -R 755 $HYRACKS_HOME
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyrackscc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 0 &> $CCLOGS_DIR/cc.log &
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/startnc.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/startnc.sh
new file mode 100644
index 0000000..6e0f90e
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/startnc.sh
@@ -0,0 +1,49 @@
+hostname
+
+MY_NAME=`hostname`
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Clean up temp dir
+
+rm -rf $NCTMP_DIR
+mkdir $NCTMP_DIR
+
+#Clean up log dir
+rm -rf $NCLOGS_DIR
+mkdir $NCLOGS_DIR
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+ rm -rf $io_dir
+ mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+IPADDR=`bin/getip.sh`
+#echo $IPADDR
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR -data-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS}" &> $NCLOGS_DIR/$NODEID.log &
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/stopAllNCs.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopAllNCs.sh
new file mode 100644
index 0000000..12367c1
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopAllNCs.sh
@@ -0,0 +1,6 @@
+PREGELIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+ ssh $i "cd ${PREGELIX_PATH}; bin/stopnc.sh"
+done
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/stopCluster.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopCluster.sh
new file mode 100644
index 0000000..4889934
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopCluster.sh
@@ -0,0 +1,3 @@
+bin/stopAllNCs.sh
+sleep 2
+bin/stopcc.sh
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/stopcc.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopcc.sh
new file mode 100644
index 0000000..c2f525a
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopcc.sh
@@ -0,0 +1,10 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep hyracks|awk '{print $2}'`
+echo $PID
+kill -9 $PID
+
+#Clean up CC temp dir
+rm -rf $CCTMP_DIR/*
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/stopnc.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopnc.sh
new file mode 100644
index 0000000..03ce4e7
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopnc.sh
@@ -0,0 +1,23 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+
+if [ "$PID" == "" ]; then
+ USERID=`id | sed 's/^uid=//;s/(.*$//'`
+ PID=`ps -ef|grep ${USERID}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+fi
+
+echo $PID
+kill -9 $PID
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+ rm -rf $io_dir/*
+done
+
+#Clean up NC temp dir
+rm -rf $NCTMP_DIR/*
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestCase.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestCase.java
new file mode 100644
index 0000000..aa38fe7
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestCase.java
@@ -0,0 +1,142 @@
+package edu.uci.ics.hivesterix.perf;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.junit.Test;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestCase;
+
+public class PerfTestCase extends AbstractPerfTestCase {
+ private File resultFile;
+ private FileSystem dfs;
+
+ PerfTestCase(File queryFile, File resultFile) {
+ super("testRuntimeFunction", queryFile);
+ this.queryFile = queryFile;
+ this.resultFile = resultFile;
+ }
+
+ @Test
+ public void testRuntimeFunction() throws Exception {
+ StringBuilder queryString = new StringBuilder();
+ readFileToString(queryFile, queryString);
+ String[] queries = queryString.toString().split(";");
+ StringWriter sw = new StringWriter();
+
+ HiveConf hconf = ConfUtil.getHiveConf();
+ Driver driver = new Driver(hconf, new PrintWriter(sw));
+ driver.init();
+
+ dfs = FileSystem.get(ConfUtil.getJobConf());
+
+ int i = 0;
+ for (String query : queries) {
+ if (i == queries.length - 1)
+ break;
+ driver.run(query);
+ driver.clear();
+ i++;
+ }
+
+ String warehouse = hconf.get("hive.metastore.warehouse.dir");
+ String tableName = removeExt(resultFile.getName());
+ String directory = warehouse + "/" + tableName + "/";
+ String localDirectory = "tmp";
+
+ FileStatus[] files = dfs.listStatus(new Path(directory));
+ FileSystem lfs = null;
+ if (files == null) {
+ lfs = FileSystem.getLocal(ConfUtil.getJobConf());
+ files = lfs.listStatus(new Path(directory));
+ }
+
+ File resultDirectory = new File(localDirectory + "/" + tableName);
+ deleteDir(resultDirectory);
+ resultDirectory.mkdir();
+
+ for (FileStatus fs : files) {
+ Path src = fs.getPath();
+ if (src.getName().indexOf("crc") >= 0)
+ continue;
+
+ String destStr = localDirectory + "/" + tableName + "/" + src.getName();
+ Path dest = new Path(destStr);
+ if (lfs != null) {
+ lfs.copyToLocalFile(src, dest);
+ dfs.copyFromLocalFile(dest, new Path(directory));
+ } else
+ dfs.copyToLocalFile(src, dest);
+ }
+
+ File[] rFiles = resultDirectory.listFiles();
+ StringBuilder sb = new StringBuilder();
+ for (File r : rFiles) {
+ if (r.getName().indexOf("crc") >= 0)
+ continue;
+ readFileToString(r, sb);
+ }
+ deleteDir(resultDirectory);
+
+ StringBuilder buf = new StringBuilder();
+ readFileToString(resultFile, buf);
+ if (!equal(buf, sb)) {
+ throw new Exception("Result for " + queryFile + " changed:\n" + sw.toString());
+ }
+ }
+
+ private void deleteDir(File resultDirectory) {
+ if (resultDirectory.exists()) {
+ File[] rFiles = resultDirectory.listFiles();
+ for (File r : rFiles)
+ r.delete();
+ resultDirectory.delete();
+ }
+ }
+
+ private boolean equal(StringBuilder sb1, StringBuilder sb2) {
+ String s1 = sb1.toString();
+ String s2 = sb2.toString();
+ String[] rowsOne = s1.split("\n");
+ String[] rowsTwo = s2.split("\n");
+
+ if (rowsOne.length != rowsTwo.length)
+ return false;
+
+ for (int i = 0; i < rowsOne.length; i++) {
+ String row1 = rowsOne[i];
+ String row2 = rowsTwo[i];
+
+ if (row1.equals(row2))
+ continue;
+
+ String[] fields1 = row1.split("");
+ String[] fields2 = row2.split("");
+
+ for (int j = 0; j < fields1.length; j++) {
+ if (fields1[j].equals(fields2[j])) {
+ continue;
+ } else if (fields1[j].indexOf('.') < 0) {
+ return false;
+ } else {
+ Float float1 = Float.parseFloat(fields1[j]);
+ Float float2 = Float.parseFloat(fields2[j]);
+
+ if (Math.abs(float1 - float2) == 0)
+ continue;
+ else
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuite.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuite.java
new file mode 100644
index 0000000..796842d
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuite.java
@@ -0,0 +1,74 @@
+package edu.uci.ics.hivesterix.perf;
+
+import java.io.File;
+import java.util.List;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestSuiteClass;
+
+public class PerfTestSuite extends AbstractPerfTestSuiteClass {
+
+ private static final String PATH_TO_QUERIES = "src/test/resources/perf/queries/";
+ private static final String PATH_TO_RESULTS = "src/test/resources/perf/results/";
+ private static final String PATH_TO_IGNORES = "src/test/resources/perf/ignore.txt";
+
+ private static final String FILE_EXTENSION_OF_RESULTS = "result";
+
+ public static Test suite() throws Exception {
+ List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
+ File testData = new File(PATH_TO_QUERIES);
+ File[] queries = testData.listFiles();
+ PerfTestSuite testSuite = new PerfTestSuite();
+
+ // set hdfs and hyracks cluster, and load test data to hdfs
+ try {
+ testSuite.setup();
+ testSuite.loadData();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+
+ for (File qFile : queries) {
+ if (isIgnored(qFile.getName(), ignores))
+ continue;
+
+ if (qFile.isFile()) {
+ String resultFileName = hiveExtToResExt(qFile.getName());
+ File rFile = new File(PATH_TO_RESULTS + resultFileName);
+ testSuite.addTest(new PerfTestCase(qFile, rFile));
+ }
+ }
+ return testSuite;
+ }
+
+ private static String hiveExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+
+ // cleanup hdfs and hyracks cluster
+ try {
+ cleanup();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteCaseGenerator.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteCaseGenerator.java
new file mode 100644
index 0000000..4777351
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteCaseGenerator.java
@@ -0,0 +1,99 @@
+package edu.uci.ics.hivesterix.perf;
+
+import java.io.File;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.junit.Test;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestCase;
+
+public class PerfTestSuiteCaseGenerator extends AbstractPerfTestCase {
+ private File resultFile;
+ private FileSystem dfs;
+
+ PerfTestSuiteCaseGenerator(File queryFile, File resultFile) {
+ super("testRuntimeFunction", queryFile);
+ this.queryFile = queryFile;
+ this.resultFile = resultFile;
+ }
+
+ @Test
+ public void testRuntimeFunction() throws Exception {
+ StringBuilder queryString = new StringBuilder();
+ readFileToString(queryFile, queryString);
+ String[] queries = queryString.toString().split(";");
+
+ HiveConf hconf = ConfUtil.getHiveConf();
+ Driver driver = new Driver(hconf);
+ driver.init();
+
+ dfs = FileSystem.get(ConfUtil.getJobConf());
+
+ long startTime = System.currentTimeMillis();
+ int i = 0;
+ for (String query : queries) {
+ if (i == queries.length - 1)
+ break;
+ driver.run(query);
+ // driver.clear();
+ i++;
+ }
+ long endTime = System.currentTimeMillis();
+ System.out.println(resultFile.getName() + " execution time " + (endTime - startTime));
+
+ String warehouse = hconf.get("hive.metastore.warehouse.dir");
+ String tableName = removeExt(resultFile.getName());
+ String directory = warehouse + "/" + tableName + "/";
+ String localDirectory = "tmp";
+
+ FileStatus[] files = dfs.listStatus(new Path(directory));
+ FileSystem lfs = null;
+ if (files == null) {
+ lfs = FileSystem.getLocal(ConfUtil.getJobConf());
+ files = lfs.listStatus(new Path(directory));
+ }
+
+ File resultDirectory = new File(localDirectory + "/" + tableName);
+ deleteDir(resultDirectory);
+ resultDirectory.mkdir();
+
+ for (FileStatus fs : files) {
+ Path src = fs.getPath();
+ if (src.getName().indexOf("crc") >= 0)
+ continue;
+
+ String destStr = localDirectory + "/" + tableName + "/" + src.getName();
+ Path dest = new Path(destStr);
+ if (lfs != null) {
+ lfs.copyToLocalFile(src, dest);
+ dfs.copyFromLocalFile(dest, new Path(directory));
+ } else
+ dfs.copyToLocalFile(src, dest);
+ }
+
+ File[] rFiles = resultDirectory.listFiles();
+ StringBuilder sb = new StringBuilder();
+ for (File r : rFiles) {
+ if (r.getName().indexOf("crc") >= 0)
+ continue;
+ readFileToString(r, sb);
+ }
+ deleteDir(resultDirectory);
+
+ writeStringToFile(resultFile, sb);
+ }
+
+ private void deleteDir(File resultDirectory) {
+ if (resultDirectory.exists()) {
+ File[] rFiles = resultDirectory.listFiles();
+ for (File r : rFiles)
+ r.delete();
+ resultDirectory.delete();
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteGenerator.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteGenerator.java
new file mode 100644
index 0000000..aa38014
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteGenerator.java
@@ -0,0 +1,74 @@
+package edu.uci.ics.hivesterix.perf;
+
+import java.io.File;
+import java.util.List;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestSuiteClass;
+
+public class PerfTestSuiteGenerator extends AbstractPerfTestSuiteClass {
+
+ private static final String PATH_TO_QUERIES = "src/test/resources/perf/queries/";
+ private static final String PATH_TO_RESULTS = "src/test/resources/perf/results/";
+ private static final String PATH_TO_IGNORES = "src/test/resources/perf/ignore.txt";
+
+ private static final String FILE_EXTENSION_OF_RESULTS = "result";
+
+ public static Test suite() throws Exception {
+ List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
+ File testData = new File(PATH_TO_QUERIES);
+ File[] queries = testData.listFiles();
+ PerfTestSuiteGenerator testSuite = new PerfTestSuiteGenerator();
+
+ // set hdfs and hyracks cluster, and load test data to hdfs
+ try {
+ testSuite.setup();
+ testSuite.loadData();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+
+ for (File qFile : queries) {
+ if (isIgnored(qFile.getName(), ignores))
+ continue;
+
+ if (qFile.isFile() && qFile.getName().startsWith("q18_")) {
+ String resultFileName = hiveExtToResExt(qFile.getName());
+ File rFile = new File(PATH_TO_RESULTS + resultFileName);
+ testSuite.addTest(new PerfTestSuiteCaseGenerator(qFile, rFile));
+ }
+ }
+ return testSuite;
+ }
+
+ private static String hiveExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+
+ // cleanup hdfs and hyracks cluster
+ try {
+ cleanup();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestCase.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestCase.java
new file mode 100644
index 0000000..7e7db36
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestCase.java
@@ -0,0 +1,49 @@
+package edu.uci.ics.hivesterix.perf.base;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import junit.framework.TestCase;
+
+public class AbstractPerfTestCase extends TestCase {
+ protected File queryFile;
+
+ public AbstractPerfTestCase(String testName, File queryFile) {
+ super(testName);
+ }
+
+ protected static void readFileToString(File file, StringBuilder buf) throws Exception {
+ BufferedReader result = new BufferedReader(new FileReader(file));
+ while (true) {
+ String s = result.readLine();
+ if (s == null) {
+ break;
+ } else {
+ buf.append(s);
+ buf.append('\n');
+ }
+ }
+ result.close();
+ }
+
+ protected static void writeStringToFile(File file, StringWriter buf) throws Exception {
+ PrintWriter result = new PrintWriter(new FileWriter(file));
+ result.print(buf);
+ result.close();
+ }
+
+ protected static void writeStringToFile(File file, StringBuilder buf) throws Exception {
+ PrintWriter result = new PrintWriter(new FileWriter(file));
+ result.print(buf);
+ result.close();
+ }
+
+ protected static String removeExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot);
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestSuiteClass.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestSuiteClass.java
new file mode 100644
index 0000000..393378f
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestSuiteClass.java
@@ -0,0 +1,201 @@
+package edu.uci.ics.hivesterix.perf.base;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.TestSuite;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
+import edu.uci.ics.hyracks.control.common.controllers.CCConfig;
+import edu.uci.ics.hyracks.control.common.controllers.NCConfig;
+import edu.uci.ics.hyracks.control.nc.NodeControllerService;
+
+@SuppressWarnings("deprecation")
+public abstract class AbstractPerfTestSuiteClass extends TestSuite {
+
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/perf/hadoop/conf";
+ private static final String PATH_TO_HIVE_CONF = "src/test/resources/perf/hive/conf/hive-default.xml";
+ private static final String PATH_TO_DATA = "src/test/resources/perf/data/";
+
+ private MiniDFSCluster dfsCluster;
+ private MiniMRCluster mrCluster;
+
+ private JobConf conf = new JobConf();
+ protected FileSystem dfs;
+
+ private int numberOfNC = 2;
+ private ClusterControllerService cc;
+ private Map<String, NodeControllerService> ncs = new HashMap<String, NodeControllerService>();
+
+ /**
+ * setup cluster
+ *
+ * @throws IOException
+ */
+ protected void setup() throws Exception {
+ setupHdfs();
+ setupHyracks();
+ }
+
+ private void setupHdfs() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+ HiveConf hconf = new HiveConf(SessionState.class);
+ hconf.addResource(new Path(PATH_TO_HIVE_CONF));
+
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ lfs.delete(new Path("metastore_db"), true);
+
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(hconf, numberOfNC, true, null);
+ dfs = dfsCluster.getFileSystem();
+
+ mrCluster = new MiniMRCluster(2, dfs.getUri().toString(), 1);
+ hconf.setVar(HiveConf.ConfVars.HADOOPJT, "localhost:" + mrCluster.getJobTrackerPort());
+ hconf.setInt("mapred.min.split.size", 1342177280);
+
+ conf = new JobConf(hconf);
+ ConfUtil.setJobConf(conf);
+
+ String fsName = conf.get("fs.default.name");
+ hconf.set("hive.metastore.warehouse.dir", fsName.concat("/tmp/hivesterix"));
+ String warehouse = hconf.get("hive.metastore.warehouse.dir");
+ dfs.mkdirs(new Path(warehouse));
+ ConfUtil.setHiveConf(hconf);
+ }
+
+ private void setupHyracks() throws Exception {
+ // read hive conf
+ HiveConf hconf = new HiveConf(SessionState.class);
+ hconf.addResource(new Path(PATH_TO_HIVE_CONF));
+ SessionState.start(hconf);
+ String ipAddress = hconf.get("hive.hyracks.host");
+ int clientPort = Integer.parseInt(hconf.get("hive.hyracks.port"));
+ int clusterPort = clientPort;
+
+ // start hyracks cc
+ CCConfig ccConfig = new CCConfig();
+ ccConfig.clientNetIpAddress = ipAddress;
+ ccConfig.clientNetPort = clientPort;
+ ccConfig.clusterNetPort = clusterPort;
+ ccConfig.profileDumpPeriod = 1000;
+ ccConfig.heartbeatPeriod = 200000000;
+ ccConfig.maxHeartbeatLapsePeriods = 200000000;
+ cc = new ClusterControllerService(ccConfig);
+ cc.start();
+
+ // start hyracks nc
+ for (int i = 0; i < numberOfNC; i++) {
+ NCConfig ncConfig = new NCConfig();
+ ncConfig.ccHost = ipAddress;
+ ncConfig.clusterNetIPAddress = ipAddress;
+ ncConfig.ccPort = clientPort;
+ ncConfig.dataIPAddress = "127.0.0.1";
+ ncConfig.datasetIPAddress = "127.0.0.1";
+ ncConfig.nodeId = "nc" + i;
+ NodeControllerService nc = new NodeControllerService(ncConfig);
+ nc.start();
+ ncs.put(ncConfig.nodeId, nc);
+ }
+ }
+
+ protected void makeDir(String path) throws IOException {
+ dfs.mkdirs(new Path(path));
+ }
+
+ protected void loadFiles(String src, String dest) throws IOException {
+ dfs.copyFromLocalFile(new Path(src), new Path(dest));
+ }
+
+ protected void cleanup() throws Exception {
+ cleanupHdfs();
+ cleanupHyracks();
+ }
+
+ /**
+ * cleanup hdfs cluster
+ */
+ private void cleanupHdfs() throws IOException {
+ dfs.delete(new Path("/"), true);
+ FileSystem.closeAll();
+ dfsCluster.shutdown();
+ }
+
+ /**
+ * cleanup hyracks cluster
+ */
+ private void cleanupHyracks() throws Exception {
+ Iterator<NodeControllerService> iterator = ncs.values().iterator();
+ while (iterator.hasNext()) {
+ NodeControllerService nc = iterator.next();
+ nc.stop();
+ }
+ cc.stop();
+ }
+
+ protected static List<String> getIgnoreList(String ignorePath) throws FileNotFoundException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+ String s = null;
+ List<String> ignores = new ArrayList<String>();
+ while ((s = reader.readLine()) != null) {
+ ignores.add(s);
+ }
+ reader.close();
+ return ignores;
+ }
+
+ protected static boolean isIgnored(String q, List<String> ignoreList) {
+ for (String ignore : ignoreList) {
+ if (ignore.equals(q)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ protected void loadData() throws IOException {
+
+ makeDir("/tpch");
+ makeDir("/tpch/customer");
+ makeDir("/tpch/lineitem");
+ makeDir("/tpch/orders");
+ makeDir("/tpch/part");
+ makeDir("/tpch/partsupp");
+ makeDir("/tpch/supplier");
+ makeDir("/tpch/nation");
+ makeDir("/tpch/region");
+
+ makeDir("/jarod");
+
+ loadFiles(PATH_TO_DATA + "customer.tbl", "/tpch/customer/");
+ loadFiles(PATH_TO_DATA + "lineitem.tbl", "/tpch/lineitem/");
+ loadFiles(PATH_TO_DATA + "orders.tbl", "/tpch/orders/");
+ loadFiles(PATH_TO_DATA + "part.tbl", "/tpch/part/");
+ loadFiles(PATH_TO_DATA + "partsupp.tbl", "/tpch/partsupp/");
+ loadFiles(PATH_TO_DATA + "supplier.tbl", "/tpch/supplier/");
+ loadFiles(PATH_TO_DATA + "nation.tbl", "/tpch/nation/");
+ loadFiles(PATH_TO_DATA + "region.tbl", "/tpch/region/");
+
+ loadFiles(PATH_TO_DATA + "ext-gby.tbl", "/jarod/");
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractHivesterixTestCase.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractHivesterixTestCase.java
new file mode 100644
index 0000000..ae5fa05
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractHivesterixTestCase.java
@@ -0,0 +1,49 @@
+package edu.uci.ics.hivesterix.test.base;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import junit.framework.TestCase;
+
+public class AbstractHivesterixTestCase extends TestCase {
+ protected File queryFile;
+
+ public AbstractHivesterixTestCase(String testName, File queryFile) {
+ super(testName);
+ }
+
+ protected static void readFileToString(File file, StringBuilder buf) throws Exception {
+ BufferedReader result = new BufferedReader(new FileReader(file));
+ while (true) {
+ String s = result.readLine();
+ if (s == null) {
+ break;
+ } else {
+ buf.append(s);
+ buf.append('\n');
+ }
+ }
+ result.close();
+ }
+
+ protected static void writeStringToFile(File file, StringWriter buf) throws Exception {
+ PrintWriter result = new PrintWriter(new FileWriter(file));
+ result.print(buf);
+ result.close();
+ }
+
+ protected static void writeStringToFile(File file, StringBuilder buf) throws Exception {
+ PrintWriter result = new PrintWriter(new FileWriter(file));
+ result.print(buf);
+ result.close();
+ }
+
+ protected static String removeExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot);
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractTestSuiteClass.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractTestSuiteClass.java
new file mode 100644
index 0000000..72c406f
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractTestSuiteClass.java
@@ -0,0 +1,234 @@
+package edu.uci.ics.hivesterix.test.base;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import junit.framework.TestSuite;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
+import edu.uci.ics.hyracks.control.common.controllers.CCConfig;
+import edu.uci.ics.hyracks.control.common.controllers.NCConfig;
+import edu.uci.ics.hyracks.control.nc.NodeControllerService;
+
+@SuppressWarnings("deprecation")
+public abstract class AbstractTestSuiteClass extends TestSuite {
+
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/runtimefunctionts/hadoop/conf";
+ private static final String PATH_TO_HIVE_CONF = "src/test/resources/runtimefunctionts/hive/conf/hive-default.xml";
+
+ private static final String PATH_TO_CLUSTER_CONF = "src/test/resources/runtimefunctionts/hive/conf/topology.xml";
+ private static final String PATH_TO_DATA = "src/test/resources/runtimefunctionts/data/";
+
+ private static final String clusterPropertiesPath = "conf/cluster.properties";
+ private static final String masterFilePath = "conf/master";
+
+ private Properties clusterProps;
+ private MiniDFSCluster dfsCluster;
+ private MiniMRCluster mrCluster;
+
+ private JobConf conf = new JobConf();
+ protected FileSystem dfs;
+
+ private int numberOfNC = 2;
+ private ClusterControllerService cc;
+ private Map<String, NodeControllerService> ncs = new HashMap<String, NodeControllerService>();
+
+ /**
+ * setup cluster
+ *
+ * @throws IOException
+ */
+ protected void setup() throws Exception {
+ setupHdfs();
+ setupHyracks();
+ }
+
+ private void setupHdfs() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+ HiveConf hconf = new HiveConf(SessionState.class);
+ hconf.addResource(new Path(PATH_TO_HIVE_CONF));
+
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ lfs.delete(new Path("metastore_db"), true);
+
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(hconf, numberOfNC, true, null);
+ dfs = dfsCluster.getFileSystem();
+
+ mrCluster = new MiniMRCluster(2, dfs.getUri().toString(), 1);
+ hconf.setVar(HiveConf.ConfVars.HADOOPJT, "localhost:" + mrCluster.getJobTrackerPort());
+
+ conf = new JobConf(hconf);
+ ConfUtil.setJobConf(conf);
+
+ String fsName = conf.get("fs.default.name");
+ hconf.set("hive.metastore.warehouse.dir", fsName.concat("/tmp/hivesterix"));
+ String warehouse = hconf.get("hive.metastore.warehouse.dir");
+ dfs.mkdirs(new Path(warehouse));
+ ConfUtil.setHiveConf(hconf);
+ }
+
+ private void setupHyracks() throws Exception {
+ // read hive conf
+ HiveConf hconf = new HiveConf(SessionState.class);
+ hconf.addResource(new Path(PATH_TO_HIVE_CONF));
+ SessionState.start(hconf);
+ /**
+ * load the properties file if it is not loaded
+ */
+ if (clusterProps == null) {
+ clusterProps = new Properties();
+ InputStream confIn = new FileInputStream(clusterPropertiesPath);
+ clusterProps.load(confIn);
+ confIn.close();
+ }
+ BufferedReader ipReader = new BufferedReader(new InputStreamReader(new FileInputStream(masterFilePath)));
+ String masterNode = ipReader.readLine();
+ ipReader.close();
+ InetAddress[] ips = InetAddress.getAllByName(masterNode);
+ String ipAddress = null;
+ for (InetAddress ip : ips) {
+ if (ip.getAddress().length <= 4) {
+ ipAddress = ip.getHostAddress();
+ }
+ }
+ int clientPort = Integer.parseInt(clusterProps.getProperty("CC_CLIENTPORT"));
+ int netPort = Integer.parseInt(clusterProps.getProperty("CC_CLUSTERPORT"));
+
+ // start hyracks cc
+ CCConfig ccConfig = new CCConfig();
+ ccConfig.clientNetIpAddress = ipAddress;
+ ccConfig.clientNetPort = clientPort;
+ ccConfig.clusterNetPort = netPort;
+ ccConfig.profileDumpPeriod = 1000;
+ ccConfig.heartbeatPeriod = 200000000;
+ ccConfig.maxHeartbeatLapsePeriods = 200000000;
+ ccConfig.clusterTopologyDefinition = new File(PATH_TO_CLUSTER_CONF);
+ cc = new ClusterControllerService(ccConfig);
+ cc.start();
+
+ // start hyracks nc
+ for (int i = 0; i < numberOfNC; i++) {
+ NCConfig ncConfig = new NCConfig();
+ ncConfig.ccHost = ipAddress;
+ ncConfig.clusterNetIPAddress = ipAddress;
+ ncConfig.ccPort = netPort;
+ ncConfig.dataIPAddress = "127.0.0.1";
+ ncConfig.datasetIPAddress = "127.0.0.1";
+ ncConfig.nodeId = "nc" + i;
+ NodeControllerService nc = new NodeControllerService(ncConfig);
+ nc.start();
+ ncs.put(ncConfig.nodeId, nc);
+ }
+ }
+
+ protected void makeDir(String path) throws IOException {
+ dfs.mkdirs(new Path(path));
+ }
+
+ protected void loadFiles(String src, String dest) throws IOException {
+ dfs.copyFromLocalFile(new Path(src), new Path(dest));
+ }
+
+ protected void cleanup() throws Exception {
+ cleanupHdfs();
+ cleanupHyracks();
+ }
+
+ /**
+ * cleanup hdfs cluster
+ */
+ private void cleanupHdfs() throws IOException {
+ dfs.delete(new Path("/"), true);
+ FileSystem.closeAll();
+ dfsCluster.shutdown();
+ }
+
+ /**
+ * cleanup hyracks cluster
+ */
+ private void cleanupHyracks() throws Exception {
+ Iterator<NodeControllerService> iterator = ncs.values().iterator();
+ while (iterator.hasNext()) {
+ NodeControllerService nc = iterator.next();
+ nc.stop();
+ }
+ cc.stop();
+ }
+
+ protected static List<String> getIgnoreList(String ignorePath) throws FileNotFoundException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+ String s = null;
+ List<String> ignores = new ArrayList<String>();
+ while ((s = reader.readLine()) != null) {
+ ignores.add(s);
+ }
+ reader.close();
+ return ignores;
+ }
+
+ protected static boolean isIgnored(String q, List<String> ignoreList) {
+ for (String ignore : ignoreList) {
+ if (q.indexOf(ignore) >= 0) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ protected void loadData() throws IOException {
+
+ makeDir("/tpch");
+ makeDir("/tpch/customer");
+ makeDir("/tpch/lineitem");
+ makeDir("/tpch/orders");
+ makeDir("/tpch/part");
+ makeDir("/tpch/partsupp");
+ makeDir("/tpch/supplier");
+ makeDir("/tpch/nation");
+ makeDir("/tpch/region");
+
+ makeDir("/test");
+ makeDir("/test/joinsrc1");
+ makeDir("/test/joinsrc2");
+
+ loadFiles(PATH_TO_DATA + "customer.tbl", "/tpch/customer/");
+ loadFiles(PATH_TO_DATA + "lineitem.tbl", "/tpch/lineitem/");
+ loadFiles(PATH_TO_DATA + "orders.tbl", "/tpch/orders/");
+ loadFiles(PATH_TO_DATA + "part.tbl", "/tpch/part/");
+ loadFiles(PATH_TO_DATA + "partsupp.tbl", "/tpch/partsupp/");
+ loadFiles(PATH_TO_DATA + "supplier.tbl", "/tpch/supplier/");
+ loadFiles(PATH_TO_DATA + "nation.tbl", "/tpch/nation/");
+ loadFiles(PATH_TO_DATA + "region.tbl", "/tpch/region/");
+
+ loadFiles(PATH_TO_DATA + "large_card_join_src.tbl", "/test/joinsrc1/");
+ loadFiles(PATH_TO_DATA + "large_card_join_src_small.tbl", "/test/joinsrc2/");
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/datagen/RecordBalance.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/datagen/RecordBalance.java
new file mode 100644
index 0000000..ac029b1
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/datagen/RecordBalance.java
@@ -0,0 +1,75 @@
+package edu.uci.ics.hivesterix.test.datagen;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextInputFormat;
+
+@SuppressWarnings("deprecation")
+public class RecordBalance {
+
+ private static String confPath = System.getenv("HADDOP_HOME");
+ private static Path[] inputPaths = { new Path("/tpch/100x/customer"), new Path("/tpch/100x/nation"),
+ new Path("/tpch/100x/region"), new Path("/tpch/100x/lineitem"), new Path("/tpch/100x/orders"),
+ new Path("/tpch/100x/part"), new Path("/tpch/100x/partsupp"), new Path("/tpch/100x/supplier") };
+
+ private static Path[] outputPaths = { new Path("/tpch/100/customer"), new Path("/tpch/100/nation"),
+ new Path("/tpch/100/region"), new Path("/tpch/100/lineitem"), new Path("/tpch/100/orders"),
+ new Path("/tpch/100/part"), new Path("/tpch/100/partsupp"), new Path("/tpch/100/supplier") };
+
+ public static class MapRecordOnly extends MapReduceBase implements Mapper<LongWritable, Text, LongWritable, Text> {
+
+ public void map(LongWritable id, Text inputValue, OutputCollector<LongWritable, Text> output, Reporter reporter)
+ throws IOException {
+ output.collect(id, inputValue);
+ }
+ }
+
+ public static class ReduceRecordOnly extends MapReduceBase implements
+ Reducer<LongWritable, Text, NullWritable, Text> {
+
+ NullWritable key = NullWritable.get();
+
+ public void reduce(LongWritable inputKey, Iterator<Text> inputValue,
+ OutputCollector<NullWritable, Text> output, Reporter reporter) throws IOException {
+ while (inputValue.hasNext())
+ output.collect(key, inputValue.next());
+ }
+ }
+
+ public static void main(String[] args) throws IOException {
+
+ for (int i = 0; i < inputPaths.length; i++) {
+ JobConf job = new JobConf(RecordBalance.class);
+ job.addResource(new Path(confPath + "/core-site.xml"));
+ job.addResource(new Path(confPath + "/mapred-site.xml"));
+ job.addResource(new Path(confPath + "/hdfs-site.xml"));
+
+ job.setJobName(RecordBalance.class.getSimpleName());
+ job.setMapperClass(MapRecordOnly.class);
+ job.setReducerClass(ReduceRecordOnly.class);
+ job.setMapOutputKeyClass(LongWritable.class);
+ job.setMapOutputValueClass(Text.class);
+
+ job.setInputFormat(TextInputFormat.class);
+ FileInputFormat.setInputPaths(job, inputPaths[i]);
+ FileOutputFormat.setOutputPath(job, outputPaths[i]);
+ job.setNumReduceTasks(Integer.parseInt(args[0]));
+
+ JobClient.runJob(job);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/legacy/LegacyTestCase.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/legacy/LegacyTestCase.java
new file mode 100644
index 0000000..32c1c3f
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/legacy/LegacyTestCase.java
@@ -0,0 +1,142 @@
+package edu.uci.ics.hivesterix.test.legacy;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.junit.Test;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
+
+public class LegacyTestCase extends AbstractHivesterixTestCase {
+ private File resultFile;
+ private FileSystem dfs;
+
+ public LegacyTestCase(File queryFile, File resultFile) {
+ super("legacy", queryFile);
+ this.queryFile = queryFile;
+ this.resultFile = resultFile;
+ }
+
+ @Test
+ public void testRuntimeFunction() throws Exception {
+ StringBuilder queryString = new StringBuilder();
+ readFileToString(queryFile, queryString);
+ String[] queries = queryString.toString().split(";");
+ StringWriter sw = new StringWriter();
+
+ HiveConf hconf = ConfUtil.getHiveConf();
+ Driver driver = new Driver(hconf, new PrintWriter(sw));
+ driver.init();
+
+ dfs = FileSystem.get(ConfUtil.getJobConf());
+
+ int i = 0;
+ for (String query : queries) {
+ if (i == queries.length - 1)
+ break;
+ driver.run(query);
+ driver.clear();
+ i++;
+ }
+
+ String warehouse = hconf.get("hive.metastore.warehouse.dir");
+ String tableName = removeExt(resultFile.getName());
+ String directory = warehouse + "/" + tableName + "/";
+ String localDirectory = "tmp";
+
+ FileStatus[] files = dfs.listStatus(new Path(directory));
+ FileSystem lfs = null;
+ if (files == null) {
+ lfs = FileSystem.getLocal(ConfUtil.getJobConf());
+ files = lfs.listStatus(new Path(directory));
+ }
+
+ File resultDirectory = new File(localDirectory + "/" + tableName);
+ deleteDir(resultDirectory);
+ resultDirectory.mkdir();
+
+ for (FileStatus fs : files) {
+ Path src = fs.getPath();
+ if (src.getName().indexOf("crc") >= 0)
+ continue;
+
+ String destStr = localDirectory + "/" + tableName + "/" + src.getName();
+ Path dest = new Path(destStr);
+ if (lfs != null) {
+ lfs.copyToLocalFile(src, dest);
+ dfs.copyFromLocalFile(dest, new Path(directory));
+ } else
+ dfs.copyToLocalFile(src, dest);
+ }
+
+ File[] rFiles = resultDirectory.listFiles();
+ StringBuilder sb = new StringBuilder();
+ for (File r : rFiles) {
+ if (r.getName().indexOf("crc") >= 0)
+ continue;
+ readFileToString(r, sb);
+ }
+ deleteDir(resultDirectory);
+
+ StringBuilder buf = new StringBuilder();
+ readFileToString(resultFile, buf);
+ if (!equal(buf, sb)) {
+ throw new Exception("Result for " + queryFile + " changed:\n" + sw.toString());
+ }
+ }
+
+ private void deleteDir(File resultDirectory) {
+ if (resultDirectory.exists()) {
+ File[] rFiles = resultDirectory.listFiles();
+ for (File r : rFiles)
+ r.delete();
+ resultDirectory.delete();
+ }
+ }
+
+ private boolean equal(StringBuilder sb1, StringBuilder sb2) {
+ String s1 = sb1.toString();
+ String s2 = sb2.toString();
+ String[] rowsOne = s1.split("\n");
+ String[] rowsTwo = s2.split("\n");
+
+ if (rowsOne.length != rowsTwo.length)
+ return false;
+
+ for (int i = 0; i < rowsOne.length; i++) {
+ String row1 = rowsOne[i];
+ String row2 = rowsTwo[i];
+
+ if (row1.equals(row2))
+ continue;
+
+ String[] fields1 = row1.split("");
+ String[] fields2 = row2.split("");
+
+ for (int j = 0; j < fields1.length; j++) {
+ if (fields1[j].equals(fields2[j])) {
+ continue;
+ } else if (fields1[j].indexOf('.') < 0) {
+ return false;
+ } else {
+ Float float1 = Float.parseFloat(fields1[j]);
+ Float float2 = Float.parseFloat(fields2[j]);
+
+ if (Math.abs(float1 - float2) == 0)
+ continue;
+ else
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestCase.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestCase.java
new file mode 100644
index 0000000..0e2a5d5
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestCase.java
@@ -0,0 +1,53 @@
+package edu.uci.ics.hivesterix.test.optimizer;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.junit.Test;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
+
+public class OptimizerTestCase extends AbstractHivesterixTestCase {
+ private File resultFile;
+
+ OptimizerTestCase(File queryFile, File resultFile) {
+ super("testOptimizer", queryFile);
+ this.queryFile = queryFile;
+ this.resultFile = resultFile;
+ }
+
+ @Test
+ public void testOptimizer() throws Exception {
+ StringBuilder queryString = new StringBuilder();
+ readFileToString(queryFile, queryString);
+ String[] queries = queryString.toString().split(";");
+ StringWriter sw = new StringWriter();
+
+ HiveConf hconf = ConfUtil.getHiveConf();
+ Driver driver = new Driver(hconf, new PrintWriter(sw));
+ driver.init();
+
+ int i = 0;
+ for (String query : queries) {
+ if (i == queries.length - 1)
+ break;
+ if (query.toLowerCase().indexOf("create") >= 0 || query.toLowerCase().indexOf("drop") >= 0
+ || query.toLowerCase().indexOf("set") >= 0 || query.toLowerCase().startsWith("\n\ncreate")
+ || query.toLowerCase().startsWith("\n\ndrop") || query.toLowerCase().startsWith("\n\nset"))
+ driver.run(query);
+ else
+ driver.compile(query);
+ driver.clear();
+ i++;
+ }
+ StringBuilder buf = new StringBuilder();
+ readFileToString(resultFile, buf);
+ if (!buf.toString().equals(sw.toString())) {
+ throw new Exception("Result for " + queryFile + " changed:\n" + sw.toString());
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuitGenerator.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuitGenerator.java
new file mode 100644
index 0000000..c6b788f
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuitGenerator.java
@@ -0,0 +1,75 @@
+package edu.uci.ics.hivesterix.test.optimizer;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.List;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
+
+public class OptimizerTestSuitGenerator extends AbstractTestSuiteClass {
+ private static final String PATH_TO_QUERIES = "src/test/resources/optimizerts/queries/";
+ private static final String PATH_TO_RESULTS = "src/test/resources/optimizerts/results/";
+ private static final String PATH_TO_IGNORES = "src/test/resources/optimizerts/ignore.txt";
+
+ private static final String FILE_EXTENSION_OF_RESULTS = "plan";
+
+ public static Test suite() throws UnsupportedEncodingException, FileNotFoundException, IOException {
+ List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
+ File testData = new File(PATH_TO_QUERIES);
+ File[] queries = testData.listFiles();
+ OptimizerTestSuitGenerator testSuite = new OptimizerTestSuitGenerator();
+ // set hdfs and hyracks cluster, and load test data to hdfs
+ try {
+ testSuite.setup();
+ testSuite.loadData();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+
+ for (File qFile : queries) {
+ if (isIgnored(qFile.getName(), ignores))
+ continue;
+
+ if (qFile.isFile()) {
+ String resultFileName = aqlExtToResExt(qFile.getName());
+ File rFile = new File(PATH_TO_RESULTS + resultFileName);
+ testSuite.addTest(new OptimizerTestSuiteCaseGenerator(qFile, rFile));
+ }
+ }
+ return testSuite;
+ }
+
+ private static String aqlExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+
+ // cleanup hdfs and hyracks cluster
+ try {
+ cleanup();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuite.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuite.java
new file mode 100644
index 0000000..8ac4e86
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuite.java
@@ -0,0 +1,53 @@
+package edu.uci.ics.hivesterix.test.optimizer;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.List;
+
+import junit.framework.Test;
+import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
+
+public class OptimizerTestSuite extends AbstractTestSuiteClass {
+
+ private static final String PATH_TO_QUERIES = "src/test/resources/optimizerts/queries/";
+ private static final String PATH_TO_RESULTS = "src/test/resources/optimizerts/results/";
+ private static final String PATH_TO_IGNORES = "src/test/resources/optimizerts/ignore.txt";
+
+ private static final String FILE_EXTENSION_OF_RESULTS = "plan";
+
+ public static Test suite() throws UnsupportedEncodingException, FileNotFoundException, IOException {
+ List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
+ File testData = new File(PATH_TO_QUERIES);
+ File[] queries = testData.listFiles();
+ OptimizerTestSuite testSuite = new OptimizerTestSuite();
+
+ // set hdfs and hyracks cluster, and load test data to hdfs
+ try {
+ testSuite.setup();
+ testSuite.loadData();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+
+ for (File qFile : queries) {
+ if (isIgnored(qFile.getName(), ignores))
+ continue;
+
+ if (qFile.isFile() && qFile.getName().startsWith("h11_")) {
+ String resultFileName = hiveExtToResExt(qFile.getName());
+ File rFile = new File(PATH_TO_RESULTS + resultFileName);
+ testSuite.addTest(new OptimizerTestCase(qFile, rFile));
+ }
+ }
+ return testSuite;
+ }
+
+ private static String hiveExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuiteCaseGenerator.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuiteCaseGenerator.java
new file mode 100644
index 0000000..ee82eb3
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuiteCaseGenerator.java
@@ -0,0 +1,50 @@
+package edu.uci.ics.hivesterix.test.optimizer;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.junit.Test;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
+
+public class OptimizerTestSuiteCaseGenerator extends AbstractHivesterixTestCase {
+ private File resultFile;
+
+ OptimizerTestSuiteCaseGenerator(File queryFile, File resultFile) {
+ super("testOptimizer", queryFile);
+ this.queryFile = queryFile;
+ this.resultFile = resultFile;
+ }
+
+ @Test
+ public void testOptimizer() throws Exception {
+ StringBuilder queryString = new StringBuilder();
+ readFileToString(queryFile, queryString);
+ String[] queries = queryString.toString().split(";");
+ StringWriter sw = new StringWriter();
+
+ HiveConf hconf = ConfUtil.getHiveConf();
+ Driver driver = new Driver(hconf, new PrintWriter(sw));
+ driver.init();
+
+ int i = 0;
+ for (String query : queries) {
+ if (i == queries.length - 1)
+ break;
+ if (query.toLowerCase().indexOf("create") >= 0 || query.toLowerCase().indexOf("drop") >= 0
+ || query.toLowerCase().indexOf("set") >= 0 || query.toLowerCase().startsWith("\n\ncreate")
+ || query.toLowerCase().startsWith("\n\ndrop") || query.toLowerCase().startsWith("\n\nset"))
+ driver.run(query);
+ else
+ driver.compile(query);
+ driver.clear();
+ i++;
+ }
+ sw.close();
+ writeStringToFile(resultFile, sw);
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestCase.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestCase.java
new file mode 100644
index 0000000..fdc4c68
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestCase.java
@@ -0,0 +1,147 @@
+package edu.uci.ics.hivesterix.test.runtimefunction;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.junit.Test;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
+
+public class RuntimeFunctionTestCase extends AbstractHivesterixTestCase {
+ private File resultFile;
+ private FileSystem dfs;
+
+ RuntimeFunctionTestCase(File queryFile, File resultFile) {
+ super("testRuntimeFunction", queryFile);
+ this.queryFile = queryFile;
+ this.resultFile = resultFile;
+ }
+
+ @Test
+ public void testRuntimeFunction() throws Exception {
+ StringBuilder queryString = new StringBuilder();
+ readFileToString(queryFile, queryString);
+ String[] queries = queryString.toString().split(";");
+ StringWriter sw = new StringWriter();
+
+ HiveConf hconf = ConfUtil.getHiveConf();
+ Driver driver = new Driver(hconf, new PrintWriter(sw));
+ driver.init();
+ // Driver driver = new Driver(hconf);
+
+ dfs = FileSystem.get(ConfUtil.getJobConf());
+
+ int i = 0;
+ for (String query : queries) {
+ if (i == queries.length - 1)
+ break;
+ driver.run(query);
+ driver.clear();
+ i++;
+ }
+
+ String warehouse = hconf.get("hive.metastore.warehouse.dir");
+ String tableName = removeExt(resultFile.getName());
+ String directory = warehouse + "/" + tableName + "/";
+ String localDirectory = "tmp";
+
+ FileStatus[] files = dfs.listStatus(new Path(directory));
+ FileSystem lfs = null;
+ if (files == null) {
+ lfs = FileSystem.getLocal(ConfUtil.getJobConf());
+ files = lfs.listStatus(new Path(directory));
+ }
+
+ File resultDirectory = new File(localDirectory + "/" + tableName);
+ deleteDir(resultDirectory);
+ resultDirectory.mkdir();
+
+ for (FileStatus fs : files) {
+ Path src = fs.getPath();
+ if (src.getName().indexOf("crc") >= 0)
+ continue;
+
+ String destStr = localDirectory + "/" + tableName + "/" + src.getName();
+ Path dest = new Path(destStr);
+ if (lfs != null) {
+ lfs.copyToLocalFile(src, dest);
+ dfs.copyFromLocalFile(dest, new Path(directory));
+ } else
+ dfs.copyToLocalFile(src, dest);
+ }
+
+ File[] rFiles = resultDirectory.listFiles();
+ StringBuilder sb = new StringBuilder();
+ for (File r : rFiles) {
+ if (r.getName().indexOf("crc") >= 0)
+ continue;
+ readFileToString(r, sb);
+ }
+
+ StringBuilder buf = new StringBuilder();
+ readFileToString(resultFile, buf);
+ StringBuffer errorMsg = new StringBuffer();
+ if (!equal(buf, sb, errorMsg)) {
+ throw new Exception("Result for " + queryFile + " changed:\n" + errorMsg.toString());
+ }
+ deleteDir(resultDirectory);
+ }
+
+ private void deleteDir(File resultDirectory) {
+ if (resultDirectory.exists()) {
+ File[] rFiles = resultDirectory.listFiles();
+ for (File r : rFiles)
+ r.delete();
+ resultDirectory.delete();
+ }
+ }
+
+ private boolean equal(StringBuilder sb1, StringBuilder sb2, StringBuffer errorMsg) {
+ String s1 = sb1.toString();
+ String s2 = sb2.toString();
+ String[] rowsOne = s1.split("\n");
+ String[] rowsTwo = s2.split("\n");
+
+ if (rowsOne.length != rowsTwo.length)
+ return false;
+
+ for (int i = 0; i < rowsOne.length; i++) {
+ String row1 = rowsOne[i];
+ String row2 = rowsTwo[i];
+
+ if (row1.equals(row2))
+ continue;
+
+ String[] fields1 = row1.split("");
+ String[] fields2 = row2.split("");
+
+ for (int j = 0; j < fields1.length; j++) {
+ if (fields1[j].equals(fields2[j])) {
+ continue;
+ } else if (fields1[j].indexOf('.') < 0) {
+ errorMsg.append("line " + i + " column " + j + ": " + fields2[j] + " expected " + fields1[j]);
+ return false;
+ } else {
+ Float float1 = Float.parseFloat(fields1[j]);
+ Float float2 = Float.parseFloat(fields2[j]);
+
+ if (Math.abs(float1 - float2) == 0)
+ continue;
+ else {
+ errorMsg.append("line " + i + " column " + j + ": " + fields2[j] + " expected " + fields1[j]);
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
new file mode 100644
index 0000000..9610497
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
@@ -0,0 +1,74 @@
+package edu.uci.ics.hivesterix.test.runtimefunction;
+
+import java.io.File;
+import java.util.List;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
+
+public class RuntimeFunctionTestSuite extends AbstractTestSuiteClass {
+
+ private static final String PATH_TO_QUERIES = "src/test/resources/runtimefunctionts/queries/";
+ private static final String PATH_TO_RESULTS = "src/test/resources/runtimefunctionts/results/";
+ private static final String PATH_TO_IGNORES = "src/test/resources/runtimefunctionts/ignore.txt";
+
+ private static final String FILE_EXTENSION_OF_RESULTS = "result";
+
+ public static Test suite() throws Exception {
+ List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
+ File testData = new File(PATH_TO_QUERIES);
+ File[] queries = testData.listFiles();
+ RuntimeFunctionTestSuite testSuite = new RuntimeFunctionTestSuite();
+
+ // set hdfs and hyracks cluster, and load test data to hdfs
+ try {
+ testSuite.setup();
+ testSuite.loadData();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+
+ for (File qFile : queries) {
+ if (isIgnored(qFile.getName(), ignores))
+ continue;
+
+ if (qFile.isFile()) {
+ String resultFileName = hiveExtToResExt(qFile.getName());
+ File rFile = new File(PATH_TO_RESULTS + resultFileName);
+ testSuite.addTest(new RuntimeFunctionTestCase(qFile, rFile));
+ }
+ }
+ return testSuite;
+ }
+
+ private static String hiveExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+
+ // cleanup hdfs and hyracks cluster
+ try {
+ cleanup();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteCaseGenerator.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteCaseGenerator.java
new file mode 100644
index 0000000..a416759
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteCaseGenerator.java
@@ -0,0 +1,99 @@
+package edu.uci.ics.hivesterix.test.runtimefunction;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.junit.Test;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
+
+public class RuntimeFunctionTestSuiteCaseGenerator extends AbstractHivesterixTestCase {
+ private File resultFile;
+ private FileSystem dfs;
+
+ RuntimeFunctionTestSuiteCaseGenerator(File queryFile, File resultFile) {
+ super("testRuntimeFunction", queryFile);
+ this.queryFile = queryFile;
+ this.resultFile = resultFile;
+ }
+
+ @Test
+ public void testRuntimeFunction() throws Exception {
+ StringBuilder queryString = new StringBuilder();
+ readFileToString(queryFile, queryString);
+ String[] queries = queryString.toString().split(";");
+ StringWriter sw = new StringWriter();
+
+ HiveConf hconf = ConfUtil.getHiveConf();
+ Driver driver = new Driver(hconf, new PrintWriter(sw));
+ driver.init();
+
+ dfs = FileSystem.get(ConfUtil.getJobConf());
+
+ int i = 0;
+ for (String query : queries) {
+ if (i == queries.length - 1)
+ break;
+ driver.run(query);
+ driver.clear();
+ i++;
+ }
+
+ String warehouse = hconf.get("hive.metastore.warehouse.dir");
+ String tableName = removeExt(resultFile.getName());
+ String directory = warehouse + "/" + tableName + "/";
+ String localDirectory = "tmp";
+
+ FileStatus[] files = dfs.listStatus(new Path(directory));
+ FileSystem lfs = null;
+ if (files == null) {
+ lfs = FileSystem.getLocal(ConfUtil.getJobConf());
+ files = lfs.listStatus(new Path(directory));
+ }
+
+ File resultDirectory = new File(localDirectory + "/" + tableName);
+ deleteDir(resultDirectory);
+ resultDirectory.mkdir();
+
+ for (FileStatus fs : files) {
+ Path src = fs.getPath();
+ if (src.getName().indexOf("crc") >= 0)
+ continue;
+
+ String destStr = localDirectory + "/" + tableName + "/" + src.getName();
+ Path dest = new Path(destStr);
+ if (lfs != null) {
+ lfs.copyToLocalFile(src, dest);
+ dfs.copyFromLocalFile(dest, new Path(directory));
+ } else
+ dfs.copyToLocalFile(src, dest);
+ }
+
+ File[] rFiles = resultDirectory.listFiles();
+ StringBuilder sb = new StringBuilder();
+ for (File r : rFiles) {
+ if (r.getName().indexOf("crc") >= 0)
+ continue;
+ readFileToString(r, sb);
+ }
+ deleteDir(resultDirectory);
+
+ writeStringToFile(resultFile, sb);
+ }
+
+ private void deleteDir(File resultDirectory) {
+ if (resultDirectory.exists()) {
+ File[] rFiles = resultDirectory.listFiles();
+ for (File r : rFiles)
+ r.delete();
+ resultDirectory.delete();
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteGenerator.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteGenerator.java
new file mode 100644
index 0000000..ca2bd6d
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteGenerator.java
@@ -0,0 +1,74 @@
+package edu.uci.ics.hivesterix.test.runtimefunction;
+
+import java.io.File;
+import java.util.List;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
+
+public class RuntimeFunctionTestSuiteGenerator extends AbstractTestSuiteClass {
+
+ private static final String PATH_TO_QUERIES = "src/test/resources/runtimefunctionts/queries/";
+ private static final String PATH_TO_RESULTS = "src/test/resources/runtimefunctionts/results/";
+ private static final String PATH_TO_IGNORES = "src/test/resources/runtimefunctionts/ignore.txt";
+
+ private static final String FILE_EXTENSION_OF_RESULTS = "result";
+
+ public static Test suite() throws Exception {
+ List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
+ File testData = new File(PATH_TO_QUERIES);
+ File[] queries = testData.listFiles();
+ RuntimeFunctionTestSuiteGenerator testSuite = new RuntimeFunctionTestSuiteGenerator();
+
+ // set hdfs and hyracks cluster, and load test data to hdfs
+ try {
+ testSuite.setup();
+ testSuite.loadData();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+
+ for (File qFile : queries) {
+ if (isIgnored(qFile.getName(), ignores))
+ continue;
+
+ if (qFile.isFile() && qFile.getName().startsWith("q16_")) {
+ String resultFileName = hiveExtToResExt(qFile.getName());
+ File rFile = new File(PATH_TO_RESULTS + resultFileName);
+ testSuite.addTest(new RuntimeFunctionTestSuiteCaseGenerator(qFile, rFile));
+ }
+ }
+ return testSuite;
+ }
+
+ private static String hiveExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+
+ // cleanup hdfs and hyracks cluster
+ try {
+ cleanup();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/serde/SerDeTest.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/serde/SerDeTest.java
new file mode 100644
index 0000000..cd39c5a
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/serde/SerDeTest.java
@@ -0,0 +1,217 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.test.serde;
+
+import java.util.List;
+import java.util.Properties;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+
+/**
+ * TestLazySimpleSerDe.
+ */
+@SuppressWarnings({ "deprecation", "rawtypes" })
+public class SerDeTest extends TestCase {
+
+ /**
+ * Test the LazySimpleSerDe class.
+ */
+ public void testLazySimpleSerDe() throws Throwable {
+ try {
+ // Create the SerDe
+ LazySimpleSerDe serDe = new LazySimpleSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createProperties();
+ serDe.initialize(conf, tbl);
+
+ LazySerDe outputSerde = new LazySerDe();
+ outputSerde.initialize(conf, tbl);
+
+ // Data
+ String s = "123\t456\t789\t1000\t5.3\thive and hadoop\t1\tqf";
+
+ byte[] bytes = s.getBytes();
+ Writable bytesWritable = new BytesWritable(bytes);
+
+ // Test
+ // deserializeAndSerialize(serDe, t, s, expectedFieldsData);
+ Object row = serDe.deserialize(bytesWritable); // test my serde
+ StructObjectInspector simpleInspector = (StructObjectInspector) serDe.getObjectInspector();
+ List<Object> fields = simpleInspector.getStructFieldsDataAsList(row);
+ List<? extends StructField> fieldRefs = simpleInspector.getAllStructFieldRefs();
+
+ int i = 0;
+ for (Object field : fields) {
+ BytesWritable fieldWritable = (BytesWritable) outputSerde.serialize(field, fieldRefs.get(i)
+ .getFieldObjectInspector());
+ System.out.print(fieldWritable.getSize() + "|");
+ i++;
+ }
+
+ // Writable output = outputSerde.serialize(row, serDe
+ // .getObjectInspector());
+ // System.out.println(output);
+ //
+ // Object row2 = outputSerde.deserialize(output);
+ // Writable output2 = serDe.serialize(row2, outputSerde
+ // .getObjectInspector());
+ // System.out.println(output2);
+
+ // System.out.println(output);
+ // deserializeAndSerialize(outputSerde, t, s, expectedFieldsData);
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+ private void deserializeAndSerialize(SerDe serDe, Text t, String s, Object[] expectedFieldsData)
+ throws SerDeException {
+ // Get the row structure
+ StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
+ List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
+ assertEquals(8, fieldRefs.size());
+
+ // Deserialize
+ Object row = serDe.deserialize(t);
+ for (int i = 0; i < fieldRefs.size(); i++) {
+ Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
+ if (fieldData != null) {
+ fieldData = ((LazyPrimitive) fieldData).getWritableObject();
+ }
+ assertEquals("Field " + i, expectedFieldsData[i], fieldData);
+ }
+ // Serialize
+ assertEquals(Text.class, serDe.getSerializedClass());
+ Text serializedText = (Text) serDe.serialize(row, oi);
+ assertEquals("Serialized data", s, serializedText.toString());
+ }
+
+ private Properties createProperties() {
+ Properties tbl = new Properties();
+
+ // Set the configuration parameters
+ tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9");
+ tbl.setProperty("columns", "abyte,ashort,aint,along,adouble,astring,anullint,anullstring");
+ tbl.setProperty("columns.types", "tinyint:smallint:int:bigint:double:string:int:string");
+ tbl.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "NULL");
+ return tbl;
+ }
+
+ /**
+ * Test the LazySimpleSerDe class with LastColumnTakesRest option.
+ */
+ public void testLazySimpleSerDeLastColumnTakesRest() throws Throwable {
+ try {
+ // Create the SerDe
+ LazySimpleSerDe serDe = new LazySimpleSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createProperties();
+ tbl.setProperty(Constants.SERIALIZATION_LAST_COLUMN_TAKES_REST, "true");
+ serDe.initialize(conf, tbl);
+
+ // Data
+ Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t");
+ String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta\tb\t";
+ Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456),
+ new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"),
+ null, new Text("a\tb\t") };
+
+ // Test
+ deserializeAndSerialize(serDe, t, s, expectedFieldsData);
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+ /**
+ * Test the LazySimpleSerDe class with extra columns.
+ */
+ public void testLazySimpleSerDeExtraColumns() throws Throwable {
+ try {
+ // Create the SerDe
+ LazySimpleSerDe serDe = new LazySimpleSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createProperties();
+ serDe.initialize(conf, tbl);
+
+ // Data
+ Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t");
+ String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta";
+ Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456),
+ new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"),
+ null, new Text("a") };
+
+ // Test
+ deserializeAndSerialize(serDe, t, s, expectedFieldsData);
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+ /**
+ * Test the LazySimpleSerDe class with missing columns.
+ */
+ public void testLazySimpleSerDeMissingColumns() throws Throwable {
+ try {
+ // Create the SerDe
+ LazySimpleSerDe serDe = new LazySimpleSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createProperties();
+ serDe.initialize(conf, tbl);
+
+ // Data
+ Text t = new Text("123\t456\t789\t1000\t5.3\t");
+ String s = "123\t456\t789\t1000\t5.3\t\tNULL\tNULL";
+ Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456),
+ new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text(""), null, null };
+
+ // Test
+ deserializeAndSerialize(serDe, t, s, expectedFieldsData);
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+}
diff --git a/hivesterix/src/test/resources/log4j.properties b/hivesterix/hivesterix-dist/src/test/resources/log4j.properties
similarity index 100%
rename from hivesterix/src/test/resources/log4j.properties
rename to hivesterix/hivesterix-dist/src/test/resources/log4j.properties
diff --git a/hivesterix/src/test/resources/logging.properties b/hivesterix/hivesterix-dist/src/test/resources/logging.properties
similarity index 100%
rename from hivesterix/src/test/resources/logging.properties
rename to hivesterix/hivesterix-dist/src/test/resources/logging.properties
diff --git a/hivesterix/src/test/resources/optimizerts/hive/conf/hive-default.xml b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/hive/conf/hive-default.xml
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/hive/conf/hive-default.xml
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/hive/conf/hive-default.xml
diff --git a/hivesterix/src/test/resources/optimizerts/ignore.txt b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/ignore.txt
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/ignore.txt
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/ignore.txt
diff --git a/hivesterix/src/test/resources/optimizerts/queries/h11_share_scan.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/h11_share_scan.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/h11_share_scan.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/h11_share_scan.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/h12_select_struct.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/h12_select_struct.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/h12_select_struct.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/h12_select_struct.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q10_returned_item.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q10_returned_item.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q11_important_stock.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q11_important_stock.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q11_important_stock.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q11_important_stock.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q12_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q12_shipping.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q13_customer_distribution.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q13_customer_distribution.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q13_customer_distribution.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q13_customer_distribution.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q14_promotion_effect.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q15_top_supplier.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q15_top_supplier.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q16_parts_supplier_relationship.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q16_parts_supplier_relationship.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q16_parts_supplier_relationship.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q16_parts_supplier_relationship.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q17_small_quantity_order_revenue.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q17_small_quantity_order_revenue.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q17_small_quantity_order_revenue.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q17_small_quantity_order_revenue.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q21_suppliers_who_kept_orders_waiting.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q21_suppliers_who_kept_orders_waiting.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q21_suppliers_who_kept_orders_waiting.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q21_suppliers_who_kept_orders_waiting.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q22_global_sales_opportunity.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q22_global_sales_opportunity.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q22_global_sales_opportunity.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q22_global_sales_opportunity.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q2_minimum_cost_supplier.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q2_minimum_cost_supplier.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q2_minimum_cost_supplier.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q2_minimum_cost_supplier.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q3_shipping_priority.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q4_order_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q4_order_priority.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q6_forecast_revenue_change.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q6_forecast_revenue_change.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q6_forecast_revenue_change.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q6_forecast_revenue_change.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q7_volume_shipping.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q8_national_market_share.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q8_national_market_share.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q8_national_market_share.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q8_national_market_share.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q9_product_type_profit.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u10_nestedloop_join.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u10_nestedloop_join.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u10_nestedloop_join.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u10_nestedloop_join.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u1_group_by.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u1_group_by.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u1_group_by.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u1_group_by.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u2_select-project.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u2_select-project.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u2_select-project.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u2_select-project.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u3_union.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u3_union.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u3_union.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u3_union.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u4_join.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u4_join.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u4_join.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u4_join.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u5_lateral_view.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u5_lateral_view.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u5_lateral_view.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u5_lateral_view.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u6_limit.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u6_limit.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u6_limit.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u6_limit.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u7_multi_join.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u7_multi_join.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u8_non_mapred.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u8_non_mapred.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u8_non_mapred.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u8_non_mapred.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u9_order_by.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u9_order_by.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u9_order_by.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u9_order_by.hive
diff --git a/hivesterix/src/test/resources/optimizerts/results/h11_share_scan.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/h11_share_scan.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/h11_share_scan.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/h11_share_scan.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/h12_select_struct.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/h12_select_struct.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/h12_select_struct.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/h12_select_struct.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q10_returned_item.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q10_returned_item.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q11_important_stock.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q11_important_stock.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q11_important_stock.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q11_important_stock.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q12_shipping.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q12_shipping.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q13_customer_distribution.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q13_customer_distribution.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q14_promotion_effect.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q14_promotion_effect.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q15_top_supplier.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q15_top_supplier.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q18_large_volume_customer.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q18_large_volume_customer.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q19_discounted_revenue.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q19_discounted_revenue.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q3_shipping_priority.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q3_shipping_priority.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q3_shipping_priority.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q3_shipping_priority.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q4_order_priority.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q4_order_priority.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q7_volume_shipping.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q7_volume_shipping.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q8_national_market_share.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q8_national_market_share.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q9_product_type_profit.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q9_product_type_profit.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q9_product_type_profit.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q9_product_type_profit.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u10_nestedloop_join.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u10_nestedloop_join.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u10_nestedloop_join.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u10_nestedloop_join.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u1_group_by.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u1_group_by.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u1_group_by.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u1_group_by.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u2_select-project.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u2_select-project.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u2_select-project.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u2_select-project.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u3_union.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u3_union.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u3_union.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u3_union.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u4_join.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u4_join.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u4_join.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u4_join.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u5_lateral_view.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u5_lateral_view.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u6_limit.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u6_limit.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u7_multi_join.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u7_multi_join.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u8_non_mapred.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u8_non_mapred.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u8_non_mapred.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u8_non_mapred.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u9_order_by.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u9_order_by.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u9_order_by.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u9_order_by.plan
diff --git a/hivesterix/src/test/resources/runtimefunctionts/conf/cluster b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/conf/cluster
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/conf/cluster
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/conf/cluster
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/customer.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/customer.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/customer.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/customer.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/large_card_join_src.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/large_card_join_src.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/large_card_join_src.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/large_card_join_src.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/large_card_join_src_small.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/large_card_join_src_small.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/large_card_join_src_small.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/large_card_join_src_small.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/lineitem.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/lineitem.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/lineitem.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/lineitem.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/nation.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/nation.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/nation.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/nation.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/orders.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/orders.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/orders.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/orders.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/part.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/part.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/part.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/part.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/partsupp.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/partsupp.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/partsupp.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/partsupp.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/region.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/region.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/region.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/region.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/supplier.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/supplier.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/supplier.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/supplier.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml.bak b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml.bak
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml.bak
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml.bak
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml.bak b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml.bak
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml.bak
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml.bak
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml.bak b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml.bak
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml.bak
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml.bak
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hive/conf/topology.xml b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/topology.xml
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hive/conf/topology.xml
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/topology.xml
diff --git a/hivesterix/src/test/resources/runtimefunctionts/ignore.txt b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/ignore.txt
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/ignore.txt
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/ignore.txt
diff --git a/hivesterix/src/test/resources/runtimefunctionts/logging.properties b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/logging.properties
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q11_important_stock.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q11_important_stock.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q11_important_stock.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q11_important_stock.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q12_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q12_shipping.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q13_customer_distribution.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q13_customer_distribution.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q13_customer_distribution.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q13_customer_distribution.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q16_parts_supplier_relationship.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q16_parts_supplier_relationship.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q16_parts_supplier_relationship.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q16_parts_supplier_relationship.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q17_small_quantity_order_revenue.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q17_small_quantity_order_revenue.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q17_small_quantity_order_revenue.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q17_small_quantity_order_revenue.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q21_suppliers_who_kept_orders_waiting.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q21_suppliers_who_kept_orders_waiting.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q21_suppliers_who_kept_orders_waiting.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q21_suppliers_who_kept_orders_waiting.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q22_global_sales_opportunity.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q22_global_sales_opportunity.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q22_global_sales_opportunity.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q22_global_sales_opportunity.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q2_minimum_cost_supplier.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q2_minimum_cost_supplier.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q2_minimum_cost_supplier.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q2_minimum_cost_supplier.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q6_forecast_revenue_change.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q6_forecast_revenue_change.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q6_forecast_revenue_change.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q6_forecast_revenue_change.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q8_national_market_share.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q8_national_market_share.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q8_national_market_share.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q8_national_market_share.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u10_join.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u10_join.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u10_join.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u10_join.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u10_nestedloop_join.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u10_nestedloop_join.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u10_nestedloop_join.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u10_nestedloop_join.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u1_gby.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u1_gby.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u1_gby.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u1_gby.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u2_gby_external.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u2_gby_external.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u2_gby_external.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u2_gby_external.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u3_union.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u3_union.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u3_union.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u3_union.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u4_gby_distinct.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u4_gby_distinct.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u4_gby_distinct.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u4_gby_distinct.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u5_gby_global.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u5_gby_global.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u5_gby_global.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u5_gby_global.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u6_large_card_join.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u6_large_card_join.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u6_large_card_join.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u6_large_card_join.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q10_returned_item.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q10_returned_item.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q10_returned_item.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q10_returned_item.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q11_important_stock.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q11_important_stock.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q11_important_stock.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q11_important_stock.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q12_shipping.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q12_shipping.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q12_shipping.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q12_shipping.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q13_customer_distribution.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q13_customer_distribution.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q13_customer_distribution.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q13_customer_distribution.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q14_promotion_effect.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q14_promotion_effect.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q14_promotion_effect.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q14_promotion_effect.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q15_top_supplier.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q15_top_supplier.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q15_top_supplier.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q15_top_supplier.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q16_parts_supplier_relationship.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q16_parts_supplier_relationship.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q16_parts_supplier_relationship.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q16_parts_supplier_relationship.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q17_small_quantity_order_revenue.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q17_small_quantity_order_revenue.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q17_small_quantity_order_revenue.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q17_small_quantity_order_revenue.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q18_large_volume_customer.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q18_large_volume_customer.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q18_large_volume_customer.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q18_large_volume_customer.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q19_discounted_revenue.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q19_discounted_revenue.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q19_discounted_revenue.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q19_discounted_revenue.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q1_pricing_summary_report.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q1_pricing_summary_report.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q1_pricing_summary_report.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q1_pricing_summary_report.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q20_potential_part_promotion.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q20_potential_part_promotion.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q20_potential_part_promotion.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q20_potential_part_promotion.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q21_suppliers_who_kept_orders_waiting.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q21_suppliers_who_kept_orders_waiting.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q21_suppliers_who_kept_orders_waiting.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q21_suppliers_who_kept_orders_waiting.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q22_global_sales_opportunity.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q22_global_sales_opportunity.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q22_global_sales_opportunity.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q22_global_sales_opportunity.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q2_minimum_cost_supplier.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q2_minimum_cost_supplier.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q2_minimum_cost_supplier.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q2_minimum_cost_supplier.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q3_shipping_priority.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q3_shipping_priority.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q3_shipping_priority.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q3_shipping_priority.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q4_order_priority.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q4_order_priority.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q4_order_priority.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q4_order_priority.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q5_local_supplier_volume.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q5_local_supplier_volume.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q5_local_supplier_volume.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q5_local_supplier_volume.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q6_forecast_revenue_change.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q6_forecast_revenue_change.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q6_forecast_revenue_change.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q6_forecast_revenue_change.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q7_volume_shipping.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q7_volume_shipping.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q7_volume_shipping.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q7_volume_shipping.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q8_national_market_share.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q8_national_market_share.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q8_national_market_share.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q8_national_market_share.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q9_product_type_profit.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q9_product_type_profit.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q9_product_type_profit.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q9_product_type_profit.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u10_join.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u10_join.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u10_join.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u10_join.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u10_nestedloop_join.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u10_nestedloop_join.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u10_nestedloop_join.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u10_nestedloop_join.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u1_gby.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u1_gby.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u1_gby.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u1_gby.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u2_gby_external.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u2_gby_external.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u2_gby_external.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u2_gby_external.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u3_union.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u3_union.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u3_union.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u3_union.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u4_gby_distinct.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u4_gby_distinct.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u4_gby_distinct.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u4_gby_distinct.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u5_gby_global.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u5_gby_global.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u5_gby_global.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u5_gby_global.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u6_large_card_join.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u6_large_card_join.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u6_large_card_join.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u6_large_card_join.result
diff --git a/hivesterix/hivesterix-optimizer/pom.xml b/hivesterix/hivesterix-optimizer/pom.xml
new file mode 100644
index 0000000..4e6032e
--- /dev/null
+++ b/hivesterix/hivesterix-optimizer/pom.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0"?>
+<project
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+ xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>hivesterix</artifactId>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <version>0.2.3-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>hivesterix-optimizer</artifactId>
+ <name>hivesterix-optimizer</name>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <encoding>UTF-8</encoding>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-common</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-translator</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java
new file mode 100644
index 0000000..7e4e271
--- /dev/null
+++ b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java
@@ -0,0 +1,113 @@
+package edu.uci.ics.hivesterix.optimizer.rulecollections;
+
+import java.util.LinkedList;
+
+import edu.uci.ics.hivesterix.optimizer.rules.InsertProjectBeforeWriteRule;
+import edu.uci.ics.hivesterix.optimizer.rules.IntroduceEarlyProjectRule;
+import edu.uci.ics.hivesterix.optimizer.rules.LocalGroupByRule;
+import edu.uci.ics.hivesterix.optimizer.rules.RemoveRedundantSelectRule;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.HeuristicOptimizer;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.BreakSelectIntoConjunctsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ComplexJoinInferenceRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateAssignsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateSelectsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.EliminateSubplanRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.EnforceStructuralPropertiesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractCommonOperatorsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractGbyExpressionsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.FactorRedundantGroupAndDecorVarsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.InferTypesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.InlineVariablesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.InsertProjectBeforeUnionRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceAggregateCombinerRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceGroupByCombinerRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.IsolateHyracksOperatorsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PullSelectOutOfEqJoin;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushLimitDownRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectDownRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectIntoDataSourceScanRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectDownRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectIntoJoinRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ReinferAllTypesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveRedundantProjectionRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveUnusedAssignAndAggregateRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetAlgebricksPhysicalOperatorsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetExecutionModeRule;
+
+public final class HiveRuleCollections {
+
+ public final static LinkedList<IAlgebraicRewriteRule> NORMALIZATION = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ NORMALIZATION.add(new EliminateSubplanRule());
+ NORMALIZATION.add(new IntroduceAggregateCombinerRule());
+ NORMALIZATION.add(new BreakSelectIntoConjunctsRule());
+ NORMALIZATION.add(new IntroduceAggregateCombinerRule());
+ NORMALIZATION.add(new PushSelectIntoJoinRule());
+ NORMALIZATION.add(new ExtractGbyExpressionsRule());
+ NORMALIZATION.add(new RemoveRedundantSelectRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> COND_PUSHDOWN_AND_JOIN_INFERENCE = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new PushSelectDownRule());
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new InlineVariablesRule());
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new FactorRedundantGroupAndDecorVarsRule());
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new EliminateSubplanRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> LOAD_FIELDS = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ // should LoadRecordFieldsRule be applied in only one pass over the
+ // plan?
+ LOAD_FIELDS.add(new InlineVariablesRule());
+ // LOAD_FIELDS.add(new RemoveUnusedAssignAndAggregateRule());
+ LOAD_FIELDS.add(new ComplexJoinInferenceRule());
+ LOAD_FIELDS.add(new InferTypesRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> OP_PUSHDOWN = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ OP_PUSHDOWN.add(new PushProjectDownRule());
+ OP_PUSHDOWN.add(new PushSelectDownRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> DATA_EXCHANGE = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ DATA_EXCHANGE.add(new SetExecutionModeRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> CONSOLIDATION = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ CONSOLIDATION.add(new RemoveRedundantProjectionRule());
+ CONSOLIDATION.add(new ConsolidateSelectsRule());
+ CONSOLIDATION.add(new IntroduceEarlyProjectRule());
+ CONSOLIDATION.add(new ConsolidateAssignsRule());
+ CONSOLIDATION.add(new IntroduceGroupByCombinerRule());
+ CONSOLIDATION.add(new RemoveUnusedAssignAndAggregateRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> PHYSICAL_PLAN_REWRITES = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ PHYSICAL_PLAN_REWRITES.add(new PullSelectOutOfEqJoin());
+ PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
+ PHYSICAL_PLAN_REWRITES.add(new EnforceStructuralPropertiesRule());
+ PHYSICAL_PLAN_REWRITES.add(new PushProjectDownRule());
+ PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
+ PHYSICAL_PLAN_REWRITES.add(new PushLimitDownRule());
+ PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeWriteRule());
+ PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeUnionRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> prepareJobGenRules = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ prepareJobGenRules.add(new ReinferAllTypesRule());
+ prepareJobGenRules.add(new IsolateHyracksOperatorsRule(
+ HeuristicOptimizer.hyraxOperatorsBelowWhichJobGenIsDisabled));
+ prepareJobGenRules.add(new ExtractCommonOperatorsRule());
+ prepareJobGenRules.add(new LocalGroupByRule());
+ prepareJobGenRules.add(new PushProjectIntoDataSourceScanRule());
+ prepareJobGenRules.add(new ReinferAllTypesRule());
+ }
+
+}
diff --git a/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/InsertProjectBeforeWriteRule.java b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/InsertProjectBeforeWriteRule.java
new file mode 100644
index 0000000..90777ee
--- /dev/null
+++ b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/InsertProjectBeforeWriteRule.java
@@ -0,0 +1,79 @@
+package edu.uci.ics.hivesterix.optimizer.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator.ExecutionMode;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.WriteOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.StreamProjectPOperator;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+public class InsertProjectBeforeWriteRule implements IAlgebraicRewriteRule {
+
+ @Override
+ public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) {
+ return false;
+ }
+
+ /**
+ * When the input schema to WriteOperator is different from the output
+ * schema in terms of variable order, add a project operator to get the
+ * write order
+ */
+ @Override
+ public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+ AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+ if (op.getOperatorTag() != LogicalOperatorTag.WRITE) {
+ return false;
+ }
+ WriteOperator opWrite = (WriteOperator) op;
+ ArrayList<LogicalVariable> finalSchema = new ArrayList<LogicalVariable>();
+ VariableUtilities.getUsedVariables(opWrite, finalSchema);
+ ArrayList<LogicalVariable> inputSchema = new ArrayList<LogicalVariable>();
+ VariableUtilities.getLiveVariables(opWrite, inputSchema);
+ if (!isIdentical(finalSchema, inputSchema)) {
+ ProjectOperator projectOp = new ProjectOperator(finalSchema);
+ Mutable<ILogicalOperator> parentOpRef = opWrite.getInputs().get(0);
+ projectOp.getInputs().add(parentOpRef);
+ opWrite.getInputs().clear();
+ opWrite.getInputs().add(new MutableObject<ILogicalOperator>(projectOp));
+ projectOp.setPhysicalOperator(new StreamProjectPOperator());
+ projectOp.setExecutionMode(ExecutionMode.PARTITIONED);
+
+ AbstractLogicalOperator op2 = (AbstractLogicalOperator) parentOpRef.getValue();
+ if (op2.getOperatorTag() == LogicalOperatorTag.PROJECT) {
+ ProjectOperator pi2 = (ProjectOperator) op2;
+ parentOpRef.setValue(pi2.getInputs().get(0).getValue());
+ }
+ context.computeAndSetTypeEnvironmentForOperator(projectOp);
+ return true;
+ } else
+ return false;
+
+ }
+
+ private boolean isIdentical(List<LogicalVariable> finalSchema, List<LogicalVariable> inputSchema) {
+ int finalSchemaSize = finalSchema.size();
+ int inputSchemaSize = inputSchema.size();
+ if (finalSchemaSize != inputSchemaSize)
+ throw new IllegalStateException("final output schema variables missing!");
+ for (int i = 0; i < finalSchemaSize; i++) {
+ LogicalVariable var1 = finalSchema.get(i);
+ LogicalVariable var2 = inputSchema.get(i);
+ if (!var1.equals(var2))
+ return false;
+ }
+ return true;
+ }
+}
diff --git a/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/IntroduceEarlyProjectRule.java b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/IntroduceEarlyProjectRule.java
new file mode 100644
index 0000000..0a18629
--- /dev/null
+++ b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/IntroduceEarlyProjectRule.java
@@ -0,0 +1,73 @@
+package edu.uci.ics.hivesterix.optimizer.rules;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+public class IntroduceEarlyProjectRule implements IAlgebraicRewriteRule {
+
+ @Override
+ public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+ return false;
+ }
+
+ @Override
+ public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+ throws AlgebricksException {
+ AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+ if (op.getOperatorTag() != LogicalOperatorTag.PROJECT) {
+ return false;
+ }
+ AbstractLogicalOperator middleOp = (AbstractLogicalOperator) op.getInputs().get(0).getValue();
+ List<LogicalVariable> deliveredVars = new ArrayList<LogicalVariable>();
+ List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
+ List<LogicalVariable> producedVars = new ArrayList<LogicalVariable>();
+
+ VariableUtilities.getUsedVariables(op, deliveredVars);
+ VariableUtilities.getUsedVariables(middleOp, usedVars);
+ VariableUtilities.getProducedVariables(middleOp, producedVars);
+
+ Set<LogicalVariable> requiredVariables = new HashSet<LogicalVariable>();
+ requiredVariables.addAll(deliveredVars);
+ requiredVariables.addAll(usedVars);
+ requiredVariables.removeAll(producedVars);
+
+ if (middleOp.getInputs().size() <= 0 || middleOp.getInputs().size() > 1)
+ return false;
+
+ AbstractLogicalOperator targetOp = (AbstractLogicalOperator) middleOp.getInputs().get(0).getValue();
+ if (targetOp.getOperatorTag() != LogicalOperatorTag.DATASOURCESCAN)
+ return false;
+
+ Set<LogicalVariable> deliveredEarlyVars = new HashSet<LogicalVariable>();
+ VariableUtilities.getLiveVariables(targetOp, deliveredEarlyVars);
+
+ deliveredEarlyVars.removeAll(requiredVariables);
+ if (deliveredEarlyVars.size() > 0) {
+ ArrayList<LogicalVariable> requiredVars = new ArrayList<LogicalVariable>();
+ requiredVars.addAll(requiredVariables);
+ ILogicalOperator earlyProjectOp = new ProjectOperator(requiredVars);
+ Mutable<ILogicalOperator> earlyProjectOpRef = new MutableObject<ILogicalOperator>(earlyProjectOp);
+ Mutable<ILogicalOperator> targetRef = middleOp.getInputs().get(0);
+ middleOp.getInputs().set(0, earlyProjectOpRef);
+ earlyProjectOp.getInputs().add(targetRef);
+ context.computeAndSetTypeEnvironmentForOperator(earlyProjectOp);
+ return true;
+ }
+ return false;
+ }
+}
diff --git a/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/LocalGroupByRule.java b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/LocalGroupByRule.java
new file mode 100644
index 0000000..90ca008
--- /dev/null
+++ b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/LocalGroupByRule.java
@@ -0,0 +1,66 @@
+package edu.uci.ics.hivesterix.optimizer.rules;
+
+import org.apache.commons.lang3.mutable.Mutable;
+
+import edu.uci.ics.hivesterix.logical.plan.HiveOperatorAnnotations;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IPhysicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.OperatorAnnotations;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.PhysicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.OneToOneExchangePOperator;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+public class LocalGroupByRule implements IAlgebraicRewriteRule {
+
+ @Override
+ public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+ return false;
+ }
+
+ @Override
+ public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+ throws AlgebricksException {
+ AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+ if (op.getOperatorTag() != LogicalOperatorTag.GROUP) {
+ return false;
+ }
+ Boolean localGby = (Boolean) op.getAnnotations().get(HiveOperatorAnnotations.LOCAL_GROUP_BY);
+ if (localGby != null && localGby.equals(Boolean.TRUE)) {
+ Boolean hashGby = (Boolean) op.getAnnotations().get(OperatorAnnotations.USE_HASH_GROUP_BY);
+ Boolean externalGby = (Boolean) op.getAnnotations().get(OperatorAnnotations.USE_EXTERNAL_GROUP_BY);
+ if ((hashGby != null && (hashGby.equals(Boolean.TRUE)) || (externalGby != null && externalGby
+ .equals(Boolean.TRUE)))) {
+ reviseExchange(op);
+ } else {
+ ILogicalOperator child = op.getInputs().get(0).getValue();
+ AbstractLogicalOperator childOp = (AbstractLogicalOperator) child;
+ while (child.getInputs().size() > 0) {
+ if (childOp.getOperatorTag() == LogicalOperatorTag.ORDER)
+ break;
+ else {
+ child = child.getInputs().get(0).getValue();
+ childOp = (AbstractLogicalOperator) child;
+ }
+ }
+ if (childOp.getOperatorTag() == LogicalOperatorTag.ORDER)
+ reviseExchange(childOp);
+ }
+ return true;
+ }
+ return false;
+ }
+
+ private void reviseExchange(AbstractLogicalOperator op) {
+ ExchangeOperator exchange = (ExchangeOperator) op.getInputs().get(0).getValue();
+ IPhysicalOperator physicalOp = exchange.getPhysicalOperator();
+ if (physicalOp.getOperatorTag() == PhysicalOperatorTag.HASH_PARTITION_EXCHANGE) {
+ exchange.setPhysicalOperator(new OneToOneExchangePOperator());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/RemoveRedundantSelectRule.java b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/RemoveRedundantSelectRule.java
new file mode 100644
index 0000000..44ff12d
--- /dev/null
+++ b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/RemoveRedundantSelectRule.java
@@ -0,0 +1,44 @@
+package edu.uci.ics.hivesterix.optimizer.rules;
+
+import org.apache.commons.lang3.mutable.Mutable;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+public class RemoveRedundantSelectRule implements IAlgebraicRewriteRule {
+
+ @Override
+ public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+ return false;
+ }
+
+ @Override
+ public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+ throws AlgebricksException {
+ AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+ if (op.getOperatorTag() != LogicalOperatorTag.SELECT) {
+ return false;
+ }
+ AbstractLogicalOperator inputOp = (AbstractLogicalOperator) op.getInputs().get(0).getValue();
+ if (inputOp.getOperatorTag() != LogicalOperatorTag.SELECT) {
+ return false;
+ }
+ SelectOperator selectOp = (SelectOperator) op;
+ SelectOperator inputSelectOp = (SelectOperator) inputOp;
+ ILogicalExpression expr1 = selectOp.getCondition().getValue();
+ ILogicalExpression expr2 = inputSelectOp.getCondition().getValue();
+
+ if (expr1.equals(expr2)) {
+ selectOp.getInputs().set(0, inputSelectOp.getInputs().get(0));
+ return true;
+ }
+ return false;
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/pom.xml b/hivesterix/hivesterix-runtime/pom.xml
new file mode 100644
index 0000000..77db0d4
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/pom.xml
@@ -0,0 +1,383 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-runtime</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <name>hivesterix-runtime</name>
+
+ <dependencies>
+ <dependency>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ <version>2.5</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.8.1</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>args4j</groupId>
+ <artifactId>args4j</artifactId>
+ <version>2.0.12</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.json</groupId>
+ <artifactId>json</artifactId>
+ <version>20090211</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-server</artifactId>
+ <version>8.0.0.M1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-servlet</artifactId>
+ <version>8.0.0.M1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>0.20.2</version>
+ </dependency>
+ <dependency>
+ <groupId>jline</groupId>
+ <artifactId>jline</artifactId>
+ <version>0.9.94</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-core</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-connectionpool</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-enhancer</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-rdbms</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-dbcp</groupId>
+ <artifactId>commons-dbcp</artifactId>
+ <version>1.4</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-pool</groupId>
+ <artifactId>commons-pool</artifactId>
+ <version>1.5.4</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-collections</groupId>
+ <artifactId>commons-collections</artifactId>
+ <version>3.2.1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ <version>2.4</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax</groupId>
+ <artifactId>jdo2-api</artifactId>
+ <version>2.3-ec</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.facebook</groupId>
+ <artifactId>libfb303</artifactId>
+ <version>0.5.0</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>libthrift</artifactId>
+ <version>0.5.0</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>cli</artifactId>
+ <version>1.2</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache</groupId>
+ <artifactId>log4j</artifactId>
+ <version>1.2.15</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>antlr-runtime</artifactId>
+ <version>3.0.1</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-cli</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-common</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-hwi</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-jdbc</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-metastore</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-service</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-shims</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-serde</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <version>1.6.1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ <version>1.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ <version>1.6.1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>0.20.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ <version>1.1.1</version>
+ <type>jar</type>
+ <classifier>api</classifier>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>r06</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>stringtemplate</artifactId>
+ <version>3.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.derby</groupId>
+ <artifactId>derby</artifactId>
+ <version>10.8.1.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase</artifactId>
+ <version>0.90.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>algebricks-compiler</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-control-cc</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-control-nc</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-serde</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-common</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <encoding>UTF-8</encoding>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>patch</id>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ <phase>package</phase>
+ <configuration>
+ <classifier>patch</classifier>
+ <finalName>a-hive-rumtime</finalName>
+ <includes>
+ <include>**/org/apache/**</include>
+ </includes>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ <repositories>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>third-party</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/third-party</url>
+ </repository>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>hyracks-public-release</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-public-releases</url>
+ </repository>
+ </repositories>
+</project>
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java
new file mode 100644
index 0000000..ad02239
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java
@@ -0,0 +1,169 @@
+package edu.uci.ics.hivesterix.runtime.evaluator;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.BytesWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
+import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
+import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+public abstract class AbstractExpressionEvaluator implements ICopyEvaluator {
+
+ private List<ICopyEvaluator> children;
+
+ private ExprNodeEvaluator evaluator;
+
+ private IDataOutputProvider out;
+
+ private ObjectInspector inspector;
+
+ /**
+ * output object inspector
+ */
+ private ObjectInspector outputInspector;
+
+ /**
+ * cached row object
+ */
+ private LazyObject<? extends ObjectInspector> cachedRowObject;
+
+ /**
+ * serializer/derialzer for lazy object
+ */
+ private SerDe lazySer;
+
+ /**
+ * data output
+ */
+ DataOutput dataOutput;
+
+ public AbstractExpressionEvaluator(ExprNodeEvaluator hiveEvaluator, ObjectInspector oi, IDataOutputProvider output)
+ throws AlgebricksException {
+ evaluator = hiveEvaluator;
+ out = output;
+ inspector = oi;
+ dataOutput = out.getDataOutput();
+ }
+
+ protected ObjectInspector getRowInspector() {
+ return null;
+ }
+
+ protected IDataOutputProvider getIDataOutputProvider() {
+ return out;
+ }
+
+ protected ExprNodeEvaluator getHiveEvaluator() {
+ return evaluator;
+ }
+
+ public ObjectInspector getObjectInspector() {
+ return inspector;
+ }
+
+ @Override
+ public void evaluate(IFrameTupleReference r) throws AlgebricksException {
+ // initialize hive evaluator
+ try {
+ if (outputInspector == null)
+ outputInspector = evaluator.initialize(inspector);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+
+ readIntoCache(r);
+ try {
+ Object result = evaluator.evaluate(cachedRowObject);
+
+ // if (result == null) {
+ // result = evaluator.evaluate(cachedRowObject);
+ //
+ // // check if result is null
+ //
+ // String errorMsg = "serialize null object in \n output " +
+ // outputInspector.toString() + " \n input "
+ // + inspector.toString() + "\n ";
+ // errorMsg += "";
+ // List<Object> columns = ((StructObjectInspector)
+ // inspector).getStructFieldsDataAsList(cachedRowObject);
+ // for (Object column : columns) {
+ // errorMsg += column.toString() + " ";
+ // }
+ // errorMsg += "\n";
+ // Log.info(errorMsg);
+ // System.out.println(errorMsg);
+ // // result = new BooleanWritable(true);
+ // throw new IllegalStateException(errorMsg);
+ // }
+
+ serializeResult(result);
+ } catch (HiveException e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ } catch (IOException e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ }
+
+ /**
+ * serialize the result
+ *
+ * @param result
+ * the evaluation result
+ * @throws IOException
+ * @throws AlgebricksException
+ */
+ private void serializeResult(Object result) throws IOException, AlgebricksException {
+ if (lazySer == null)
+ lazySer = new LazySerDe();
+
+ try {
+ BytesWritable outputWritable = (BytesWritable) lazySer.serialize(result, outputInspector);
+ dataOutput.write(outputWritable.getBytes(), 0, outputWritable.getLength());
+ } catch (SerDeException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ /**
+ * bind the tuple reference to the cached row object
+ *
+ * @param r
+ */
+ private void readIntoCache(IFrameTupleReference r) {
+ if (cachedRowObject == null)
+ cachedRowObject = (LazyObject<? extends ObjectInspector>) LazyFactory.createLazyObject(inspector);
+ cachedRowObject.init(r);
+ }
+
+ /**
+ * set a list of children of this evaluator
+ *
+ * @param children
+ */
+ public void setChildren(List<ICopyEvaluator> children) {
+ this.children = children;
+ }
+
+ public void addChild(ICopyEvaluator child) {
+ if (children == null)
+ children = new ArrayList<ICopyEvaluator>();
+ children.add(child);
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java
new file mode 100644
index 0000000..e500376
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java
@@ -0,0 +1,224 @@
+package edu.uci.ics.hivesterix.runtime.evaluator;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.BytesWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunction;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+public class AggregationFunctionEvaluator implements ICopyAggregateFunction {
+
+ /**
+ * the mode of aggregation function
+ */
+ private GenericUDAFEvaluator.Mode mode;
+
+ /**
+ * an array of evaluators
+ */
+ private ExprNodeEvaluator[] evaluators;
+
+ /**
+ * udaf evaluator partial
+ */
+ private GenericUDAFEvaluator udafPartial;
+
+ /**
+ * udaf evaluator complete
+ */
+ private GenericUDAFEvaluator udafComplete;
+
+ /**
+ * cached parameter objects
+ */
+ private Object[] cachedParameters;
+
+ /**
+ * cached row objects
+ */
+ private LazyObject<? extends ObjectInspector> cachedRowObject;
+
+ /**
+ * the output channel
+ */
+ private DataOutput out;
+
+ /**
+ * aggregation buffer
+ */
+ private AggregationBuffer aggBuffer;
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ private SerDe lazySer;
+
+ /**
+ * the output object inspector for this aggregation function
+ */
+ private ObjectInspector outputInspector;
+
+ /**
+ * the output object inspector for this aggregation function
+ */
+ private ObjectInspector outputInspectorPartial;
+
+ /**
+ * parameter inspectors
+ */
+ private ObjectInspector[] parameterInspectors;
+
+ /**
+ * output make sure the aggregation functio has least object creation
+ *
+ * @param desc
+ * @param oi
+ * @param output
+ */
+ public AggregationFunctionEvaluator(List<ExprNodeDesc> inputs, List<TypeInfo> inputTypes, String genericUDAFName,
+ GenericUDAFEvaluator.Mode aggMode, boolean distinct, ObjectInspector oi, DataOutput output,
+ ExprNodeEvaluator[] evals, ObjectInspector[] pInspectors, Object[] parameterCache, SerDe serde,
+ LazyObject<? extends ObjectInspector> row, GenericUDAFEvaluator udafunctionPartial,
+ GenericUDAFEvaluator udafunctionComplete, ObjectInspector outputOi, ObjectInspector outputOiPartial) {
+ // shared object across threads
+ this.out = output;
+ this.mode = aggMode;
+ this.parameterInspectors = pInspectors;
+
+ // thread local objects
+ this.evaluators = evals;
+ this.cachedParameters = parameterCache;
+ this.cachedRowObject = row;
+ this.lazySer = serde;
+ this.udafPartial = udafunctionPartial;
+ this.udafComplete = udafunctionComplete;
+ this.outputInspector = outputOi;
+ this.outputInspectorPartial = outputOiPartial;
+ }
+
+ @Override
+ public void init() throws AlgebricksException {
+ try {
+ aggBuffer = udafPartial.getNewAggregationBuffer();
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ @Override
+ public void step(IFrameTupleReference tuple) throws AlgebricksException {
+ readIntoCache(tuple);
+ processRow();
+ }
+
+ private void processRow() throws AlgebricksException {
+ try {
+ // get values by evaluating them
+ for (int i = 0; i < cachedParameters.length; i++) {
+ cachedParameters[i] = evaluators[i].evaluate(cachedRowObject);
+ }
+ processAggregate();
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ private void processAggregate() throws HiveException {
+ /**
+ * accumulate the aggregation function
+ */
+ switch (mode) {
+ case PARTIAL1:
+ case COMPLETE:
+ udafPartial.iterate(aggBuffer, cachedParameters);
+ break;
+ case PARTIAL2:
+ case FINAL:
+ if (udafPartial instanceof GenericUDAFCount.GenericUDAFCountEvaluator) {
+ Object parameter = ((PrimitiveObjectInspector) parameterInspectors[0])
+ .getPrimitiveWritableObject(cachedParameters[0]);
+ udafPartial.merge(aggBuffer, parameter);
+ } else
+ udafPartial.merge(aggBuffer, cachedParameters[0]);
+ break;
+ default:
+ break;
+ }
+ }
+
+ /**
+ * serialize the result
+ *
+ * @param result
+ * the evaluation result
+ * @throws IOException
+ * @throws AlgebricksException
+ */
+ private void serializeResult(Object result, ObjectInspector oi) throws IOException, AlgebricksException {
+ try {
+ BytesWritable outputWritable = (BytesWritable) lazySer.serialize(result, oi);
+ out.write(outputWritable.getBytes(), 0, outputWritable.getLength());
+ } catch (SerDeException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ /**
+ * bind the tuple reference to the cached row object
+ *
+ * @param r
+ */
+ private void readIntoCache(IFrameTupleReference r) {
+ cachedRowObject.init(r);
+ }
+
+ @Override
+ public void finish() throws AlgebricksException {
+ // aggregator
+ try {
+ Object result = null;
+ result = udafPartial.terminatePartial(aggBuffer);
+ if (mode == GenericUDAFEvaluator.Mode.COMPLETE || mode == GenericUDAFEvaluator.Mode.FINAL) {
+ result = udafComplete.terminate(aggBuffer);
+ serializeResult(result, outputInspector);
+ } else {
+ serializeResult(result, outputInspectorPartial);
+ }
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ } catch (IOException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ @Override
+ public void finishPartial() throws AlgebricksException {
+ // aggregator.
+ try {
+ Object result = null;
+ // get aggregations
+ result = udafPartial.terminatePartial(aggBuffer);
+ serializeResult(result, outputInspectorPartial);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ } catch (IOException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java
new file mode 100644
index 0000000..1933253
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java
@@ -0,0 +1,248 @@
+package edu.uci.ics.hivesterix.runtime.evaluator;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.BytesWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunction;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+public class AggregatuibFunctionSerializableEvaluator implements ICopySerializableAggregateFunction {
+
+ /**
+ * the mode of aggregation function
+ */
+ private GenericUDAFEvaluator.Mode mode;
+
+ /**
+ * an array of evaluators
+ */
+ private ExprNodeEvaluator[] evaluators;
+
+ /**
+ * udaf evaluator partial
+ */
+ private GenericUDAFEvaluator udafPartial;
+
+ /**
+ * udaf evaluator complete
+ */
+ private GenericUDAFEvaluator udafComplete;
+
+ /**
+ * cached parameter objects
+ */
+ private Object[] cachedParameters;
+
+ /**
+ * cached row objects
+ */
+ private LazyObject<? extends ObjectInspector> cachedRowObject;
+
+ /**
+ * aggregation buffer
+ */
+ private SerializableBuffer aggBuffer;
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ private SerDe lazySer;
+
+ /**
+ * the output object inspector for this aggregation function
+ */
+ private ObjectInspector outputInspector;
+
+ /**
+ * the output object inspector for this aggregation function
+ */
+ private ObjectInspector outputInspectorPartial;
+
+ /**
+ * parameter inspectors
+ */
+ private ObjectInspector[] parameterInspectors;
+
+ /**
+ * output make sure the aggregation functio has least object creation
+ *
+ * @param desc
+ * @param oi
+ * @param output
+ */
+ public AggregatuibFunctionSerializableEvaluator(List<ExprNodeDesc> inputs, List<TypeInfo> inputTypes,
+ String genericUDAFName, GenericUDAFEvaluator.Mode aggMode, boolean distinct, ObjectInspector oi,
+ ExprNodeEvaluator[] evals, ObjectInspector[] pInspectors, Object[] parameterCache, SerDe serde,
+ LazyObject<? extends ObjectInspector> row, GenericUDAFEvaluator udafunctionPartial,
+ GenericUDAFEvaluator udafunctionComplete, ObjectInspector outputOi, ObjectInspector outputOiPartial)
+ throws AlgebricksException {
+ // shared object across threads
+ this.mode = aggMode;
+ this.parameterInspectors = pInspectors;
+
+ // thread local objects
+ this.evaluators = evals;
+ this.cachedParameters = parameterCache;
+ this.cachedRowObject = row;
+ this.lazySer = serde;
+ this.udafPartial = udafunctionPartial;
+ this.udafComplete = udafunctionComplete;
+ this.outputInspector = outputOi;
+ this.outputInspectorPartial = outputOiPartial;
+
+ try {
+ aggBuffer = (SerializableBuffer) udafPartial.getNewAggregationBuffer();
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ @Override
+ public void init(DataOutput output) throws AlgebricksException {
+ try {
+ udafPartial.reset(aggBuffer);
+ outputAggBuffer(aggBuffer, output);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ @Override
+ public void step(IFrameTupleReference tuple, byte[] data, int start, int len) throws AlgebricksException {
+ deSerializeAggBuffer(aggBuffer, data, start, len);
+ readIntoCache(tuple);
+ processRow();
+ serializeAggBuffer(aggBuffer, data, start, len);
+ }
+
+ private void processRow() throws AlgebricksException {
+ try {
+ // get values by evaluating them
+ for (int i = 0; i < cachedParameters.length; i++) {
+ cachedParameters[i] = evaluators[i].evaluate(cachedRowObject);
+ }
+ processAggregate();
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ private void processAggregate() throws HiveException {
+ /**
+ * accumulate the aggregation function
+ */
+ switch (mode) {
+ case PARTIAL1:
+ case COMPLETE:
+ udafPartial.iterate(aggBuffer, cachedParameters);
+ break;
+ case PARTIAL2:
+ case FINAL:
+ if (udafPartial instanceof GenericUDAFCount.GenericUDAFCountEvaluator) {
+ Object parameter = ((PrimitiveObjectInspector) parameterInspectors[0])
+ .getPrimitiveWritableObject(cachedParameters[0]);
+ udafPartial.merge(aggBuffer, parameter);
+ } else
+ udafPartial.merge(aggBuffer, cachedParameters[0]);
+ break;
+ default:
+ break;
+ }
+ }
+
+ /**
+ * serialize the result
+ *
+ * @param result
+ * the evaluation result
+ * @throws IOException
+ * @throws AlgebricksException
+ */
+ private void serializeResult(Object result, ObjectInspector oi, DataOutput out) throws IOException,
+ AlgebricksException {
+ try {
+ BytesWritable outputWritable = (BytesWritable) lazySer.serialize(result, oi);
+ out.write(outputWritable.getBytes(), 0, outputWritable.getLength());
+ } catch (SerDeException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ /**
+ * bind the tuple reference to the cached row object
+ *
+ * @param r
+ */
+ private void readIntoCache(IFrameTupleReference r) {
+ cachedRowObject.init(r);
+ }
+
+ @Override
+ public void finish(byte[] data, int start, int len, DataOutput output) throws AlgebricksException {
+ deSerializeAggBuffer(aggBuffer, data, start, len);
+ // aggregator
+ try {
+ Object result = null;
+ result = udafPartial.terminatePartial(aggBuffer);
+ if (mode == GenericUDAFEvaluator.Mode.COMPLETE || mode == GenericUDAFEvaluator.Mode.FINAL) {
+ result = udafComplete.terminate(aggBuffer);
+ serializeResult(result, outputInspector, output);
+ } else {
+ serializeResult(result, outputInspectorPartial, output);
+ }
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ } catch (IOException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ @Override
+ public void finishPartial(byte[] data, int start, int len, DataOutput output) throws AlgebricksException {
+ deSerializeAggBuffer(aggBuffer, data, start, len);
+ // aggregator.
+ try {
+ Object result = null;
+ // get aggregations
+ result = udafPartial.terminatePartial(aggBuffer);
+ serializeResult(result, outputInspectorPartial, output);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ } catch (IOException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ private void serializeAggBuffer(SerializableBuffer buffer, byte[] data, int start, int len)
+ throws AlgebricksException {
+ buffer.serializeAggBuffer(data, start, len);
+ }
+
+ private void deSerializeAggBuffer(SerializableBuffer buffer, byte[] data, int start, int len)
+ throws AlgebricksException {
+ buffer.deSerializeAggBuffer(data, start, len);
+ }
+
+ private void outputAggBuffer(SerializableBuffer buffer, DataOutput out) throws AlgebricksException {
+ try {
+ buffer.serializeAggBuffer(out);
+ } catch (IOException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/BufferSerDeUtil.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/BufferSerDeUtil.java
new file mode 100644
index 0000000..96065e5
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/BufferSerDeUtil.java
@@ -0,0 +1,67 @@
+package edu.uci.ics.hivesterix.runtime.evaluator;
+
+public class BufferSerDeUtil {
+
+ public static double getDouble(byte[] bytes, int offset) {
+ return Double.longBitsToDouble(getLong(bytes, offset));
+ }
+
+ public static float getFloat(byte[] bytes, int offset) {
+ return Float.intBitsToFloat(getInt(bytes, offset));
+ }
+
+ public static boolean getBoolean(byte[] bytes, int offset) {
+ if (bytes[offset] == 0)
+ return false;
+ else
+ return true;
+ }
+
+ public static int getInt(byte[] bytes, int offset) {
+ return ((bytes[offset] & 0xff) << 24) + ((bytes[offset + 1] & 0xff) << 16) + ((bytes[offset + 2] & 0xff) << 8)
+ + ((bytes[offset + 3] & 0xff) << 0);
+ }
+
+ public static long getLong(byte[] bytes, int offset) {
+ return (((long) (bytes[offset] & 0xff)) << 56) + (((long) (bytes[offset + 1] & 0xff)) << 48)
+ + (((long) (bytes[offset + 2] & 0xff)) << 40) + (((long) (bytes[offset + 3] & 0xff)) << 32)
+ + (((long) (bytes[offset + 4] & 0xff)) << 24) + (((long) (bytes[offset + 5] & 0xff)) << 16)
+ + (((long) (bytes[offset + 6] & 0xff)) << 8) + (((long) (bytes[offset + 7] & 0xff)) << 0);
+ }
+
+ public static void writeBoolean(boolean value, byte[] bytes, int offset) {
+ if (value)
+ bytes[offset] = (byte) 1;
+ else
+ bytes[offset] = (byte) 0;
+ }
+
+ public static void writeInt(int value, byte[] bytes, int offset) {
+ bytes[offset++] = (byte) (value >> 24);
+ bytes[offset++] = (byte) (value >> 16);
+ bytes[offset++] = (byte) (value >> 8);
+ bytes[offset++] = (byte) (value);
+ }
+
+ public static void writeLong(long value, byte[] bytes, int offset) {
+ bytes[offset++] = (byte) (value >> 56);
+ bytes[offset++] = (byte) (value >> 48);
+ bytes[offset++] = (byte) (value >> 40);
+ bytes[offset++] = (byte) (value >> 32);
+ bytes[offset++] = (byte) (value >> 24);
+ bytes[offset++] = (byte) (value >> 16);
+ bytes[offset++] = (byte) (value >> 8);
+ bytes[offset++] = (byte) (value);
+ }
+
+ public static void writeDouble(double value, byte[] bytes, int offset) {
+ long lValue = Double.doubleToLongBits(value);
+ writeLong(lValue, bytes, offset);
+ }
+
+ public static void writeFloat(float value, byte[] bytes, int offset) {
+ int iValue = Float.floatToIntBits(value);
+ writeInt(iValue, bytes, offset);
+ }
+
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ColumnExpressionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ColumnExpressionEvaluator.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ColumnExpressionEvaluator.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ColumnExpressionEvaluator.java
index 3296e19..5647f6a 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ColumnExpressionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ColumnExpressionEvaluator.java
@@ -9,10 +9,9 @@
public class ColumnExpressionEvaluator extends AbstractExpressionEvaluator {
- public ColumnExpressionEvaluator(ExprNodeColumnDesc expr,
- ObjectInspector oi, IDataOutputProvider output)
- throws AlgebricksException {
- super(new ExprNodeColumnEvaluator(expr), oi, output);
- }
+ public ColumnExpressionEvaluator(ExprNodeColumnDesc expr, ObjectInspector oi, IDataOutputProvider output)
+ throws AlgebricksException {
+ super(new ExprNodeColumnEvaluator(expr), oi, output);
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ConstantExpressionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ConstantExpressionEvaluator.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ConstantExpressionEvaluator.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ConstantExpressionEvaluator.java
index 62928e6..d8796ea 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ConstantExpressionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ConstantExpressionEvaluator.java
@@ -9,9 +9,8 @@
public class ConstantExpressionEvaluator extends AbstractExpressionEvaluator {
- public ConstantExpressionEvaluator(ExprNodeConstantDesc expr,
- ObjectInspector oi, IDataOutputProvider output)
- throws AlgebricksException {
- super(new ExprNodeConstantEvaluator(expr), oi, output);
- }
+ public ConstantExpressionEvaluator(ExprNodeConstantDesc expr, ObjectInspector oi, IDataOutputProvider output)
+ throws AlgebricksException {
+ super(new ExprNodeConstantEvaluator(expr), oi, output);
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FieldExpressionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FieldExpressionEvaluator.java
similarity index 68%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FieldExpressionEvaluator.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FieldExpressionEvaluator.java
index 5f6a5dc..35560b6 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FieldExpressionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FieldExpressionEvaluator.java
@@ -9,9 +9,9 @@
public class FieldExpressionEvaluator extends AbstractExpressionEvaluator {
- public FieldExpressionEvaluator(ExprNodeFieldDesc expr, ObjectInspector oi,
- IDataOutputProvider output) throws AlgebricksException {
- super(new ExprNodeFieldEvaluator(expr), oi, output);
- }
+ public FieldExpressionEvaluator(ExprNodeFieldDesc expr, ObjectInspector oi, IDataOutputProvider output)
+ throws AlgebricksException {
+ super(new ExprNodeFieldEvaluator(expr), oi, output);
+ }
}
\ No newline at end of file
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FunctionExpressionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FunctionExpressionEvaluator.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FunctionExpressionEvaluator.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FunctionExpressionEvaluator.java
index c3f3c93..7ffec7a 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FunctionExpressionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FunctionExpressionEvaluator.java
@@ -9,10 +9,9 @@
public class FunctionExpressionEvaluator extends AbstractExpressionEvaluator {
- public FunctionExpressionEvaluator(ExprNodeGenericFuncDesc expr,
- ObjectInspector oi, IDataOutputProvider output)
- throws AlgebricksException {
- super(new ExprNodeGenericFuncEvaluator(expr), oi, output);
- }
+ public FunctionExpressionEvaluator(ExprNodeGenericFuncDesc expr, ObjectInspector oi, IDataOutputProvider output)
+ throws AlgebricksException {
+ super(new ExprNodeGenericFuncEvaluator(expr), oi, output);
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/NullExpressionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/NullExpressionEvaluator.java
similarity index 68%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/NullExpressionEvaluator.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/NullExpressionEvaluator.java
index cbe5561..ca60385 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/NullExpressionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/NullExpressionEvaluator.java
@@ -9,8 +9,8 @@
public class NullExpressionEvaluator extends AbstractExpressionEvaluator {
- public NullExpressionEvaluator(ExprNodeNullDesc expr, ObjectInspector oi,
- IDataOutputProvider output) throws AlgebricksException {
- super(new ExprNodeNullEvaluator(expr), oi, output);
- }
+ public NullExpressionEvaluator(ExprNodeNullDesc expr, ObjectInspector oi, IDataOutputProvider output)
+ throws AlgebricksException {
+ super(new ExprNodeNullEvaluator(expr), oi, output);
+ }
}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/SerializableBuffer.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/SerializableBuffer.java
new file mode 100644
index 0000000..676989e
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/SerializableBuffer.java
@@ -0,0 +1,16 @@
+package edu.uci.ics.hivesterix.runtime.evaluator;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
+
+public interface SerializableBuffer extends AggregationBuffer {
+
+ public void deSerializeAggBuffer(byte[] data, int start, int len);
+
+ public void serializeAggBuffer(byte[] data, int start, int len);
+
+ public void serializeAggBuffer(DataOutput output) throws IOException;
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java
new file mode 100644
index 0000000..2e78663
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java
@@ -0,0 +1,143 @@
+package edu.uci.ics.hivesterix.runtime.evaluator;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.ql.udf.generic.Collector;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.BytesWritable;
+
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
+import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
+import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
+import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyUnnestingFunction;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+public class UDTFFunctionEvaluator implements ICopyUnnestingFunction, Collector {
+
+ /**
+ * udtf function
+ */
+ private UDTFDesc func;
+
+ /**
+ * input object inspector
+ */
+ private ObjectInspector inputInspector;
+
+ /**
+ * output object inspector
+ */
+ private ObjectInspector outputInspector;
+
+ /**
+ * object inspector for udtf
+ */
+ private ObjectInspector[] udtfInputOIs;
+
+ /**
+ * generic udtf
+ */
+ private GenericUDTF udtf;
+
+ /**
+ * data output
+ */
+ private DataOutput out;
+
+ /**
+ * the input row object
+ */
+ private LazyColumnar cachedRowObject;
+
+ /**
+ * cached row object (input)
+ */
+ private Object[] cachedInputObjects;
+
+ /**
+ * serialization/deserialization
+ */
+ private SerDe lazySerDe;
+
+ /**
+ * columns feed into UDTF
+ */
+ private int[] columns;
+
+ public UDTFFunctionEvaluator(UDTFDesc desc, Schema schema, int[] cols, DataOutput output) {
+ this.func = desc;
+ this.inputInspector = schema.toObjectInspector();
+ udtf = func.getGenericUDTF();
+ out = output;
+ columns = cols;
+ }
+
+ @Override
+ public void init(IFrameTupleReference tuple) throws AlgebricksException {
+ cachedInputObjects = new LazyObject[columns.length];
+ try {
+ cachedRowObject = (LazyColumnar) LazyFactory.createLazyObject(inputInspector);
+ outputInspector = udtf.initialize(udtfInputOIs);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ udtf.setCollector(this);
+ lazySerDe = new LazySerDe();
+ readIntoCache(tuple);
+ }
+
+ @Override
+ public boolean step() throws AlgebricksException {
+ try {
+ udtf.process(cachedInputObjects);
+ return true;
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ /**
+ * bind the tuple reference to the cached row object
+ *
+ * @param r
+ */
+ private void readIntoCache(IFrameTupleReference r) {
+ cachedRowObject.init(r);
+ for (int i = 0; i < cachedInputObjects.length; i++) {
+ cachedInputObjects[i] = cachedRowObject.getField(columns[i]);
+ }
+ }
+
+ /**
+ * serialize the result
+ *
+ * @param result
+ * the evaluation result
+ * @throws IOException
+ * @throws AlgebricksException
+ */
+ private void serializeResult(Object result) throws SerDeException, IOException {
+ BytesWritable outputWritable = (BytesWritable) lazySerDe.serialize(result, outputInspector);
+ out.write(outputWritable.getBytes(), 0, outputWritable.getLength());
+ }
+
+ @Override
+ public void collect(Object input) throws HiveException {
+ try {
+ serializeResult(input);
+ } catch (IOException e) {
+ throw new HiveException(e);
+ } catch (SerDeException e) {
+ throw new HiveException(e);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryAscComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryAscComparatorFactory.java
new file mode 100644
index 0000000..f3b76e4
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryAscComparatorFactory.java
@@ -0,0 +1,34 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveByteBinaryAscComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveByteBinaryAscComparatorFactory INSTANCE = new HiveByteBinaryAscComparatorFactory();
+
+ private HiveByteBinaryAscComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private byte left;
+ private byte right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = b1[s1];
+ right = b2[s2];
+ if (left > right)
+ return 1;
+ else if (left == right)
+ return 0;
+ else
+ return -1;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryDescComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryDescComparatorFactory.java
new file mode 100644
index 0000000..8d452dc
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryDescComparatorFactory.java
@@ -0,0 +1,33 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveByteBinaryDescComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveByteBinaryDescComparatorFactory INSTANCE = new HiveByteBinaryDescComparatorFactory();
+
+ private HiveByteBinaryDescComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private byte left;
+ private byte right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = b1[s1];
+ right = b2[s2];
+ if (left > right)
+ return -1;
+ else if (left == right)
+ return 0;
+ else
+ return 1;
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryAscComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryAscComparatorFactory.java
new file mode 100644
index 0000000..0b5350a
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryAscComparatorFactory.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveDoubleBinaryAscComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveDoubleBinaryAscComparatorFactory INSTANCE = new HiveDoubleBinaryAscComparatorFactory();
+
+ private HiveDoubleBinaryAscComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private double left;
+ private double right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = Double.longBitsToDouble(LazyUtils.byteArrayToLong(b1, s1));
+ right = Double.longBitsToDouble(LazyUtils.byteArrayToLong(b2, s2));
+ if (left > right)
+ return 1;
+ else if (left == right)
+ return 0;
+ else
+ return -1;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryDescComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryDescComparatorFactory.java
new file mode 100644
index 0000000..2405956
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryDescComparatorFactory.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveDoubleBinaryDescComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveDoubleBinaryDescComparatorFactory INSTANCE = new HiveDoubleBinaryDescComparatorFactory();
+
+ private HiveDoubleBinaryDescComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private double left;
+ private double right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = Double.longBitsToDouble(LazyUtils.byteArrayToLong(b1, s1));
+ right = Double.longBitsToDouble(LazyUtils.byteArrayToLong(b2, s2));
+ if (left > right)
+ return -1;
+ else if (left == right)
+ return 0;
+ else
+ return 1;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryAscComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryAscComparatorFactory.java
new file mode 100644
index 0000000..05a43e6
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryAscComparatorFactory.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveFloatBinaryAscComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveFloatBinaryAscComparatorFactory INSTANCE = new HiveFloatBinaryAscComparatorFactory();
+
+ private HiveFloatBinaryAscComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private float left;
+ private float right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b1, s1));
+ right = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b2, s2));
+ if (left > right)
+ return 1;
+ else if (left == right)
+ return 0;
+ else
+ return -1;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryDescComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryDescComparatorFactory.java
new file mode 100644
index 0000000..2c44f97
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryDescComparatorFactory.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveFloatBinaryDescComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveFloatBinaryDescComparatorFactory INSTANCE = new HiveFloatBinaryDescComparatorFactory();
+
+ private HiveFloatBinaryDescComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private float left;
+ private float right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b1, s1));
+ right = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b2, s2));
+ if (left > right)
+ return -1;
+ else if (left == right)
+ return 0;
+ else
+ return 1;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryAscComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryAscComparatorFactory.java
new file mode 100644
index 0000000..0127791
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryAscComparatorFactory.java
@@ -0,0 +1,40 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveIntegerBinaryAscComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static final HiveIntegerBinaryAscComparatorFactory INSTANCE = new HiveIntegerBinaryAscComparatorFactory();
+
+ private HiveIntegerBinaryAscComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private VInt left = new VInt();
+ private VInt right = new VInt();
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ LazyUtils.readVInt(b1, s1, left);
+ LazyUtils.readVInt(b2, s2, right);
+
+ if (left.length != l1 || right.length != l2)
+ throw new IllegalArgumentException("length mismatch in int comparator function actual: "
+ + left.length + "," + right.length + " expected " + l1 + "," + l2);
+
+ if (left.value > right.value)
+ return 1;
+ else if (left.value == right.value)
+ return 0;
+ else
+ return -1;
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryDescComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryDescComparatorFactory.java
new file mode 100644
index 0000000..5116337
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryDescComparatorFactory.java
@@ -0,0 +1,38 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveIntegerBinaryDescComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static final HiveIntegerBinaryDescComparatorFactory INSTANCE = new HiveIntegerBinaryDescComparatorFactory();
+
+ private HiveIntegerBinaryDescComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private VInt left = new VInt();
+ private VInt right = new VInt();
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ LazyUtils.readVInt(b1, s1, left);
+ LazyUtils.readVInt(b2, s2, right);
+ if (left.length != l1 || right.length != l2)
+ throw new IllegalArgumentException("length mismatch in int comparator function actual: "
+ + left.length + " expected " + l1);
+ if (left.value > right.value)
+ return -1;
+ else if (left.value == right.value)
+ return 0;
+ else
+ return 1;
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryAscComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryAscComparatorFactory.java
new file mode 100644
index 0000000..fa416a9
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryAscComparatorFactory.java
@@ -0,0 +1,38 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveLongBinaryAscComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static final HiveLongBinaryAscComparatorFactory INSTANCE = new HiveLongBinaryAscComparatorFactory();
+
+ private HiveLongBinaryAscComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private VLong left = new VLong();
+ private VLong right = new VLong();
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ LazyUtils.readVLong(b1, s1, left);
+ LazyUtils.readVLong(b2, s2, right);
+ if (left.length != l1 || right.length != l2)
+ throw new IllegalArgumentException("length mismatch in int comparator function actual: "
+ + left.length + " expected " + l1);
+ if (left.value > right.value)
+ return 1;
+ else if (left.value == right.value)
+ return 0;
+ else
+ return -1;
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryDescComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryDescComparatorFactory.java
new file mode 100644
index 0000000..e72dc62
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryDescComparatorFactory.java
@@ -0,0 +1,38 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveLongBinaryDescComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static final HiveLongBinaryDescComparatorFactory INSTANCE = new HiveLongBinaryDescComparatorFactory();
+
+ private HiveLongBinaryDescComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private VLong left = new VLong();
+ private VLong right = new VLong();
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ LazyUtils.readVLong(b1, s1, left);
+ LazyUtils.readVLong(b2, s2, right);
+ if (left.length != l1 || right.length != l2)
+ throw new IllegalArgumentException("length mismatch in int comparator function actual: "
+ + left.length + " expected " + l1);
+ if (left.value > right.value)
+ return -1;
+ else if (left.value == right.value)
+ return 0;
+ else
+ return 1;
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryAscComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryAscComparatorFactory.java
new file mode 100644
index 0000000..a3745fa
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryAscComparatorFactory.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveShortBinaryAscComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveShortBinaryAscComparatorFactory INSTANCE = new HiveShortBinaryAscComparatorFactory();
+
+ private HiveShortBinaryAscComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private short left;
+ private short right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = LazyUtils.byteArrayToShort(b1, s1);
+ right = LazyUtils.byteArrayToShort(b2, s2);
+ if (left > right)
+ return 1;
+ else if (left == right)
+ return 0;
+ else
+ return -1;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryDescComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryDescComparatorFactory.java
new file mode 100644
index 0000000..44d3f43
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryDescComparatorFactory.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveShortBinaryDescComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveShortBinaryDescComparatorFactory INSTANCE = new HiveShortBinaryDescComparatorFactory();
+
+ private HiveShortBinaryDescComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private short left;
+ private short right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = LazyUtils.byteArrayToShort(b1, s1);
+ right = LazyUtils.byteArrayToShort(b2, s2);
+ if (left > right)
+ return -1;
+ else if (left == right)
+ return 0;
+ else
+ return 1;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryAscComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryAscComparatorFactory.java
new file mode 100644
index 0000000..6da9716
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryAscComparatorFactory.java
@@ -0,0 +1,40 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import org.apache.hadoop.io.Text;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveStringBinaryAscComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveStringBinaryAscComparatorFactory INSTANCE = new HiveStringBinaryAscComparatorFactory();
+
+ private HiveStringBinaryAscComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private VInt leftLen = new VInt();
+ private VInt rightLen = new VInt();
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ LazyUtils.readVInt(b1, s1, leftLen);
+ LazyUtils.readVInt(b2, s2, rightLen);
+
+ if (leftLen.value + leftLen.length != l1 || rightLen.value + rightLen.length != l2)
+ throw new IllegalStateException("parse string: length mismatch, expected "
+ + (leftLen.value + leftLen.length) + ", " + (rightLen.value + rightLen.length)
+ + " but get " + l1 + ", " + l2);
+
+ return Text.Comparator.compareBytes(b1, s1 + leftLen.length, l1 - leftLen.length, b2, s2
+ + rightLen.length, l2 - rightLen.length);
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryDescComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryDescComparatorFactory.java
new file mode 100644
index 0000000..c579711
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryDescComparatorFactory.java
@@ -0,0 +1,39 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import org.apache.hadoop.io.WritableComparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveStringBinaryDescComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveStringBinaryDescComparatorFactory INSTANCE = new HiveStringBinaryDescComparatorFactory();
+
+ private HiveStringBinaryDescComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private VInt leftLen = new VInt();
+ private VInt rightLen = new VInt();
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ LazyUtils.readVInt(b1, s1, leftLen);
+ LazyUtils.readVInt(b2, s2, rightLen);
+
+ if (leftLen.value + leftLen.length != l1 || rightLen.value + rightLen.length != l2)
+ throw new IllegalStateException("parse string: length mismatch, expected "
+ + (leftLen.value + leftLen.length) + ", " + (rightLen.value + rightLen.length)
+ + " but get " + l1 + ", " + l2);
+
+ return -WritableComparator.compareBytes(b1, s1 + leftLen.length, l1 - leftLen.length, b2, s2
+ + rightLen.length, l2 - rightLen.length);
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java
new file mode 100644
index 0000000..d664341
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java
@@ -0,0 +1,367 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.AggregationFunctionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
+import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
+import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunction;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunctionFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+public class AggregationFunctionFactory implements ICopyAggregateFunctionFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * list of parameters' serialization
+ */
+ private List<String> parametersSerialization = new ArrayList<String>();
+
+ /**
+ * the name of the udf
+ */
+ private String genericUDAFName;
+
+ /**
+ * aggregation mode
+ */
+ private GenericUDAFEvaluator.Mode mode;
+
+ /**
+ * list of type info
+ */
+ private List<TypeInfo> types = new ArrayList<TypeInfo>();
+
+ /**
+ * distinct or not
+ */
+ private boolean distinct;
+
+ /**
+ * the schema of incoming rows
+ */
+ private Schema rowSchema;
+
+ /**
+ * list of parameters
+ */
+ private transient List<ExprNodeDesc> parametersOrigin;
+
+ /**
+ * row inspector
+ */
+ private transient ObjectInspector rowInspector = null;
+
+ /**
+ * output object inspector
+ */
+ private transient ObjectInspector outputInspector = null;
+
+ /**
+ * output object inspector
+ */
+ private transient ObjectInspector outputInspectorPartial = null;
+
+ /**
+ * parameter inspectors
+ */
+ private transient ObjectInspector[] parameterInspectors = null;
+
+ /**
+ * expression desc
+ */
+ private transient HashMap<Long, List<ExprNodeDesc>> parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
+
+ /**
+ * evaluators
+ */
+ private transient HashMap<Long, ExprNodeEvaluator[]> evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
+
+ /**
+ * cached parameter objects
+ */
+ private transient HashMap<Long, Object[]> cachedParameters = new HashMap<Long, Object[]>();
+
+ /**
+ * cached row object: one per thread
+ */
+ private transient HashMap<Long, LazyObject<? extends ObjectInspector>> cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ private transient HashMap<Long, SerDe> serDe = new HashMap<Long, SerDe>();
+
+ /**
+ * udaf evaluators
+ */
+ private transient HashMap<Long, GenericUDAFEvaluator> udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * udaf evaluators
+ */
+ private transient HashMap<Long, GenericUDAFEvaluator> udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * aggregation function desc
+ */
+ private transient AggregationDesc aggregator;
+
+ /**
+ * @param aggregator
+ * Algebricks function call expression
+ * @param oi
+ * schema
+ */
+ public AggregationFunctionFactory(AggregateFunctionCallExpression expression, Schema oi,
+ IVariableTypeEnvironment env) throws AlgebricksException {
+
+ try {
+ aggregator = (AggregationDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ init(aggregator.getParameters(), aggregator.getGenericUDAFName(), aggregator.getMode(),
+ aggregator.getDistinct(), oi);
+ }
+
+ /**
+ * constructor of aggregation function factory
+ *
+ * @param inputs
+ * @param name
+ * @param udafMode
+ * @param distinct
+ * @param oi
+ */
+ private void init(List<ExprNodeDesc> inputs, String name, GenericUDAFEvaluator.Mode udafMode, boolean distinct,
+ Schema oi) {
+ parametersOrigin = inputs;
+ genericUDAFName = name;
+ mode = udafMode;
+ this.distinct = distinct;
+ rowSchema = oi;
+
+ for (ExprNodeDesc input : inputs) {
+ TypeInfo type = input.getTypeInfo();
+ if (type instanceof StructTypeInfo) {
+ types.add(TypeInfoFactory.doubleTypeInfo);
+ } else
+ types.add(type);
+
+ String s = Utilities.serializeExpression(input);
+ parametersSerialization.add(s);
+ }
+ }
+
+ @Override
+ public synchronized ICopyAggregateFunction createAggregateFunction(IDataOutputProvider provider)
+ throws AlgebricksException {
+ if (parametersOrigin == null) {
+ Configuration config = new Configuration();
+ config.setClassLoader(this.getClass().getClassLoader());
+ /**
+ * in case of class.forname(...) call in hive code
+ */
+ Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+
+ parametersOrigin = new ArrayList<ExprNodeDesc>();
+ for (String serialization : parametersSerialization) {
+ parametersOrigin.add(Utilities.deserializeExpression(serialization, config));
+ }
+ }
+
+ /**
+ * exprs
+ */
+ if (parameterExprs == null)
+ parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
+
+ /**
+ * evaluators
+ */
+ if (evaluators == null)
+ evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
+
+ /**
+ * cached parameter objects
+ */
+ if (cachedParameters == null)
+ cachedParameters = new HashMap<Long, Object[]>();
+
+ /**
+ * cached row object: one per thread
+ */
+ if (cachedRowObjects == null)
+ cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ if (serDe == null)
+ serDe = new HashMap<Long, SerDe>();
+
+ /**
+ * UDAF functions
+ */
+ if (udafsComplete == null)
+ udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * UDAF functions
+ */
+ if (udafsPartial == null)
+ udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
+
+ if (parameterInspectors == null)
+ parameterInspectors = new ObjectInspector[parametersOrigin.size()];
+
+ if (rowInspector == null)
+ rowInspector = rowSchema.toObjectInspector();
+
+ // get current thread id
+ long threadId = Thread.currentThread().getId();
+
+ /**
+ * expressions, expressions are thread local
+ */
+ List<ExprNodeDesc> parameters = parameterExprs.get(threadId);
+ if (parameters == null) {
+ parameters = new ArrayList<ExprNodeDesc>();
+ for (ExprNodeDesc parameter : parametersOrigin)
+ parameters.add(parameter.clone());
+ parameterExprs.put(threadId, parameters);
+ }
+
+ /**
+ * cached parameter objects
+ */
+ Object[] cachedParas = cachedParameters.get(threadId);
+ if (cachedParas == null) {
+ cachedParas = new Object[parameters.size()];
+ cachedParameters.put(threadId, cachedParas);
+ }
+
+ /**
+ * cached row object: one per thread
+ */
+ LazyObject<? extends ObjectInspector> cachedRowObject = cachedRowObjects.get(threadId);
+ if (cachedRowObject == null) {
+ cachedRowObject = LazyFactory.createLazyObject(rowInspector);
+ cachedRowObjects.put(threadId, cachedRowObject);
+ }
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ SerDe lazySer = serDe.get(threadId);
+ if (lazySer == null) {
+ lazySer = new LazySerDe();
+ serDe.put(threadId, lazySer);
+ }
+
+ /**
+ * evaluators
+ */
+ ExprNodeEvaluator[] evals = evaluators.get(threadId);
+ if (evals == null) {
+ evals = new ExprNodeEvaluator[parameters.size()];
+ evaluators.put(threadId, evals);
+ }
+
+ GenericUDAFEvaluator udafPartial;
+ GenericUDAFEvaluator udafComplete;
+
+ // initialize object inspectors
+ try {
+ /**
+ * evaluators, udf, object inpsectors are shared in one thread
+ */
+ for (int i = 0; i < evals.length; i++) {
+ if (evals[i] == null) {
+ evals[i] = ExprNodeEvaluatorFactory.get(parameters.get(i));
+ if (parameterInspectors[i] == null) {
+ parameterInspectors[i] = evals[i].initialize(rowInspector);
+ } else {
+ evals[i].initialize(rowInspector);
+ }
+ }
+ }
+
+ udafComplete = udafsComplete.get(threadId);
+ if (udafComplete == null) {
+ try {
+ udafComplete = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ udafsComplete.put(threadId, udafComplete);
+ udafComplete.init(mode, parameterInspectors);
+ }
+
+ // multiple stage group by, determined by the mode parameter
+ if (outputInspector == null)
+ outputInspector = udafComplete.init(mode, parameterInspectors);
+
+ // initial partial gby udaf
+ GenericUDAFEvaluator.Mode partialMode;
+ // adjust mode for external groupby
+ if (mode == GenericUDAFEvaluator.Mode.COMPLETE)
+ partialMode = GenericUDAFEvaluator.Mode.PARTIAL1;
+ else if (mode == GenericUDAFEvaluator.Mode.FINAL)
+ partialMode = GenericUDAFEvaluator.Mode.PARTIAL2;
+ else
+ partialMode = mode;
+ udafPartial = udafsPartial.get(threadId);
+ if (udafPartial == null) {
+ try {
+ udafPartial = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ udafPartial.init(partialMode, parameterInspectors);
+ udafsPartial.put(threadId, udafPartial);
+ }
+
+ // multiple stage group by, determined by the mode parameter
+ if (outputInspectorPartial == null)
+ outputInspectorPartial = udafPartial.init(partialMode, parameterInspectors);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e);
+ }
+
+ return new AggregationFunctionEvaluator(parameters, types, genericUDAFName, mode, distinct, rowInspector,
+ provider.getDataOutput(), evals, parameterInspectors, cachedParas, lazySer, cachedRowObject,
+ udafPartial, udafComplete, outputInspector, outputInspectorPartial);
+ }
+
+ public String toString() {
+ return "aggregation function expression evaluator factory: " + this.genericUDAFName;
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java
new file mode 100644
index 0000000..54a1155
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java
@@ -0,0 +1,366 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.AggregatuibFunctionSerializableEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
+import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
+import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunction;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunctionFactory;
+
+public class AggregationFunctionSerializableFactory implements ICopySerializableAggregateFunctionFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * list of parameters' serialization
+ */
+ private List<String> parametersSerialization = new ArrayList<String>();
+
+ /**
+ * the name of the udf
+ */
+ private String genericUDAFName;
+
+ /**
+ * aggregation mode
+ */
+ private GenericUDAFEvaluator.Mode mode;
+
+ /**
+ * list of type info
+ */
+ private List<TypeInfo> types = new ArrayList<TypeInfo>();
+
+ /**
+ * distinct or not
+ */
+ private boolean distinct;
+
+ /**
+ * the schema of incoming rows
+ */
+ private Schema rowSchema;
+
+ /**
+ * list of parameters
+ */
+ private transient List<ExprNodeDesc> parametersOrigin;
+
+ /**
+ * row inspector
+ */
+ private transient ObjectInspector rowInspector = null;
+
+ /**
+ * output object inspector
+ */
+ private transient ObjectInspector outputInspector = null;
+
+ /**
+ * output object inspector
+ */
+ private transient ObjectInspector outputInspectorPartial = null;
+
+ /**
+ * parameter inspectors
+ */
+ private transient ObjectInspector[] parameterInspectors = null;
+
+ /**
+ * expression desc
+ */
+ private transient HashMap<Long, List<ExprNodeDesc>> parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
+
+ /**
+ * evaluators
+ */
+ private transient HashMap<Long, ExprNodeEvaluator[]> evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
+
+ /**
+ * cached parameter objects
+ */
+ private transient HashMap<Long, Object[]> cachedParameters = new HashMap<Long, Object[]>();
+
+ /**
+ * cached row object: one per thread
+ */
+ private transient HashMap<Long, LazyObject<? extends ObjectInspector>> cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ private transient HashMap<Long, SerDe> serDe = new HashMap<Long, SerDe>();
+
+ /**
+ * udaf evaluators
+ */
+ private transient HashMap<Long, GenericUDAFEvaluator> udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * udaf evaluators
+ */
+ private transient HashMap<Long, GenericUDAFEvaluator> udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * aggregation function desc
+ */
+ private transient AggregationDesc aggregator;
+
+ /**
+ * @param aggregator
+ * Algebricks function call expression
+ * @param oi
+ * schema
+ */
+ public AggregationFunctionSerializableFactory(AggregateFunctionCallExpression expression, Schema oi,
+ IVariableTypeEnvironment env) throws AlgebricksException {
+
+ try {
+ aggregator = (AggregationDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ init(aggregator.getParameters(), aggregator.getGenericUDAFName(), aggregator.getMode(),
+ aggregator.getDistinct(), oi);
+ }
+
+ /**
+ * constructor of aggregation function factory
+ *
+ * @param inputs
+ * @param name
+ * @param udafMode
+ * @param distinct
+ * @param oi
+ */
+ private void init(List<ExprNodeDesc> inputs, String name, GenericUDAFEvaluator.Mode udafMode, boolean distinct,
+ Schema oi) {
+ parametersOrigin = inputs;
+ genericUDAFName = name;
+ mode = udafMode;
+ this.distinct = distinct;
+ rowSchema = oi;
+
+ for (ExprNodeDesc input : inputs) {
+ TypeInfo type = input.getTypeInfo();
+ if (type instanceof StructTypeInfo) {
+ types.add(TypeInfoFactory.doubleTypeInfo);
+ } else
+ types.add(type);
+
+ String s = Utilities.serializeExpression(input);
+ parametersSerialization.add(s);
+ }
+ }
+
+ @Override
+ public synchronized ICopySerializableAggregateFunction createAggregateFunction() throws AlgebricksException {
+ if (parametersOrigin == null) {
+ Configuration config = new Configuration();
+ config.setClassLoader(this.getClass().getClassLoader());
+ /**
+ * in case of class.forname(...) call in hive code
+ */
+ Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+
+ parametersOrigin = new ArrayList<ExprNodeDesc>();
+ for (String serialization : parametersSerialization) {
+ parametersOrigin.add(Utilities.deserializeExpression(serialization, config));
+ }
+ }
+
+ /**
+ * exprs
+ */
+ if (parameterExprs == null)
+ parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
+
+ /**
+ * evaluators
+ */
+ if (evaluators == null)
+ evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
+
+ /**
+ * cached parameter objects
+ */
+ if (cachedParameters == null)
+ cachedParameters = new HashMap<Long, Object[]>();
+
+ /**
+ * cached row object: one per thread
+ */
+ if (cachedRowObjects == null)
+ cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ if (serDe == null)
+ serDe = new HashMap<Long, SerDe>();
+
+ /**
+ * UDAF functions
+ */
+ if (udafsComplete == null)
+ udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * UDAF functions
+ */
+ if (udafsPartial == null)
+ udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
+
+ if (parameterInspectors == null)
+ parameterInspectors = new ObjectInspector[parametersOrigin.size()];
+
+ if (rowInspector == null)
+ rowInspector = rowSchema.toObjectInspector();
+
+ // get current thread id
+ long threadId = Thread.currentThread().getId();
+
+ /**
+ * expressions, expressions are thread local
+ */
+ List<ExprNodeDesc> parameters = parameterExprs.get(threadId);
+ if (parameters == null) {
+ parameters = new ArrayList<ExprNodeDesc>();
+ for (ExprNodeDesc parameter : parametersOrigin)
+ parameters.add(parameter.clone());
+ parameterExprs.put(threadId, parameters);
+ }
+
+ /**
+ * cached parameter objects
+ */
+ Object[] cachedParas = cachedParameters.get(threadId);
+ if (cachedParas == null) {
+ cachedParas = new Object[parameters.size()];
+ cachedParameters.put(threadId, cachedParas);
+ }
+
+ /**
+ * cached row object: one per thread
+ */
+ LazyObject<? extends ObjectInspector> cachedRowObject = cachedRowObjects.get(threadId);
+ if (cachedRowObject == null) {
+ cachedRowObject = LazyFactory.createLazyObject(rowInspector);
+ cachedRowObjects.put(threadId, cachedRowObject);
+ }
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ SerDe lazySer = serDe.get(threadId);
+ if (lazySer == null) {
+ lazySer = new LazySerDe();
+ serDe.put(threadId, lazySer);
+ }
+
+ /**
+ * evaluators
+ */
+ ExprNodeEvaluator[] evals = evaluators.get(threadId);
+ if (evals == null) {
+ evals = new ExprNodeEvaluator[parameters.size()];
+ evaluators.put(threadId, evals);
+ }
+
+ GenericUDAFEvaluator udafPartial;
+ GenericUDAFEvaluator udafComplete;
+
+ // initialize object inspectors
+ try {
+ /**
+ * evaluators, udf, object inpsectors are shared in one thread
+ */
+ for (int i = 0; i < evals.length; i++) {
+ if (evals[i] == null) {
+ evals[i] = ExprNodeEvaluatorFactory.get(parameters.get(i));
+ if (parameterInspectors[i] == null) {
+ parameterInspectors[i] = evals[i].initialize(rowInspector);
+ } else {
+ evals[i].initialize(rowInspector);
+ }
+ }
+ }
+
+ udafComplete = udafsComplete.get(threadId);
+ if (udafComplete == null) {
+ try {
+ udafComplete = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ udafsComplete.put(threadId, udafComplete);
+ udafComplete.init(mode, parameterInspectors);
+ }
+
+ // multiple stage group by, determined by the mode parameter
+ if (outputInspector == null)
+ outputInspector = udafComplete.init(mode, parameterInspectors);
+
+ // initial partial gby udaf
+ GenericUDAFEvaluator.Mode partialMode;
+ // adjust mode for external groupby
+ if (mode == GenericUDAFEvaluator.Mode.COMPLETE)
+ partialMode = GenericUDAFEvaluator.Mode.PARTIAL1;
+ else if (mode == GenericUDAFEvaluator.Mode.FINAL)
+ partialMode = GenericUDAFEvaluator.Mode.PARTIAL2;
+ else
+ partialMode = mode;
+ udafPartial = udafsPartial.get(threadId);
+ if (udafPartial == null) {
+ try {
+ udafPartial = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ udafPartial.init(partialMode, parameterInspectors);
+ udafsPartial.put(threadId, udafPartial);
+ }
+
+ // multiple stage group by, determined by the mode parameter
+ if (outputInspectorPartial == null)
+ outputInspectorPartial = udafPartial.init(partialMode, parameterInspectors);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e);
+ }
+
+ return new AggregatuibFunctionSerializableEvaluator(parameters, types, genericUDAFName, mode, distinct,
+ rowInspector, evals, parameterInspectors, cachedParas, lazySer, cachedRowObject, udafPartial,
+ udafComplete, outputInspector, outputInspectorPartial);
+ }
+
+ public String toString() {
+ return "aggregation function expression evaluator factory: " + this.genericUDAFName;
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ColumnExpressionEvaluatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ColumnExpressionEvaluatorFactory.java
new file mode 100644
index 0000000..6f51bfe
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ColumnExpressionEvaluatorFactory.java
@@ -0,0 +1,41 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.ColumnExpressionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+public class ColumnExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ private ExprNodeColumnDesc expr;
+
+ private Schema inputSchema;
+
+ public ColumnExpressionEvaluatorFactory(ILogicalExpression expression, Schema schema, IVariableTypeEnvironment env)
+ throws AlgebricksException {
+ try {
+ expr = (ExprNodeColumnDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ throw new AlgebricksException(e.getMessage());
+ }
+ inputSchema = schema;
+ }
+
+ public ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
+ return new ColumnExpressionEvaluator(expr, inputSchema.toObjectInspector(), output);
+ }
+
+ public String toString() {
+ return "column expression evaluator factory: " + expr.toString();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ConstantExpressionEvaluatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ConstantExpressionEvaluatorFactory.java
new file mode 100644
index 0000000..4ecdb70
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ConstantExpressionEvaluatorFactory.java
@@ -0,0 +1,41 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.ConstantExpressionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+public class ConstantExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ private ExprNodeConstantDesc expr;
+
+ private Schema schema;
+
+ public ConstantExpressionEvaluatorFactory(ILogicalExpression expression, Schema inputSchema,
+ IVariableTypeEnvironment env) throws AlgebricksException {
+ try {
+ expr = (ExprNodeConstantDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ throw new AlgebricksException(e.getMessage());
+ }
+ schema = inputSchema;
+ }
+
+ public ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
+ return new ConstantExpressionEvaluator(expr, schema.toObjectInspector(), output);
+ }
+
+ public String toString() {
+ return "constant expression evaluator factory: " + expr.toString();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/FieldExpressionEvaluatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/FieldExpressionEvaluatorFactory.java
new file mode 100644
index 0000000..ef0c104
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/FieldExpressionEvaluatorFactory.java
@@ -0,0 +1,40 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.FieldExpressionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+public class FieldExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ private ExprNodeFieldDesc expr;
+
+ private Schema inputSchema;
+
+ public FieldExpressionEvaluatorFactory(ILogicalExpression expression, Schema schema, IVariableTypeEnvironment env)
+ throws AlgebricksException {
+ try {
+ expr = (ExprNodeFieldDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ throw new AlgebricksException(e.getMessage());
+ }
+ inputSchema = schema;
+ }
+
+ public ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
+ return new FieldExpressionEvaluator(expr, inputSchema.toObjectInspector(), output);
+ }
+
+ public String toString() {
+ return "field access expression evaluator factory: " + expr.toString();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/HiveExpressionRuntimeProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/HiveExpressionRuntimeProvider.java
new file mode 100644
index 0000000..c3b4b17
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/HiveExpressionRuntimeProvider.java
@@ -0,0 +1,167 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionConstant;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression.FunctionKind;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionRuntimeProvider;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.LogicalExpressionJobGenToExpressionRuntimeProviderAdapter.AggregateFunctionFactoryAdapter;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.LogicalExpressionJobGenToExpressionRuntimeProviderAdapter.ScalarEvaluatorFactoryAdapter;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.LogicalExpressionJobGenToExpressionRuntimeProviderAdapter.UnnestingFunctionFactoryAdapter;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.StatefulFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
+import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenContext;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IAggregateEvaluatorFactory;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunctionFactory;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IRunningAggregateEvaluatorFactory;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IUnnestingEvaluatorFactory;
+
+public class HiveExpressionRuntimeProvider implements IExpressionRuntimeProvider {
+
+ public static final IExpressionRuntimeProvider INSTANCE = new HiveExpressionRuntimeProvider();
+
+ @Override
+ public IAggregateEvaluatorFactory createAggregateFunctionFactory(AggregateFunctionCallExpression expr,
+ IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas, JobGenContext context)
+ throws AlgebricksException {
+ Schema schema = this.getSchema(inputSchemas[0], env);
+ return new AggregateFunctionFactoryAdapter(new AggregationFunctionFactory(expr, schema, env));
+ }
+
+ @Override
+ public ICopySerializableAggregateFunctionFactory createSerializableAggregateFunctionFactory(
+ AggregateFunctionCallExpression expr, IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas,
+ JobGenContext context) throws AlgebricksException {
+ Schema schema = this.getSchema(inputSchemas[0], env);
+ return new AggregationFunctionSerializableFactory(expr, schema, env);
+ }
+
+ @Override
+ public IRunningAggregateEvaluatorFactory createRunningAggregateFunctionFactory(StatefulFunctionCallExpression expr,
+ IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas, JobGenContext context)
+ throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public IUnnestingEvaluatorFactory createUnnestingFunctionFactory(UnnestingFunctionCallExpression expr,
+ IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas, JobGenContext context)
+ throws AlgebricksException {
+ Schema schema = this.getSchema(inputSchemas[0], env);
+ return new UnnestingFunctionFactoryAdapter(new UnnestingFunctionFactory(expr, schema, env));
+ }
+
+ public IScalarEvaluatorFactory createEvaluatorFactory(ILogicalExpression expr, IVariableTypeEnvironment env,
+ IOperatorSchema[] inputSchemas, JobGenContext context) throws AlgebricksException {
+ switch (expr.getExpressionTag()) {
+ case VARIABLE: {
+ VariableReferenceExpression v = (VariableReferenceExpression) expr;
+ return new ScalarEvaluatorFactoryAdapter(createVariableEvaluatorFactory(v, env, inputSchemas, context));
+ }
+ case CONSTANT: {
+ ConstantExpression c = (ConstantExpression) expr;
+ return new ScalarEvaluatorFactoryAdapter(createConstantEvaluatorFactory(c, env, inputSchemas, context));
+ }
+ case FUNCTION_CALL: {
+ AbstractFunctionCallExpression fun = (AbstractFunctionCallExpression) expr;
+ FunctionIdentifier fid = fun.getFunctionIdentifier();
+
+ if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
+ return new ScalarEvaluatorFactoryAdapter(createFieldExpressionEvaluatorFactory(fun, env,
+ inputSchemas, context));
+ }
+
+ if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
+ return new ScalarEvaluatorFactoryAdapter(createNullExpressionEvaluatorFactory(fun, env,
+ inputSchemas, context));
+ }
+
+ if (fun.getKind() == FunctionKind.SCALAR) {
+ ScalarFunctionCallExpression scalar = (ScalarFunctionCallExpression) fun;
+ return new ScalarEvaluatorFactoryAdapter(createScalarFunctionEvaluatorFactory(scalar, env,
+ inputSchemas, context));
+ } else {
+ throw new AlgebricksException("Cannot create evaluator for function " + fun + " of kind "
+ + fun.getKind());
+ }
+ }
+ default: {
+ throw new IllegalStateException();
+ }
+ }
+ }
+
+ private ICopyEvaluatorFactory createVariableEvaluatorFactory(VariableReferenceExpression expr,
+ IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas, JobGenContext context)
+ throws AlgebricksException {
+ Schema schema = this.getSchema(inputSchemas[0], env);
+ return new ColumnExpressionEvaluatorFactory(expr, schema, env);
+ }
+
+ private ICopyEvaluatorFactory createScalarFunctionEvaluatorFactory(AbstractFunctionCallExpression expr,
+ IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas, JobGenContext context)
+ throws AlgebricksException {
+ List<String> names = new ArrayList<String>();
+ List<TypeInfo> types = new ArrayList<TypeInfo>();
+ for (IOperatorSchema inputSchema : inputSchemas) {
+ Schema schema = this.getSchema(inputSchema, env);
+ names.addAll(schema.getNames());
+ types.addAll(schema.getTypes());
+ }
+ Schema inputSchema = new Schema(names, types);
+ return new ScalarFunctionExpressionEvaluatorFactory(expr, inputSchema, env);
+ }
+
+ private ICopyEvaluatorFactory createFieldExpressionEvaluatorFactory(AbstractFunctionCallExpression expr,
+ IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas, JobGenContext context)
+ throws AlgebricksException {
+ Schema schema = this.getSchema(inputSchemas[0], env);
+ return new FieldExpressionEvaluatorFactory(expr, schema, env);
+ }
+
+ private ICopyEvaluatorFactory createNullExpressionEvaluatorFactory(AbstractFunctionCallExpression expr,
+ IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas, JobGenContext context)
+ throws AlgebricksException {
+ Schema schema = this.getSchema(inputSchemas[0], env);
+ return new NullExpressionEvaluatorFactory(expr, schema, env);
+ }
+
+ private ICopyEvaluatorFactory createConstantEvaluatorFactory(ConstantExpression expr, IVariableTypeEnvironment env,
+ IOperatorSchema[] inputSchemas, JobGenContext context) throws AlgebricksException {
+ Schema schema = this.getSchema(inputSchemas[0], env);
+ return new ConstantExpressionEvaluatorFactory(expr, schema, env);
+ }
+
+ private Schema getSchema(IOperatorSchema inputSchema, IVariableTypeEnvironment env) throws AlgebricksException {
+ List<String> names = new ArrayList<String>();
+ List<TypeInfo> types = new ArrayList<TypeInfo>();
+ Iterator<LogicalVariable> variables = inputSchema.iterator();
+ while (variables.hasNext()) {
+ LogicalVariable var = variables.next();
+ names.add(var.toString());
+ types.add((TypeInfo) env.getVarType(var));
+ }
+
+ Schema schema = new Schema(names, types);
+ return schema;
+ }
+
+}
\ No newline at end of file
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/NullExpressionEvaluatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/NullExpressionEvaluatorFactory.java
new file mode 100644
index 0000000..f37b825
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/NullExpressionEvaluatorFactory.java
@@ -0,0 +1,41 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.NullExpressionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+public class NullExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ private ExprNodeNullDesc expr;
+
+ private Schema schema;
+
+ public NullExpressionEvaluatorFactory(ILogicalExpression expression, Schema intputSchema,
+ IVariableTypeEnvironment env) throws AlgebricksException {
+ try {
+ expr = (ExprNodeNullDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ throw new AlgebricksException(e.getMessage());
+ }
+ schema = intputSchema;
+ }
+
+ public ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
+ return new NullExpressionEvaluator(expr, schema.toObjectInspector(), output);
+ }
+
+ public String toString() {
+ return "null expression evaluator factory: " + expr.toString();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ScalarFunctionExpressionEvaluatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ScalarFunctionExpressionEvaluatorFactory.java
new file mode 100644
index 0000000..cbac10a
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ScalarFunctionExpressionEvaluatorFactory.java
@@ -0,0 +1,69 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.FunctionExpressionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+public class ScalarFunctionExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ private transient ExprNodeGenericFuncDesc expr;
+
+ private String exprSerialization;
+
+ private Schema inputSchema;
+
+ private transient Configuration config;
+
+ public ScalarFunctionExpressionEvaluatorFactory(ILogicalExpression expression, Schema schema,
+ IVariableTypeEnvironment env) throws AlgebricksException {
+ try {
+ expr = (ExprNodeGenericFuncDesc) ExpressionTranslator.getHiveExpression(expression, env);
+
+ exprSerialization = Utilities.serializeExpression(expr);
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ inputSchema = schema;
+ }
+
+ public synchronized ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
+ if (expr == null) {
+ configClassLoader();
+ expr = (ExprNodeGenericFuncDesc) Utilities.deserializeExpression(exprSerialization, config);
+ }
+
+ ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) expr.clone();
+ return new FunctionExpressionEvaluator(funcDesc, inputSchema.toObjectInspector(), output);
+ }
+
+ private void configClassLoader() {
+ config = new Configuration();
+ ClassLoader loader = this.getClass().getClassLoader();
+ config.setClassLoader(loader);
+ Thread.currentThread().setContextClassLoader(loader);
+ }
+
+ public String toString() {
+ if (expr == null) {
+ configClassLoader();
+ expr = (ExprNodeGenericFuncDesc) Utilities.deserializeExpression(exprSerialization, new Configuration());
+ }
+
+ return "function expression evaluator factory: " + expr.getExprString();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/UnnestingFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/UnnestingFunctionFactory.java
new file mode 100644
index 0000000..3b22513
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/UnnestingFunctionFactory.java
@@ -0,0 +1,40 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.UDTFFunctionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyUnnestingFunction;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyUnnestingFunctionFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+public class UnnestingFunctionFactory implements ICopyUnnestingFunctionFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ private UDTFDesc expr;
+
+ private Schema inputSchema;
+
+ private int[] columns;
+
+ public UnnestingFunctionFactory(ILogicalExpression expression, Schema schema, IVariableTypeEnvironment env)
+ throws AlgebricksException {
+ try {
+ expr = (UDTFDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ throw new AlgebricksException(e.getMessage());
+ }
+ inputSchema = schema;
+ }
+
+ @Override
+ public ICopyUnnestingFunction createUnnestingFunction(IDataOutputProvider provider) throws AlgebricksException {
+ return new UDTFFunctionEvaluator(expr, inputSchema, columns, provider.getDataOutput());
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveDoubleBinaryHashFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveDoubleBinaryHashFunctionFactory.java
new file mode 100644
index 0000000..b636009
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveDoubleBinaryHashFunctionFactory.java
@@ -0,0 +1,29 @@
+package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+
+public class HiveDoubleBinaryHashFunctionFactory implements IBinaryHashFunctionFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveDoubleBinaryHashFunctionFactory INSTANCE = new HiveDoubleBinaryHashFunctionFactory();
+
+ private HiveDoubleBinaryHashFunctionFactory() {
+ }
+
+ @Override
+ public IBinaryHashFunction createBinaryHashFunction() {
+ // TODO Auto-generated method stub
+ return new IBinaryHashFunction() {
+ private Double value;
+
+ @Override
+ public int hash(byte[] bytes, int offset, int length) {
+ value = Double.longBitsToDouble(LazyUtils.byteArrayToLong(bytes, offset));
+ return value.hashCode();
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveIntegerBinaryHashFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveIntegerBinaryHashFunctionFactory.java
new file mode 100644
index 0000000..90e6ce4
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveIntegerBinaryHashFunctionFactory.java
@@ -0,0 +1,33 @@
+package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+
+public class HiveIntegerBinaryHashFunctionFactory implements IBinaryHashFunctionFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static IBinaryHashFunctionFactory INSTANCE = new HiveIntegerBinaryHashFunctionFactory();
+
+ private HiveIntegerBinaryHashFunctionFactory() {
+ }
+
+ @Override
+ public IBinaryHashFunction createBinaryHashFunction() {
+
+ return new IBinaryHashFunction() {
+ private VInt value = new VInt();
+
+ @Override
+ public int hash(byte[] bytes, int offset, int length) {
+ LazyUtils.readVInt(bytes, offset, value);
+ if (value.length != length)
+ throw new IllegalArgumentException("length mismatch in int hash function actual: " + length
+ + " expected " + value.length);
+ return value.value;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveLongBinaryHashFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveLongBinaryHashFunctionFactory.java
new file mode 100644
index 0000000..1b61f67
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveLongBinaryHashFunctionFactory.java
@@ -0,0 +1,30 @@
+package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+
+public class HiveLongBinaryHashFunctionFactory implements IBinaryHashFunctionFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static IBinaryHashFunctionFactory INSTANCE = new HiveLongBinaryHashFunctionFactory();
+
+ private HiveLongBinaryHashFunctionFactory() {
+ }
+
+ @Override
+ public IBinaryHashFunction createBinaryHashFunction() {
+
+ return new IBinaryHashFunction() {
+ private VLong value = new VLong();
+
+ @Override
+ public int hash(byte[] bytes, int offset, int length) {
+ LazyUtils.readVLong(bytes, offset, value);
+ return (int) value.value;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveRawBinaryHashFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveRawBinaryHashFunctionFactory.java
new file mode 100644
index 0000000..f2b7b44
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveRawBinaryHashFunctionFactory.java
@@ -0,0 +1,31 @@
+package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+
+public class HiveRawBinaryHashFunctionFactory implements IBinaryHashFunctionFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static IBinaryHashFunctionFactory INSTANCE = new HiveRawBinaryHashFunctionFactory();
+
+ private HiveRawBinaryHashFunctionFactory() {
+
+ }
+
+ @Override
+ public IBinaryHashFunction createBinaryHashFunction() {
+
+ return new IBinaryHashFunction() {
+
+ @Override
+ public int hash(byte[] bytes, int offset, int length) {
+ int value = 1;
+ int end = offset + length;
+ for (int i = offset; i < end; i++)
+ value = value * 31 + (int) bytes[i];
+ return value;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveStingBinaryHashFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveStingBinaryHashFunctionFactory.java
new file mode 100644
index 0000000..a9cf6fd
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveStingBinaryHashFunctionFactory.java
@@ -0,0 +1,41 @@
+package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+
+public class HiveStingBinaryHashFunctionFactory implements IBinaryHashFunctionFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveStingBinaryHashFunctionFactory INSTANCE = new HiveStingBinaryHashFunctionFactory();
+
+ private HiveStingBinaryHashFunctionFactory() {
+ }
+
+ @Override
+ public IBinaryHashFunction createBinaryHashFunction() {
+ // TODO Auto-generated method stub
+ return new IBinaryHashFunction() {
+ private VInt len = new VInt();
+
+ @Override
+ public int hash(byte[] bytes, int offset, int length) {
+ LazyUtils.readVInt(bytes, offset, len);
+ if (len.value + len.length != length)
+ throw new IllegalStateException("parse string: length mismatch, expected "
+ + (len.value + len.length) + " but get " + length);
+ return hashBytes(bytes, offset + len.length, length - len.length);
+ }
+
+ public int hashBytes(byte[] bytes, int offset, int length) {
+ int value = 1;
+ int end = offset + length;
+ for (int i = offset; i < end; i++)
+ value = value * 31 + (int) bytes[i];
+ return value;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/MurmurHash3BinaryHashFunctionFamily.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/MurmurHash3BinaryHashFunctionFamily.java
similarity index 100%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/MurmurHash3BinaryHashFunctionFamily.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/MurmurHash3BinaryHashFunctionFamily.java
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleAscNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleAscNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..6ac012f
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleAscNormalizedKeyComputerFactory.java
@@ -0,0 +1,24 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class HiveDoubleAscNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+
+ return new INormalizedKeyComputer() {
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ int header = LazyUtils.byteArrayToInt(bytes, start);
+ long unsignedValue = (long) header;
+ return (int) ((unsignedValue - ((long) Integer.MIN_VALUE)) & 0xffffffffL);
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleDescNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleDescNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..3044109
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleDescNormalizedKeyComputerFactory.java
@@ -0,0 +1,24 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class HiveDoubleDescNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+ private final INormalizedKeyComputerFactory ascNormalizedKeyComputerFactory = new HiveDoubleAscNormalizedKeyComputerFactory();
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+ return new INormalizedKeyComputer() {
+ private INormalizedKeyComputer nmkComputer = ascNormalizedKeyComputerFactory.createNormalizedKeyComputer();
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ int nk = nmkComputer.normalize(bytes, start, length);
+ return (int) ((long) Integer.MAX_VALUE - (long) (nk - Integer.MIN_VALUE));
+ }
+
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerAscNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerAscNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..a1d4d48
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerAscNormalizedKeyComputerFactory.java
@@ -0,0 +1,29 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class HiveIntegerAscNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+
+ return new INormalizedKeyComputer() {
+ private VInt vint = new VInt();
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ LazyUtils.readVInt(bytes, start, vint);
+ if (vint.length != length)
+ throw new IllegalArgumentException("length mismatch in int comparator function actual: "
+ + vint.length + " expected " + length);
+ long unsignedValue = (long) vint.value;
+ return (int) ((unsignedValue - ((long) Integer.MIN_VALUE)) & 0xffffffffL);
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerDescNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerDescNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..b8a30a8
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerDescNormalizedKeyComputerFactory.java
@@ -0,0 +1,29 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class HiveIntegerDescNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+
+ return new INormalizedKeyComputer() {
+ private VInt vint = new VInt();
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ LazyUtils.readVInt(bytes, start, vint);
+ if (vint.length != length)
+ throw new IllegalArgumentException("length mismatch in int comparator function actual: "
+ + vint.length + " expected " + length);
+ long unsignedValue = (long) vint.value;
+ return (int) ((long) 0xffffffff - unsignedValue);
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongAscNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongAscNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..a893d19
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongAscNormalizedKeyComputerFactory.java
@@ -0,0 +1,63 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class HiveLongAscNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+
+ return new INormalizedKeyComputer() {
+ private static final int POSTIVE_LONG_MASK = (3 << 30);
+ private static final int NON_NEGATIVE_INT_MASK = (2 << 30);
+ private static final int NEGATIVE_LONG_MASK = (0 << 30);
+ private VLong vlong = new VLong();
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ LazyUtils.readVLong(bytes, start, vlong);
+ if (vlong.length != length)
+ throw new IllegalArgumentException("length mismatch in int comparator function actual: "
+ + vlong.length + " expected " + length);
+ long value = (long) vlong.value;
+ int highValue = (int) (value >> 32);
+ if (highValue > 0) {
+ /**
+ * larger than Integer.MAX
+ */
+ int highNmk = getKey(highValue);
+ highNmk >>= 2;
+ highNmk |= POSTIVE_LONG_MASK;
+ return highNmk;
+ } else if (highValue == 0) {
+ /**
+ * smaller than Integer.MAX but >=0
+ */
+ int lowNmk = (int) value;
+ lowNmk >>= 2;
+ lowNmk |= NON_NEGATIVE_INT_MASK;
+ return lowNmk;
+ } else {
+ /**
+ * less than 0; TODO: have not optimized for that
+ */
+ int highNmk = getKey(highValue);
+ highNmk >>= 2;
+ highNmk |= NEGATIVE_LONG_MASK;
+ return highNmk;
+ }
+ }
+
+ private int getKey(int value) {
+ long unsignedFirstValue = (long) value;
+ int nmk = (int) ((unsignedFirstValue - ((long) Integer.MIN_VALUE)) & 0xffffffffL);
+ return nmk;
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongDescNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongDescNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..cc5661b
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongDescNormalizedKeyComputerFactory.java
@@ -0,0 +1,25 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class HiveLongDescNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+ private final INormalizedKeyComputerFactory ascNormalizedKeyComputerFactory = new HiveIntegerAscNormalizedKeyComputerFactory();
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+ return new INormalizedKeyComputer() {
+ private INormalizedKeyComputer nmkComputer = ascNormalizedKeyComputerFactory.createNormalizedKeyComputer();
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ int nk = nmkComputer.normalize(bytes, start, length);
+ return (int) ((long) Integer.MAX_VALUE - (long) (nk - Integer.MIN_VALUE));
+ }
+
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringAscNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringAscNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..d0429d6
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringAscNormalizedKeyComputerFactory.java
@@ -0,0 +1,40 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+
+public class HiveStringAscNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+
+ return new INormalizedKeyComputer() {
+ private VInt len = new VInt();
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ LazyUtils.readVInt(bytes, start, len);
+
+ if (len.value + len.length != length)
+ throw new IllegalStateException("parse string: length mismatch, expected "
+ + (len.value + len.length) + " but get " + length);
+ int nk = 0;
+ int offset = start + len.length;
+ for (int i = 0; i < 2; ++i) {
+ nk <<= 16;
+ if (i < len.value) {
+ char character = UTF8StringPointable.charAt(bytes, offset);
+ nk += ((int) character) & 0xffff;
+ offset += UTF8StringPointable.charSize(bytes, offset);
+ }
+ }
+ return nk;
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringDescNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringDescNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..15b2d27
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringDescNormalizedKeyComputerFactory.java
@@ -0,0 +1,37 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+
+public class HiveStringDescNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+ return new INormalizedKeyComputer() {
+ private VInt len = new VInt();
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ LazyUtils.readVInt(bytes, start, len);
+ if (len.value + len.length != length)
+ throw new IllegalStateException("parse string: length mismatch, expected "
+ + (len.value + len.length) + " but get " + length);
+ int nk = 0;
+ int offset = start + len.length;
+ for (int i = 0; i < 2; ++i) {
+ nk <<= 16;
+ if (i < len.value) {
+ nk += ((int) UTF8StringPointable.charAt(bytes, offset)) & 0xffff;
+ offset += UTF8StringPointable.charSize(bytes, offset);
+ }
+ }
+ return (int) ((long) 0xffffffff - (long) nk);
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/nullwriter/HiveNullWriterFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/nullwriter/HiveNullWriterFactory.java
new file mode 100644
index 0000000..590bd61
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/nullwriter/HiveNullWriterFactory.java
@@ -0,0 +1,28 @@
+package edu.uci.ics.hivesterix.runtime.factory.nullwriter;
+
+import java.io.DataOutput;
+
+import edu.uci.ics.hyracks.api.dataflow.value.INullWriter;
+import edu.uci.ics.hyracks.api.dataflow.value.INullWriterFactory;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+public class HiveNullWriterFactory implements INullWriterFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ public static HiveNullWriterFactory INSTANCE = new HiveNullWriterFactory();
+
+ @Override
+ public INullWriter createNullWriter() {
+ return new HiveNullWriter();
+ }
+}
+
+class HiveNullWriter implements INullWriter {
+
+ @Override
+ public void writeNull(DataOutput out) throws HyracksDataException {
+ // do nothing
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspector.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspector.java
new file mode 100644
index 0000000..677e20e
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspector.java
@@ -0,0 +1,19 @@
+package edu.uci.ics.hivesterix.runtime.inspector;
+
+import edu.uci.ics.hyracks.algebricks.data.IBinaryBooleanInspector;
+
+public class HiveBinaryBooleanInspector implements IBinaryBooleanInspector {
+
+ HiveBinaryBooleanInspector() {
+ }
+
+ @Override
+ public boolean getBooleanValue(byte[] bytes, int offset, int length) {
+ if (length == 0)
+ return false;
+ if (length != 1)
+ throw new IllegalStateException("boolean field error: with length " + length);
+ return bytes[0] == 1;
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspectorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspectorFactory.java
new file mode 100644
index 0000000..22a6065
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspectorFactory.java
@@ -0,0 +1,20 @@
+package edu.uci.ics.hivesterix.runtime.inspector;
+
+import edu.uci.ics.hyracks.algebricks.data.IBinaryBooleanInspector;
+import edu.uci.ics.hyracks.algebricks.data.IBinaryBooleanInspectorFactory;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+
+public class HiveBinaryBooleanInspectorFactory implements IBinaryBooleanInspectorFactory {
+ private static final long serialVersionUID = 1L;
+ public static HiveBinaryBooleanInspectorFactory INSTANCE = new HiveBinaryBooleanInspectorFactory();
+
+ private HiveBinaryBooleanInspectorFactory() {
+
+ }
+
+ @Override
+ public IBinaryBooleanInspector createBinaryBooleanInspector(IHyracksTaskContext arg0) {
+ return new HiveBinaryBooleanInspector();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspector.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspector.java
new file mode 100644
index 0000000..555afee
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspector.java
@@ -0,0 +1,22 @@
+package edu.uci.ics.hivesterix.runtime.inspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.algebricks.data.IBinaryIntegerInspector;
+
+public class HiveBinaryIntegerInspector implements IBinaryIntegerInspector {
+ private VInt value = new VInt();
+
+ HiveBinaryIntegerInspector() {
+ }
+
+ @Override
+ public int getIntegerValue(byte[] bytes, int offset, int length) {
+ LazyUtils.readVInt(bytes, offset, value);
+ if (value.length != length)
+ throw new IllegalArgumentException("length mismatch in int hash function actual: " + length + " expected "
+ + value.length);
+ return value.value;
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspectorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspectorFactory.java
new file mode 100644
index 0000000..bb93a60
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspectorFactory.java
@@ -0,0 +1,20 @@
+package edu.uci.ics.hivesterix.runtime.inspector;
+
+import edu.uci.ics.hyracks.algebricks.data.IBinaryIntegerInspector;
+import edu.uci.ics.hyracks.algebricks.data.IBinaryIntegerInspectorFactory;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+
+public class HiveBinaryIntegerInspectorFactory implements IBinaryIntegerInspectorFactory {
+ private static final long serialVersionUID = 1L;
+ public static HiveBinaryIntegerInspectorFactory INSTANCE = new HiveBinaryIntegerInspectorFactory();
+
+ private HiveBinaryIntegerInspectorFactory() {
+
+ }
+
+ @Override
+ public IBinaryIntegerInspector createBinaryIntegerInspector(IHyracksTaskContext arg0) {
+ return new HiveBinaryIntegerInspector();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveConnectorPolicyAssignmentPolicy.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveConnectorPolicyAssignmentPolicy.java
new file mode 100644
index 0000000..cfceb26
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveConnectorPolicyAssignmentPolicy.java
@@ -0,0 +1,68 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicyAssignmentPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.PipeliningConnectorPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedBlockingConnectorPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedPipeliningConnectorPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedReceiveSideMaterializedBlockingConnectorPolicy;
+import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
+
+public class HiveConnectorPolicyAssignmentPolicy implements IConnectorPolicyAssignmentPolicy {
+ public enum Policy {
+ PIPELINING,
+ SEND_SIDE_MAT_PIPELINING,
+ SEND_SIDE_MAT_BLOCKING,
+ SEND_SIDE_MAT_RECEIVE_SIDE_MAT_BLOCKING;
+ };
+
+ private static final long serialVersionUID = 1L;
+
+ private final IConnectorPolicy pipeliningPolicy = new PipeliningConnectorPolicy();
+ private final IConnectorPolicy sendSideMatPipeliningPolicy = new SendSideMaterializedPipeliningConnectorPolicy();
+ private final IConnectorPolicy sendSideMatBlockingPolicy = new SendSideMaterializedBlockingConnectorPolicy();
+ private final IConnectorPolicy sendSideMatReceiveSideMatBlockingPolicy = new SendSideMaterializedReceiveSideMaterializedBlockingConnectorPolicy();
+ private final Policy policy;
+
+ public HiveConnectorPolicyAssignmentPolicy(Policy policy) {
+ this.policy = policy;
+ }
+
+ @Override
+ public IConnectorPolicy getConnectorPolicyAssignment(IConnectorDescriptor c, int nProducers, int nConsumers,
+ int[] fanouts) {
+ if (c instanceof MToNPartitioningMergingConnectorDescriptor) {
+ // avoid deadlocks
+ switch (policy) {
+ case PIPELINING:
+ case SEND_SIDE_MAT_PIPELINING:
+ return sendSideMatPipeliningPolicy;
+ case SEND_SIDE_MAT_BLOCKING:
+ return sendSideMatBlockingPolicy;
+ case SEND_SIDE_MAT_RECEIVE_SIDE_MAT_BLOCKING:
+ return sendSideMatReceiveSideMatBlockingPolicy;
+ default:
+ return sendSideMatPipeliningPolicy;
+ }
+ } else if (c instanceof MToNPartitioningConnectorDescriptor) {
+ // support different repartitioning policies
+ switch (policy) {
+ case PIPELINING:
+ return pipeliningPolicy;
+ case SEND_SIDE_MAT_PIPELINING:
+ return sendSideMatPipeliningPolicy;
+ case SEND_SIDE_MAT_BLOCKING:
+ return sendSideMatBlockingPolicy;
+ case SEND_SIDE_MAT_RECEIVE_SIDE_MAT_BLOCKING:
+ return sendSideMatReceiveSideMatBlockingPolicy;
+ default:
+ return pipeliningPolicy;
+ }
+ } else {
+ // pipelining for other connectors
+ return pipeliningPolicy;
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSink.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSink.java
new file mode 100644
index 0000000..ccc2e6c
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSink.java
@@ -0,0 +1,32 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSink;
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.IPartitioningProperty;
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.RandomPartitioningProperty;
+
+public class HiveDataSink implements IDataSink {
+
+ private Object[] schema;
+
+ private Object fsOperator;
+
+ public HiveDataSink(Object sink, Object[] sourceSchema) {
+ schema = sourceSchema;
+ fsOperator = sink;
+ }
+
+ @Override
+ public Object getId() {
+ return fsOperator;
+ }
+
+ @Override
+ public Object[] getSchemaTypes() {
+ return schema;
+ }
+
+ public IPartitioningProperty getPartitioningProperty() {
+ return new RandomPartitioningProperty(new HiveDomain());
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSource.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSource.java
new file mode 100644
index 0000000..67b743b
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSource.java
@@ -0,0 +1,47 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSourcePropertiesProvider;
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.FunctionalDependency;
+
+public class HiveDataSource<P> implements IDataSource<P> {
+
+ private P source;
+
+ private Object[] schema;
+
+ public HiveDataSource(P dataSource, Object[] sourceSchema) {
+ source = dataSource;
+ schema = sourceSchema;
+ }
+
+ @Override
+ public P getId() {
+ return source;
+ }
+
+ @Override
+ public Object[] getSchemaTypes() {
+ return schema;
+ }
+
+ @Override
+ public void computeFDs(List<LogicalVariable> scanVariables, List<FunctionalDependency> fdList) {
+ }
+
+ @Override
+ public IDataSourcePropertiesProvider getPropertiesProvider() {
+ return new HiveDataSourcePartitioningProvider();
+ }
+
+ @Override
+ public String toString() {
+ PartitionDesc desc = (PartitionDesc) source;
+ return desc.getTableName();
+ }
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSourcePartitioningProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSourcePartitioningProvider.java
similarity index 60%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSourcePartitioningProvider.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSourcePartitioningProvider.java
index 08dd684..bb9c4ce 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSourcePartitioningProvider.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSourcePartitioningProvider.java
@@ -11,16 +11,13 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.properties.RandomPartitioningProperty;
import edu.uci.ics.hyracks.algebricks.core.algebra.properties.StructuralPropertiesVector;
-public class HiveDataSourcePartitioningProvider implements
- IDataSourcePropertiesProvider {
+public class HiveDataSourcePartitioningProvider implements IDataSourcePropertiesProvider {
- @Override
- public IPhysicalPropertiesVector computePropertiesVector(
- List<LogicalVariable> scanVariables) {
- IPartitioningProperty property = new RandomPartitioningProperty(
- new HiveDomain());
- IPhysicalPropertiesVector vector = new StructuralPropertiesVector(
- property, new LinkedList<ILocalStructuralProperty>());
- return vector;
- }
+ @Override
+ public IPhysicalPropertiesVector computePropertiesVector(List<LogicalVariable> scanVariables) {
+ IPartitioningProperty property = new RandomPartitioningProperty(new HiveDomain());
+ IPhysicalPropertiesVector vector = new StructuralPropertiesVector(property,
+ new LinkedList<ILocalStructuralProperty>());
+ return vector;
+ }
}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDomain.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDomain.java
new file mode 100644
index 0000000..8b1d3b5
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDomain.java
@@ -0,0 +1,17 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.INodeDomain;
+
+public class HiveDomain implements INodeDomain {
+
+ @Override
+ public boolean sameAs(INodeDomain domain) {
+ return true;
+ }
+
+ @Override
+ public Integer cardinality() {
+ return 0;
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveMetaDataProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveMetaDataProvider.java
new file mode 100644
index 0000000..daf6a7f
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveMetaDataProvider.java
@@ -0,0 +1,137 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.HiveFunctionInfo;
+import edu.uci.ics.hivesterix.runtime.jobgen.HiveScanRuntimeGenerator;
+import edu.uci.ics.hivesterix.runtime.jobgen.HiveWriteRuntimeGenerator;
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSink;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
+import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenContext;
+import edu.uci.ics.hyracks.algebricks.data.IPrinterFactory;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntimeFactory;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+
+@SuppressWarnings("rawtypes")
+public class HiveMetaDataProvider<S, T> implements IMetadataProvider<S, T> {
+
+ private Operator fileSink;
+ private Schema outputSchema;
+ private HashMap<S, IDataSource<S>> dataSourceMap;
+
+ public HiveMetaDataProvider(Operator fsOp, Schema oi, HashMap<S, IDataSource<S>> map) {
+ fileSink = fsOp;
+ outputSchema = oi;
+ dataSourceMap = map;
+ }
+
+ @Override
+ public IDataSourceIndex<T, S> findDataSourceIndex(T indexId, S dataSourceId) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public IDataSource<S> findDataSource(S id) throws AlgebricksException {
+ return dataSourceMap.get(id);
+ }
+
+ @Override
+ public boolean scannerOperatorIsLeaf(IDataSource<S> dataSource) {
+ return true;
+ }
+
+ @Override
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getScannerRuntime(IDataSource<S> dataSource,
+ List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed,
+ IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec)
+ throws AlgebricksException {
+
+ S desc = dataSource.getId();
+ HiveScanRuntimeGenerator generator = new HiveScanRuntimeGenerator((PartitionDesc) desc);
+ return generator.getRuntimeOperatorAndConstraint(dataSource, scanVariables, projectVariables, projectPushed,
+ context, jobSpec);
+ }
+
+ @Override
+ public Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> getWriteFileRuntime(IDataSink sink,
+ int[] printColumns, IPrinterFactory[] printerFactories, RecordDescriptor inputDesc) {
+
+ HiveWriteRuntimeGenerator generator = new HiveWriteRuntimeGenerator((FileSinkOperator) fileSink, outputSchema);
+ return generator.getWriterRuntime(inputDesc);
+ }
+
+ @Override
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getDeleteRuntime(IDataSource<S> arg0,
+ IOperatorSchema arg1, List<LogicalVariable> arg2, LogicalVariable arg3, RecordDescriptor arg4,
+ JobGenContext arg5, JobSpecification arg6) throws AlgebricksException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getInsertRuntime(IDataSource<S> arg0,
+ IOperatorSchema arg1, List<LogicalVariable> arg2, LogicalVariable arg3, RecordDescriptor arg4,
+ JobGenContext arg5, JobSpecification arg6) throws AlgebricksException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getResultHandleRuntime(IDataSink sink,
+ int[] printColumns, IPrinterFactory[] printerFactories, RecordDescriptor inputDesc, boolean ordered,
+ JobSpecification spec) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getWriteResultRuntime(IDataSource<S> arg0,
+ IOperatorSchema arg1, List<LogicalVariable> arg2, LogicalVariable arg3, JobGenContext arg4,
+ JobSpecification arg5) throws AlgebricksException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public IFunctionInfo lookupFunction(FunctionIdentifier arg0) {
+ return new HiveFunctionInfo(arg0, null);
+ }
+
+ @Override
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexInsertRuntime(
+ IDataSourceIndex<T, S> dataSource, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas,
+ IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys,
+ ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec)
+ throws AlgebricksException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexDeleteRuntime(
+ IDataSourceIndex<T, S> dataSource, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas,
+ IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys,
+ ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec)
+ throws AlgebricksException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveOperatorSchema.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveOperatorSchema.java
new file mode 100644
index 0000000..cdb0e95
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveOperatorSchema.java
@@ -0,0 +1,84 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
+
+public class HiveOperatorSchema implements IOperatorSchema {
+
+ private final Map<LogicalVariable, Integer> varMap;
+
+ private final List<LogicalVariable> varList;
+
+ public HiveOperatorSchema() {
+ varMap = new HashMap<LogicalVariable, Integer>();
+ varList = new ArrayList<LogicalVariable>();
+ }
+
+ @Override
+ public void addAllVariables(IOperatorSchema source) {
+ for (LogicalVariable v : source) {
+ varMap.put(v, varList.size());
+ varList.add(v);
+ }
+ }
+
+ @Override
+ public void addAllNewVariables(IOperatorSchema source) {
+ for (LogicalVariable v : source) {
+ if (varMap.get(v) == null) {
+ varMap.put(v, varList.size());
+ varList.add(v);
+ }
+ }
+ }
+
+ @Override
+ public int addVariable(LogicalVariable var) {
+ int idx = varList.size();
+ varMap.put(var, idx);
+ varList.add(var);
+ return idx;
+ }
+
+ @Override
+ public void clear() {
+ varMap.clear();
+ varList.clear();
+ }
+
+ @Override
+ public int findVariable(LogicalVariable var) {
+ Integer i = varMap.get(var);
+ if (i == null) {
+ return -1;
+ }
+ return i;
+ }
+
+ @Override
+ public int getSize() {
+ return varList.size();
+ }
+
+ @Override
+ public LogicalVariable getVariable(int index) {
+ return varList.get(index);
+ }
+
+ @Override
+ public Iterator<LogicalVariable> iterator() {
+ return varList.iterator();
+ }
+
+ @Override
+ public String toString() {
+ return varMap.toString();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveScanRuntimeGenerator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveScanRuntimeGenerator.java
new file mode 100644
index 0000000..0d2c78a
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveScanRuntimeGenerator.java
@@ -0,0 +1,115 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.runtime.operator.filescan.HiveKeyValueParserFactory;
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
+import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenContext;
+import edu.uci.ics.hyracks.algebricks.data.ISerializerDeserializerProvider;
+import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor;
+import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
+
+@SuppressWarnings({ "rawtypes", "deprecation" })
+public class HiveScanRuntimeGenerator {
+
+ private PartitionDesc fileDesc;
+
+ private transient Path filePath;
+
+ private String filePathName;
+
+ private Properties properties;
+
+ public HiveScanRuntimeGenerator(PartitionDesc path) {
+ fileDesc = path;
+ properties = fileDesc.getProperties();
+
+ String inputPath = (String) properties.getProperty("location");
+
+ if (inputPath.startsWith("file:")) {
+ // Windows
+ String[] strs = inputPath.split(":");
+ filePathName = strs[strs.length - 1];
+ } else {
+ // Linux
+ filePathName = inputPath;
+ }
+
+ filePath = new Path(filePathName);
+ }
+
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getRuntimeOperatorAndConstraint(
+ IDataSource dataSource, List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables,
+ boolean projectPushed, JobGenContext context, JobSpecification jobSpec) throws AlgebricksException {
+ try {
+ // get the correct delimiter from Hive metastore or other data
+ // structures
+ IOperatorSchema propagatedSchema = new HiveOperatorSchema();
+
+ List<LogicalVariable> outputVariables = projectPushed ? projectVariables : scanVariables;
+ for (LogicalVariable var : outputVariables)
+ propagatedSchema.addVariable(var);
+
+ int[] outputColumnsOffset = new int[scanVariables.size()];
+ int i = 0;
+ for (LogicalVariable var : scanVariables)
+ if (outputVariables.contains(var)) {
+ int offset = outputVariables.indexOf(var);
+ outputColumnsOffset[i++] = offset;
+ } else
+ outputColumnsOffset[i++] = -1;
+
+ Object[] schemaTypes = dataSource.getSchemaTypes();
+ // get record descriptor
+ RecordDescriptor recDescriptor = mkRecordDescriptor(propagatedSchema, schemaTypes, context);
+
+ // setup the run time operator and constraints
+ JobConf conf = ConfUtil.getJobConf(fileDesc.getInputFileFormatClass(), filePath);
+ String[] locConstraints = ConfUtil.getNCs();
+ Map<String, NodeControllerInfo> ncNameToNcInfos = ConfUtil.getNodeControllerInfo();
+ Scheduler scheduler = new Scheduler(ncNameToNcInfos);
+ InputSplit[] splits = conf.getInputFormat().getSplits(conf, locConstraints.length);
+ String[] schedule = scheduler.getLocationConstraints(splits);
+ IOperatorDescriptor scanner = new HDFSReadOperatorDescriptor(jobSpec, recDescriptor, conf, splits,
+ schedule, new HiveKeyValueParserFactory(fileDesc, conf, outputColumnsOffset));
+
+ return new Pair<IOperatorDescriptor, AlgebricksPartitionConstraint>(scanner,
+ new AlgebricksAbsolutePartitionConstraint(locConstraints));
+ } catch (Exception e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ private static RecordDescriptor mkRecordDescriptor(IOperatorSchema opSchema, Object[] types, JobGenContext context)
+ throws AlgebricksException {
+ ISerializerDeserializer[] fields = new ISerializerDeserializer[opSchema.getSize()];
+ ISerializerDeserializerProvider sdp = context.getSerializerDeserializerProvider();
+ int size = opSchema.getSize();
+ for (int i = 0; i < size; i++) {
+ Object t = types[i];
+ fields[i] = sdp.getSerializerDeserializer(t);
+ i++;
+ }
+ return new RecordDescriptor(fields);
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveWriteRuntimeGenerator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveWriteRuntimeGenerator.java
new file mode 100644
index 0000000..7a577e8
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveWriteRuntimeGenerator.java
@@ -0,0 +1,37 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.runtime.operator.filewrite.HivePushRuntimeFactory;
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntimeFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+
+@SuppressWarnings("deprecation")
+public class HiveWriteRuntimeGenerator {
+ private FileSinkOperator fileSink;
+
+ private Schema inputSchema;
+
+ public HiveWriteRuntimeGenerator(FileSinkOperator fsOp, Schema oi) {
+ fileSink = fsOp;
+ inputSchema = oi;
+ }
+
+ /**
+ * get the write runtime
+ *
+ * @param inputDesc
+ * @return
+ */
+ public Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> getWriterRuntime(RecordDescriptor inputDesc) {
+ JobConf conf = ConfUtil.getJobConf();
+ IPushRuntimeFactory factory = new HivePushRuntimeFactory(inputDesc, conf, fileSink, inputSchema);
+ Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> pair = new Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint>(
+ factory, null);
+ return pair;
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/Schema.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/Schema.java
new file mode 100644
index 0000000..927c709
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/Schema.java
@@ -0,0 +1,39 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+
+public class Schema implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ private List<String> fieldNames;
+
+ private List<TypeInfo> fieldTypes;
+
+ public Schema(List<String> fieldNames, List<TypeInfo> fieldTypes) {
+ this.fieldNames = fieldNames;
+ this.fieldTypes = fieldTypes;
+ }
+
+ public ObjectInspector toObjectInspector() {
+ return LazyUtils.getLazyObjectInspector(fieldNames, fieldTypes);
+ }
+
+ public List<String> getNames() {
+ return fieldNames;
+ }
+
+ public List<TypeInfo> getTypes() {
+ return fieldTypes;
+ }
+
+ public Object[] getSchema() {
+ return fieldTypes.toArray();
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParser.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParser.java
new file mode 100644
index 0000000..472994a
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParser.java
@@ -0,0 +1,209 @@
+package edu.uci.ics.hivesterix.runtime.operator.filescan;
+
+import java.io.DataOutput;
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import edu.uci.ics.hivesterix.serde.parser.IHiveParser;
+import edu.uci.ics.hivesterix.serde.parser.TextToBinaryTupleParser;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.hdfs.api.IKeyValueParser;
+
+@SuppressWarnings("deprecation")
+public class HiveKeyValueParser<K, V> implements IKeyValueParser<K, V> {
+ /**
+ * the columns to output: projection is pushed into this scan
+ */
+ private int[] outputColumnsOffset;
+
+ /**
+ * serialization/de-serialization object
+ */
+ private SerDe serDe;
+
+ /**
+ * the input row object inspector
+ */
+ private StructObjectInspector structInspector;
+
+ /**
+ * the hadoop job conf
+ */
+ private JobConf job;
+
+ /**
+ * Hyrax context to control resource allocation
+ */
+ private final IHyracksTaskContext ctx;
+
+ /**
+ * lazy serde: format flow in between operators
+ */
+ private final SerDe outputSerDe;
+
+ /**
+ * the parser from hive data to binary data
+ */
+ private IHiveParser parser;
+
+ /**
+ * the buffer for buffering output data
+ */
+ private ByteBuffer buffer;
+
+ /**
+ * the frame tuple appender
+ */
+ private FrameTupleAppender appender;
+
+ /**
+ * the array tuple builder
+ */
+ private ArrayTupleBuilder tb;
+
+ /**
+ * the field references of all fields
+ */
+ private List<? extends StructField> fieldRefs;
+
+ /**
+ * output fields
+ */
+ private Object[] outputFields;
+
+ /**
+ * output field references
+ */
+ private StructField[] outputFieldRefs;
+
+ public HiveKeyValueParser(String serDeClass, String outputSerDeClass, Properties tbl, JobConf conf,
+ final IHyracksTaskContext ctx, int[] outputColumnsOffset) throws HyracksDataException {
+ try {
+ job = conf;
+ // initialize the input serde
+ serDe = (SerDe) ReflectionUtils.newInstance(Class.forName(serDeClass), job);
+ serDe.initialize(job, tbl);
+ // initialize the output serde
+ outputSerDe = (SerDe) ReflectionUtils.newInstance(Class.forName(outputSerDeClass), job);
+ outputSerDe.initialize(job, tbl);
+ // object inspector of the row
+ structInspector = (StructObjectInspector) serDe.getObjectInspector();
+ // hyracks context
+ this.ctx = ctx;
+ this.outputColumnsOffset = outputColumnsOffset;
+
+ if (structInspector instanceof LazySimpleStructObjectInspector) {
+ LazySimpleStructObjectInspector rowInspector = (LazySimpleStructObjectInspector) structInspector;
+ List<? extends StructField> fieldRefs = rowInspector.getAllStructFieldRefs();
+ boolean lightWeightParsable = true;
+ for (StructField fieldRef : fieldRefs) {
+ Category category = fieldRef.getFieldObjectInspector().getCategory();
+ if (!(category == Category.PRIMITIVE)) {
+ lightWeightParsable = false;
+ break;
+ }
+ }
+ if (lightWeightParsable) {
+ parser = new TextToBinaryTupleParser(this.outputColumnsOffset, structInspector);
+ }
+ }
+
+ fieldRefs = structInspector.getAllStructFieldRefs();
+ int size = 0;
+ for (int i = 0; i < outputColumnsOffset.length; i++) {
+ if (outputColumnsOffset[i] >= 0) {
+ size++;
+ }
+ }
+
+ tb = new ArrayTupleBuilder(size);
+ outputFieldRefs = new StructField[size];
+ outputFields = new Object[size];
+ for (int i = 0; i < outputColumnsOffset.length; i++)
+ if (outputColumnsOffset[i] >= 0)
+ outputFieldRefs[outputColumnsOffset[i]] = fieldRefs.get(i);
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void open(IFrameWriter writer) throws HyracksDataException {
+ buffer = ctx.allocateFrame();
+ appender = new FrameTupleAppender(ctx.getFrameSize());
+ appender.reset(buffer, true);
+ }
+
+ @Override
+ public void parse(K key, V value, IFrameWriter writer) throws HyracksDataException {
+ try {
+ tb.reset();
+ if (parser != null) {
+ Text text = (Text) value;
+ parser.parse(text.getBytes(), 0, text.getLength(), tb);
+ } else {
+ Object row = serDe.deserialize((Writable) value);
+ /**
+ * write fields to the tuple builder one by one
+ */
+ int i = 0;
+ for (StructField fieldRef : fieldRefs) {
+ if (outputColumnsOffset[i] >= 0)
+ outputFields[outputColumnsOffset[i]] = structInspector.getStructFieldData(row, fieldRef);
+ i++;
+ }
+ i = 0;
+ DataOutput dos = tb.getDataOutput();
+ for (Object field : outputFields) {
+ BytesWritable fieldWritable = (BytesWritable) outputSerDe.serialize(field,
+ outputFieldRefs[i].getFieldObjectInspector());
+ dos.write(fieldWritable.getBytes(), 0, fieldWritable.getSize());
+ tb.addFieldEndOffset();
+ i++;
+ }
+ }
+
+ /**
+ * append the tuple and flush it if necessary.
+ */
+ if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
+ FrameUtils.flushFrame(buffer, writer);
+ appender.reset(buffer, true);
+ if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
+ throw new IllegalStateException();
+ }
+ }
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void close(IFrameWriter writer) throws HyracksDataException {
+ /**
+ * flush the residual tuples
+ */
+ if (appender.getTupleCount() > 0) {
+ FrameUtils.flushFrame(buffer, writer);
+ }
+ System.gc();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParserFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParserFactory.java
new file mode 100644
index 0000000..05903b9
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParserFactory.java
@@ -0,0 +1,39 @@
+package edu.uci.ics.hivesterix.runtime.operator.filescan;
+
+import java.util.Properties;
+
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.hdfs.api.IKeyValueParser;
+import edu.uci.ics.hyracks.hdfs.api.IKeyValueParserFactory;
+import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
+
+@SuppressWarnings("deprecation")
+public class HiveKeyValueParserFactory<K, V> implements IKeyValueParserFactory<K, V> {
+ private static final long serialVersionUID = 1L;
+ private final String serDeClass;
+ private final String outputSerDeClass = LazySerDe.class.getName();;
+ private final Properties tbl;
+ private final ConfFactory confFactory;
+ private final int[] outputColumnsOffset;
+
+ public HiveKeyValueParserFactory(PartitionDesc desc, JobConf conf, int[] outputColumnsOffset)
+ throws HyracksDataException {
+ this.tbl = desc.getProperties();
+ this.serDeClass = (String) tbl.getProperty("serialization.lib");
+ this.outputColumnsOffset = outputColumnsOffset;
+ this.confFactory = new ConfFactory(conf);
+ }
+
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ @Override
+ public IKeyValueParser<K, V> createKeyValueParser(IHyracksTaskContext ctx) throws HyracksDataException {
+ return new HiveKeyValueParser(serDeClass, outputSerDeClass, tbl, confFactory.getConf(), ctx,
+ outputColumnsOffset);
+ }
+
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java
similarity index 98%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java
index 05e79ea..81faf38 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java
@@ -10,7 +10,7 @@
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.mapred.JobConf;
-import edu.uci.ics.hivesterix.logical.expression.Schema;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntime;
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HivePushRuntimeFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HivePushRuntimeFactory.java
new file mode 100644
index 0000000..6c18231
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HivePushRuntimeFactory.java
@@ -0,0 +1,105 @@
+package edu.uci.ics.hivesterix.runtime.operator.filewrite;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.util.UUID;
+
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntime;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntimeFactory;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+
+@SuppressWarnings("deprecation")
+public class HivePushRuntimeFactory implements IPushRuntimeFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ private final RecordDescriptor inputRecordDesc;
+ private transient JobConf conf;
+ private final FileSinkDesc fileSink;
+ private final RowSchema outSchema;
+ private final Schema schema;
+
+ /**
+ * the content of the configuration
+ */
+ private String confContent;
+
+ public HivePushRuntimeFactory(RecordDescriptor inputRecordDesc, JobConf conf, FileSinkOperator fsp, Schema sch) {
+ this.inputRecordDesc = inputRecordDesc;
+ this.conf = conf;
+ this.fileSink = fsp.getConf();
+ outSchema = fsp.getSchema();
+ this.schema = sch;
+
+ writeConfContent();
+ }
+
+ @Override
+ public String toString() {
+ return "file write";
+ }
+
+ @Override
+ public IPushRuntime createPushRuntime(IHyracksTaskContext context) throws AlgebricksException {
+ if (conf == null)
+ readConfContent();
+
+ return new HiveFileWritePushRuntime(context, inputRecordDesc, conf, fileSink, outSchema, schema);
+ }
+
+ private void readConfContent() {
+ File dir = new File("hadoop-conf-tmp");
+ if (!dir.exists()) {
+ dir.mkdir();
+ }
+
+ String fileName = "hadoop-conf-tmp/" + UUID.randomUUID() + System.currentTimeMillis() + ".xml";
+ try {
+ PrintWriter out = new PrintWriter((new OutputStreamWriter(new FileOutputStream(new File(fileName)))));
+ out.write(confContent);
+ out.close();
+ conf = new JobConf(fileName);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ private void writeConfContent() {
+ File dir = new File("hadoop-conf-tmp");
+ if (!dir.exists()) {
+ dir.mkdir();
+ }
+
+ String fileName = "hadoop-conf-tmp/" + UUID.randomUUID() + System.currentTimeMillis() + ".xml";
+ try {
+ DataOutputStream out = new DataOutputStream(new FileOutputStream(new File(fileName)));
+ conf.writeXml(out);
+ out.close();
+
+ DataInputStream in = new DataInputStream(new FileInputStream(fileName));
+ StringBuffer buffer = new StringBuffer();
+ String line;
+ while ((line = in.readLine()) != null) {
+ buffer.append(line + "\n");
+ }
+ in.close();
+ confContent = buffer.toString();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryComparatorFactoryProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryComparatorFactoryProvider.java
new file mode 100644
index 0000000..467ec0a
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryComparatorFactoryProvider.java
@@ -0,0 +1,75 @@
+package edu.uci.ics.hivesterix.runtime.provider;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveByteBinaryAscComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveByteBinaryDescComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveDoubleBinaryAscComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveDoubleBinaryDescComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveFloatBinaryAscComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveFloatBinaryDescComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveIntegerBinaryAscComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveIntegerBinaryDescComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveLongBinaryAscComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveLongBinaryDescComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveShortBinaryAscComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveShortBinaryDescComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveStringBinaryAscComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveStringBinaryDescComparatorFactory;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
+import edu.uci.ics.hyracks.algebricks.data.IBinaryComparatorFactoryProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveBinaryComparatorFactoryProvider implements IBinaryComparatorFactoryProvider {
+
+ public static final HiveBinaryComparatorFactoryProvider INSTANCE = new HiveBinaryComparatorFactoryProvider();
+
+ private HiveBinaryComparatorFactoryProvider() {
+ }
+
+ @Override
+ public IBinaryComparatorFactory getBinaryComparatorFactory(Object type, boolean ascending)
+ throws AlgebricksException {
+ if (type.equals(TypeInfoFactory.intTypeInfo)) {
+ if (ascending)
+ return HiveIntegerBinaryAscComparatorFactory.INSTANCE;
+ else
+ return HiveIntegerBinaryDescComparatorFactory.INSTANCE;
+
+ } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
+ if (ascending)
+ return HiveLongBinaryAscComparatorFactory.INSTANCE;
+ else
+ return HiveLongBinaryDescComparatorFactory.INSTANCE;
+
+ } else if (type.equals(TypeInfoFactory.floatTypeInfo)) {
+ if (ascending)
+ return HiveFloatBinaryAscComparatorFactory.INSTANCE;
+ else
+ return HiveFloatBinaryDescComparatorFactory.INSTANCE;
+
+ } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
+ if (ascending)
+ return HiveDoubleBinaryAscComparatorFactory.INSTANCE;
+ else
+ return HiveDoubleBinaryDescComparatorFactory.INSTANCE;
+ } else if (type.equals(TypeInfoFactory.shortTypeInfo)) {
+ if (ascending)
+ return HiveShortBinaryAscComparatorFactory.INSTANCE;
+ else
+ return HiveShortBinaryDescComparatorFactory.INSTANCE;
+ } else if (type.equals(TypeInfoFactory.stringTypeInfo)) {
+ if (ascending)
+ return HiveStringBinaryAscComparatorFactory.INSTANCE;
+ else
+ return HiveStringBinaryDescComparatorFactory.INSTANCE;
+ } else if (type.equals(TypeInfoFactory.byteTypeInfo) || type.equals(TypeInfoFactory.booleanTypeInfo)) {
+ if (ascending)
+ return HiveByteBinaryAscComparatorFactory.INSTANCE;
+ else
+ return HiveByteBinaryDescComparatorFactory.INSTANCE;
+ } else
+ throw new NotImplementedException();
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFactoryProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFactoryProvider.java
new file mode 100644
index 0000000..473eee1
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFactoryProvider.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.runtime.provider;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveDoubleBinaryHashFunctionFactory;
+import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveIntegerBinaryHashFunctionFactory;
+import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveLongBinaryHashFunctionFactory;
+import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveRawBinaryHashFunctionFactory;
+import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveStingBinaryHashFunctionFactory;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.data.IBinaryHashFunctionFactoryProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+
+public class HiveBinaryHashFunctionFactoryProvider implements IBinaryHashFunctionFactoryProvider {
+
+ public static final HiveBinaryHashFunctionFactoryProvider INSTANCE = new HiveBinaryHashFunctionFactoryProvider();
+
+ private HiveBinaryHashFunctionFactoryProvider() {
+ }
+
+ @Override
+ public IBinaryHashFunctionFactory getBinaryHashFunctionFactory(Object type) throws AlgebricksException {
+ if (type.equals(TypeInfoFactory.intTypeInfo)) {
+ return HiveIntegerBinaryHashFunctionFactory.INSTANCE;
+ } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
+ return HiveLongBinaryHashFunctionFactory.INSTANCE;
+ } else if (type.equals(TypeInfoFactory.stringTypeInfo)) {
+ return HiveStingBinaryHashFunctionFactory.INSTANCE;
+ } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
+ return HiveDoubleBinaryHashFunctionFactory.INSTANCE;
+ } else {
+ return HiveRawBinaryHashFunctionFactory.INSTANCE;
+ }
+ }
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFamilyProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFamilyProvider.java
similarity index 100%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFamilyProvider.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFamilyProvider.java
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveNormalizedKeyComputerFactoryProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveNormalizedKeyComputerFactoryProvider.java
new file mode 100644
index 0000000..91bf3e5
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveNormalizedKeyComputerFactoryProvider.java
@@ -0,0 +1,51 @@
+package edu.uci.ics.hivesterix.runtime.provider;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveDoubleAscNormalizedKeyComputerFactory;
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveDoubleDescNormalizedKeyComputerFactory;
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveIntegerAscNormalizedKeyComputerFactory;
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveIntegerDescNormalizedKeyComputerFactory;
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveLongAscNormalizedKeyComputerFactory;
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveLongDescNormalizedKeyComputerFactory;
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveStringAscNormalizedKeyComputerFactory;
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveStringDescNormalizedKeyComputerFactory;
+import edu.uci.ics.hyracks.algebricks.data.INormalizedKeyComputerFactoryProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class HiveNormalizedKeyComputerFactoryProvider implements INormalizedKeyComputerFactoryProvider {
+
+ public static final HiveNormalizedKeyComputerFactoryProvider INSTANCE = new HiveNormalizedKeyComputerFactoryProvider();
+
+ private HiveNormalizedKeyComputerFactoryProvider() {
+ }
+
+ @Override
+ public INormalizedKeyComputerFactory getNormalizedKeyComputerFactory(Object type, boolean ascending) {
+ if (ascending) {
+ if (type.equals(TypeInfoFactory.stringTypeInfo)) {
+ return new HiveStringAscNormalizedKeyComputerFactory();
+ } else if (type.equals(TypeInfoFactory.intTypeInfo)) {
+ return new HiveIntegerAscNormalizedKeyComputerFactory();
+ } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
+ return new HiveLongAscNormalizedKeyComputerFactory();
+ } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
+ return new HiveDoubleAscNormalizedKeyComputerFactory();
+ } else {
+ return null;
+ }
+ } else {
+ if (type.equals(TypeInfoFactory.stringTypeInfo)) {
+ return new HiveStringDescNormalizedKeyComputerFactory();
+ } else if (type.equals(TypeInfoFactory.intTypeInfo)) {
+ return new HiveIntegerDescNormalizedKeyComputerFactory();
+ } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
+ return new HiveLongDescNormalizedKeyComputerFactory();
+ } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
+ return new HiveDoubleDescNormalizedKeyComputerFactory();
+ } else {
+ return null;
+ }
+ }
+ }
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HivePrinterFactoryProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HivePrinterFactoryProvider.java
similarity index 61%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HivePrinterFactoryProvider.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HivePrinterFactoryProvider.java
index bebb457..10c84d2 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HivePrinterFactoryProvider.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HivePrinterFactoryProvider.java
@@ -6,12 +6,11 @@
public class HivePrinterFactoryProvider implements IPrinterFactoryProvider {
- public static IPrinterFactoryProvider INSTANCE = new HivePrinterFactoryProvider();
+ public static IPrinterFactoryProvider INSTANCE = new HivePrinterFactoryProvider();
- @Override
- public IPrinterFactory getPrinterFactory(Object type)
- throws AlgebricksException {
- return null;
- }
+ @Override
+ public IPrinterFactory getPrinterFactory(Object type) throws AlgebricksException {
+ return null;
+ }
}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveSerializerDeserializerProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveSerializerDeserializerProvider.java
new file mode 100644
index 0000000..22f81e0
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveSerializerDeserializerProvider.java
@@ -0,0 +1,21 @@
+package edu.uci.ics.hivesterix.runtime.provider;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.data.ISerializerDeserializerProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+
+public class HiveSerializerDeserializerProvider implements ISerializerDeserializerProvider {
+
+ public static final HiveSerializerDeserializerProvider INSTANCE = new HiveSerializerDeserializerProvider();
+
+ private HiveSerializerDeserializerProvider() {
+ }
+
+ @SuppressWarnings("rawtypes")
+ @Override
+ public ISerializerDeserializer getSerializerDeserializer(Object type) throws AlgebricksException {
+ // return ARecordSerializerDeserializer.SCHEMALESS_INSTANCE;
+ return null;
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveTypeTraitProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveTypeTraitProvider.java
new file mode 100644
index 0000000..be4b149
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveTypeTraitProvider.java
@@ -0,0 +1,33 @@
+package edu.uci.ics.hivesterix.runtime.provider;
+
+import java.io.Serializable;
+
+import edu.uci.ics.hyracks.algebricks.data.ITypeTraitProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+
+public class HiveTypeTraitProvider implements ITypeTraitProvider, Serializable {
+ private static final long serialVersionUID = 1L;
+ public static HiveTypeTraitProvider INSTANCE = new HiveTypeTraitProvider();
+
+ private HiveTypeTraitProvider() {
+
+ }
+
+ @Override
+ public ITypeTraits getTypeTrait(Object arg0) {
+ return new ITypeTraits() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public int getFixedLength() {
+ return -1;
+ }
+
+ @Override
+ public boolean isFixedLength() {
+ return false;
+ }
+
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-serde/pom.xml b/hivesterix/hivesterix-serde/pom.xml
new file mode 100644
index 0000000..0ba73bd
--- /dev/null
+++ b/hivesterix/hivesterix-serde/pom.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0"?>
+<project
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+ xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>hivesterix</artifactId>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <version>0.2.3-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>hivesterix-serde</artifactId>
+ <name>hivesterix-serde</name>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <encoding>UTF-8</encoding>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-serde</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-common</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>algebricks-compiler</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>0.20.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>3.8.1</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/ByteArrayRef.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/ByteArrayRef.java
similarity index 85%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/ByteArrayRef.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/ByteArrayRef.java
index 673416d..92415f9 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/ByteArrayRef.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/ByteArrayRef.java
@@ -19,24 +19,23 @@
/**
* ByteArrayRef stores a reference to a byte array.
- *
* The LazyObject hierarchy uses a reference to a single ByteArrayRef, so that
* it's much faster to switch to the next row and release the reference to the
* old row (so that the system can do garbage collection if needed).
*/
public class ByteArrayRef {
- /**
- * Stores the actual data.
- */
- byte[] data;
+ /**
+ * Stores the actual data.
+ */
+ byte[] data;
- public byte[] getData() {
- return data;
- }
+ public byte[] getData() {
+ return data;
+ }
- public void setData(byte[] data) {
- this.data = data;
- }
+ public void setData(byte[] data) {
+ this.data = data;
+ }
}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyArray.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyArray.java
new file mode 100644
index 0000000..33b20bf
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyArray.java
@@ -0,0 +1,229 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyListObjectInspector;
+
+/**
+ * LazyArray is serialized as follows: start A b b b b b b end bytes[] ->
+ * |--------|---|---|---|---| ... |---|---|
+ * Section A is the null-bytes. Suppose the list has N elements, then there are
+ * (N+7)/8 bytes used as null-bytes. Each bit corresponds to an element and it
+ * indicates whether that element is null (0) or not null (1).
+ * After A, all b(s) represent the elements of the list. Each of them is again a
+ * LazyObject.
+ */
+
+public class LazyArray extends LazyNonPrimitive<LazyListObjectInspector> {
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean parsed = false;
+ /**
+ * The length of the array. Only valid when the data is parsed.
+ */
+ int arraySize = 0;
+
+ /**
+ * The start positions and lengths of array elements. Only valid when the
+ * data is parsed.
+ */
+ int[] elementStart;
+ int[] elementLength;
+
+ /**
+ * Whether an element is initialized or not.
+ */
+ boolean[] elementInited;
+
+ /**
+ * Whether an element is null or not. Because length is 0 does not means the
+ * field is null. In particular, a 0-length string is not null.
+ */
+ boolean[] elementIsNull;
+
+ /**
+ * The elements of the array. Note that we call arrayElements[i].init(bytes,
+ * begin, length) only when that element is accessed.
+ */
+ @SuppressWarnings("rawtypes")
+ LazyObject[] arrayElements;
+
+ /**
+ * Construct a LazyArray object with the ObjectInspector.
+ *
+ * @param oi
+ * the oi representing the type of this LazyArray
+ */
+ protected LazyArray(LazyListObjectInspector oi) {
+ super(oi);
+ }
+
+ /**
+ * Set the row data for this LazyArray.
+ *
+ * @see LazyObject#init(ByteArrayRef, int, int)
+ */
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ super.init(bytes, start, length);
+ parsed = false;
+ }
+
+ /**
+ * Enlarge the size of arrays storing information for the elements inside
+ * the array.
+ */
+ private void adjustArraySize(int newSize) {
+ if (elementStart == null || elementStart.length < newSize) {
+ elementStart = new int[newSize];
+ elementLength = new int[newSize];
+ elementInited = new boolean[newSize];
+ elementIsNull = new boolean[newSize];
+ arrayElements = new LazyObject[newSize];
+ }
+ }
+
+ VInt vInt = new LazyUtils.VInt();
+ RecordInfo recordInfo = new LazyUtils.RecordInfo();
+
+ /**
+ * Parse the bytes and fill elementStart, elementLength, elementInited and
+ * elementIsNull.
+ */
+ private void parse() {
+
+ // get the vlong that represents the map size
+ LazyUtils.readVInt(bytes, start, vInt);
+ arraySize = vInt.value;
+ if (0 == arraySize) {
+ parsed = true;
+ return;
+ }
+
+ // adjust arrays
+ adjustArraySize(arraySize);
+ // find out the null-bytes
+ int arryByteStart = start + vInt.length;
+ int nullByteCur = arryByteStart;
+ int nullByteEnd = arryByteStart + (arraySize + 7) / 8;
+ // the begin the real elements
+ int lastElementByteEnd = nullByteEnd;
+ // the list element object inspector
+ ObjectInspector listEleObjectInspector = ((ListObjectInspector) oi).getListElementObjectInspector();
+ // parsing elements one by one
+ for (int i = 0; i < arraySize; i++) {
+ elementIsNull[i] = true;
+ if ((bytes[nullByteCur] & (1 << (i % 8))) != 0) {
+ elementIsNull[i] = false;
+ LazyUtils.checkObjectByteInfo(listEleObjectInspector, bytes, lastElementByteEnd, recordInfo);
+ elementStart[i] = lastElementByteEnd + recordInfo.elementOffset;
+ elementLength[i] = recordInfo.elementSize;
+ lastElementByteEnd = elementStart[i] + elementLength[i];
+ }
+ // move onto the next null byte
+ if (7 == (i % 8)) {
+ nullByteCur++;
+ }
+ }
+
+ Arrays.fill(elementInited, 0, arraySize, false);
+ parsed = true;
+ }
+
+ /**
+ * Returns the actual primitive object at the index position inside the
+ * array represented by this LazyObject.
+ */
+ public Object getListElementObject(int index) {
+ if (!parsed) {
+ parse();
+ }
+ if (index < 0 || index >= arraySize) {
+ return null;
+ }
+ return uncheckedGetElement(index);
+ }
+
+ /**
+ * Get the element without checking out-of-bound index.
+ *
+ * @param index
+ * index to the array element
+ */
+ private Object uncheckedGetElement(int index) {
+
+ if (elementIsNull[index]) {
+ return null;
+ } else {
+ if (!elementInited[index]) {
+ elementInited[index] = true;
+ if (arrayElements[index] == null) {
+ arrayElements[index] = LazyFactory.createLazyObject((oi).getListElementObjectInspector());
+ }
+ arrayElements[index].init(bytes, elementStart[index], elementLength[index]);
+ }
+ }
+ return arrayElements[index].getObject();
+ }
+
+ /**
+ * Returns the array size.
+ */
+ public int getListLength() {
+ if (!parsed) {
+ parse();
+ }
+ return arraySize;
+ }
+
+ /**
+ * cachedList is reused every time getList is called. Different
+ * LazyBianryArray instances cannot share the same cachedList.
+ */
+ ArrayList<Object> cachedList;
+
+ /**
+ * Returns the List of actual primitive objects. Returns null for null
+ * array.
+ */
+ public List<Object> getList() {
+ if (!parsed) {
+ parse();
+ }
+ if (cachedList == null) {
+ cachedList = new ArrayList<Object>(arraySize);
+ } else {
+ cachedList.clear();
+ }
+ for (int index = 0; index < arraySize; index++) {
+ cachedList.add(uncheckedGetElement(index));
+ }
+ return cachedList;
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyBoolean.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyBoolean.java
new file mode 100644
index 0000000..5a48525
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyBoolean.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.BooleanWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyBooleanObjectInspector;
+
+/**
+ * LazyObject for storing a value of boolean.
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project. As with the specification, this implementation relied on code laid out in <a href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's Delight, (Addison Wesley, 2002)</a> as well as <a href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ */
+public class LazyBoolean extends LazyPrimitive<LazyBooleanObjectInspector, BooleanWritable> {
+
+ public LazyBoolean(LazyBooleanObjectInspector oi) {
+ super(oi);
+ data = new BooleanWritable();
+ }
+
+ public LazyBoolean(LazyBoolean copy) {
+ super(copy);
+ data = new BooleanWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ // a temporal hack
+ assert (1 == length);
+ byte val = bytes[start];
+ if (val == 0) {
+ data.set(false);
+ } else if (val == 1) {
+ data.set(true);
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyByte.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyByte.java
new file mode 100644
index 0000000..bf4ff04
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyByte.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.ByteWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyByteObjectInspector;
+
+/**
+ * LazyObject for storing a value of Byte.
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project. As with the specification, this implementation relied on code laid out in <a href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's Delight, (Addison Wesley, 2002)</a> as well as <a href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ */
+public class LazyByte extends LazyPrimitive<LazyByteObjectInspector, ByteWritable> {
+
+ public LazyByte(LazyByteObjectInspector oi) {
+ super(oi);
+ data = new ByteWritable();
+ }
+
+ public LazyByte(LazyByte copy) {
+ super(copy);
+ data = new ByteWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ assert (1 == length);
+ data.set(bytes[start]);
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyColumnar.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyColumnar.java
new file mode 100644
index 0000000..d73fea7
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyColumnar.java
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+/**
+ * LazyObject for storing a struct. The field of a struct can be primitive or
+ * non-primitive.
+ * LazyStruct does not deal with the case of a NULL struct. That is handled by
+ * the parent LazyObject.
+ */
+@SuppressWarnings("rawtypes")
+public class LazyColumnar extends LazyNonPrimitive<LazyColumnarObjectInspector> {
+
+ /**
+ * IFrameTupleReference: the backend of the struct
+ */
+ IFrameTupleReference tuple;
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean reset;
+
+ /**
+ * The fields of the struct.
+ */
+ LazyObject[] fields;
+
+ /**
+ * Whether init() has been called on the field or not.
+ */
+ boolean[] fieldVisited;
+
+ /**
+ * whether it is the first time initialization
+ */
+ boolean start = true;
+
+ /**
+ * Construct a LazyStruct object with the ObjectInspector.
+ */
+ public LazyColumnar(LazyColumnarObjectInspector oi) {
+ super(oi);
+ }
+
+ /**
+ * Set the row data for this LazyStruct.
+ *
+ * @see LazyObject#init(ByteArrayRef, int, int)
+ */
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ super.init(bytes, start, length);
+ reset = false;
+ }
+
+ /**
+ * Parse the byte[] and fill each field.
+ */
+ private void parse() {
+
+ if (start) {
+ // initialize field array and reusable objects
+ List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
+
+ fields = new LazyObject[fieldRefs.size()];
+ for (int i = 0; i < fields.length; i++) {
+ fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i).getFieldObjectInspector());
+ }
+ fieldVisited = new boolean[fields.length];
+ start = false;
+ }
+
+ Arrays.fill(fieldVisited, false);
+ reset = true;
+ }
+
+ /**
+ * Get one field out of the struct.
+ * If the field is a primitive field, return the actual object. Otherwise
+ * return the LazyObject. This is because PrimitiveObjectInspector does not
+ * have control over the object used by the user - the user simply directly
+ * use the Object instead of going through Object
+ * PrimitiveObjectInspector.get(Object).
+ *
+ * @param fieldID
+ * The field ID
+ * @return The field as a LazyObject
+ */
+ public Object getField(int fieldID) {
+ if (!reset) {
+ parse();
+ }
+ return uncheckedGetField(fieldID);
+ }
+
+ /**
+ * Get the field out of the row without checking parsed. This is called by
+ * both getField and getFieldsAsList.
+ *
+ * @param fieldID
+ * The id of the field starting from 0.
+ * @param nullSequence
+ * The sequence representing NULL value.
+ * @return The value of the field
+ */
+ private Object uncheckedGetField(int fieldID) {
+ // get the buffer
+ byte[] buffer = tuple.getFieldData(fieldID);
+ // get the offset of the field
+ int s1 = tuple.getFieldStart(fieldID);
+ int l1 = tuple.getFieldLength(fieldID);
+
+ if (!fieldVisited[fieldID]) {
+ fieldVisited[fieldID] = true;
+ fields[fieldID].init(buffer, s1, l1);
+ }
+ // if (fields[fieldID].getObject() == null) {
+ // throw new IllegalStateException("illegal field " + fieldID);
+ // }
+ return fields[fieldID].getObject();
+ }
+
+ ArrayList<Object> cachedList;
+
+ /**
+ * Get the values of the fields as an ArrayList.
+ *
+ * @return The values of the fields as an ArrayList.
+ */
+ public ArrayList<Object> getFieldsAsList() {
+ if (!reset) {
+ parse();
+ }
+ if (cachedList == null) {
+ cachedList = new ArrayList<Object>();
+ } else {
+ cachedList.clear();
+ }
+ for (int i = 0; i < fields.length; i++) {
+ cachedList.add(uncheckedGetField(i));
+ }
+ return cachedList;
+ }
+
+ @Override
+ public Object getObject() {
+ return this;
+ }
+
+ protected boolean getParsed() {
+ return reset;
+ }
+
+ protected void setParsed(boolean parsed) {
+ this.reset = parsed;
+ }
+
+ protected LazyObject[] getFields() {
+ return fields;
+ }
+
+ protected void setFields(LazyObject[] fields) {
+ this.fields = fields;
+ }
+
+ protected boolean[] getFieldInited() {
+ return fieldVisited;
+ }
+
+ protected void setFieldInited(boolean[] fieldInited) {
+ this.fieldVisited = fieldInited;
+ }
+
+ /**
+ * rebind a frametuplereference to the struct
+ */
+ public void init(IFrameTupleReference r) {
+ this.tuple = r;
+ reset = false;
+ }
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyDouble.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyDouble.java
similarity index 62%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyDouble.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyDouble.java
index d687aa1..1b2cc5a 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyDouble.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyDouble.java
@@ -23,31 +23,28 @@
/**
* LazyObject for storing a value of Double.
- *
*/
-public class LazyDouble extends
- LazyPrimitive<LazyDoubleObjectInspector, DoubleWritable> {
+public class LazyDouble extends LazyPrimitive<LazyDoubleObjectInspector, DoubleWritable> {
- public LazyDouble(LazyDoubleObjectInspector oi) {
- super(oi);
- data = new DoubleWritable();
- }
+ public LazyDouble(LazyDoubleObjectInspector oi) {
+ super(oi);
+ data = new DoubleWritable();
+ }
- public LazyDouble(LazyDouble copy) {
- super(copy);
- data = new DoubleWritable(copy.data.get());
- }
+ public LazyDouble(LazyDouble copy) {
+ super(copy);
+ data = new DoubleWritable(copy.data.get());
+ }
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
- assert (8 == length);
- data.set(Double.longBitsToDouble(LazyUtils
- .byteArrayToLong(bytes, start)));
- }
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+ assert (8 == length);
+ data.set(Double.longBitsToDouble(LazyUtils.byteArrayToLong(bytes, start)));
+ }
}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFactory.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFactory.java
new file mode 100644
index 0000000..7caa9ed
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFactory.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyListObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyMapObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyStructObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyBooleanObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyByteObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyDoubleObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyFloatObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyIntObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyLongObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyShortObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyStringObjectInspector;
+
+/**
+ * LazyFactory.
+ */
+public final class LazyFactory {
+
+ /**
+ * Create a lazy binary primitive class given the type name.
+ */
+ public static LazyPrimitive<?, ?> createLazyPrimitiveClass(PrimitiveObjectInspector oi) {
+ PrimitiveCategory p = oi.getPrimitiveCategory();
+ switch (p) {
+ case BOOLEAN:
+ return new LazyBoolean((LazyBooleanObjectInspector) oi);
+ case BYTE:
+ return new LazyByte((LazyByteObjectInspector) oi);
+ case SHORT:
+ return new LazyShort((LazyShortObjectInspector) oi);
+ case INT:
+ return new LazyInteger((LazyIntObjectInspector) oi);
+ case LONG:
+ return new LazyLong((LazyLongObjectInspector) oi);
+ case FLOAT:
+ return new LazyFloat((LazyFloatObjectInspector) oi);
+ case DOUBLE:
+ return new LazyDouble((LazyDoubleObjectInspector) oi);
+ case STRING:
+ return new LazyString((LazyStringObjectInspector) oi);
+ default:
+ throw new RuntimeException("Internal error: no LazyObject for " + p);
+ }
+ }
+
+ /**
+ * Create a hierarchical LazyObject based on the given typeInfo.
+ */
+ public static LazyObject<? extends ObjectInspector> createLazyObject(ObjectInspector oi) {
+ ObjectInspector.Category c = oi.getCategory();
+ switch (c) {
+ case PRIMITIVE:
+ return createLazyPrimitiveClass((PrimitiveObjectInspector) oi);
+ case MAP:
+ return new LazyMap((LazyMapObjectInspector) oi);
+ case LIST:
+ return new LazyArray((LazyListObjectInspector) oi);
+ case STRUCT: // check whether it is a top-level struct
+ if (oi instanceof LazyStructObjectInspector)
+ return new LazyStruct((LazyStructObjectInspector) oi);
+ else
+ return new LazyColumnar((LazyColumnarObjectInspector) oi);
+ default:
+ throw new RuntimeException("Hive LazySerDe Internal error.");
+ }
+ }
+
+ private LazyFactory() {
+ // prevent instantiation
+ }
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFloat.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFloat.java
similarity index 62%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFloat.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFloat.java
index 303cc67..430ac2e 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFloat.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFloat.java
@@ -23,31 +23,29 @@
/**
* LazyObject for storing a value of Double.
- *
*/
-public class LazyFloat extends
- LazyPrimitive<LazyFloatObjectInspector, FloatWritable> {
+public class LazyFloat extends LazyPrimitive<LazyFloatObjectInspector, FloatWritable> {
- public LazyFloat(LazyFloatObjectInspector oi) {
- super(oi);
- data = new FloatWritable();
- }
+ public LazyFloat(LazyFloatObjectInspector oi) {
+ super(oi);
+ data = new FloatWritable();
+ }
- public LazyFloat(LazyFloat copy) {
- super(copy);
- data = new FloatWritable(copy.data.get());
- }
+ public LazyFloat(LazyFloat copy) {
+ super(copy);
+ data = new FloatWritable(copy.data.get());
+ }
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
- assert (4 == length);
- data.set(Float.intBitsToFloat(LazyUtils.byteArrayToInt(bytes, start)));
- }
+ assert (4 == length);
+ data.set(Float.intBitsToFloat(LazyUtils.byteArrayToInt(bytes, start)));
+ }
}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyInteger.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyInteger.java
new file mode 100644
index 0000000..0765c4f
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyInteger.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.IntWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyIntObjectInspector;
+
+/**
+ * LazyObject for storing a value of Integer.
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project. As with the specification, this implementation relied on code laid out in <a href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's Delight, (Addison Wesley, 2002)</a> as well as <a href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ */
+public class LazyInteger extends LazyPrimitive<LazyIntObjectInspector, IntWritable> {
+
+ public LazyInteger(LazyIntObjectInspector oi) {
+ super(oi);
+ data = new IntWritable();
+ }
+
+ public LazyInteger(LazyInteger copy) {
+ super(copy);
+ data = new IntWritable(copy.data.get());
+ }
+
+ /**
+ * The reusable vInt for decoding the integer.
+ */
+ VInt vInt = new LazyUtils.VInt();
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ LazyUtils.readVInt(bytes, start, vInt);
+ assert (length == vInt.length);
+ if (length != vInt.length)
+ throw new IllegalStateException("parse int: length mismatch, expected " + vInt.length + " but get "
+ + length);
+ data.set(vInt.value);
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyLong.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyLong.java
new file mode 100644
index 0000000..e6b56c3
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyLong.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.LongWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyLongObjectInspector;
+
+/**
+ * LazyObject for storing a value of Long.
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project. As with the specification, this implementation relied on code laid out in <a href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's Delight, (Addison Wesley, 2002)</a> as well as <a href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ */
+public class LazyLong extends LazyPrimitive<LazyLongObjectInspector, LongWritable> {
+
+ public LazyLong(LazyLongObjectInspector oi) {
+ super(oi);
+ data = new LongWritable();
+ }
+
+ public LazyLong(LazyLong copy) {
+ super(copy);
+ data = new LongWritable(copy.data.get());
+ }
+
+ /**
+ * The reusable vLong for decoding the long.
+ */
+ VLong vLong = new LazyUtils.VLong();
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ LazyUtils.readVLong(bytes, start, vLong);
+ assert (length == vLong.length);
+ if (length != vLong.length)
+ throw new IllegalStateException("parse long: length mismatch");
+ data.set(vLong.value);
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyMap.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyMap.java
new file mode 100644
index 0000000..9c7af2e
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyMap.java
@@ -0,0 +1,327 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyMapObjectInspector;
+
+/**
+ * LazyMap is serialized as follows: start A b c b c b c end bytes[] ->
+ * |--------|---|---|---|---| ... |---|---|
+ * Section A is the null-bytes. Suppose the map has N key-value pairs, then
+ * there are (N*2+7)/8 bytes used as null-bytes. Each bit corresponds to a key
+ * or a value and it indicates whether that key or value is null (0) or not null
+ * (1).
+ * After A, all the bytes are actual serialized data of the map, which are
+ * key-value pairs. b represent the keys and c represent the values. Each of
+ * them is again a LazyObject.
+ */
+
+@SuppressWarnings("rawtypes")
+public class LazyMap extends LazyNonPrimitive<LazyMapObjectInspector> {
+
+ private static Log LOG = LogFactory.getLog(LazyMap.class.getName());
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean parsed;
+
+ /**
+ * The size of the map. Only valid when the data is parsed. -1 when the map
+ * is NULL.
+ */
+ int mapSize = 0;
+
+ /**
+ * The beginning position and length of key[i] and value[i]. Only valid when
+ * the data is parsed.
+ */
+ int[] keyStart;
+ int[] keyLength;
+ int[] valueStart;
+ int[] valueLength;
+ /**
+ * Whether valueObjects[i]/keyObjects[i] is initialized or not.
+ */
+ boolean[] keyInited;
+ boolean[] valueInited;
+
+ /**
+ * Whether valueObjects[i]/keyObjects[i] is null or not This could not be
+ * inferred from the length of the object. In particular, a 0-length string
+ * is not null.
+ */
+ boolean[] keyIsNull;
+ boolean[] valueIsNull;
+
+ /**
+ * The keys are stored in an array of LazyPrimitives.
+ */
+ LazyPrimitive<?, ?>[] keyObjects;
+ /**
+ * The values are stored in an array of LazyObjects. value[index] will start
+ * from KeyEnd[index] + 1, and ends before KeyStart[index+1] - 1.
+ */
+ LazyObject[] valueObjects;
+
+ protected LazyMap(LazyMapObjectInspector oi) {
+ super(oi);
+ }
+
+ /**
+ * Set the row data for this LazyMap.
+ *
+ * @see LazyObject#init(ByteArrayRef, int, int)
+ */
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ super.init(bytes, start, length);
+ parsed = false;
+ }
+
+ /**
+ * Adjust the size of arrays: keyStart, keyLength valueStart, valueLength
+ * keyInited, keyIsNull valueInited, valueIsNull.
+ */
+ protected void adjustArraySize(int newSize) {
+ if (keyStart == null || keyStart.length < newSize) {
+ keyStart = new int[newSize];
+ keyLength = new int[newSize];
+ valueStart = new int[newSize];
+ valueLength = new int[newSize];
+ keyInited = new boolean[newSize];
+ keyIsNull = new boolean[newSize];
+ valueInited = new boolean[newSize];
+ valueIsNull = new boolean[newSize];
+ keyObjects = new LazyPrimitive<?, ?>[newSize];
+ valueObjects = new LazyObject[newSize];
+ }
+ }
+
+ boolean nullMapKey = false;
+ VInt vInt = new LazyUtils.VInt();
+ RecordInfo recordInfo = new LazyUtils.RecordInfo();
+
+ /**
+ * Parse the byte[] and fill keyStart, keyLength, keyIsNull valueStart,
+ * valueLength and valueIsNull.
+ */
+ private void parse() {
+
+ // get the VInt that represents the map size
+ LazyUtils.readVInt(bytes, start, vInt);
+ mapSize = vInt.value;
+ if (0 == mapSize) {
+ parsed = true;
+ return;
+ }
+
+ // adjust arrays
+ adjustArraySize(mapSize);
+
+ // find out the null-bytes
+ int mapByteStart = start + vInt.length;
+ int nullByteCur = mapByteStart;
+ int nullByteEnd = mapByteStart + (mapSize * 2 + 7) / 8;
+ int lastElementByteEnd = nullByteEnd;
+
+ // parsing the keys and values one by one
+ for (int i = 0; i < mapSize; i++) {
+ // parse a key
+ keyIsNull[i] = true;
+ if ((bytes[nullByteCur] & (1 << ((i * 2) % 8))) != 0) {
+ keyIsNull[i] = false;
+ LazyUtils.checkObjectByteInfo(((MapObjectInspector) oi).getMapKeyObjectInspector(), bytes,
+ lastElementByteEnd, recordInfo);
+ keyStart[i] = lastElementByteEnd + recordInfo.elementOffset;
+ keyLength[i] = recordInfo.elementSize;
+ lastElementByteEnd = keyStart[i] + keyLength[i];
+ } else if (!nullMapKey) {
+ nullMapKey = true;
+ LOG.warn("Null map key encountered! Ignoring similar problems.");
+ }
+
+ // parse a value
+ valueIsNull[i] = true;
+ if ((bytes[nullByteCur] & (1 << ((i * 2 + 1) % 8))) != 0) {
+ valueIsNull[i] = false;
+ LazyUtils.checkObjectByteInfo(((MapObjectInspector) oi).getMapValueObjectInspector(), bytes,
+ lastElementByteEnd, recordInfo);
+ valueStart[i] = lastElementByteEnd + recordInfo.elementOffset;
+ valueLength[i] = recordInfo.elementSize;
+ lastElementByteEnd = valueStart[i] + valueLength[i];
+ }
+
+ // move onto the next null byte
+ if (3 == (i % 4)) {
+ nullByteCur++;
+ }
+ }
+
+ Arrays.fill(keyInited, 0, mapSize, false);
+ Arrays.fill(valueInited, 0, mapSize, false);
+ parsed = true;
+ }
+
+ /**
+ * Get the value object with the index without checking parsed.
+ *
+ * @param index
+ * The index into the array starting from 0
+ */
+ private LazyObject uncheckedGetValue(int index) {
+ if (valueIsNull[index]) {
+ return null;
+ }
+ if (!valueInited[index]) {
+ valueInited[index] = true;
+ if (valueObjects[index] == null) {
+ valueObjects[index] = LazyFactory.createLazyObject(((MapObjectInspector) oi)
+ .getMapValueObjectInspector());
+ }
+ valueObjects[index].init(bytes, valueStart[index], valueLength[index]);
+ }
+ return valueObjects[index];
+ }
+
+ /**
+ * Get the value in the map for the key.
+ * If there are multiple matches (which is possible in the serialized
+ * format), only the first one is returned.
+ * The most efficient way to get the value for the key is to serialize the
+ * key and then try to find it in the array. We do linear search because in
+ * most cases, user only wants to get one or two values out of the map, and
+ * the cost of building up a HashMap is substantially higher.
+ *
+ * @param key
+ * The key object that we are looking for.
+ * @return The corresponding value object, or NULL if not found
+ */
+ public Object getMapValueElement(Object key) {
+ if (!parsed) {
+ parse();
+ }
+ // search for the key
+ for (int i = 0; i < mapSize; i++) {
+ LazyPrimitive<?, ?> lazyKeyI = uncheckedGetKey(i);
+ if (lazyKeyI == null) {
+ continue;
+ }
+ // getWritableObject() will convert LazyPrimitive to actual
+ // primitive
+ // writable objects.
+ Object keyI = lazyKeyI.getWritableObject();
+ if (keyI == null) {
+ continue;
+ }
+ if (keyI.equals(key)) {
+ // Got a match, return the value
+ LazyObject v = uncheckedGetValue(i);
+ return v == null ? v : v.getObject();
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Get the key object with the index without checking parsed.
+ *
+ * @param index
+ * The index into the array starting from 0
+ */
+ private LazyPrimitive<?, ?> uncheckedGetKey(int index) {
+ if (keyIsNull[index]) {
+ return null;
+ }
+ if (!keyInited[index]) {
+ keyInited[index] = true;
+ if (keyObjects[index] == null) {
+ // Keys are always primitive
+ keyObjects[index] = LazyFactory
+ .createLazyPrimitiveClass((PrimitiveObjectInspector) ((MapObjectInspector) oi)
+ .getMapKeyObjectInspector());
+ }
+ keyObjects[index].init(bytes, keyStart[index], keyLength[index]);
+ }
+ return keyObjects[index];
+ }
+
+ /**
+ * cachedMap is reused for different calls to getMap(). But each LazyMap has
+ * a separate cachedMap so we won't overwrite the data by accident.
+ */
+ LinkedHashMap<Object, Object> cachedMap;
+
+ /**
+ * Return the map object representing this LazyMap. Note that the keyObjects
+ * will be Writable primitive objects.
+ *
+ * @return the map object
+ */
+ public Map<Object, Object> getMap() {
+ if (!parsed) {
+ parse();
+ }
+ if (cachedMap == null) {
+ // Use LinkedHashMap to provide deterministic order
+ cachedMap = new LinkedHashMap<Object, Object>();
+ } else {
+ cachedMap.clear();
+ }
+
+ // go through each element of the map
+ for (int i = 0; i < mapSize; i++) {
+ LazyPrimitive<?, ?> lazyKey = uncheckedGetKey(i);
+ if (lazyKey == null) {
+ continue;
+ }
+ Object key = lazyKey.getObject();
+ // do not overwrite if there are duplicate keys
+ if (key != null && !cachedMap.containsKey(key)) {
+ LazyObject lazyValue = uncheckedGetValue(i);
+ Object value = (lazyValue == null ? null : lazyValue.getObject());
+ cachedMap.put(key, value);
+ }
+ }
+ return cachedMap;
+ }
+
+ /**
+ * Get the size of the map represented by this LazyMap.
+ *
+ * @return The size of the map
+ */
+ public int getMapSize() {
+ if (!parsed) {
+ parse();
+ }
+ return mapSize;
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyNonPrimitive.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyNonPrimitive.java
new file mode 100644
index 0000000..f7ae1e3
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyNonPrimitive.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+/**
+ * LazyPrimitive stores a primitive Object in a LazyObject.
+ */
+public abstract class LazyNonPrimitive<OI extends ObjectInspector> extends LazyObject<OI> {
+
+ protected byte[] bytes;
+ protected int start;
+ protected int length;
+
+ /**
+ * Create a LazyNonPrimitive object with the specified ObjectInspector.
+ *
+ * @param oi
+ * The ObjectInspector would have to have a hierarchy of
+ * LazyObjectInspectors with the leaf nodes being
+ * WritableObjectInspectors. It's used both for accessing the
+ * type hierarchy of the complex object, as well as getting meta
+ * information (separator, nullSequence, etc) when parsing the
+ * lazy object.
+ */
+ protected LazyNonPrimitive(OI oi) {
+ super(oi);
+ bytes = null;
+ start = 0;
+ length = 0;
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (bytes == null) {
+ throw new RuntimeException("bytes cannot be null!");
+ }
+ this.bytes = bytes;
+ this.start = start;
+ this.length = length;
+ assert start >= 0;
+ assert start + length <= bytes.length;
+ }
+
+ @Override
+ public Object getObject() {
+ return this;
+ }
+
+ @Override
+ public int hashCode() {
+ return LazyUtils.hashBytes(bytes, start, length);
+ }
+
+ @Override
+ public void init(IFrameTupleReference tuple) {
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyObject.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyObject.java
new file mode 100644
index 0000000..dc1dc60
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyObject.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+/**
+ * LazyObject stores an object in a range of bytes in a byte[].
+ * A LazyObject can represent any primitive object or hierarchical object like
+ * array, map or struct.
+ */
+public abstract class LazyObject<OI extends ObjectInspector> {
+
+ OI oi;
+
+ /**
+ * Create a LazyObject.
+ *
+ * @param oi
+ * Derived classes can access meta information about this Lazy
+ * Object (e.g, separator, nullSequence, escaper) from it.
+ */
+ protected LazyObject(OI oi) {
+ this.oi = oi;
+ }
+
+ /**
+ * Set the data for this LazyObject. We take ByteArrayRef instead of byte[]
+ * so that we will be able to drop the reference to byte[] by a single
+ * assignment. The ByteArrayRef object can be reused across multiple rows.
+ *
+ * @param bytes
+ * The wrapper of the byte[].
+ * @param start
+ * The start position inside the bytes.
+ * @param length
+ * The length of the data, starting from "start"
+ * @see ByteArrayRef
+ */
+ public abstract void init(byte[] bytes, int start, int length);
+
+ public abstract void init(IFrameTupleReference tuple);
+
+ /**
+ * If the LazyObject is a primitive Object, then deserialize it and return
+ * the actual primitive Object. Otherwise (array, map, struct), return this.
+ */
+ public abstract Object getObject();
+
+ @Override
+ public abstract int hashCode();
+
+ protected OI getInspector() {
+ return oi;
+ }
+
+ protected void setInspector(OI oi) {
+ this.oi = oi;
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyPrimitive.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyPrimitive.java
new file mode 100644
index 0000000..8139c65
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyPrimitive.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Writable;
+
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+/**
+ * LazyPrimitive stores a primitive Object in a LazyObject.
+ */
+public abstract class LazyPrimitive<OI extends ObjectInspector, T extends Writable> extends LazyObject<OI> {
+
+ LazyPrimitive(OI oi) {
+ super(oi);
+ }
+
+ LazyPrimitive(LazyPrimitive<OI, T> copy) {
+ super(copy.oi);
+ isNull = copy.isNull;
+ }
+
+ T data;
+ boolean isNull = false;
+
+ /**
+ * Returns the primitive object represented by this LazyObject. This is
+ * useful because it can make sure we have "null" for null objects.
+ */
+ @Override
+ public Object getObject() {
+ return isNull ? null : this;
+ }
+
+ public T getWritableObject() {
+ return isNull ? null : data;
+ }
+
+ @Override
+ public String toString() {
+ return isNull ? "null" : data.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ return isNull ? 0 : data.hashCode();
+ }
+
+ @Override
+ public void init(IFrameTupleReference tuple) {
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
new file mode 100644
index 0000000..05b82ba
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
@@ -0,0 +1,460 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.ByteStream;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * The LazySerDe class combines the lazy property of LazySimpleSerDe class and
+ * the binary property of BinarySortable class. Lazy means a field is not
+ * deserialized until required. Binary means a field is serialized in binary
+ * compact format.
+ */
+public class LazySerDe implements SerDe {
+
+ public static final Log LOG = LogFactory.getLog(LazySerDe.class.getName());
+
+ public LazySerDe() {
+ }
+
+ List<String> columnNames;
+ List<TypeInfo> columnTypes;
+
+ TypeInfo rowTypeInfo;
+ ObjectInspector cachedObjectInspector;
+
+ // The object for storing row data
+ LazyColumnar cachedLazyStruct;
+
+ /**
+ * Initialize the SerDe with configuration and table information.
+ */
+ @Override
+ public void initialize(Configuration conf, Properties tbl) throws SerDeException {
+ // Get column names and types
+ String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
+ String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
+ if (columnNameProperty.length() == 0) {
+ columnNames = new ArrayList<String>();
+ } else {
+ columnNames = Arrays.asList(columnNameProperty.split(","));
+ }
+ if (columnTypeProperty.length() == 0) {
+ columnTypes = new ArrayList<TypeInfo>();
+ } else {
+ columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+ }
+ assert (columnNames.size() == columnTypes.size());
+ // Create row related objects
+ rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
+ // Create the object inspector and the lazy binary struct object
+ cachedObjectInspector = LazyUtils.getLazyObjectInspectorFromTypeInfo(rowTypeInfo, true);
+ cachedLazyStruct = (LazyColumnar) LazyFactory.createLazyObject(cachedObjectInspector);
+ // output debug info
+ LOG.debug("LazySerDe initialized with: columnNames=" + columnNames + " columnTypes=" + columnTypes);
+ }
+
+ /**
+ * Returns the ObjectInspector for the row.
+ */
+ @Override
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return cachedObjectInspector;
+ }
+
+ /**
+ * Returns the Writable Class after serialization.
+ */
+ @Override
+ public Class<? extends Writable> getSerializedClass() {
+ return BytesWritable.class;
+ }
+
+ // The wrapper for byte array
+ ByteArrayRef byteArrayRef;
+
+ /**
+ * Deserialize a table record to a Lazy struct.
+ */
+ @SuppressWarnings("deprecation")
+ @Override
+ public Object deserialize(Writable field) throws SerDeException {
+ if (byteArrayRef == null) {
+ byteArrayRef = new ByteArrayRef();
+ }
+ if (field instanceof BytesWritable) {
+ BytesWritable b = (BytesWritable) field;
+ if (b.getSize() == 0) {
+ return null;
+ }
+ // For backward-compatibility with hadoop 0.17
+ byteArrayRef.setData(b.get());
+ cachedLazyStruct.init(byteArrayRef.getData(), 0, b.getSize());
+ } else if (field instanceof Text) {
+ Text t = (Text) field;
+ if (t.getLength() == 0) {
+ return null;
+ }
+ byteArrayRef.setData(t.getBytes());
+ cachedLazyStruct.init(byteArrayRef.getData(), 0, t.getLength());
+ } else {
+ throw new SerDeException(getClass().toString() + ": expects either BytesWritable or Text object!");
+ }
+ return cachedLazyStruct;
+ }
+
+ /**
+ * The reusable output buffer and serialize byte buffer.
+ */
+ BytesWritable serializeBytesWritable = new BytesWritable();
+ ByteStream.Output serializeByteStream = new ByteStream.Output();
+
+ /**
+ * Serialize an object to a byte buffer in a binary compact way.
+ */
+ @Override
+ public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
+ // make sure it is a struct record or not
+ serializeByteStream.reset();
+
+ if (objInspector.getCategory() != Category.STRUCT) {
+ // serialize the primitive object
+ serialize(serializeByteStream, obj, objInspector);
+ } else {
+ // serialize the row as a struct
+ serializeStruct(serializeByteStream, obj, (StructObjectInspector) objInspector);
+ }
+ // return the serialized bytes
+ serializeBytesWritable.set(serializeByteStream.getData(), 0, serializeByteStream.getCount());
+ return serializeBytesWritable;
+ }
+
+ boolean nullMapKey = false;
+
+ /**
+ * Serialize a struct object without writing the byte size. This function is
+ * shared by both row serialization and struct serialization.
+ *
+ * @param byteStream
+ * the byte stream storing the serialization data
+ * @param obj
+ * the struct object to serialize
+ * @param objInspector
+ * the struct object inspector
+ */
+ private void serializeStruct(Output byteStream, Object obj, StructObjectInspector soi) {
+ // do nothing for null struct
+ if (null == obj) {
+ return;
+ }
+ /*
+ * Interleave serializing one null byte and 8 struct fields in each
+ * round, in order to support data deserialization with different table
+ * schemas
+ */
+ List<? extends StructField> fields = soi.getAllStructFieldRefs();
+ int size = fields.size();
+ int lasti = 0;
+ byte nullByte = 0;
+ for (int i = 0; i < size; i++) {
+ // set bit to 1 if a field is not null
+ if (null != soi.getStructFieldData(obj, fields.get(i))) {
+ nullByte |= 1 << (i % 8);
+ }
+ // write the null byte every eight elements or
+ // if this is the last element and serialize the
+ // corresponding 8 struct fields at the same time
+ if (7 == i % 8 || i == size - 1) {
+ serializeByteStream.write(nullByte);
+ for (int j = lasti; j <= i; j++) {
+ serialize(serializeByteStream, soi.getStructFieldData(obj, fields.get(j)), fields.get(j)
+ .getFieldObjectInspector());
+ }
+ lasti = i + 1;
+ nullByte = 0;
+ }
+ }
+ }
+
+ /**
+ * A recursive function that serialize an object to a byte buffer based on
+ * its object inspector.
+ *
+ * @param byteStream
+ * the byte stream storing the serialization data
+ * @param obj
+ * the object to serialize
+ * @param objInspector
+ * the object inspector
+ */
+ private void serialize(Output byteStream, Object obj, ObjectInspector objInspector) {
+
+ // do nothing for null object
+ if (null == obj) {
+ return;
+ }
+
+ switch (objInspector.getCategory()) {
+ case PRIMITIVE: {
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector) objInspector;
+ switch (poi.getPrimitiveCategory()) {
+ case VOID: {
+ return;
+ }
+ case BOOLEAN: {
+ boolean v = ((BooleanObjectInspector) poi).get(obj);
+ byteStream.write((byte) (v ? 1 : 0));
+ return;
+ }
+ case BYTE: {
+ ByteObjectInspector boi = (ByteObjectInspector) poi;
+ byte v = boi.get(obj);
+ byteStream.write(v);
+ return;
+ }
+ case SHORT: {
+ ShortObjectInspector spoi = (ShortObjectInspector) poi;
+ short v = spoi.get(obj);
+ byteStream.write((byte) (v >> 8));
+ byteStream.write((byte) (v));
+ return;
+ }
+ case INT: {
+ IntObjectInspector ioi = (IntObjectInspector) poi;
+ int v = ioi.get(obj);
+ LazyUtils.writeVInt(byteStream, v);
+ return;
+ }
+ case LONG: {
+ LongObjectInspector loi = (LongObjectInspector) poi;
+ long v = loi.get(obj);
+ LazyUtils.writeVLong(byteStream, v);
+ return;
+ }
+ case FLOAT: {
+ FloatObjectInspector foi = (FloatObjectInspector) poi;
+ int v = Float.floatToIntBits(foi.get(obj));
+ byteStream.write((byte) (v >> 24));
+ byteStream.write((byte) (v >> 16));
+ byteStream.write((byte) (v >> 8));
+ byteStream.write((byte) (v));
+ return;
+ }
+ case DOUBLE: {
+ DoubleObjectInspector doi = (DoubleObjectInspector) poi;
+ long v = Double.doubleToLongBits(doi.get(obj));
+ byteStream.write((byte) (v >> 56));
+ byteStream.write((byte) (v >> 48));
+ byteStream.write((byte) (v >> 40));
+ byteStream.write((byte) (v >> 32));
+ byteStream.write((byte) (v >> 24));
+ byteStream.write((byte) (v >> 16));
+ byteStream.write((byte) (v >> 8));
+ byteStream.write((byte) (v));
+ return;
+ }
+ case STRING: {
+ StringObjectInspector soi = (StringObjectInspector) poi;
+ Text t = soi.getPrimitiveWritableObject(obj);
+ /* write byte size of the string which is a vint */
+ int length = t.getLength();
+ LazyUtils.writeVInt(byteStream, length);
+ /* write string itself */
+ byte[] data = t.getBytes();
+ byteStream.write(data, 0, length);
+ return;
+ }
+ default: {
+ throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
+ }
+ }
+ }
+ case LIST: {
+ ListObjectInspector loi = (ListObjectInspector) objInspector;
+ ObjectInspector eoi = loi.getListElementObjectInspector();
+
+ // 1/ reserve spaces for the byte size of the list
+ // which is a integer and takes four bytes
+ int byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ int listStart = byteStream.getCount();
+
+ // 2/ write the size of the list as a VInt
+ int size = loi.getListLength(obj);
+ LazyUtils.writeVInt(byteStream, size);
+
+ // 3/ write the null bytes
+ byte nullByte = 0;
+ for (int eid = 0; eid < size; eid++) {
+ // set the bit to 1 if an element is not null
+ if (null != loi.getListElement(obj, eid)) {
+ nullByte |= 1 << (eid % 8);
+ }
+ // store the byte every eight elements or
+ // if this is the last element
+ if (7 == eid % 8 || eid == size - 1) {
+ byteStream.write(nullByte);
+ nullByte = 0;
+ }
+ }
+
+ // 4/ write element by element from the list
+ for (int eid = 0; eid < size; eid++) {
+ serialize(byteStream, loi.getListElement(obj, eid), eoi);
+ }
+
+ // 5/ update the list byte size
+ int listEnd = byteStream.getCount();
+ int listSize = listEnd - listStart;
+ byte[] bytes = byteStream.getData();
+ bytes[byteSizeStart] = (byte) (listSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (listSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (listSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (listSize);
+
+ return;
+ }
+ case MAP: {
+ MapObjectInspector moi = (MapObjectInspector) objInspector;
+ ObjectInspector koi = moi.getMapKeyObjectInspector();
+ ObjectInspector voi = moi.getMapValueObjectInspector();
+ Map<?, ?> map = moi.getMap(obj);
+
+ // 1/ reserve spaces for the byte size of the map
+ // which is a integer and takes four bytes
+ int byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ int mapStart = byteStream.getCount();
+
+ // 2/ write the size of the map which is a VInt
+ int size = map.size();
+ LazyUtils.writeVInt(byteStream, size);
+
+ // 3/ write the null bytes
+ int b = 0;
+ byte nullByte = 0;
+ for (Map.Entry<?, ?> entry : map.entrySet()) {
+ // set the bit to 1 if a key is not null
+ if (null != entry.getKey()) {
+ nullByte |= 1 << (b % 8);
+ } else if (!nullMapKey) {
+ nullMapKey = true;
+ LOG.warn("Null map key encountered! Ignoring similar problems.");
+ }
+ b++;
+ // set the bit to 1 if a value is not null
+ if (null != entry.getValue()) {
+ nullByte |= 1 << (b % 8);
+ }
+ b++;
+ // write the byte to stream every 4 key-value pairs
+ // or if this is the last key-value pair
+ if (0 == b % 8 || b == size * 2) {
+ byteStream.write(nullByte);
+ nullByte = 0;
+ }
+ }
+
+ // 4/ write key-value pairs one by one
+ for (Map.Entry<?, ?> entry : map.entrySet()) {
+ serialize(byteStream, entry.getKey(), koi);
+ serialize(byteStream, entry.getValue(), voi);
+ }
+
+ // 5/ update the byte size of the map
+ int mapEnd = byteStream.getCount();
+ int mapSize = mapEnd - mapStart;
+ byte[] bytes = byteStream.getData();
+ bytes[byteSizeStart] = (byte) (mapSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (mapSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (mapSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (mapSize);
+
+ return;
+ }
+ case STRUCT: {
+ // 1/ reserve spaces for the byte size of the struct
+ // which is a integer and takes four bytes
+ int byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ int structStart = byteStream.getCount();
+
+ // 2/ serialize the struct
+ serializeStruct(byteStream, obj, (StructObjectInspector) objInspector);
+
+ // 3/ update the byte size of the struct
+ int structEnd = byteStream.getCount();
+ int structSize = structEnd - structStart;
+ byte[] bytes = byteStream.getData();
+ bytes[byteSizeStart] = (byte) (structSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (structSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (structSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (structSize);
+
+ return;
+ }
+ default: {
+ throw new RuntimeException("Unrecognized type: " + objInspector.getCategory());
+ }
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyShort.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyShort.java
new file mode 100644
index 0000000..f493b37
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyShort.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyShortObjectInspector;
+
+/**
+ * LazyObject for storing a value of Short.
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project. As with the specification, this implementation relied on code laid out in <a href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's Delight, (Addison Wesley, 2002)</a> as well as <a href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ */
+public class LazyShort extends LazyPrimitive<LazyShortObjectInspector, ShortWritable> {
+
+ public LazyShort(LazyShortObjectInspector oi) {
+ super(oi);
+ data = new ShortWritable();
+ }
+
+ public LazyShort(LazyShort copy) {
+ super(copy);
+ data = new ShortWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ assert (2 == length);
+ data.set(LazyUtils.byteArrayToShort(bytes, start));
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyString.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyString.java
new file mode 100644
index 0000000..0293af8
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyString.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.Text;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyStringObjectInspector;
+
+/**
+ * LazyObject for storing a value of String.
+ */
+public class LazyString extends LazyPrimitive<LazyStringObjectInspector, Text> {
+
+ public LazyString(LazyStringObjectInspector oi) {
+ super(oi);
+ data = new Text();
+ }
+
+ public LazyString(LazyString copy) {
+ super(copy);
+ data = new Text(copy.data);
+ }
+
+ VInt vInt = new LazyUtils.VInt();
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ // get the byte length of the string
+ LazyUtils.readVInt(bytes, start, vInt);
+ if (vInt.value + vInt.length != length)
+ throw new IllegalStateException("parse string: length mismatch, expected " + (vInt.value + vInt.length)
+ + " but get " + length);
+ assert (length - vInt.length > -1);
+ data.set(bytes, start + vInt.length, length - vInt.length);
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyStruct.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyStruct.java
new file mode 100644
index 0000000..47e95e4
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyStruct.java
@@ -0,0 +1,234 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyStructObjectInspector;
+
+/**
+ * LazyStruct is serialized as follows: start A B A B A B end bytes[] ->
+ * |-----|---------|--- ... ---|-----|---------|
+ * Section A is one null-byte, corresponding to eight struct fields in Section
+ * B. Each bit indicates whether the corresponding field is null (0) or not null
+ * (1). Each field is a LazyObject.
+ * Following B, there is another section A and B. This pattern repeats until the
+ * all struct fields are serialized.
+ */
+public class LazyStruct extends LazyNonPrimitive<LazyStructObjectInspector> {
+
+ private static Log LOG = LogFactory.getLog(LazyStruct.class.getName());
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean parsed;
+
+ /**
+ * The fields of the struct.
+ */
+ @SuppressWarnings("rawtypes")
+ LazyObject[] fields;
+
+ /**
+ * Whether a field is initialized or not.
+ */
+ boolean[] fieldInited;
+
+ /**
+ * Whether a field is null or not. Because length is 0 does not means the
+ * field is null. In particular, a 0-length string is not null.
+ */
+ boolean[] fieldIsNull;
+
+ /**
+ * The start positions and lengths of struct fields. Only valid when the
+ * data is parsed.
+ */
+ int[] fieldStart;
+ int[] fieldLength;
+
+ /**
+ * Construct a LazyStruct object with an ObjectInspector.
+ */
+ protected LazyStruct(LazyStructObjectInspector oi) {
+ super(oi);
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ super.init(bytes, start, length);
+ parsed = false;
+ }
+
+ RecordInfo recordInfo = new LazyUtils.RecordInfo();
+ boolean missingFieldWarned = false;
+ boolean extraFieldWarned = false;
+
+ /**
+ * Parse the byte[] and fill fieldStart, fieldLength, fieldInited and
+ * fieldIsNull.
+ */
+ private void parse() {
+
+ List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
+
+ if (fields == null) {
+ fields = new LazyObject[fieldRefs.size()];
+ for (int i = 0; i < fields.length; i++) {
+ ObjectInspector insp = fieldRefs.get(i).getFieldObjectInspector();
+ fields[i] = insp == null ? null : LazyFactory.createLazyObject(insp);
+ }
+ fieldInited = new boolean[fields.length];
+ fieldIsNull = new boolean[fields.length];
+ fieldStart = new int[fields.length];
+ fieldLength = new int[fields.length];
+ }
+
+ /**
+ * Please note that one null byte is followed by eight fields, then more
+ * null byte and fields.
+ */
+
+ int fieldId = 0;
+ int structByteEnd = start + length;
+
+ byte nullByte = bytes[start];
+ int lastFieldByteEnd = start + 1;
+ // Go through all bytes in the byte[]
+ for (int i = 0; i < fields.length; i++) {
+ fieldIsNull[i] = true;
+ if ((nullByte & (1 << (i % 8))) != 0) {
+ fieldIsNull[i] = false;
+ LazyUtils.checkObjectByteInfo(fieldRefs.get(i).getFieldObjectInspector(), bytes, lastFieldByteEnd,
+ recordInfo);
+ fieldStart[i] = lastFieldByteEnd + recordInfo.elementOffset;
+ fieldLength[i] = recordInfo.elementSize;
+ lastFieldByteEnd = fieldStart[i] + fieldLength[i];
+ }
+
+ // count how many fields are there
+ if (lastFieldByteEnd <= structByteEnd) {
+ fieldId++;
+ }
+ // next byte is a null byte if there are more bytes to go
+ if (7 == (i % 8)) {
+ if (lastFieldByteEnd < structByteEnd) {
+ nullByte = bytes[lastFieldByteEnd];
+ lastFieldByteEnd++;
+ } else {
+ // otherwise all null afterwards
+ nullByte = 0;
+ lastFieldByteEnd++;
+ }
+ }
+ }
+
+ // Extra bytes at the end?
+ if (!extraFieldWarned && lastFieldByteEnd < structByteEnd) {
+ extraFieldWarned = true;
+ LOG.warn("Extra bytes detected at the end of the row! Ignoring similar " + "problems.");
+ }
+
+ // Missing fields?
+ if (!missingFieldWarned && lastFieldByteEnd > structByteEnd) {
+ missingFieldWarned = true;
+ LOG.warn("Missing fields! Expected " + fields.length + " fields but " + "only got " + fieldId
+ + "! Ignoring similar problems.");
+ }
+
+ Arrays.fill(fieldInited, false);
+ parsed = true;
+ }
+
+ /**
+ * Get one field out of the struct.
+ * If the field is a primitive field, return the actual object. Otherwise
+ * return the LazyObject. This is because PrimitiveObjectInspector does not
+ * have control over the object used by the user - the user simply directly
+ * use the Object instead of going through Object
+ * PrimitiveObjectInspector.get(Object).
+ *
+ * @param fieldID
+ * The field ID
+ * @return The field as a LazyObject
+ */
+ public Object getField(int fieldID) {
+ if (!parsed) {
+ parse();
+ }
+ return uncheckedGetField(fieldID);
+ }
+
+ /**
+ * Get the field out of the row without checking parsed. This is called by
+ * both getField and getFieldsAsList.
+ *
+ * @param fieldID
+ * The id of the field starting from 0.
+ * @return The value of the field
+ */
+ private Object uncheckedGetField(int fieldID) {
+ // Test the length first so in most cases we avoid doing a byte[]
+ // comparison.
+ if (fieldIsNull[fieldID]) {
+ return null;
+ }
+ if (!fieldInited[fieldID]) {
+ fieldInited[fieldID] = true;
+ fields[fieldID].init(bytes, fieldStart[fieldID], fieldLength[fieldID]);
+ }
+ return fields[fieldID].getObject();
+ }
+
+ ArrayList<Object> cachedList;
+
+ /**
+ * Get the values of the fields as an ArrayList.
+ *
+ * @return The values of the fields as an ArrayList.
+ */
+ public ArrayList<Object> getFieldsAsList() {
+ if (!parsed) {
+ parse();
+ }
+ if (cachedList == null) {
+ cachedList = new ArrayList<Object>();
+ } else {
+ cachedList.clear();
+ }
+ for (int i = 0; i < fields.length; i++) {
+ cachedList.add(uncheckedGetField(i));
+ }
+ return cachedList;
+ }
+
+ @Override
+ public Object getObject() {
+ return this;
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyUtils.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyUtils.java
new file mode 100644
index 0000000..6554ccc
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyUtils.java
@@ -0,0 +1,503 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.WritableUtils;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyObjectInspectorFactory;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/**
+ * LazyUtils.
+ */
+public final class LazyUtils {
+
+ /**
+ * Convert the byte array to an int starting from the given offset. Refer to
+ * code by aeden on DZone Snippets:
+ *
+ * @param b
+ * the byte array
+ * @param offset
+ * the array offset
+ * @return the integer
+ */
+ public static int byteArrayToInt(byte[] b, int offset) {
+ int value = 0;
+ for (int i = 0; i < 4; i++) {
+ int shift = (4 - 1 - i) * 8;
+ value += (b[i + offset] & 0x000000FF) << shift;
+ }
+ return value;
+ }
+
+ /**
+ * Convert the byte array to a long starting from the given offset.
+ *
+ * @param b
+ * the byte array
+ * @param offset
+ * the array offset
+ * @return the long
+ */
+ public static long byteArrayToLong(byte[] b, int offset) {
+ long value = 0;
+ for (int i = 0; i < 8; i++) {
+ int shift = (8 - 1 - i) * 8;
+ value += ((long) (b[i + offset] & 0x00000000000000FF)) << shift;
+ }
+ return value;
+ }
+
+ /**
+ * Convert the byte array to a short starting from the given offset.
+ *
+ * @param b
+ * the byte array
+ * @param offset
+ * the array offset
+ * @return the short
+ */
+ public static short byteArrayToShort(byte[] b, int offset) {
+ short value = 0;
+ value += (b[offset] & 0x000000FF) << 8;
+ value += (b[offset + 1] & 0x000000FF);
+ return value;
+ }
+
+ /**
+ * Record is the unit that data is serialized in. A record includes two
+ * parts. The first part stores the size of the element and the second part
+ * stores the real element. size element record ->
+ * |----|-------------------------|
+ * A RecordInfo stores two information of a record, the size of the "size"
+ * part which is the element offset and the size of the element part which
+ * is element size.
+ */
+ public static class RecordInfo {
+ public RecordInfo() {
+ elementOffset = 0;
+ elementSize = 0;
+ }
+
+ public byte elementOffset;
+ public int elementSize;
+
+ @Override
+ public String toString() {
+ return "(" + elementOffset + ", " + elementSize + ")";
+ }
+ }
+
+ static VInt vInt = new LazyUtils.VInt();
+
+ /**
+ * Check a particular field and set its size and offset in bytes based on
+ * the field type and the bytes arrays.
+ * For void, boolean, byte, short, int, long, float and double, there is no
+ * offset and the size is fixed. For string, map, list, struct, the first
+ * four bytes are used to store the size. So the offset is 4 and the size is
+ * computed by concating the first four bytes together. The first four bytes
+ * are defined with respect to the offset in the bytes arrays.
+ *
+ * @param objectInspector
+ * object inspector of the field
+ * @param bytes
+ * bytes arrays store the table row
+ * @param offset
+ * offset of this field
+ * @param recordInfo
+ * modify this byteinfo object and return it
+ */
+ public static void checkObjectByteInfo(ObjectInspector objectInspector, byte[] bytes, int offset,
+ RecordInfo recordInfo) {
+ Category category = objectInspector.getCategory();
+ switch (category) {
+ case PRIMITIVE:
+ PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) objectInspector)
+ .getPrimitiveCategory();
+ switch (primitiveCategory) {
+ case VOID:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 0;
+ break;
+ case BOOLEAN:
+ case BYTE:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 1;
+ break;
+ case SHORT:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 2;
+ break;
+ case FLOAT:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 4;
+ break;
+ case DOUBLE:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 8;
+ break;
+ case INT:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
+ break;
+ case LONG:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
+ break;
+ case STRING:
+ // using vint instead of 4 bytes
+ LazyUtils.readVInt(bytes, offset, vInt);
+ recordInfo.elementOffset = vInt.length;
+ recordInfo.elementSize = vInt.value;
+ break;
+ default: {
+ throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory);
+ }
+ }
+ break;
+ case LIST:
+ case MAP:
+ case STRUCT:
+ recordInfo.elementOffset = 4;
+ recordInfo.elementSize = LazyUtils.byteArrayToInt(bytes, offset);
+ break;
+ default: {
+ throw new RuntimeException("Unrecognized non-primitive type: " + category);
+ }
+ }
+ }
+
+ /**
+ * A zero-compressed encoded long.
+ */
+ public static class VLong {
+ public VLong() {
+ value = 0;
+ length = 0;
+ }
+
+ public long value;
+ public byte length;
+ };
+
+ /**
+ * Reads a zero-compressed encoded long from a byte array and returns it.
+ *
+ * @param bytes
+ * the byte array
+ * @param offset
+ * offset of the array to read from
+ * @param vlong
+ * storing the deserialized long and its size in byte
+ */
+ public static void readVLong(byte[] bytes, int offset, VLong vlong) {
+ byte firstByte = bytes[offset];
+ vlong.length = (byte) WritableUtils.decodeVIntSize(firstByte);
+ if (vlong.length == 1) {
+ vlong.value = firstByte;
+ return;
+ }
+ long i = 0;
+ for (int idx = 0; idx < vlong.length - 1; idx++) {
+ byte b = bytes[offset + 1 + idx];
+ i = i << 8;
+ i = i | (b & 0xFF);
+ }
+ vlong.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i);
+ }
+
+ /**
+ * A zero-compressed encoded integer.
+ */
+ public static class VInt implements Serializable {
+ private static final long serialVersionUID = 1L;
+
+ public VInt() {
+ value = 0;
+ length = 0;
+ }
+
+ public int value;
+ public byte length;
+ };
+
+ /**
+ * Reads a zero-compressed encoded int from a byte array and returns it.
+ *
+ * @param bytes
+ * the byte array
+ * @param offset
+ * offset of the array to read from
+ * @param vInt
+ * storing the deserialized int and its size in byte
+ */
+ public static void readVInt(byte[] bytes, int offset, VInt vInt) {
+ byte firstByte = bytes[offset];
+ vInt.length = (byte) WritableUtils.decodeVIntSize(firstByte);
+ if (vInt.length == 1) {
+ vInt.value = firstByte;
+ return;
+ }
+ int i = 0;
+ for (int idx = 0; idx < vInt.length - 1; idx++) {
+ byte b = bytes[offset + 1 + idx];
+ i = i << 8;
+ i = i | (b & 0xFF);
+ }
+ vInt.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1) : i);
+ }
+
+ /**
+ * Writes a zero-compressed encoded int to a byte array.
+ *
+ * @param byteStream
+ * the byte array/stream
+ * @param i
+ * the int
+ */
+ public static void writeVInt(Output byteStream, int i) {
+ writeVLong(byteStream, i);
+ }
+
+ /**
+ * Write a zero-compressed encoded long to a byte array.
+ *
+ * @param byteStream
+ * the byte array/stream
+ * @param l
+ * the long
+ */
+ public static void writeVLong(Output byteStream, long l) {
+ if (l >= -112 && l <= 127) {
+ byteStream.write((byte) l);
+ return;
+ }
+
+ int len = -112;
+ if (l < 0) {
+ l ^= -1L; // take one's complement'
+ len = -120;
+ }
+
+ long tmp = l;
+ while (tmp != 0) {
+ tmp = tmp >> 8;
+ len--;
+ }
+
+ byteStream.write((byte) len);
+
+ len = (len < -120) ? -(len + 120) : -(len + 112);
+
+ for (int idx = len; idx != 0; idx--) {
+ int shiftbits = (idx - 1) * 8;
+ long mask = 0xFFL << shiftbits;
+ byteStream.write((byte) ((l & mask) >> shiftbits));
+ }
+ }
+
+ static Map<TypeInfo, ObjectInspector> cachedLazyObjectInspector = new ConcurrentHashMap<TypeInfo, ObjectInspector>();
+
+ /**
+ * Returns the lazy binary object inspector that can be used to inspect an
+ * lazy binary object of that typeInfo
+ * For primitive types, we use the standard writable object inspector.
+ */
+ public static ObjectInspector getLazyObjectInspectorFromTypeInfo(TypeInfo typeInfo, boolean topLevel) {
+ if (typeInfo == null)
+ throw new IllegalStateException("illegal type null ");
+ ObjectInspector result = cachedLazyObjectInspector.get(typeInfo);
+ if (result == null) {
+ switch (typeInfo.getCategory()) {
+ case PRIMITIVE: {
+ result = PrimitiveObjectInspectorFactory
+ .getPrimitiveLazyObjectInspector(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
+ break;
+ }
+ case LIST: {
+ ObjectInspector elementObjectInspector = getLazyObjectInspectorFromTypeInfo(
+ ((ListTypeInfo) typeInfo).getListElementTypeInfo(), false);
+ result = LazyObjectInspectorFactory.getLazyListObjectInspector(elementObjectInspector);
+ break;
+ }
+ case MAP: {
+ MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
+ ObjectInspector keyObjectInspector = getLazyObjectInspectorFromTypeInfo(
+ mapTypeInfo.getMapKeyTypeInfo(), false);
+ ObjectInspector valueObjectInspector = getLazyObjectInspectorFromTypeInfo(
+ mapTypeInfo.getMapValueTypeInfo(), false);
+ result = LazyObjectInspectorFactory.getLazyMapObjectInspector(keyObjectInspector,
+ valueObjectInspector);
+ break;
+ }
+ case STRUCT: {
+ StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
+ List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
+ List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+ List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
+
+ for (int i = 0; i < fieldTypeInfos.size(); i++) {
+ fieldObjectInspectors.add(getLazyObjectInspectorFromTypeInfo(fieldTypeInfos.get(i), false));
+ }
+
+ // if it is top level then create columnar
+ if (topLevel)
+ result = LazyObjectInspectorFactory.getLazyColumnarObjectInspector(fieldNames,
+ fieldObjectInspectors);
+ // if it is not top level then create struct
+ else
+ result = LazyObjectInspectorFactory.getLazyStructObjectInspector(fieldNames,
+ fieldObjectInspectors);
+
+ break;
+ }
+ default: {
+ result = null;
+ }
+ }
+ cachedLazyObjectInspector.put(typeInfo, result);
+ }
+ return result;
+ }
+
+ /**
+ * get top-level lazy object inspector
+ *
+ * @param fieldNames
+ * @param fieldTypeInfos
+ * @return
+ */
+ public static ObjectInspector getLazyObjectInspector(List<String> fieldNames, List<TypeInfo> fieldTypeInfos) {
+ List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
+ for (int i = 0; i < fieldTypeInfos.size(); i++) {
+ fieldObjectInspectors.add(getLazyObjectInspectorFromTypeInfo(fieldTypeInfos.get(i), false));
+ }
+
+ return LazyObjectInspectorFactory.getLazyColumnarObjectInspector(fieldNames, fieldObjectInspectors);
+ }
+
+ private LazyUtils() {
+ // prevent instantiation
+ }
+
+ /**
+ * Returns -1 if the first byte sequence is lexicographically less than the
+ * second; returns +1 if the second byte sequence is lexicographically less
+ * than the first; otherwise return 0.
+ */
+ public static int compare(byte[] b1, int start1, int length1, byte[] b2, int start2, int length2) {
+
+ int min = Math.min(length1, length2);
+
+ for (int i = 0; i < min; i++) {
+ if (b1[start1 + i] == b2[start2 + i]) {
+ continue;
+ }
+ if (b1[start1 + i] < b2[start2 + i]) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+
+ if (length1 < length2) {
+ return -1;
+ }
+ if (length1 > length2) {
+ return 1;
+ }
+ return 0;
+ }
+
+ public static int hashBytes(byte[] data, int start, int len) {
+ int hash = 1;
+ for (int i = start; i < len; i++) {
+ hash = (31 * hash) + data[i];
+ }
+ return hash;
+ }
+
+ /**
+ * Writes a zero-compressed encoded int to a byte array.
+ *
+ * @param byteStream
+ * the byte array/stream
+ * @param i
+ * the int
+ */
+ public static void writeVInt(DataOutput byteStream, int i) throws IOException {
+ writeVLong(byteStream, i);
+ }
+
+ /**
+ * Write a zero-compressed encoded long to a byte array.
+ *
+ * @param byteStream
+ * the byte array/stream
+ * @param l
+ * the long
+ */
+ public static void writeVLong(DataOutput byteStream, long l) throws IOException {
+ if (l >= -112 && l <= 127) {
+ byteStream.write((byte) l);
+ return;
+ }
+
+ int len = -112;
+ if (l < 0) {
+ l ^= -1L; // take one's complement'
+ len = -120;
+ }
+
+ long tmp = l;
+ while (tmp != 0) {
+ tmp = tmp >> 8;
+ len--;
+ }
+
+ byteStream.write((byte) len);
+
+ len = (len < -120) ? -(len + 120) : -(len + 112);
+
+ for (int idx = len; idx != 0; idx--) {
+ int shiftbits = (idx - 1) * 8;
+ long mask = 0xFFL << shiftbits;
+ byteStream.write((byte) ((l & mask) >> shiftbits));
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyColumnarObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyColumnarObjectInspector.java
new file mode 100644
index 0000000..b1ca622
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyColumnarObjectInspector.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
+
+/**
+ * ObjectInspector for LazyColumnar.
+ *
+ * @see LazyColumnar
+ */
+public class LazyColumnarObjectInspector extends StandardStructObjectInspector implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ public LazyColumnarObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
+ super(structFieldNames, structFieldObjectInspectors);
+ }
+
+ public LazyColumnarObjectInspector(List<StructField> fields) {
+ super(fields);
+ }
+
+ @Override
+ public Object getStructFieldData(Object data, StructField fieldRef) {
+ if (data == null) {
+ return null;
+ }
+ LazyColumnar struct = (LazyColumnar) data;
+ MyField f = (MyField) fieldRef;
+
+ int fieldID = f.getFieldID();
+ assert (fieldID >= 0 && fieldID < fields.size());
+
+ Object column = struct.getField(fieldID);
+ return column;
+ }
+
+ @Override
+ public List<Object> getStructFieldsDataAsList(Object data) {
+ if (data == null) {
+ return null;
+ }
+ LazyColumnar struct = (LazyColumnar) data;
+ return struct.getFieldsAsList();
+ }
+
+ public String toString() {
+ String str = "";
+ for (MyField f : fields) {
+ str += f.getFieldName() + ":" + f.getFieldObjectInspector().getTypeName() + " ";
+ }
+ return str;
+ }
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyListObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyListObjectInspector.java
similarity index 61%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyListObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyListObjectInspector.java
index dc4e85b..aaa5d66 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyListObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyListObjectInspector.java
@@ -29,34 +29,34 @@
*/
public class LazyListObjectInspector extends StandardListObjectInspector {
- protected LazyListObjectInspector(ObjectInspector listElementObjectInspector) {
- super(listElementObjectInspector);
- }
+ protected LazyListObjectInspector(ObjectInspector listElementObjectInspector) {
+ super(listElementObjectInspector);
+ }
- @Override
- public List<?> getList(Object data) {
- if (data == null) {
- return null;
- }
- LazyArray array = (LazyArray) data;
- return array.getList();
- }
+ @Override
+ public List<?> getList(Object data) {
+ if (data == null) {
+ return null;
+ }
+ LazyArray array = (LazyArray) data;
+ return array.getList();
+ }
- @Override
- public Object getListElement(Object data, int index) {
- if (data == null) {
- return null;
- }
- LazyArray array = (LazyArray) data;
- return array.getListElementObject(index);
- }
+ @Override
+ public Object getListElement(Object data, int index) {
+ if (data == null) {
+ return null;
+ }
+ LazyArray array = (LazyArray) data;
+ return array.getListElementObject(index);
+ }
- @Override
- public int getListLength(Object data) {
- if (data == null) {
- return -1;
- }
- LazyArray array = (LazyArray) data;
- return array.getListLength();
- }
+ @Override
+ public int getListLength(Object data) {
+ if (data == null) {
+ return -1;
+ }
+ LazyArray array = (LazyArray) data;
+ return array.getListLength();
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyMapObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyMapObjectInspector.java
similarity index 62%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyMapObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyMapObjectInspector.java
index a3be142..1b0c412 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyMapObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyMapObjectInspector.java
@@ -31,32 +31,31 @@
*/
public class LazyMapObjectInspector extends StandardMapObjectInspector {
- protected LazyMapObjectInspector(ObjectInspector mapKeyObjectInspector,
- ObjectInspector mapValueObjectInspector) {
- super(mapKeyObjectInspector, mapValueObjectInspector);
- }
+ protected LazyMapObjectInspector(ObjectInspector mapKeyObjectInspector, ObjectInspector mapValueObjectInspector) {
+ super(mapKeyObjectInspector, mapValueObjectInspector);
+ }
- @Override
- public Map<?, ?> getMap(Object data) {
- if (data == null) {
- return null;
- }
- return ((LazyMap) data).getMap();
- }
+ @Override
+ public Map<?, ?> getMap(Object data) {
+ if (data == null) {
+ return null;
+ }
+ return ((LazyMap) data).getMap();
+ }
- @Override
- public int getMapSize(Object data) {
- if (data == null) {
- return -1;
- }
- return ((LazyMap) data).getMapSize();
- }
+ @Override
+ public int getMapSize(Object data) {
+ if (data == null) {
+ return -1;
+ }
+ return ((LazyMap) data).getMapSize();
+ }
- @Override
- public Object getMapValueElement(Object data, Object key) {
- if (data == null) {
- return -1;
- }
- return ((LazyMap) data).getMapValueElement(key);
- }
+ @Override
+ public Object getMapValueElement(Object data, Object key) {
+ if (data == null) {
+ return -1;
+ }
+ return ((LazyMap) data).getMapValueElement(key);
+ }
}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyObjectInspectorFactory.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyObjectInspectorFactory.java
new file mode 100644
index 0000000..8093c94
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyObjectInspectorFactory.java
@@ -0,0 +1,82 @@
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+/**
+ * ObjectInspectorFactory is the primary way to create new ObjectInspector
+ * instances.
+ * SerDe classes should call the static functions in this library to create an
+ * ObjectInspector to return to the caller of SerDe2.getObjectInspector().
+ * The reason of having caches here is that ObjectInspectors do not have an
+ * internal state - so ObjectInspectors with the same construction parameters
+ * should result in exactly the same ObjectInspector.
+ */
+
+public final class LazyObjectInspectorFactory {
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyColumnarObjectInspector> cachedLazyColumnarObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyColumnarObjectInspector>();
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyStructObjectInspector> cachedLazyStructObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyStructObjectInspector>();
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyListObjectInspector> cachedLazyListObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyListObjectInspector>();
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyMapObjectInspector> cachedLazyMapObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyMapObjectInspector>();
+
+ public static LazyColumnarObjectInspector getLazyColumnarObjectInspector(List<String> structFieldNames,
+ List<ObjectInspector> structFieldObjectInspectors) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(structFieldNames);
+ signature.add(structFieldObjectInspectors);
+ LazyColumnarObjectInspector result = cachedLazyColumnarObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyColumnarObjectInspector(structFieldNames, structFieldObjectInspectors);
+ cachedLazyColumnarObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ public static LazyStructObjectInspector getLazyStructObjectInspector(List<String> structFieldNames,
+ List<ObjectInspector> structFieldObjectInspectors) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(structFieldNames);
+ signature.add(structFieldObjectInspectors);
+ LazyStructObjectInspector result = cachedLazyStructObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyStructObjectInspector(structFieldNames, structFieldObjectInspectors);
+ cachedLazyStructObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ public static LazyListObjectInspector getLazyListObjectInspector(ObjectInspector listElementInspector) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(listElementInspector);
+ LazyListObjectInspector result = cachedLazyListObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyListObjectInspector(listElementInspector);
+ cachedLazyListObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ public static LazyMapObjectInspector getLazyMapObjectInspector(ObjectInspector keyInspector,
+ ObjectInspector valueInspector) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(keyInspector);
+ signature.add(valueInspector);
+ LazyMapObjectInspector result = cachedLazyMapObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyMapObjectInspector(keyInspector, valueInspector);
+ cachedLazyMapObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ private LazyObjectInspectorFactory() {
+ // prevent instantiation
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyStructObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyStructObjectInspector.java
new file mode 100644
index 0000000..ad70d4c
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyStructObjectInspector.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyStruct;
+
+/**
+ * ObjectInspector for LazyStruct.
+ *
+ * @see LazyStruct
+ */
+public class LazyStructObjectInspector extends StandardStructObjectInspector {
+
+ protected LazyStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
+ super(structFieldNames, structFieldObjectInspectors);
+ }
+
+ protected LazyStructObjectInspector(List<StructField> fields) {
+ super(fields);
+ }
+
+ @Override
+ public Object getStructFieldData(Object data, StructField fieldRef) {
+ if (data == null) {
+ return null;
+ }
+ LazyStruct struct = (LazyStruct) data;
+ MyField f = (MyField) fieldRef;
+
+ int fieldID = f.getFieldID();
+ assert (fieldID >= 0 && fieldID < fields.size());
+
+ return struct.getField(fieldID);
+ }
+
+ @Override
+ public List<Object> getStructFieldsDataAsList(Object data) {
+ if (data == null) {
+ return null;
+ }
+ LazyStruct struct = (LazyStruct) data;
+ return struct.getFieldsAsList();
+ }
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/AbstractPrimitiveLazyObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/AbstractPrimitiveLazyObjectInspector.java
similarity index 72%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/AbstractPrimitiveLazyObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/AbstractPrimitiveLazyObjectInspector.java
index 7ef8bdd..eaa2bbc 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/AbstractPrimitiveLazyObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/AbstractPrimitiveLazyObjectInspector.java
@@ -26,24 +26,23 @@
/**
* An AbstractPrimitiveLazyObjectInspector for a LazyPrimitive object.
*/
-public abstract class AbstractPrimitiveLazyObjectInspector<T extends Writable>
- extends AbstractPrimitiveObjectInspector {
+public abstract class AbstractPrimitiveLazyObjectInspector<T extends Writable> extends AbstractPrimitiveObjectInspector {
- protected AbstractPrimitiveLazyObjectInspector(PrimitiveTypeEntry typeEntry) {
- super(typeEntry);
- }
+ protected AbstractPrimitiveLazyObjectInspector(PrimitiveTypeEntry typeEntry) {
+ super(typeEntry);
+ }
- @SuppressWarnings("unchecked")
- @Override
- public T getPrimitiveWritableObject(Object o) {
- if (o == null)
- System.out.println("sth. wrong");
- return o == null ? null : ((LazyPrimitive<?, T>) o).getWritableObject();
- }
+ @SuppressWarnings("unchecked")
+ @Override
+ public T getPrimitiveWritableObject(Object o) {
+ if (o == null)
+ System.out.println("sth. wrong");
+ return o == null ? null : ((LazyPrimitive<?, T>) o).getWritableObject();
+ }
- @Override
- public boolean preferWritable() {
- return true;
- }
+ @Override
+ public boolean preferWritable() {
+ return true;
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyBooleanObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyBooleanObjectInspector.java
similarity index 66%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyBooleanObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyBooleanObjectInspector.java
index 472dce0..7927c1e 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyBooleanObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyBooleanObjectInspector.java
@@ -26,26 +26,25 @@
/**
* A WritableBooleanObjectInspector inspects a BooleanWritable Object.
*/
-public class LazyBooleanObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<BooleanWritable> implements
- BooleanObjectInspector {
+public class LazyBooleanObjectInspector extends AbstractPrimitiveLazyObjectInspector<BooleanWritable> implements
+ BooleanObjectInspector {
- LazyBooleanObjectInspector() {
- super(PrimitiveObjectInspectorUtils.booleanTypeEntry);
- }
+ LazyBooleanObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.booleanTypeEntry);
+ }
- @Override
- public boolean get(Object o) {
- return getPrimitiveWritableObject(o).get();
- }
+ @Override
+ public boolean get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyBoolean((LazyBoolean) o);
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyBoolean((LazyBoolean) o);
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- return o == null ? null : Boolean.valueOf(get(o));
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Boolean.valueOf(get(o));
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyByteObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyByteObjectInspector.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyByteObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyByteObjectInspector.java
index e631fc7..10a881c 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyByteObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyByteObjectInspector.java
@@ -26,26 +26,25 @@
/**
* A WritableByteObjectInspector inspects a ByteWritable Object.
*/
-public class LazyByteObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<ByteWritable> implements
- ByteObjectInspector {
+public class LazyByteObjectInspector extends AbstractPrimitiveLazyObjectInspector<ByteWritable> implements
+ ByteObjectInspector {
- LazyByteObjectInspector() {
- super(PrimitiveObjectInspectorUtils.byteTypeEntry);
- }
+ LazyByteObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.byteTypeEntry);
+ }
- @Override
- public byte get(Object o) {
- return getPrimitiveWritableObject(o).get();
- }
+ @Override
+ public byte get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyByte((LazyByte) o);
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyByte((LazyByte) o);
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- return o == null ? null : Byte.valueOf(get(o));
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Byte.valueOf(get(o));
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyDoubleObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyDoubleObjectInspector.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyDoubleObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyDoubleObjectInspector.java
index 1257f11..9f98b56 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyDoubleObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyDoubleObjectInspector.java
@@ -26,26 +26,25 @@
/**
* A WritableDoubleObjectInspector inspects a DoubleWritable Object.
*/
-public class LazyDoubleObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<DoubleWritable> implements
- DoubleObjectInspector {
+public class LazyDoubleObjectInspector extends AbstractPrimitiveLazyObjectInspector<DoubleWritable> implements
+ DoubleObjectInspector {
- LazyDoubleObjectInspector() {
- super(PrimitiveObjectInspectorUtils.doubleTypeEntry);
- }
+ LazyDoubleObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.doubleTypeEntry);
+ }
- @Override
- public double get(Object o) {
- return getPrimitiveWritableObject(o).get();
- }
+ @Override
+ public double get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyDouble((LazyDouble) o);
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyDouble((LazyDouble) o);
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- return o == null ? null : Double.valueOf(get(o));
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Double.valueOf(get(o));
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyFloatObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyFloatObjectInspector.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyFloatObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyFloatObjectInspector.java
index c66a06f..bf3e9a2 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyFloatObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyFloatObjectInspector.java
@@ -26,26 +26,25 @@
/**
* A FloatObjectInspector inspects a FloatWritable Object.
*/
-public class LazyFloatObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<FloatWritable> implements
- FloatObjectInspector {
+public class LazyFloatObjectInspector extends AbstractPrimitiveLazyObjectInspector<FloatWritable> implements
+ FloatObjectInspector {
- LazyFloatObjectInspector() {
- super(PrimitiveObjectInspectorUtils.floatTypeEntry);
- }
+ LazyFloatObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.floatTypeEntry);
+ }
- @Override
- public float get(Object o) {
- return getPrimitiveWritableObject(o).get();
- }
+ @Override
+ public float get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyFloat((LazyFloat) o);
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyFloat((LazyFloat) o);
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- return o == null ? null : Float.valueOf(get(o));
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Float.valueOf(get(o));
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyIntObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyIntObjectInspector.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyIntObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyIntObjectInspector.java
index b2159e0..87bcb0d 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyIntObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyIntObjectInspector.java
@@ -26,26 +26,25 @@
/**
* A WritableIntObjectInspector inspects a IntWritable Object.
*/
-public class LazyIntObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<IntWritable> implements
- IntObjectInspector {
+public class LazyIntObjectInspector extends AbstractPrimitiveLazyObjectInspector<IntWritable> implements
+ IntObjectInspector {
- LazyIntObjectInspector() {
- super(PrimitiveObjectInspectorUtils.intTypeEntry);
- }
+ LazyIntObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.intTypeEntry);
+ }
- @Override
- public int get(Object o) {
- return getPrimitiveWritableObject(o).get();
- }
+ @Override
+ public int get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyInteger((LazyInteger) o);
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyInteger((LazyInteger) o);
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- return o == null ? null : Integer.valueOf(get(o));
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Integer.valueOf(get(o));
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyLongObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyLongObjectInspector.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyLongObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyLongObjectInspector.java
index 1fc2d53..06b5d3c 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyLongObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyLongObjectInspector.java
@@ -26,26 +26,25 @@
/**
* A WritableLongObjectInspector inspects a LongWritable Object.
*/
-public class LazyLongObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<LongWritable> implements
- LongObjectInspector {
+public class LazyLongObjectInspector extends AbstractPrimitiveLazyObjectInspector<LongWritable> implements
+ LongObjectInspector {
- LazyLongObjectInspector() {
- super(PrimitiveObjectInspectorUtils.longTypeEntry);
- }
+ LazyLongObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.longTypeEntry);
+ }
- @Override
- public long get(Object o) {
- return getPrimitiveWritableObject(o).get();
- }
+ @Override
+ public long get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyLong((LazyLong) o);
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyLong((LazyLong) o);
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- return o == null ? null : Long.valueOf(get(o));
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Long.valueOf(get(o));
+ }
}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java
new file mode 100644
index 0000000..5d7ef48
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import java.util.ArrayList;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+
+/**
+ * LazyPrimitiveObjectInspectorFactory is the primary way to create new
+ * ObjectInspector instances.
+ * SerDe classes should call the static functions in this library to create an
+ * ObjectInspector to return to the caller of SerDe2.getObjectInspector().
+ * The reason of having caches here is that ObjectInspector is because
+ * ObjectInspectors do not have an internal state - so ObjectInspectors with the
+ * same construction parameters should result in exactly the same
+ * ObjectInspector.
+ */
+public final class LazyPrimitiveObjectInspectorFactory {
+
+ public static final LazyBooleanObjectInspector LAZY_BOOLEAN_OBJECT_INSPECTOR = new LazyBooleanObjectInspector();
+ public static final LazyByteObjectInspector LAZY_BYTE_OBJECT_INSPECTOR = new LazyByteObjectInspector();
+ public static final LazyShortObjectInspector LAZY_SHORT_OBJECT_INSPECTOR = new LazyShortObjectInspector();
+ public static final LazyIntObjectInspector LAZY_INT_OBJECT_INSPECTOR = new LazyIntObjectInspector();
+ public static final LazyLongObjectInspector LAZY_LONG_OBJECT_INSPECTOR = new LazyLongObjectInspector();
+ public static final LazyFloatObjectInspector LAZY_FLOAT_OBJECT_INSPECTOR = new LazyFloatObjectInspector();
+ public static final LazyDoubleObjectInspector LAZY_DOUBLE_OBJECT_INSPECTOR = new LazyDoubleObjectInspector();
+ public static final LazyVoidObjectInspector LAZY_VOID_OBJECT_INSPECTOR = new LazyVoidObjectInspector();
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyStringObjectInspector> cachedLazyStringObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyStringObjectInspector>();
+
+ public static LazyStringObjectInspector getLazyStringObjectInspector(boolean escaped, byte escapeChar) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(Boolean.valueOf(escaped));
+ signature.add(Byte.valueOf(escapeChar));
+ LazyStringObjectInspector result = cachedLazyStringObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyStringObjectInspector(escaped, escapeChar);
+ cachedLazyStringObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ public static AbstractPrimitiveLazyObjectInspector<?> getLazyObjectInspector(PrimitiveCategory primitiveCategory,
+ boolean escaped, byte escapeChar) {
+
+ switch (primitiveCategory) {
+ case BOOLEAN:
+ return LAZY_BOOLEAN_OBJECT_INSPECTOR;
+ case BYTE:
+ return LAZY_BYTE_OBJECT_INSPECTOR;
+ case SHORT:
+ return LAZY_SHORT_OBJECT_INSPECTOR;
+ case INT:
+ return LAZY_INT_OBJECT_INSPECTOR;
+ case LONG:
+ return LAZY_LONG_OBJECT_INSPECTOR;
+ case FLOAT:
+ return LAZY_FLOAT_OBJECT_INSPECTOR;
+ case DOUBLE:
+ return LAZY_DOUBLE_OBJECT_INSPECTOR;
+ case STRING:
+ return getLazyStringObjectInspector(escaped, escapeChar);
+ case VOID:
+ return LAZY_VOID_OBJECT_INSPECTOR;
+ default:
+ throw new RuntimeException("Internal error: Cannot find ObjectInspector " + " for " + primitiveCategory);
+ }
+ }
+
+ private LazyPrimitiveObjectInspectorFactory() {
+ // prevent instantiation
+ }
+
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyShortObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyShortObjectInspector.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyShortObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyShortObjectInspector.java
index cb06dfd..b02d9bc 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyShortObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyShortObjectInspector.java
@@ -26,26 +26,25 @@
/**
* A WritableShortObjectInspector inspects a ShortWritable Object.
*/
-public class LazyShortObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<ShortWritable> implements
- ShortObjectInspector {
+public class LazyShortObjectInspector extends AbstractPrimitiveLazyObjectInspector<ShortWritable> implements
+ ShortObjectInspector {
- LazyShortObjectInspector() {
- super(PrimitiveObjectInspectorUtils.shortTypeEntry);
- }
+ LazyShortObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.shortTypeEntry);
+ }
- @Override
- public short get(Object o) {
- return getPrimitiveWritableObject(o).get();
- }
+ @Override
+ public short get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyShort((LazyShort) o);
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyShort((LazyShort) o);
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- return o == null ? null : Short.valueOf(get(o));
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Short.valueOf(get(o));
+ }
}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyStringObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyStringObjectInspector.java
new file mode 100644
index 0000000..4d649dc
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyStringObjectInspector.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.io.Text;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyString;
+
+/**
+ * A WritableStringObjectInspector inspects a Text Object.
+ */
+public class LazyStringObjectInspector extends AbstractPrimitiveLazyObjectInspector<Text> implements
+ StringObjectInspector {
+
+ boolean escaped;
+ byte escapeChar;
+
+ LazyStringObjectInspector(boolean escaped, byte escapeChar) {
+ super(PrimitiveObjectInspectorUtils.stringTypeEntry);
+ this.escaped = escaped;
+ this.escapeChar = escapeChar;
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyString((LazyString) o);
+ }
+
+ @Override
+ public Text getPrimitiveWritableObject(Object o) {
+ return o == null ? null : ((LazyString) o).getWritableObject();
+ }
+
+ @Override
+ public String getPrimitiveJavaObject(Object o) {
+ return o == null ? null : ((LazyString) o).getWritableObject().toString();
+ }
+
+ public boolean isEscaped() {
+ return escaped;
+ }
+
+ public byte getEscapeChar() {
+ return escapeChar;
+ }
+
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyVoidObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyVoidObjectInspector.java
similarity index 71%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyVoidObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyVoidObjectInspector.java
index a30f1af..c916191 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyVoidObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyVoidObjectInspector.java
@@ -24,21 +24,20 @@
/**
* A WritableVoidObjectInspector inspects a NullWritable Object.
*/
-public class LazyVoidObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<NullWritable> implements
- VoidObjectInspector {
+public class LazyVoidObjectInspector extends AbstractPrimitiveLazyObjectInspector<NullWritable> implements
+ VoidObjectInspector {
- LazyVoidObjectInspector() {
- super(PrimitiveObjectInspectorUtils.voidTypeEntry);
- }
+ LazyVoidObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.voidTypeEntry);
+ }
- @Override
- public Object copyObject(Object o) {
- return o;
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o;
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- throw new RuntimeException("Internal error: cannot create Void object.");
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ throw new RuntimeException("Internal error: cannot create Void object.");
+ }
}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/PrimitiveObjectInspectorFactory.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/PrimitiveObjectInspectorFactory.java
new file mode 100644
index 0000000..33f0e51
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/PrimitiveObjectInspectorFactory.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import java.util.HashMap;
+
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+
+/**
+ * PrimitiveObjectInspectorFactory is the primary way to create new
+ * PrimitiveObjectInspector instances.
+ * The reason of having caches here is that ObjectInspector is because
+ * ObjectInspectors do not have an internal state - so ObjectInspectors with the
+ * same construction parameters should result in exactly the same
+ * ObjectInspector.
+ */
+public final class PrimitiveObjectInspectorFactory {
+
+ public static final LazyBooleanObjectInspector LazyBooleanObjectInspector = new LazyBooleanObjectInspector();
+ public static final LazyByteObjectInspector LazyByteObjectInspector = new LazyByteObjectInspector();
+ public static final LazyShortObjectInspector LazyShortObjectInspector = new LazyShortObjectInspector();
+ public static final LazyIntObjectInspector LazyIntObjectInspector = new LazyIntObjectInspector();
+ public static final LazyLongObjectInspector LazyLongObjectInspector = new LazyLongObjectInspector();
+ public static final LazyFloatObjectInspector LazyFloatObjectInspector = new LazyFloatObjectInspector();
+ public static final LazyDoubleObjectInspector LazyDoubleObjectInspector = new LazyDoubleObjectInspector();
+ public static final LazyStringObjectInspector LazyStringObjectInspector = new LazyStringObjectInspector(false,
+ (byte) '\\');
+ public static final LazyVoidObjectInspector LazyVoidObjectInspector = new LazyVoidObjectInspector();
+
+ private static HashMap<PrimitiveCategory, AbstractPrimitiveLazyObjectInspector<?>> cachedPrimitiveLazyInspectorCache = new HashMap<PrimitiveCategory, AbstractPrimitiveLazyObjectInspector<?>>();
+
+ static {
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.BOOLEAN, LazyBooleanObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.BYTE, LazyByteObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.SHORT, LazyShortObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.INT, LazyIntObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.LONG, LazyLongObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.FLOAT, LazyFloatObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.DOUBLE, LazyDoubleObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.STRING, LazyStringObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.VOID, LazyVoidObjectInspector);
+ }
+
+ /**
+ * Returns the PrimitiveWritableObjectInspector for the PrimitiveCategory.
+ *
+ * @param primitiveCategory
+ */
+ public static AbstractPrimitiveLazyObjectInspector<?> getPrimitiveLazyObjectInspector(
+ PrimitiveCategory primitiveCategory) {
+ AbstractPrimitiveLazyObjectInspector<?> result = cachedPrimitiveLazyInspectorCache.get(primitiveCategory);
+ if (result == null) {
+ throw new RuntimeException("Internal error: Cannot find ObjectInspector " + " for " + primitiveCategory);
+ }
+ return result;
+ }
+
+ private PrimitiveObjectInspectorFactory() {
+ // prevent instantiation
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/IHiveParser.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/IHiveParser.java
new file mode 100644
index 0000000..7830c52
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/IHiveParser.java
@@ -0,0 +1,16 @@
+package edu.uci.ics.hivesterix.serde.parser;
+
+import java.io.IOException;
+
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+
+public interface IHiveParser {
+ /**
+ * parse one hive rwo into
+ *
+ * @param row
+ * @param objectInspector
+ * @param tb
+ */
+ public void parse(byte[] data, int start, int length, ArrayTupleBuilder tb) throws IOException;
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/TextToBinaryTupleParser.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/TextToBinaryTupleParser.java
new file mode 100644
index 0000000..38e1b36
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/TextToBinaryTupleParser.java
@@ -0,0 +1,174 @@
+package edu.uci.ics.hivesterix.serde.parser;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.lazy.LazyInteger;
+import org.apache.hadoop.hive.serde2.lazy.LazyLong;
+import org.apache.hadoop.hive.serde2.lazy.LazyShort;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.io.Text;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+
+public class TextToBinaryTupleParser implements IHiveParser {
+ private int[] invertedIndex;
+ private int[] fieldEnds;
+ private int lastNecessaryFieldIndex;
+ private LazySimpleStructObjectInspector inputObjectInspector;
+ private List<? extends StructField> fieldRefs;
+
+ public TextToBinaryTupleParser(int[] outputColumnsOffset, ObjectInspector structInspector) {
+ int size = 0;
+ for (int i = 0; i < outputColumnsOffset.length; i++)
+ if (outputColumnsOffset[i] >= 0)
+ size++;
+ invertedIndex = new int[size];
+ for (int i = 0; i < outputColumnsOffset.length; i++)
+ if (outputColumnsOffset[i] >= 0) {
+ invertedIndex[outputColumnsOffset[i]] = i;
+ lastNecessaryFieldIndex = i;
+ }
+ fieldEnds = new int[outputColumnsOffset.length];
+ for (int i = 0; i < fieldEnds.length; i++)
+ fieldEnds[i] = 0;
+ inputObjectInspector = (LazySimpleStructObjectInspector) structInspector;
+ fieldRefs = inputObjectInspector.getAllStructFieldRefs();
+ }
+
+ @Override
+ public void parse(byte[] bytes, int start, int length, ArrayTupleBuilder tb) throws IOException {
+ byte separator = inputObjectInspector.getSeparator();
+ boolean lastColumnTakesRest = inputObjectInspector.getLastColumnTakesRest();
+ boolean isEscaped = inputObjectInspector.isEscaped();
+ byte escapeChar = inputObjectInspector.getEscapeChar();
+ DataOutput output = tb.getDataOutput();
+
+ int structByteEnd = start + length - 1;
+ int fieldId = 0;
+ int fieldByteEnd = start;
+
+ // Go through all bytes in the byte[]
+ while (fieldByteEnd <= structByteEnd && fieldId <= lastNecessaryFieldIndex) {
+ if (fieldByteEnd == structByteEnd || bytes[fieldByteEnd] == separator) {
+ // Reached the end of a field?
+ if (lastColumnTakesRest && fieldId == fieldEnds.length - 1) {
+ fieldByteEnd = structByteEnd;
+ }
+ fieldEnds[fieldId] = fieldByteEnd;
+ if (fieldId == fieldEnds.length - 1 || fieldByteEnd == structByteEnd) {
+ // for the case of null fields
+ for (int i = fieldId; i < fieldEnds.length; i++) {
+ fieldEnds[i] = fieldByteEnd;
+ }
+ break;
+ }
+ fieldByteEnd++;
+ fieldId++;
+ } else {
+ if (isEscaped && bytes[fieldByteEnd] == escapeChar && fieldByteEnd + 1 < structByteEnd) {
+ // ignore the char after escape_char
+ fieldByteEnd += 2;
+ } else {
+ fieldByteEnd++;
+ }
+ }
+ }
+
+ for (int i = 0; i < invertedIndex.length; i++) {
+ int index = invertedIndex[i];
+ StructField fieldRef = fieldRefs.get(index);
+ ObjectInspector inspector = fieldRef.getFieldObjectInspector();
+ Category category = inspector.getCategory();
+ int fieldStart = index == 0 ? 0 : fieldEnds[index - 1] + 1;
+ int fieldEnd = fieldEnds[index];
+ if (bytes[fieldEnd] == separator)
+ fieldEnd--;
+ int fieldLen = fieldEnd - fieldStart + 1;
+ switch (category) {
+ case PRIMITIVE:
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector) inspector;
+ switch (poi.getPrimitiveCategory()) {
+ case VOID: {
+ break;
+ }
+ case BOOLEAN: {
+ output.write(bytes[fieldStart]);
+ break;
+ }
+ case BYTE: {
+ output.write(bytes[fieldStart]);
+ break;
+ }
+ case SHORT: {
+ short v = LazyShort.parseShort(bytes, fieldStart, fieldLen);
+ output.write((byte) (v >> 8));
+ output.write((byte) (v));
+ break;
+ }
+ case INT: {
+ int v = LazyInteger.parseInt(bytes, fieldStart, fieldLen);
+ LazyUtils.writeVInt(output, v);
+ break;
+ }
+ case LONG: {
+ long v = LazyLong.parseLong(bytes, fieldStart, fieldLen);
+ LazyUtils.writeVLong(output, v);
+ break;
+ }
+ case FLOAT: {
+ float value = Float.parseFloat(Text.decode(bytes, fieldStart, fieldLen));
+ int v = Float.floatToIntBits(value);
+ output.write((byte) (v >> 24));
+ output.write((byte) (v >> 16));
+ output.write((byte) (v >> 8));
+ output.write((byte) (v));
+ break;
+ }
+ case DOUBLE: {
+ try {
+ double value = Double.parseDouble(Text.decode(bytes, fieldStart, fieldLen));
+ long v = Double.doubleToLongBits(value);
+ output.write((byte) (v >> 56));
+ output.write((byte) (v >> 48));
+ output.write((byte) (v >> 40));
+ output.write((byte) (v >> 32));
+ output.write((byte) (v >> 24));
+ output.write((byte) (v >> 16));
+ output.write((byte) (v >> 8));
+ output.write((byte) (v));
+ } catch (NumberFormatException e) {
+ throw e;
+ }
+ break;
+ }
+ case STRING: {
+ LazyUtils.writeVInt(output, fieldLen);
+ output.write(bytes, fieldStart, fieldLen);
+ break;
+ }
+ default: {
+ throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
+ }
+ }
+ break;
+ case STRUCT:
+ throw new NotImplementedException("Unrecognized type: struct ");
+ case LIST:
+ throw new NotImplementedException("Unrecognized type: struct ");
+ case MAP:
+ throw new NotImplementedException("Unrecognized type: struct ");
+ case UNION:
+ throw new NotImplementedException("Unrecognized type: struct ");
+ }
+ tb.addFieldEndOffset();
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-translator/pom.xml b/hivesterix/hivesterix-translator/pom.xml
new file mode 100644
index 0000000..b99d652
--- /dev/null
+++ b/hivesterix/hivesterix-translator/pom.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0"?>
+<project
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+ xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hivesterix-translator</artifactId>
+ <name>hivesterix-translator</name>
+
+ <parent>
+ <artifactId>hivesterix</artifactId>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <version>0.2.3-SNAPSHOT</version>
+ </parent>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <encoding>UTF-8</encoding>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>algebricks-compiler</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-common</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-runtime</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
similarity index 97%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
rename to hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
index 1fb973e..80b3fef 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
@@ -45,7 +45,6 @@
import edu.uci.ics.hivesterix.logical.expression.HiveAlgebricksBuiltInFunctionMap;
import edu.uci.ics.hivesterix.logical.expression.HiveFunctionInfo;
import edu.uci.ics.hivesterix.logical.expression.HivesterixConstantValue;
-import edu.uci.ics.hivesterix.logical.expression.Schema;
import edu.uci.ics.hivesterix.logical.plan.visitor.ExtractVisitor;
import edu.uci.ics.hivesterix.logical.plan.visitor.FilterVisitor;
import edu.uci.ics.hivesterix.logical.plan.visitor.GroupByVisitor;
@@ -59,6 +58,7 @@
import edu.uci.ics.hivesterix.logical.plan.visitor.UnionVisitor;
import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
import edu.uci.ics.hivesterix.logical.plan.visitor.base.Visitor;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
@@ -180,7 +180,6 @@
fieldToLogicalVariableMap.put(fieldName, variable);
nameToLogicalVariableMap.put(fieldName, variable);
} else if (!var.equals(variable)) {
- // System.out.println("!!!replace variables!!!");
fieldToLogicalVariableMap.put(fieldName, variable);
nameToLogicalVariableMap.put(fieldName, variable);
}
@@ -212,8 +211,7 @@
}
/**
- * get the number of variables
- * s
+ * get the number of variables s
*
* @return
*/
@@ -521,7 +519,7 @@
@Override
public void rewriteOperatorOutputSchema(List<LogicalVariable> variables, Operator operator) {
- printOperatorSchema(operator);
+ //printOperatorSchema(operator);
List<ColumnInfo> columns = operator.getSchema().getSignature();
if (variables.size() != columns.size()) {
throw new IllegalStateException("output cardinality error " + operator.getName() + " variable size: "
@@ -537,7 +535,7 @@
column.setInternalName(var.toString());
}
}
- printOperatorSchema(operator);
+ //printOperatorSchema(operator);
}
/**
@@ -599,14 +597,15 @@
}
}
- private void printOperatorSchema(Operator operator) {
- System.out.println(operator.getName());
- List<ColumnInfo> columns = operator.getSchema().getSignature();
- for (ColumnInfo column : columns) {
- System.out.print(column.getTabAlias() + "." + column.getInternalName() + " ");
- }
- System.out.println();
- }
+ // private void printOperatorSchema(Operator operator) {
+ // // System.out.println(operator.getName());
+ // // List<ColumnInfo> columns = operator.getSchema().getSignature();
+ // // for (ColumnInfo column : columns) {
+ // // System.out.print(column.getTabAlias() + "." +
+ // // column.getInternalName() + " ");
+ // // }
+ // // System.out.println();
+ // }
/**
* translate scalar function expression
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveLogicalPlanAndMetaData.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveLogicalPlanAndMetaData.java
new file mode 100644
index 0000000..d5801a3
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveLogicalPlanAndMetaData.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.logical.plan;
+
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlanAndMetadata;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+
+@SuppressWarnings({ "rawtypes", "unchecked" })
+public class HiveLogicalPlanAndMetaData implements ILogicalPlanAndMetadata {
+
+ IMetadataProvider metadata;
+ ILogicalPlan plan;
+
+ public HiveLogicalPlanAndMetaData(ILogicalPlan plan, IMetadataProvider metadata) {
+ this.plan = plan;
+ this.metadata = metadata;
+ }
+
+ @Override
+ public IMetadataProvider getMetadataProvider() {
+ return metadata;
+ }
+
+ @Override
+ public ILogicalPlan getPlan() {
+ return plan;
+ }
+
+ @Override
+ public AlgebricksPartitionConstraint getClusterLocations() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveOperatorAnnotations.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveOperatorAnnotations.java
new file mode 100644
index 0000000..0ea4e01
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveOperatorAnnotations.java
@@ -0,0 +1,8 @@
+package edu.uci.ics.hivesterix.logical.plan;
+
+public class HiveOperatorAnnotations {
+
+ // hints
+ public static final String LOCAL_GROUP_BY = "LOCAL_GROUP_BY";
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ExtractVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ExtractVisitor.java
new file mode 100644
index 0000000..1c67bae
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ExtractVisitor.java
@@ -0,0 +1,26 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+
+public class ExtractVisitor extends DefaultVisitor {
+
+ @Override
+ public Mutable<ILogicalOperator> visit(ExtractOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
+ Schema currentSchema = t.generateInputSchema(operator.getParentOperators().get(0));
+ operator.setSchema(operator.getParentOperators().get(0).getSchema());
+ List<LogicalVariable> latestOutputSchema = t.getVariablesFromSchema(currentSchema);
+ t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
+ return null;
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/FilterVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/FilterVisitor.java
new file mode 100644
index 0000000..9279144
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/FilterVisitor.java
@@ -0,0 +1,41 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.FilterDesc;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
+
+public class FilterVisitor extends DefaultVisitor {
+
+ @Override
+ public Mutable<ILogicalOperator> visit(FilterOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
+ Schema currentSchema = t.generateInputSchema(operator.getParentOperators().get(0));
+
+ FilterDesc desc = (FilterDesc) operator.getConf();
+ ExprNodeDesc predicate = desc.getPredicate();
+ t.rewriteExpression(predicate);
+
+ Mutable<ILogicalExpression> exprs = t.translateScalarFucntion(desc.getPredicate());
+ ILogicalOperator currentOperator = new SelectOperator(exprs);
+ currentOperator.getInputs().add(AlgebricksParentOperatorRef);
+
+ // populate the schema from upstream operator
+ operator.setSchema(operator.getParentOperators().get(0).getSchema());
+ List<LogicalVariable> latestOutputSchema = t.getVariablesFromSchema(currentSchema);
+ t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/GroupByVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/GroupByVisitor.java
new file mode 100644
index 0000000..b7d5779
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/GroupByVisitor.java
@@ -0,0 +1,264 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.logical.plan.HiveOperatorAnnotations;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.OperatorAnnotations;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.NestedTupleSourceOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl;
+
+@SuppressWarnings({ "rawtypes", "unchecked" })
+public class GroupByVisitor extends DefaultVisitor {
+
+ private List<Mutable<ILogicalExpression>> AlgebricksAggs = new ArrayList<Mutable<ILogicalExpression>>();
+ private List<IFunctionInfo> localAggs = new ArrayList<IFunctionInfo>();
+ private boolean isDistinct = false;
+ private boolean gbyKeyNotRedKey = false;
+
+ @Override
+ public Mutable<ILogicalOperator> visit(GroupByOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException {
+
+ // get descriptors
+ GroupByDesc desc = (GroupByDesc) operator.getConf();
+ GroupByDesc.Mode mode = desc.getMode();
+
+ List<ExprNodeDesc> keys = desc.getKeys();
+ List<AggregationDesc> aggregators = desc.getAggregators();
+
+ Operator child = operator.getChildOperators().get(0);
+
+ if (child.getType() == OperatorType.REDUCESINK) {
+ List<ExprNodeDesc> partKeys = ((ReduceSinkDesc) child.getConf()).getPartitionCols();
+ if (keys.size() != partKeys.size())
+ gbyKeyNotRedKey = true;
+ }
+
+ if (mode == GroupByDesc.Mode.PARTIAL1 || mode == GroupByDesc.Mode.HASH || mode == GroupByDesc.Mode.COMPLETE
+ || (aggregators.size() == 0 && isDistinct == false) || gbyKeyNotRedKey) {
+ AlgebricksAggs.clear();
+ // add an assign operator if the key is not a column expression
+ ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
+ ILogicalOperator currentOperator = null;
+ ILogicalOperator assignOperator = t.getAssignOperator(AlgebricksParentOperatorRef, keys, keyVariables);
+ if (assignOperator != null) {
+ currentOperator = assignOperator;
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ // get key variable expression list
+ List<Mutable<ILogicalExpression>> keyExprs = new ArrayList<Mutable<ILogicalExpression>>();
+ for (LogicalVariable var : keyVariables) {
+ keyExprs.add(t.translateScalarFucntion(new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, var
+ .toString(), "", false)));
+ }
+
+ if (aggregators.size() == 0) {
+ List<Mutable<ILogicalExpression>> distinctExprs = new ArrayList<Mutable<ILogicalExpression>>();
+ for (LogicalVariable var : keyVariables) {
+ Mutable<ILogicalExpression> varExpr = new MutableObject<ILogicalExpression>(
+ new VariableReferenceExpression(var));
+ distinctExprs.add(varExpr);
+ }
+ t.rewriteOperatorOutputSchema(keyVariables, operator);
+ isDistinct = true;
+ ILogicalOperator lop = new DistinctOperator(distinctExprs);
+ lop.getInputs().add(AlgebricksParentOperatorRef);
+ return new MutableObject<ILogicalOperator>(lop);
+ }
+
+ // get the pair<LogicalVariable, ILogicalExpression> list
+ List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> keyParameters = new ArrayList<Pair<LogicalVariable, Mutable<ILogicalExpression>>>();
+ keyVariables.clear();
+ for (Mutable<ILogicalExpression> expr : keyExprs) {
+ LogicalVariable keyVar = t.getVariable(expr.getValue().toString(), TypeInfoFactory.unknownTypeInfo);
+ keyParameters.add(new Pair(keyVar, expr));
+ keyVariables.add(keyVar);
+ }
+
+ // get the parameters for the aggregator operator
+ ArrayList<LogicalVariable> aggVariables = new ArrayList<LogicalVariable>();
+ ArrayList<Mutable<ILogicalExpression>> aggExprs = new ArrayList<Mutable<ILogicalExpression>>();
+
+ // get the type of each aggregation function
+ HashMap<AggregationDesc, TypeInfo> aggToType = new HashMap<AggregationDesc, TypeInfo>();
+ List<ColumnInfo> columns = operator.getSchema().getSignature();
+ int offset = keys.size();
+ for (int i = offset; i < columns.size(); i++) {
+ aggToType.put(aggregators.get(i - offset), columns.get(i).getType());
+ }
+
+ localAggs.clear();
+ // rewrite parameter expressions for all aggregators
+ for (AggregationDesc aggregator : aggregators) {
+ for (ExprNodeDesc parameter : aggregator.getParameters()) {
+ t.rewriteExpression(parameter);
+ }
+ Mutable<ILogicalExpression> aggExpr = t.translateAggregation(aggregator);
+ AbstractFunctionCallExpression localAggExpr = (AbstractFunctionCallExpression) aggExpr.getValue();
+ localAggs.add(localAggExpr.getFunctionInfo());
+
+ AggregationDesc logicalAgg = new AggregationDesc(aggregator.getGenericUDAFName(),
+ aggregator.getGenericUDAFEvaluator(), aggregator.getParameters(), aggregator.getDistinct(),
+ Mode.COMPLETE);
+ Mutable<ILogicalExpression> logicalAggExpr = t.translateAggregation(logicalAgg);
+
+ AlgebricksAggs.add(logicalAggExpr);
+ if (!gbyKeyNotRedKey)
+ aggExprs.add(logicalAggExpr);
+ else
+ aggExprs.add(aggExpr);
+
+ aggVariables.add(t.getVariable(aggregator.getExprString() + aggregator.getMode(),
+ aggToType.get(aggregator)));
+ }
+
+ if (child.getType() != OperatorType.REDUCESINK)
+ gbyKeyNotRedKey = false;
+
+ // get the sub plan list
+ AggregateOperator aggOperator = new AggregateOperator(aggVariables, aggExprs);
+ NestedTupleSourceOperator nestedTupleSource = new NestedTupleSourceOperator(
+ new MutableObject<ILogicalOperator>());
+ aggOperator.getInputs().add(new MutableObject<ILogicalOperator>(nestedTupleSource));
+
+ List<Mutable<ILogicalOperator>> subRoots = new ArrayList<Mutable<ILogicalOperator>>();
+ subRoots.add(new MutableObject<ILogicalOperator>(aggOperator));
+ ILogicalPlan subPlan = new ALogicalPlanImpl(subRoots);
+ List<ILogicalPlan> subPlans = new ArrayList<ILogicalPlan>();
+ subPlans.add(subPlan);
+
+ // create the group by operator
+ currentOperator = new edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator(
+ keyParameters, new ArrayList<Pair<LogicalVariable, Mutable<ILogicalExpression>>>(), subPlans);
+ currentOperator.getInputs().add(AlgebricksParentOperatorRef);
+ nestedTupleSource.getDataSourceReference().setValue(currentOperator);
+
+ List<LogicalVariable> outputVariables = new ArrayList<LogicalVariable>();
+ outputVariables.addAll(keyVariables);
+ outputVariables.addAll(aggVariables);
+ t.rewriteOperatorOutputSchema(outputVariables, operator);
+
+ if (gbyKeyNotRedKey) {
+ currentOperator.getAnnotations().put(HiveOperatorAnnotations.LOCAL_GROUP_BY, Boolean.TRUE);
+ }
+
+ HiveConf conf = ConfUtil.getHiveConf();
+ Boolean extGby = conf.getBoolean("hive.algebricks.groupby.external", false);
+
+ if (extGby && isSerializable(aggregators)) {
+ currentOperator.getAnnotations().put(OperatorAnnotations.USE_EXTERNAL_GROUP_BY, Boolean.TRUE);
+ }
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ } else {
+ isDistinct = false;
+ // rewrite parameter expressions for all aggregators
+ int i = 0;
+ for (AggregationDesc aggregator : aggregators) {
+ for (ExprNodeDesc parameter : aggregator.getParameters()) {
+ t.rewriteExpression(parameter);
+ }
+ Mutable<ILogicalExpression> agg = t.translateAggregation(aggregator);
+ AggregateFunctionCallExpression originalAgg = (AggregateFunctionCallExpression) AlgebricksAggs.get(i)
+ .getValue();
+ originalAgg.setStepOneAggregate(localAggs.get(i));
+ AggregateFunctionCallExpression currentAgg = (AggregateFunctionCallExpression) agg.getValue();
+ if (currentAgg.getFunctionInfo() != null) {
+ originalAgg.setTwoStep(true);
+ originalAgg.setStepTwoAggregate(currentAgg.getFunctionInfo());
+ }
+ i++;
+ }
+ return null;
+ }
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(ReduceSinkOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
+ Operator downStream = (Operator) operator.getChildOperators().get(0);
+ if (!(downStream instanceof GroupByOperator)) {
+ return null;
+ }
+
+ ReduceSinkDesc desc = (ReduceSinkDesc) operator.getConf();
+ List<ExprNodeDesc> keys = desc.getKeyCols();
+ List<ExprNodeDesc> values = desc.getValueCols();
+
+ // insert assign for keys
+ ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
+ t.getAssignOperator(AlgebricksParentOperatorRef, keys, keyVariables);
+
+ // insert assign for values
+ ArrayList<LogicalVariable> valueVariables = new ArrayList<LogicalVariable>();
+ t.getAssignOperator(AlgebricksParentOperatorRef, values, valueVariables);
+
+ ArrayList<LogicalVariable> columns = new ArrayList<LogicalVariable>();
+ columns.addAll(keyVariables);
+ columns.addAll(valueVariables);
+
+ t.rewriteOperatorOutputSchema(columns, operator);
+ return null;
+ }
+
+ private boolean isSerializable(List<AggregationDesc> descs) throws AlgebricksException {
+ try {
+ for (AggregationDesc desc : descs) {
+ GenericUDAFEvaluator udaf = desc.getGenericUDAFEvaluator();
+ AggregationBuffer buf = udaf.getNewAggregationBuffer();
+ Class<?> bufferClass = buf.getClass();
+ Field[] fields = bufferClass.getDeclaredFields();
+ for (Field field : fields) {
+ field.setAccessible(true);
+ String type = field.getType().toString();
+ if (!(type.equals("int") || type.equals("long") || type.equals("float") || type.equals("double") || type
+ .equals("boolean"))) {
+ return false;
+ }
+ }
+
+ }
+ return true;
+ } catch (Exception e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/JoinVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/JoinVisitor.java
new file mode 100644
index 0000000..ef346bc
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/JoinVisitor.java
@@ -0,0 +1,417 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
+import org.apache.hadoop.hive.ql.plan.JoinDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+
+@SuppressWarnings("rawtypes")
+public class JoinVisitor extends DefaultVisitor {
+
+ /**
+ * reduce sink operator to variables
+ */
+ private HashMap<Operator, List<LogicalVariable>> reduceSinkToKeyVariables = new HashMap<Operator, List<LogicalVariable>>();
+
+ /**
+ * reduce sink operator to variables
+ */
+ private HashMap<Operator, List<String>> reduceSinkToFieldNames = new HashMap<Operator, List<String>>();
+
+ /**
+ * reduce sink operator to variables
+ */
+ private HashMap<Operator, List<TypeInfo>> reduceSinkToTypes = new HashMap<Operator, List<TypeInfo>>();
+
+ /**
+ * map a join operator (in hive) to its parent operators (in hive)
+ */
+ private HashMap<Operator, List<Operator>> operatorToHiveParents = new HashMap<Operator, List<Operator>>();
+
+ /**
+ * map a join operator (in hive) to its parent operators (in asterix)
+ */
+ private HashMap<Operator, List<ILogicalOperator>> operatorToAsterixParents = new HashMap<Operator, List<ILogicalOperator>>();
+
+ /**
+ * the latest traversed reduce sink operator
+ */
+ private Operator latestReduceSink = null;
+
+ /**
+ * the latest generated parent for join
+ */
+ private ILogicalOperator latestAlgebricksOperator = null;
+
+ /**
+ * process a join operator
+ */
+ @Override
+ public Mutable<ILogicalOperator> visit(JoinOperator operator, Mutable<ILogicalOperator> AlgebricksParentOperator,
+ Translator t) {
+ latestAlgebricksOperator = AlgebricksParentOperator.getValue();
+ translateJoinOperatorPreprocess(operator, t);
+ List<Operator> parents = operatorToHiveParents.get(operator);
+ if (parents.size() < operator.getParentOperators().size()) {
+ return null;
+ } else {
+ ILogicalOperator joinOp = translateJoinOperator(operator, AlgebricksParentOperator, t);
+ // clearStatus();
+ return new MutableObject<ILogicalOperator>(joinOp);
+ }
+ }
+
+ private void reorder(Byte[] order, List<ILogicalOperator> parents, List<Operator> hiveParents) {
+ ILogicalOperator[] lops = new ILogicalOperator[parents.size()];
+ Operator[] ops = new Operator[hiveParents.size()];
+
+ for (Operator op : hiveParents) {
+ ReduceSinkOperator rop = (ReduceSinkOperator) op;
+ ReduceSinkDesc rdesc = rop.getConf();
+ int tag = rdesc.getTag();
+
+ int index = -1;
+ for (int i = 0; i < order.length; i++)
+ if (order[i] == tag) {
+ index = i;
+ break;
+ }
+ lops[index] = parents.get(hiveParents.indexOf(op));
+ ops[index] = op;
+ }
+
+ parents.clear();
+ hiveParents.clear();
+
+ for (int i = 0; i < lops.length; i++) {
+ parents.add(lops[i]);
+ hiveParents.add(ops[i]);
+ }
+ }
+
+ /**
+ * translate a hive join operator to asterix join operator->assign
+ * operator->project operator
+ *
+ * @param parentOperator
+ * @param operator
+ * @return
+ */
+ private ILogicalOperator translateJoinOperator(Operator operator, Mutable<ILogicalOperator> parentOperator,
+ Translator t) {
+
+ JoinDesc joinDesc = (JoinDesc) operator.getConf();
+
+ // get the projection expression (already re-written) from each source
+ // table
+ Map<Byte, List<ExprNodeDesc>> exprMap = joinDesc.getExprs();
+ reorder(joinDesc.getTagOrder(), operatorToAsterixParents.get(operator), operatorToHiveParents.get(operator));
+
+ // make an reduce join operator
+ ILogicalOperator currentOperator = generateJoinTree(joinDesc.getCondsList(),
+ operatorToAsterixParents.get(operator), operatorToHiveParents.get(operator), 0, t);
+ parentOperator = new MutableObject<ILogicalOperator>(currentOperator);
+
+ // add assign and project operator on top of a join
+ // output variables
+ ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
+ Set<Entry<Byte, List<ExprNodeDesc>>> entries = exprMap.entrySet();
+ Iterator<Entry<Byte, List<ExprNodeDesc>>> iterator = entries.iterator();
+ while (iterator.hasNext()) {
+ List<ExprNodeDesc> outputExprs = iterator.next().getValue();
+ ILogicalOperator assignOperator = t.getAssignOperator(parentOperator, outputExprs, variables);
+
+ if (assignOperator != null) {
+ currentOperator = assignOperator;
+ parentOperator = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+ }
+
+ ILogicalOperator po = new ProjectOperator(variables);
+ po.getInputs().add(parentOperator);
+ t.rewriteOperatorOutputSchema(variables, operator);
+ return po;
+ }
+
+ /**
+ * deal with reduce sink operator for the case of join
+ */
+ @Override
+ public Mutable<ILogicalOperator> visit(ReduceSinkOperator operator, Mutable<ILogicalOperator> parentOperator,
+ Translator t) {
+
+ Operator downStream = (Operator) operator.getChildOperators().get(0);
+ if (!(downStream instanceof JoinOperator))
+ return null;
+
+ ReduceSinkDesc desc = (ReduceSinkDesc) operator.getConf();
+ List<ExprNodeDesc> keys = desc.getKeyCols();
+ List<ExprNodeDesc> values = desc.getValueCols();
+ List<ExprNodeDesc> partitionCols = desc.getPartitionCols();
+
+ /**
+ * rewrite key, value, paritioncol expressions
+ */
+ for (ExprNodeDesc key : keys)
+ t.rewriteExpression(key);
+ for (ExprNodeDesc value : values)
+ t.rewriteExpression(value);
+ for (ExprNodeDesc col : partitionCols)
+ t.rewriteExpression(col);
+
+ ILogicalOperator currentOperator = null;
+
+ // add assign operator for keys if necessary
+ ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
+ ILogicalOperator assignOperator = t.getAssignOperator(parentOperator, keys, keyVariables);
+ if (assignOperator != null) {
+ currentOperator = assignOperator;
+ parentOperator = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ // add assign operator for values if necessary
+ ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
+ assignOperator = t.getAssignOperator(parentOperator, values, variables);
+ if (assignOperator != null) {
+ currentOperator = assignOperator;
+ parentOperator = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ // unified schema: key, value
+ ArrayList<LogicalVariable> unifiedKeyValues = new ArrayList<LogicalVariable>();
+ unifiedKeyValues.addAll(keyVariables);
+ for (LogicalVariable value : variables)
+ if (keyVariables.indexOf(value) < 0)
+ unifiedKeyValues.add(value);
+
+ // insert projection operator, it is a *must*,
+ // in hive, reduce sink sometimes also do the projection operator's
+ // task
+ currentOperator = new ProjectOperator(unifiedKeyValues);
+ currentOperator.getInputs().add(parentOperator);
+ parentOperator = new MutableObject<ILogicalOperator>(currentOperator);
+
+ reduceSinkToKeyVariables.put(operator, keyVariables);
+ List<String> fieldNames = new ArrayList<String>();
+ List<TypeInfo> types = new ArrayList<TypeInfo>();
+ for (LogicalVariable var : unifiedKeyValues) {
+ fieldNames.add(var.toString());
+ types.add(t.getType(var));
+ }
+ reduceSinkToFieldNames.put(operator, fieldNames);
+ reduceSinkToTypes.put(operator, types);
+ t.rewriteOperatorOutputSchema(variables, operator);
+
+ latestAlgebricksOperator = currentOperator;
+ latestReduceSink = operator;
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ /**
+ * partial rewrite a join operator
+ *
+ * @param operator
+ * @param t
+ */
+ private void translateJoinOperatorPreprocess(Operator operator, Translator t) {
+ JoinDesc desc = (JoinDesc) operator.getConf();
+ ReduceSinkDesc reduceSinkDesc = (ReduceSinkDesc) latestReduceSink.getConf();
+ int tag = reduceSinkDesc.getTag();
+
+ Map<Byte, List<ExprNodeDesc>> exprMap = desc.getExprs();
+ List<ExprNodeDesc> exprs = exprMap.get(Byte.valueOf((byte) tag));
+
+ for (ExprNodeDesc expr : exprs)
+ t.rewriteExpression(expr);
+
+ List<Operator> parents = operatorToHiveParents.get(operator);
+ if (parents == null) {
+ parents = new ArrayList<Operator>();
+ operatorToHiveParents.put(operator, parents);
+ }
+ parents.add(latestReduceSink);
+
+ List<ILogicalOperator> asterixParents = operatorToAsterixParents.get(operator);
+ if (asterixParents == null) {
+ asterixParents = new ArrayList<ILogicalOperator>();
+ operatorToAsterixParents.put(operator, asterixParents);
+ }
+ asterixParents.add(latestAlgebricksOperator);
+ }
+
+ // generate a join tree from a list of exchange/reducesink operator
+ // both exchanges and reduce sinks have the same order
+ private ILogicalOperator generateJoinTree(List<JoinCondDesc> conds, List<ILogicalOperator> exchanges,
+ List<Operator> reduceSinks, int offset, Translator t) {
+ // get a list of reduce sink descs (input descs)
+ int inputSize = reduceSinks.size() - offset;
+
+ if (inputSize == 2) {
+ ILogicalOperator currentRoot;
+
+ List<ReduceSinkDesc> reduceSinkDescs = new ArrayList<ReduceSinkDesc>();
+ for (int i = reduceSinks.size() - 1; i >= offset; i--)
+ reduceSinkDescs.add((ReduceSinkDesc) reduceSinks.get(i).getConf());
+
+ // get the object inspector for the join
+ List<String> fieldNames = new ArrayList<String>();
+ List<TypeInfo> types = new ArrayList<TypeInfo>();
+ for (int i = reduceSinks.size() - 1; i >= offset; i--) {
+ fieldNames.addAll(reduceSinkToFieldNames.get(reduceSinks.get(i)));
+ types.addAll(reduceSinkToTypes.get(reduceSinks.get(i)));
+ }
+
+ // get number of equality conjunctions in the final join condition
+ int size = reduceSinkDescs.get(0).getKeyCols().size();
+
+ // make up the join conditon expression
+ List<ExprNodeDesc> joinConditionChildren = new ArrayList<ExprNodeDesc>();
+ for (int i = 0; i < size; i++) {
+ // create a join key pair
+ List<ExprNodeDesc> keyPair = new ArrayList<ExprNodeDesc>();
+ for (ReduceSinkDesc sink : reduceSinkDescs) {
+ keyPair.add(sink.getKeyCols().get(i));
+ }
+ // create a hive equal condition
+ ExprNodeDesc equality = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ new GenericUDFOPEqual(), keyPair);
+ // add the equal condition to the conjunction list
+ joinConditionChildren.add(equality);
+ }
+ // get final conjunction expression
+ ExprNodeDesc conjunct = null;
+
+ if (joinConditionChildren.size() > 1)
+ conjunct = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(),
+ joinConditionChildren);
+ else if (joinConditionChildren.size() == 1)
+ conjunct = joinConditionChildren.get(0);
+ else {
+ // there is no join equality condition, equal-join
+ conjunct = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, new Boolean(true));
+ }
+ // get an ILogicalExpression from hive's expression
+ Mutable<ILogicalExpression> expression = t.translateScalarFucntion(conjunct);
+
+ Mutable<ILogicalOperator> leftBranch = new MutableObject<ILogicalOperator>(
+ exchanges.get(exchanges.size() - 1));
+ Mutable<ILogicalOperator> rightBranch = new MutableObject<ILogicalOperator>(
+ exchanges.get(exchanges.size() - 2));
+ // get the join operator
+ if (conds.get(offset).getType() == JoinDesc.LEFT_OUTER_JOIN) {
+ currentRoot = new LeftOuterJoinOperator(expression);
+ Mutable<ILogicalOperator> temp = leftBranch;
+ leftBranch = rightBranch;
+ rightBranch = temp;
+ } else if (conds.get(offset).getType() == JoinDesc.RIGHT_OUTER_JOIN) {
+ currentRoot = new LeftOuterJoinOperator(expression);
+ } else
+ currentRoot = new InnerJoinOperator(expression);
+
+ currentRoot.getInputs().add(leftBranch);
+ currentRoot.getInputs().add(rightBranch);
+
+ // rewriteOperatorOutputSchema(variables, operator);
+ return currentRoot;
+ } else {
+ // get the child join operator and insert and one-to-one exchange
+ ILogicalOperator joinSrcOne = generateJoinTree(conds, exchanges, reduceSinks, offset + 1, t);
+ // joinSrcOne.addInput(childJoin);
+
+ ILogicalOperator currentRoot;
+
+ List<ReduceSinkDesc> reduceSinkDescs = new ArrayList<ReduceSinkDesc>();
+ for (int i = offset; i < offset + 2; i++)
+ reduceSinkDescs.add((ReduceSinkDesc) reduceSinks.get(i).getConf());
+
+ // get the object inspector for the join
+ List<String> fieldNames = new ArrayList<String>();
+ List<TypeInfo> types = new ArrayList<TypeInfo>();
+ for (int i = offset; i < reduceSinks.size(); i++) {
+ fieldNames.addAll(reduceSinkToFieldNames.get(reduceSinks.get(i)));
+ types.addAll(reduceSinkToTypes.get(reduceSinks.get(i)));
+ }
+
+ // get number of equality conjunctions in the final join condition
+ int size = reduceSinkDescs.get(0).getKeyCols().size();
+
+ // make up the join condition expression
+ List<ExprNodeDesc> joinConditionChildren = new ArrayList<ExprNodeDesc>();
+ for (int i = 0; i < size; i++) {
+ // create a join key pair
+ List<ExprNodeDesc> keyPair = new ArrayList<ExprNodeDesc>();
+ for (ReduceSinkDesc sink : reduceSinkDescs) {
+ keyPair.add(sink.getKeyCols().get(i));
+ }
+ // create a hive equal condition
+ ExprNodeDesc equality = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ new GenericUDFOPEqual(), keyPair);
+ // add the equal condition to the conjunction list
+ joinConditionChildren.add(equality);
+ }
+ // get final conjunction expression
+ ExprNodeDesc conjunct = null;
+
+ if (joinConditionChildren.size() > 1)
+ conjunct = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(),
+ joinConditionChildren);
+ else if (joinConditionChildren.size() == 1)
+ conjunct = joinConditionChildren.get(0);
+ else {
+ // there is no join equality condition, full outer join
+ conjunct = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, new Boolean(true));
+ }
+ // get an ILogicalExpression from hive's expression
+ Mutable<ILogicalExpression> expression = t.translateScalarFucntion(conjunct);
+
+ Mutable<ILogicalOperator> leftBranch = new MutableObject<ILogicalOperator>(joinSrcOne);
+ Mutable<ILogicalOperator> rightBranch = new MutableObject<ILogicalOperator>(exchanges.get(offset));
+
+ // get the join operator
+ if (conds.get(offset).getType() == JoinDesc.LEFT_OUTER_JOIN) {
+ currentRoot = new LeftOuterJoinOperator(expression);
+ Mutable<ILogicalOperator> temp = leftBranch;
+ leftBranch = rightBranch;
+ rightBranch = temp;
+ } else if (conds.get(offset).getType() == JoinDesc.RIGHT_OUTER_JOIN) {
+ currentRoot = new LeftOuterJoinOperator(expression);
+ } else
+ currentRoot = new InnerJoinOperator(expression);
+
+ // set the inputs from Algebricks join operator
+ // add the current table
+ currentRoot.getInputs().add(leftBranch);
+ currentRoot.getInputs().add(rightBranch);
+
+ return currentRoot;
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java
new file mode 100644
index 0000000..cc19364
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java
@@ -0,0 +1,110 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.UDTFOperator;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+
+/**
+ * The lateral view join operator is used for FROM src LATERAL VIEW udtf()...
+ * This operator was implemented with the following operator DAG in mind.
+ * For a query such as
+ * SELECT pageid, adid.* FROM example_table LATERAL VIEW explode(adid_list) AS
+ * adid
+ * The top of the operator DAG will look similar to
+ * [Table Scan] | [Lateral View Forward] / \ [Select](*) [Select](adid_list) | |
+ * | [UDTF] (explode) \ / [Lateral View Join] | | [Select] (pageid, adid.*) |
+ * ....
+ * Rows from the table scan operator are first to a lateral view forward
+ * operator that just forwards the row and marks the start of a LV. The select
+ * operator on the left picks all the columns while the select operator on the
+ * right picks only the columns needed by the UDTF.
+ * The output of select in the left branch and output of the UDTF in the right
+ * branch are then sent to the lateral view join (LVJ). In most cases, the UDTF
+ * will generate > 1 row for every row received from the TS, while the left
+ * select operator will generate only one. For each row output from the TS, the
+ * LVJ outputs all possible rows that can be created by joining the row from the
+ * left select and one of the rows output from the UDTF.
+ * Additional lateral views can be supported by adding a similar DAG after the
+ * previous LVJ operator.
+ */
+
+@SuppressWarnings("rawtypes")
+public class LateralViewJoinVisitor extends DefaultVisitor {
+
+ private UDTFDesc udtf;
+
+ private List<Mutable<ILogicalOperator>> parents = new ArrayList<Mutable<ILogicalOperator>>();
+
+ @Override
+ public Mutable<ILogicalOperator> visit(LateralViewJoinOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException {
+
+ parents.add(AlgebricksParentOperatorRef);
+ if (operator.getParentOperators().size() > parents.size()) {
+ return null;
+ }
+
+ Operator parent0 = operator.getParentOperators().get(0);
+ ILogicalOperator parentOperator;
+ ILogicalExpression unnestArg;
+ if (parent0 instanceof UDTFOperator) {
+ List<LogicalVariable> unnestVars = new ArrayList<LogicalVariable>();
+ VariableUtilities.getLiveVariables(parents.get(1).getValue(), unnestVars);
+ unnestArg = new VariableReferenceExpression(unnestVars.get(0));
+ parentOperator = parents.get(1).getValue();
+ } else {
+ List<LogicalVariable> unnestVars = new ArrayList<LogicalVariable>();
+ VariableUtilities.getLiveVariables(parents.get(0).getValue(), unnestVars);
+ unnestArg = new VariableReferenceExpression(unnestVars.get(0));
+ parentOperator = parents.get(0).getValue();
+ }
+
+ LogicalVariable var = t.getVariable(udtf.toString(), TypeInfoFactory.unknownTypeInfo);
+
+ Mutable<ILogicalExpression> unnestExpr = t.translateUnnestFunction(udtf, new MutableObject<ILogicalExpression>(
+ unnestArg));
+ ILogicalOperator currentOperator = new UnnestOperator(var, unnestExpr);
+
+ List<LogicalVariable> outputVars = new ArrayList<LogicalVariable>();
+ VariableUtilities.getLiveVariables(parentOperator, outputVars);
+ outputVars.add(var);
+ currentOperator.getInputs().add(new MutableObject<ILogicalOperator>(parentOperator));
+
+ parents.clear();
+ udtf = null;
+ t.rewriteOperatorOutputSchema(outputVars, operator);
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(UDTFOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
+ Schema currentSchema = t.generateInputSchema(operator.getParentOperators().get(0));
+ udtf = (UDTFDesc) operator.getConf();
+
+ // populate the schema from upstream operator
+ operator.setSchema(operator.getParentOperators().get(0).getSchema());
+ List<LogicalVariable> latestOutputSchema = t.getVariablesFromSchema(currentSchema);
+ t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
+ return null;
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LimitVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LimitVisitor.java
new file mode 100644
index 0000000..cc10f8f
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LimitVisitor.java
@@ -0,0 +1,41 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.HivesterixConstantValue;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+
+public class LimitVisitor extends DefaultVisitor {
+
+ @Override
+ public Mutable<ILogicalOperator> visit(LimitOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
+ Schema currentSchema = t.generateInputSchema(operator.getParentOperators().get(0));
+
+ LimitDesc desc = (LimitDesc) operator.getConf();
+ int limit = desc.getLimit();
+ Integer limitValue = new Integer(limit);
+
+ ILogicalExpression expr = new ConstantExpression(new HivesterixConstantValue(limitValue));
+ ILogicalOperator currentOperator = new edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.LimitOperator(
+ expr, true);
+ currentOperator.getInputs().add(AlgebricksParentOperatorRef);
+
+ operator.setSchema(operator.getParentOperators().get(0).getSchema());
+ List<LogicalVariable> latestOutputSchema = t.getVariablesFromSchema(currentSchema);
+ t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java
new file mode 100644
index 0000000..4aba6a4
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java
@@ -0,0 +1,171 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+
+@SuppressWarnings("rawtypes")
+public class MapJoinVisitor extends DefaultVisitor {
+
+ /**
+ * map a join operator (in hive) to its parent operators (in asterix)
+ */
+ private HashMap<Operator, List<Mutable<ILogicalOperator>>> opMap = new HashMap<Operator, List<Mutable<ILogicalOperator>>>();
+
+ @Override
+ public Mutable<ILogicalOperator> visit(MapJoinOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
+ List<Operator<? extends Serializable>> joinSrc = operator.getParentOperators();
+ List<Mutable<ILogicalOperator>> parents = opMap.get(operator);
+ if (parents == null) {
+ parents = new ArrayList<Mutable<ILogicalOperator>>();
+ opMap.put(operator, parents);
+ }
+ parents.add(AlgebricksParentOperatorRef);
+ if (joinSrc.size() != parents.size())
+ return null;
+
+ ILogicalOperator currentOperator;
+ // make an map join operator
+ // TODO: will have trouble for n-way joins
+ MapJoinDesc joinDesc = (MapJoinDesc) operator.getConf();
+
+ Map<Byte, List<ExprNodeDesc>> keyMap = joinDesc.getKeys();
+ // get the projection expression (already re-written) from each source
+ // table
+ Map<Byte, List<ExprNodeDesc>> exprMap = joinDesc.getExprs();
+
+ int inputSize = operator.getParentOperators().size();
+ // get a list of reduce sink descs (input descs)
+
+ // get the parent operator
+ List<Mutable<ILogicalOperator>> parentOps = parents;
+
+ List<String> fieldNames = new ArrayList<String>();
+ List<TypeInfo> types = new ArrayList<TypeInfo>();
+ for (Operator ts : joinSrc) {
+ List<ColumnInfo> columns = ts.getSchema().getSignature();
+ for (ColumnInfo col : columns) {
+ fieldNames.add(col.getInternalName());
+ types.add(col.getType());
+ }
+ }
+
+ // get number of equality conjunctions in the final join condition
+ Set<Entry<Byte, List<ExprNodeDesc>>> keyEntries = keyMap.entrySet();
+ Iterator<Entry<Byte, List<ExprNodeDesc>>> entry = keyEntries.iterator();
+
+ int size = 0;
+ if (entry.hasNext())
+ size = entry.next().getValue().size();
+
+ // make up the join conditon expression
+ List<ExprNodeDesc> joinConditionChildren = new ArrayList<ExprNodeDesc>();
+ for (int i = 0; i < size; i++) {
+ // create a join key pair
+ List<ExprNodeDesc> keyPair = new ArrayList<ExprNodeDesc>();
+ for (int j = 0; j < inputSize; j++) {
+ keyPair.add(keyMap.get(Byte.valueOf((byte) j)).get(i));
+ }
+ // create a hive equal condition
+ ExprNodeDesc equality = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ new GenericUDFOPEqual(), keyPair);
+ // add the equal condition to the conjunction list
+ joinConditionChildren.add(equality);
+ }
+ // get final conjunction expression
+ ExprNodeDesc conjunct = null;
+
+ if (joinConditionChildren.size() > 1)
+ conjunct = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(),
+ joinConditionChildren);
+ else if (joinConditionChildren.size() == 1)
+ conjunct = joinConditionChildren.get(0);
+ else {
+ // there is no join equality condition, full outer join
+ conjunct = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, new Boolean(true));
+ }
+ // get an ILogicalExpression from hive's expression
+ Mutable<ILogicalExpression> expression = t.translateScalarFucntion(conjunct);
+
+ ArrayList<LogicalVariable> left = new ArrayList<LogicalVariable>();
+ ArrayList<LogicalVariable> right = new ArrayList<LogicalVariable>();
+
+ Set<Entry<Byte, List<ExprNodeDesc>>> kentries = keyMap.entrySet();
+ Iterator<Entry<Byte, List<ExprNodeDesc>>> kiterator = kentries.iterator();
+ int iteration = 0;
+ ILogicalOperator assignOperator = null;
+ while (kiterator.hasNext()) {
+ List<ExprNodeDesc> outputExprs = kiterator.next().getValue();
+
+ if (iteration == 0)
+ assignOperator = t.getAssignOperator(AlgebricksParentOperatorRef, outputExprs, left);
+ else
+ assignOperator = t.getAssignOperator(AlgebricksParentOperatorRef, outputExprs, right);
+
+ if (assignOperator != null) {
+ currentOperator = assignOperator;
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+ iteration++;
+ }
+
+ List<Mutable<ILogicalOperator>> inputs = parentOps;
+
+ // get the join operator
+ currentOperator = new InnerJoinOperator(expression);
+
+ // set the inputs from asterix join operator
+ for (Mutable<ILogicalOperator> input : inputs)
+ currentOperator.getInputs().add(input);
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+
+ // add assign and project operator
+ // output variables
+ ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
+ Set<Entry<Byte, List<ExprNodeDesc>>> entries = exprMap.entrySet();
+ Iterator<Entry<Byte, List<ExprNodeDesc>>> iterator = entries.iterator();
+ while (iterator.hasNext()) {
+ List<ExprNodeDesc> outputExprs = iterator.next().getValue();
+ assignOperator = t.getAssignOperator(AlgebricksParentOperatorRef, outputExprs, variables);
+
+ if (assignOperator != null) {
+ currentOperator = assignOperator;
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+ }
+
+ currentOperator = new ProjectOperator(variables);
+ currentOperator.getInputs().add(AlgebricksParentOperatorRef);
+ t.rewriteOperatorOutputSchema(variables, operator);
+ // opMap.clear();
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ProjectVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ProjectVisitor.java
new file mode 100644
index 0000000..eb0922f
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ProjectVisitor.java
@@ -0,0 +1,56 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+
+public class ProjectVisitor extends DefaultVisitor {
+
+ /**
+ * translate project operator
+ */
+ @Override
+ public Mutable<ILogicalOperator> visit(SelectOperator operator, Mutable<ILogicalOperator> AlgebricksParentOperator,
+ Translator t) {
+
+ SelectDesc desc = (SelectDesc) operator.getConf();
+
+ if (desc == null)
+ return null;
+
+ List<ExprNodeDesc> cols = desc.getColList();
+
+ if (cols == null)
+ return null;
+
+ // insert assign operator if necessary
+ ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
+
+ for (ExprNodeDesc expr : cols)
+ t.rewriteExpression(expr);
+
+ ILogicalOperator assignOp = t.getAssignOperator(AlgebricksParentOperator, cols, variables);
+ ILogicalOperator currentOperator = null;
+ if (assignOp != null) {
+ currentOperator = assignOp;
+ AlgebricksParentOperator = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ currentOperator = new ProjectOperator(variables);
+ currentOperator.getInputs().add(AlgebricksParentOperator);
+ t.rewriteOperatorOutputSchema(variables, operator);
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/SortVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/SortVisitor.java
new file mode 100644
index 0000000..325b632
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/SortVisitor.java
@@ -0,0 +1,113 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.OrderOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.OrderOperator.IOrder;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.OrderColumn;
+
+public class SortVisitor extends DefaultVisitor {
+
+ @SuppressWarnings("rawtypes")
+ @Override
+ public Mutable<ILogicalOperator> visit(ReduceSinkOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException {
+ ReduceSinkDesc desc = (ReduceSinkDesc) operator.getConf();
+ Operator downStream = (Operator) operator.getChildOperators().get(0);
+ List<ExprNodeDesc> keys = desc.getKeyCols();
+ if (!(downStream instanceof ExtractOperator && desc.getNumReducers() == 1 && keys.size() > 0)) {
+ return null;
+ }
+
+ List<ExprNodeDesc> schema = new ArrayList<ExprNodeDesc>();
+ List<ExprNodeDesc> values = desc.getValueCols();
+ List<ExprNodeDesc> partitionCols = desc.getPartitionCols();
+ for (ExprNodeDesc key : keys) {
+ t.rewriteExpression(key);
+ }
+ for (ExprNodeDesc value : values) {
+ t.rewriteExpression(value);
+ }
+ for (ExprNodeDesc col : partitionCols) {
+ t.rewriteExpression(col);
+ }
+
+ // add a order-by operator and limit if any
+ List<Pair<IOrder, Mutable<ILogicalExpression>>> pairs = new ArrayList<Pair<IOrder, Mutable<ILogicalExpression>>>();
+ char[] orders = desc.getOrder().toCharArray();
+ int i = 0;
+ for (ExprNodeDesc key : keys) {
+ Mutable<ILogicalExpression> expr = t.translateScalarFucntion(key);
+ IOrder order = orders[i] == '+' ? OrderOperator.ASC_ORDER : OrderOperator.DESC_ORDER;
+
+ Pair<IOrder, Mutable<ILogicalExpression>> pair = new Pair<IOrder, Mutable<ILogicalExpression>>(order, expr);
+ pairs.add(pair);
+ i++;
+ }
+
+ // get input variables
+ ArrayList<LogicalVariable> inputVariables = new ArrayList<LogicalVariable>();
+ VariableUtilities.getProducedVariables(AlgebricksParentOperatorRef.getValue(), inputVariables);
+
+ ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
+ ILogicalOperator currentOperator;
+ ILogicalOperator assignOp = t.getAssignOperator(AlgebricksParentOperatorRef, keys, keyVariables);
+ if (assignOp != null) {
+ currentOperator = assignOp;
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ OrderColumn[] keyColumns = new OrderColumn[keyVariables.size()];
+
+ for (int j = 0; j < keyColumns.length; j++)
+ keyColumns[j] = new OrderColumn(keyVariables.get(j), pairs.get(j).first.getKind());
+
+ // handle order operator
+ currentOperator = new OrderOperator(pairs);
+ currentOperator.getInputs().add(AlgebricksParentOperatorRef);
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+
+ // project back, remove generated sort-key columns if any
+ if (assignOp != null) {
+ currentOperator = new ProjectOperator(inputVariables);
+ currentOperator.getInputs().add(AlgebricksParentOperatorRef);
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ /**
+ * a special rule for hive's order by output schema of reduce sink
+ * operator only contains the columns
+ */
+ for (ExprNodeDesc value : values) {
+ schema.add(value);
+ }
+
+ ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
+ ILogicalOperator assignOperator = t.getAssignOperator(AlgebricksParentOperatorRef, schema, variables);
+ t.rewriteOperatorOutputSchema(variables, operator);
+
+ if (assignOperator != null) {
+ currentOperator = assignOperator;
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java
new file mode 100644
index 0000000..3af1832
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java
@@ -0,0 +1,135 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hivesterix.runtime.jobgen.HiveDataSink;
+import edu.uci.ics.hivesterix.runtime.jobgen.HiveDataSource;
+import edu.uci.ics.hivesterix.runtime.jobgen.HiveMetaDataProvider;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSink;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.WriteOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+
+public class TableScanWriteVisitor extends DefaultVisitor {
+
+ /**
+ * map from alias to partition desc
+ */
+ private HashMap<String, PartitionDesc> aliasToPathMap;
+
+ /**
+ * map from partition desc to data source
+ */
+ private HashMap<PartitionDesc, IDataSource<PartitionDesc>> dataSourceMap = new HashMap<PartitionDesc, IDataSource<PartitionDesc>>();
+
+ /**
+ * constructor
+ *
+ * @param aliasToPathMap
+ */
+ public TableScanWriteVisitor(HashMap<String, PartitionDesc> aliasToPathMap) {
+ this.aliasToPathMap = aliasToPathMap;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(TableScanOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ TableScanDesc desc = (TableScanDesc) operator.getConf();
+ if (desc == null) {
+ List<LogicalVariable> schema = new ArrayList<LogicalVariable>();
+ VariableUtilities.getLiveVariables(AlgebricksParentOperator.getValue(), schema);
+ t.rewriteOperatorOutputSchema(schema, operator);
+ return null;
+ }
+
+ List<ColumnInfo> columns = operator.getSchema().getSignature();
+ for (int i = columns.size() - 1; i >= 0; i--)
+ if (columns.get(i).getIsVirtualCol() == true)
+ columns.remove(i);
+
+ // start with empty tuple operator
+ List<TypeInfo> types = new ArrayList<TypeInfo>();
+ ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
+ List<String> names = new ArrayList<String>();
+ for (ColumnInfo column : columns) {
+ types.add(column.getType());
+
+ LogicalVariable var = t.getVariableFromFieldName(column.getTabAlias() + "." + column.getInternalName());
+ LogicalVariable varNew;
+
+ if (var != null) {
+ varNew = t.getVariable(column.getTabAlias() + "." + column.getInternalName() + operator.toString(),
+ column.getType());
+ t.replaceVariable(var, varNew);
+ var = varNew;
+ } else
+ var = t.getNewVariable(column.getTabAlias() + "." + column.getInternalName(), column.getType());
+
+ variables.add(var);
+ names.add(column.getInternalName());
+ }
+ Schema currentSchema = new Schema(names, types);
+
+ String alias = desc.getAlias();
+ PartitionDesc partDesc = aliasToPathMap.get(alias);
+ IDataSource<PartitionDesc> dataSource = new HiveDataSource<PartitionDesc>(partDesc, currentSchema.getSchema());
+ ILogicalOperator currentOperator = new DataSourceScanOperator(variables, dataSource);
+
+ // set empty tuple source operator
+ ILogicalOperator ets = new EmptyTupleSourceOperator();
+ currentOperator.getInputs().add(new MutableObject<ILogicalOperator>(ets));
+
+ // setup data source
+ dataSourceMap.put(partDesc, dataSource);
+ t.rewriteOperatorOutputSchema(variables, operator);
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(FileSinkOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) {
+
+ if (hiveOperator.getChildOperators() != null && hiveOperator.getChildOperators().size() > 0)
+ return null;
+
+ Schema currentSchema = t.generateInputSchema(hiveOperator.getParentOperators().get(0));
+
+ IDataSink sink = new HiveDataSink(hiveOperator, currentSchema.getSchema());
+ List<Mutable<ILogicalExpression>> exprList = new ArrayList<Mutable<ILogicalExpression>>();
+ for (String column : currentSchema.getNames()) {
+ exprList.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(t.getVariable(column))));
+ }
+
+ ILogicalOperator currentOperator = new WriteOperator(exprList, sink);
+ if (AlgebricksParentOperator != null) {
+ currentOperator.getInputs().add(AlgebricksParentOperator);
+ }
+
+ IMetadataProvider<PartitionDesc, Object> metaData = new HiveMetaDataProvider<PartitionDesc, Object>(
+ hiveOperator, currentSchema, dataSourceMap);
+ t.setMetadataProvider(metaData);
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java
new file mode 100644
index 0000000..96b9463
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java
@@ -0,0 +1,62 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Triple;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+
+public class UnionVisitor extends DefaultVisitor {
+
+ List<Mutable<ILogicalOperator>> parents = new ArrayList<Mutable<ILogicalOperator>>();
+
+ @Override
+ public Mutable<ILogicalOperator> visit(UnionOperator operator, Mutable<ILogicalOperator> AlgebricksParentOperator,
+ Translator t) throws AlgebricksException {
+
+ parents.add(AlgebricksParentOperator);
+ if (operator.getParentOperators().size() > parents.size()) {
+ return null;
+ }
+
+ List<LogicalVariable> leftVars = new ArrayList<LogicalVariable>();
+ List<LogicalVariable> rightVars = new ArrayList<LogicalVariable>();
+
+ VariableUtilities.getUsedVariables(parents.get(0).getValue(), leftVars);
+ VariableUtilities.getUsedVariables(parents.get(1).getValue(), rightVars);
+
+ List<Triple<LogicalVariable, LogicalVariable, LogicalVariable>> triples = new ArrayList<Triple<LogicalVariable, LogicalVariable, LogicalVariable>>();
+ List<LogicalVariable> unionVars = new ArrayList<LogicalVariable>();
+
+ for (int i = 0; i < leftVars.size(); i++) {
+ LogicalVariable unionVar = t.getVariable(
+ leftVars.get(i).getId() + "union" + AlgebricksParentOperator.hashCode(),
+ TypeInfoFactory.unknownTypeInfo);
+ unionVars.add(unionVar);
+ Triple<LogicalVariable, LogicalVariable, LogicalVariable> triple = new Triple<LogicalVariable, LogicalVariable, LogicalVariable>(
+ leftVars.get(i), rightVars.get(i), unionVar);
+ t.replaceVariable(leftVars.get(i), unionVar);
+ t.replaceVariable(rightVars.get(i), unionVar);
+ triples.add(triple);
+ }
+ ILogicalOperator currentOperator = new edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator(
+ triples);
+ for (Mutable<ILogicalOperator> parent : parents)
+ currentOperator.getInputs().add(parent);
+
+ t.rewriteOperatorOutputSchema(unionVars, operator);
+ parents.clear();
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/DefaultVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/DefaultVisitor.java
new file mode 100644
index 0000000..d298553
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/DefaultVisitor.java
@@ -0,0 +1,145 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor.base;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.CollectOperator;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.ForwardOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.MapOperator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.ScriptOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.UDTFOperator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+
+/**
+ * a default empty implementation of visitor
+ *
+ * @author yingyib
+ */
+public class DefaultVisitor implements Visitor {
+
+ @Override
+ public Mutable<ILogicalOperator> visit(CollectOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(JoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(ExtractOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(MapJoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(SMBMapJoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ public Mutable<ILogicalOperator> visit(FileSinkOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ public Mutable<ILogicalOperator> visit(ReduceSinkOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(FilterOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(ForwardOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(GroupByOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(LateralViewForwardOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(LateralViewJoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(LimitOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(MapOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(ScriptOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(SelectOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(TableScanOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(UDTFOperator operator, Mutable<ILogicalOperator> AlgebricksParentOperator,
+ Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(UnionOperator operator, Mutable<ILogicalOperator> AlgebricksParentOperator,
+ Translator t) throws AlgebricksException {
+ return null;
+ }
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java
new file mode 100644
index 0000000..8aa139a
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java
@@ -0,0 +1,170 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor.base;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+
+@SuppressWarnings("rawtypes")
+public interface Translator {
+
+ /**
+ * generate input schema
+ *
+ * @param operator
+ * @return
+ */
+ public Schema generateInputSchema(Operator operator);
+
+ /**
+ * rewrite the names of output columns for feture expression evaluators to
+ * use
+ *
+ * @param operator
+ */
+ public void rewriteOperatorOutputSchema(List<LogicalVariable> vars, Operator operator);
+
+ /**
+ * rewrite the names of output columns for feture expression evaluators to
+ * use
+ *
+ * @param operator
+ */
+ public void rewriteOperatorOutputSchema(Operator operator);
+
+ /**
+ * rewrite an expression and substitute variables
+ *
+ * @param expr
+ * hive expression
+ */
+ public void rewriteExpression(ExprNodeDesc expr);
+
+ /**
+ * rewrite an expression and substitute variables
+ *
+ * @param expr
+ * hive expression
+ */
+ public void rewriteExpressionPartial(ExprNodeDesc expr);
+
+ /**
+ * get an assign operator as a child of parent
+ *
+ * @param parent
+ * @param cols
+ * @param variables
+ * @return
+ */
+ public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent, List<ExprNodeDesc> cols,
+ ArrayList<LogicalVariable> variables);
+
+ /**
+ * get type for a logical variable
+ *
+ * @param var
+ * @return type info
+ */
+ public TypeInfo getType(LogicalVariable var);
+
+ /**
+ * translate an expression from hive to Algebricks
+ *
+ * @param desc
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateScalarFucntion(ExprNodeDesc desc);
+
+ /**
+ * translate an aggregation from hive to Algebricks
+ *
+ * @param aggregateDesc
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateAggregation(AggregationDesc aggregateDesc);
+
+ /**
+ * translate unnesting (UDTF) function expression
+ *
+ * @param aggregator
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateUnnestFunction(UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument);
+
+ /**
+ * get variable from a schema
+ *
+ * @param schema
+ * @return
+ */
+ public List<LogicalVariable> getVariablesFromSchema(Schema schema);
+
+ /**
+ * get variable from name
+ *
+ * @param name
+ * @return
+ */
+ public LogicalVariable getVariable(String name);
+
+ /**
+ * get variable from field name
+ *
+ * @param name
+ * @return
+ */
+ public LogicalVariable getVariableFromFieldName(String name);
+
+ /**
+ * get variable from name, type
+ *
+ * @param fieldName
+ * @param type
+ * @return
+ */
+ public LogicalVariable getVariable(String fieldName, TypeInfo type);
+
+ /**
+ * get new variable from name, type
+ *
+ * @param fieldName
+ * @param type
+ * @return
+ */
+ public LogicalVariable getNewVariable(String fieldName, TypeInfo type);
+
+ /**
+ * set the metadata provider
+ *
+ * @param metadata
+ */
+ public void setMetadataProvider(IMetadataProvider<PartitionDesc, Object> metadata);
+
+ /**
+ * get the metadata provider
+ *
+ * @param metadata
+ */
+ public IMetadataProvider<PartitionDesc, Object> getMetadataProvider();
+
+ /**
+ * replace the variable
+ *
+ * @param oldVar
+ * @param newVar
+ */
+ public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar);
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Visitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Visitor.java
new file mode 100644
index 0000000..11ae357
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Visitor.java
@@ -0,0 +1,85 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor.base;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.CollectOperator;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.ForwardOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.MapOperator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.ScriptOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.UDTFOperator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+
+public interface Visitor {
+
+ public Mutable<ILogicalOperator> visit(CollectOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(JoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(ExtractOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(MapJoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(SMBMapJoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(FilterOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(ForwardOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(GroupByOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(LateralViewForwardOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(LateralViewJoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(LimitOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(MapOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(ScriptOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(SelectOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(TableScanOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(FileSinkOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(ReduceSinkOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(UDTFOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(UnionOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+}
diff --git a/hivesterix/pom.xml b/hivesterix/pom.xml
index d7621ac..a8ef4ee 100644
--- a/hivesterix/pom.xml
+++ b/hivesterix/pom.xml
@@ -1,557 +1,139 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <groupId>edu.uci.ics.hivesterix</groupId>
- <artifactId>hivesterix</artifactId>
- <version>0.2.3-SNAPSHOT</version>
- <name>hivesterix</name>
- <dependencies>
- <dependency>
- <groupId>javax.servlet</groupId>
- <artifactId>servlet-api</artifactId>
- <version>2.5</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>4.8.1</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>args4j</groupId>
- <artifactId>args4j</artifactId>
- <version>2.0.12</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.json</groupId>
- <artifactId>json</artifactId>
- <version>20090211</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
- <version>8.0.0.M1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-servlet</artifactId>
- <version>8.0.0.M1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <version>0.20.2</version>
- </dependency>
- <dependency>
- <groupId>jline</groupId>
- <artifactId>jline</artifactId>
- <version>0.9.94</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-core</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-connectionpool</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-enhancer</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-rdbms</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-dbcp</groupId>
- <artifactId>commons-dbcp</artifactId>
- <version>1.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-pool</groupId>
- <artifactId>commons-pool</artifactId>
- <version>1.5.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-collections</groupId>
- <artifactId>commons-collections</artifactId>
- <version>3.2.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-lang</groupId>
- <artifactId>commons-lang</artifactId>
- <version>2.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>javax</groupId>
- <artifactId>jdo2-api</artifactId>
- <version>2.3-ec</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>com.facebook</groupId>
- <artifactId>libfb303</artifactId>
- <version>0.5.0</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.thrift</groupId>
- <artifactId>libthrift</artifactId>
- <version>0.5.0</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>cli</artifactId>
- <version>1.2</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache</groupId>
- <artifactId>log4j</artifactId>
- <version>1.2.15</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.antlr</groupId>
- <artifactId>antlr-runtime</artifactId>
- <version>3.0.1</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-cli</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-common</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-exec</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-hwi</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-jdbc</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-metastore</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-service</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-shims</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-serde</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- <version>1.6.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-cli</groupId>
- <artifactId>commons-cli</artifactId>
- <version>1.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- <version>1.6.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-test</artifactId>
- <version>0.20.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- <version>1.1.1</version>
- <type>jar</type>
- <classifier>api</classifier>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
- <version>r06</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.antlr</groupId>
- <artifactId>stringtemplate</artifactId>
- <version>3.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.derby</groupId>
- <artifactId>derby</artifactId>
- <version>10.8.1.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>0.90.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>edu.uci.ics.hyracks</groupId>
- <artifactId>algebricks-compiler</artifactId>
- <version>0.2.3-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>edu.uci.ics.hyracks</groupId>
- <artifactId>hyracks-control-cc</artifactId>
- <version>0.2.3-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>edu.uci.ics.hyracks</groupId>
- <artifactId>hyracks-control-nc</artifactId>
- <version>0.2.3-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>2.0.2</version>
- <configuration>
- <source>1.6</source>
- <target>1.6</target>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>appassembler-maven-plugin</artifactId>
- <version>1.3</version>
- <executions>
- <execution>
- <configuration>
- <programs>
- <program>
- <mainClass>edu.uci.ics.asterix.hive.cli.CliDriver</mainClass>
- <name>algebricks-hivesterix-cmd</name>
- </program>
- </programs>
- <repositoryLayout>flat</repositoryLayout>
- <repositoryName>lib</repositoryName>
- </configuration>
- <phase>package</phase>
- <goals>
- <goal>assemble</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <artifactId>maven-assembly-plugin</artifactId>
- <version>2.2-beta-5</version>
- <executions>
- <execution>
- <configuration>
- <descriptors>
- <descriptor>src/main/assembly/binary-assembly.xml</descriptor>
- </descriptors>
- </configuration>
- <phase>package</phase>
- <goals>
- <goal>attached</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <version>2.7.2</version>
- <configuration>
- <forkMode>pertest</forkMode>
- <argLine>-enableassertions -Xmx2047m -Dfile.encoding=UTF-8
- -Djava.util.logging.config.file=src/test/resources/logging.properties</argLine>
- <includes>
- <include>**/test/optimizer/*TestSuite.java</include>
- <include>**/test/optimizer/*Test.java</include>
- <include>**/test/runtimefunction/*TestSuite.java</include>
- <include>**/test/runtimefunction/*Test.java</include>
- </includes>
- </configuration>
- </plugin>
- <plugin>
- <artifactId>maven-resources-plugin</artifactId>
- <version>2.5</version>
- <executions>
- <execution>
- <id>copy-resources</id>
- <!-- here the phase you need -->
- <phase>package</phase>
- <goals>
- <goal>copy-resources</goal>
- </goals>
- <configuration>
- <outputDirectory>target/appassembler/bin</outputDirectory>
- <resources>
- <resource>
- <directory>resource/bin</directory>
- </resource>
- </resources>
- </configuration>
- </execution>
- <execution>
- <id>copy-conf</id>
- <!-- here the phase you need -->
- <phase>package</phase>
- <goals>
- <goal>copy-resources</goal>
- </goals>
- <configuration>
- <outputDirectory>target/appassembler/conf</outputDirectory>
- <resources>
- <resource>
- <directory>conf</directory>
- </resource>
- </resources>
- </configuration>
- </execution>
- <execution>
- <id>copy-asterix</id>
- <!-- here the phase you need -->
- <phase>package</phase>
- <goals>
- <goal>copy-resources</goal>
- </goals>
- <configuration>
- <outputDirectory>target/appassembler/asterix</outputDirectory>
- <resources>
- <resource>
- <directory>resource/asterix</directory>
- </resource>
- </resources>
- </configuration>
- </execution>
- <execution>
- <id>copy-asterix-dbg</id>
- <!-- here the phase you need -->
- <phase>package</phase>
- <goals>
- <goal>copy-resources</goal>
- </goals>
- <configuration>
- <outputDirectory>target/appassembler/asterix_dbg</outputDirectory>
- <resources>
- <resource>
- <directory>resource/asterix_dbg</directory>
- </resource>
- </resources>
- </configuration>
- </execution>
- <execution>
- <id>copy-hivesterix</id>
- <!-- here the phase you need -->
- <phase>package</phase>
- <goals>
- <goal>copy-resources</goal>
- </goals>
- <configuration>
- <outputDirectory>target/appassembler/hivesterix</outputDirectory>
- <resources>
- <resource>
- <directory>resource/hivesterix</directory>
- </resource>
- </resources>
- </configuration>
- </execution>
- <execution>
- <id>copy-conf2</id>
- <!-- here the phase you need -->
- <phase>package</phase>
- <goals>
- <goal>copy-resources</goal>
- </goals>
- <configuration>
- <outputDirectory>target/appassembler/hivesterix/conf</outputDirectory>
- <resources>
- <resource>
- <directory>conf</directory>
- </resource>
- </resources>
- </configuration>
- </execution>
- <execution>
- <id>copy-data</id>
- <!-- here the phase you need -->
- <phase>package</phase>
- <goals>
- <goal>copy-resources</goal>
- </goals>
- <configuration>
- <outputDirectory>data</outputDirectory>
- <resources>
- <resource>
- <directory>resource/data</directory>
- </resource>
- </resources>
- </configuration>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <artifactId>maven-clean-plugin</artifactId>
- <version>2.4.1</version>
- <configuration>
- <filesets>
- <fileset>
- <directory>.</directory>
- <includes>
- <include>metastore*</include>
- <include>hadoop*</include>
- <include>edu*</include>
- <include>tmp*</include>
- <include>build*</include>
- <include>target*</include>
- <include>log*</include>
- <include>derby.log</include>
- <include>ClusterController*</include>
- </includes>
- </fileset>
- </filesets>
- </configuration>
- </plugin>
- </plugins>
- </build>
- <repositories>
- <repository>
- <releases>
- <enabled>true</enabled>
- <updatePolicy>always</updatePolicy>
- <checksumPolicy>warn</checksumPolicy>
- </releases>
- <snapshots>
- <enabled>true</enabled>
- <updatePolicy>always</updatePolicy>
- <checksumPolicy>fail</checksumPolicy>
- </snapshots>
- <id>third-party</id>
- <url>http://obelix.ics.uci.edu/nexus/content/repositories/third-party</url>
- </repository>
- <repository>
- <releases>
- <enabled>true</enabled>
- <updatePolicy>always</updatePolicy>
- <checksumPolicy>warn</checksumPolicy>
- </releases>
- <snapshots>
- <enabled>true</enabled>
- <updatePolicy>always</updatePolicy>
- <checksumPolicy>fail</checksumPolicy>
- </snapshots>
- <id>hyracks-public-release</id>
- <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-public-releases</url>
- </repository>
- </repositories>
- <distributionManagement>
- <!-- use the following if you're not using a snapshot version. -->
- <repository>
- <id>hivesterix</id>
- <name>hivesterix</name>
- <url>scp://obelix.ics.uci.edu/nexus/content/groups/hivesterix-public</url>
- </repository>
- <!-- use the following if you ARE using a snapshot version. -->
- <snapshotRepository>
- <id>hivesterix</id>
- <name>Repository Name</name>
- <url>scp://obelix.ics.uci.edu/nexus/content/groups/hivesterix-public</url>
- </snapshotRepository>
- </distributionManagement>
-</project>
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <packaging>pom</packaging>
+ <name>hivesterix</name>
+
+ <properties>
+ <jvm.extraargs />
+ </properties>
+
+ <profiles>
+ <profile>
+ <id>macosx</id>
+ <activation>
+ <os>
+ <name>mac os x</name>
+ </os>
+ <jdk>1.7</jdk>
+ </activation>
+ <properties>
+ <jvm.extraargs>-Djava.nio.channels.spi.SelectorProvider=sun.nio.ch.KQueueSelectorProvider</jvm.extraargs>
+ </properties>
+ </profile>
+ </profiles>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-release-plugin</artifactId>
+ <version>2.0</version>
+ <configuration>
+ <goals>package source:jar javadoc:jar deploy:deploy</goals>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>versions-maven-plugin</artifactId>
+ <version>1.2</version>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>2.13</version>
+ <configuration>
+ <forkMode>pertest</forkMode>
+ <argLine>-enableassertions
+ -Djava.util.logging.config.file=${user.home}/logging.properties
+ -Xdebug
+ -Xrunjdwp:transport=dt_socket,server=y,address=8000,suspend=n
+ ${jvm.extraargs}</argLine>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <reporting>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-changelog-plugin</artifactId>
+ <version>2.2</version>
+ </plugin>
+ </plugins>
+ </reporting>
+
+ <distributionManagement>
+ <repository>
+ <id>hyracks-releases</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-releases/</url>
+ </repository>
+ <snapshotRepository>
+ <id>hyracks-snapshots</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-snapshots/</url>
+ </snapshotRepository>
+ </distributionManagement>
+
+ <repositories>
+ <repository>
+ <id>hyracks-public</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/groups/hyracks-public/</url>
+ </repository>
+ <repository>
+ <id>jboss-public</id>
+ <url>https://repository.jboss.org/nexus/content/groups/public/</url>
+ </repository>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>third-party</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/third-party</url>
+ </repository>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>hyracks-public-release</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-public-releases</url>
+ </repository>
+ </repositories>
+
+ <pluginRepositories>
+ <pluginRepository>
+ <id>hyracks-public</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/groups/hyracks-public/</url>
+ <releases>
+ <updatePolicy>always</updatePolicy>
+ </releases>
+ </pluginRepository>
+ </pluginRepositories>
+
+ <modules>
+ <module>hivesterix-runtime</module>
+ <module>hivesterix-translator</module>
+ <module>hivesterix-optimizer</module>
+ <module>hivesterix-serde</module>
+ <module>hivesterix-dist</module>
+ <module>hivesterix-common</module>
+ </modules>
+</project>
diff --git a/hivesterix/src/main/assembly/binary-assembly.xml b/hivesterix/src/main/assembly/binary-assembly.xml
deleted file mode 100755
index 0500499..0000000
--- a/hivesterix/src/main/assembly/binary-assembly.xml
+++ /dev/null
@@ -1,19 +0,0 @@
-<assembly>
- <id>binary-assembly</id>
- <formats>
- <format>zip</format>
- <format>dir</format>
- </formats>
- <includeBaseDirectory>false</includeBaseDirectory>
- <fileSets>
- <fileSet>
- <directory>target/appassembler/bin</directory>
- <outputDirectory>bin</outputDirectory>
- <fileMode>0755</fileMode>
- </fileSet>
- <fileSet>
- <directory>target/appassembler/lib</directory>
- <outputDirectory>lib</outputDirectory>
- </fileSet>
- </fileSets>
-</assembly>
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionConstant.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionConstant.java
deleted file mode 100644
index 3c84566..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionConstant.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-/**
- * some constants for expression
- *
- * @author yingyib
- *
- */
-public class ExpressionConstant {
-
- /**
- * name space for function identifier
- */
- public static String NAMESPACE = "hive";
-
- /**
- * field expression: modeled as function in Algebricks
- */
- public static String FIELDACCESS = "fieldaccess";
-
- /**
- * null string: modeled as null in Algebricks
- */
- public static String NULL = "null";
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveAlgebricksBuiltInFunctionMap.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveAlgebricksBuiltInFunctionMap.java
deleted file mode 100644
index 18380f7..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveAlgebricksBuiltInFunctionMap.java
+++ /dev/null
@@ -1,83 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import java.util.HashMap;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-
-public class HiveAlgebricksBuiltInFunctionMap {
-
- /**
- * hive auqa builtin function map instance
- */
- public static HiveAlgebricksBuiltInFunctionMap INSTANCE = new HiveAlgebricksBuiltInFunctionMap();
-
- /**
- * hive to Algebricks function name mapping
- */
- private HashMap<String, FunctionIdentifier> hiveToAlgebricksMap = new HashMap<String, FunctionIdentifier>();
-
- /**
- * Algebricks to hive function name mapping
- */
- private HashMap<FunctionIdentifier, String> AlgebricksToHiveMap = new HashMap<FunctionIdentifier, String>();
-
- /**
- * the bi-directional mapping between hive functions and Algebricks
- * functions
- */
- private HiveAlgebricksBuiltInFunctionMap() {
- hiveToAlgebricksMap.put("and", AlgebricksBuiltinFunctions.AND);
- hiveToAlgebricksMap.put("or", AlgebricksBuiltinFunctions.OR);
- hiveToAlgebricksMap.put("!", AlgebricksBuiltinFunctions.NOT);
- hiveToAlgebricksMap.put("not", AlgebricksBuiltinFunctions.NOT);
- hiveToAlgebricksMap.put("=", AlgebricksBuiltinFunctions.EQ);
- hiveToAlgebricksMap.put("<>", AlgebricksBuiltinFunctions.NEQ);
- hiveToAlgebricksMap.put(">", AlgebricksBuiltinFunctions.GT);
- hiveToAlgebricksMap.put("<", AlgebricksBuiltinFunctions.LT);
- hiveToAlgebricksMap.put(">=", AlgebricksBuiltinFunctions.GE);
- hiveToAlgebricksMap.put("<=", AlgebricksBuiltinFunctions.LE);
-
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.AND, "and");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.OR, "or");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.NOT, "!");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.NOT, "not");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.EQ, "=");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.NEQ, "<>");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.GT, ">");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.LT, "<");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.GE, ">=");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.LE, "<=");
- }
-
- /**
- * get hive function name from Algebricks function identifier
- *
- * @param AlgebricksId
- * @return hive
- */
- public String getHiveFunctionName(FunctionIdentifier AlgebricksId) {
- return AlgebricksToHiveMap.get(AlgebricksId);
- }
-
- /**
- * get hive UDF or Generic class's corresponding built-in functions
- *
- * @param funcClass
- * @return function identifier
- */
- public FunctionIdentifier getAlgebricksFunctionId(Class<?> funcClass) {
- Description annotation = (Description) funcClass
- .getAnnotation(Description.class);
- String hiveUDFName = "";
- if (annotation == null) {
- hiveUDFName = null;
- return null;
- } else {
- hiveUDFName = annotation.name();
- return hiveToAlgebricksMap.get(hiveUDFName);
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveExpressionTypeComputer.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveExpressionTypeComputer.java
deleted file mode 100644
index afb7d39..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveExpressionTypeComputer.java
+++ /dev/null
@@ -1,200 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.FunctionInfo;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionTypeComputer;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-
-public class HiveExpressionTypeComputer implements IExpressionTypeComputer {
-
- public static IExpressionTypeComputer INSTANCE = new HiveExpressionTypeComputer();
-
- @Override
- public Object getType(ILogicalExpression expr,
- IMetadataProvider<?, ?> metadataProvider,
- IVariableTypeEnvironment env) throws AlgebricksException {
- if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
- /**
- * function expression
- */
- AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
- IFunctionInfo funcInfo = funcExpr.getFunctionInfo();
-
- /**
- * argument expressions, types, object inspectors
- */
- List<Mutable<ILogicalExpression>> arguments = funcExpr
- .getArguments();
- List<TypeInfo> argumentTypes = new ArrayList<TypeInfo>();
-
- /**
- * get types of argument
- */
- for (Mutable<ILogicalExpression> argument : arguments) {
- TypeInfo type = (TypeInfo) getType(argument.getValue(),
- metadataProvider, env);
- argumentTypes.add(type);
- }
-
- ObjectInspector[] childrenOIs = new ObjectInspector[argumentTypes
- .size()];
-
- /**
- * get object inspector
- */
- for (int i = 0; i < argumentTypes.size(); i++) {
- childrenOIs[i] = TypeInfoUtils
- .getStandardWritableObjectInspectorFromTypeInfo(argumentTypes
- .get(i));
- }
-
- /**
- * type inference for scalar function
- */
- if (funcExpr instanceof ScalarFunctionCallExpression) {
-
- FunctionIdentifier AlgebricksId = funcInfo
- .getFunctionIdentifier();
- Object functionInfo = ((HiveFunctionInfo) funcInfo).getInfo();
- String udfName = HiveAlgebricksBuiltInFunctionMap.INSTANCE
- .getHiveFunctionName(AlgebricksId);
- GenericUDF udf;
- if (udfName != null) {
- /**
- * get corresponding function info for built-in functions
- */
- FunctionInfo fInfo = FunctionRegistry
- .getFunctionInfo(udfName);
- udf = fInfo.getGenericUDF();
- } else if (functionInfo != null) {
- /**
- * for GenericUDFBridge: we should not call get type of this
- * hive expression, because parameters may have been
- * changed!
- */
- ExprNodeGenericFuncDesc hiveExpr = (ExprNodeGenericFuncDesc) functionInfo;
- udf = hiveExpr.getGenericUDF();
- } else {
- /**
- * for other generic UDF
- */
- Class<?> udfClass;
- try {
- udfClass = Class.forName(AlgebricksId.getName());
- udf = (GenericUDF) udfClass.newInstance();
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- }
- /**
- * doing the actual type inference
- */
- ObjectInspector oi = null;
- try {
- oi = udf.initialize(childrenOIs);
- } catch (Exception e) {
- e.printStackTrace();
- }
-
- TypeInfo exprType = TypeInfoUtils
- .getTypeInfoFromObjectInspector(oi);
- return exprType;
-
- } else if (funcExpr instanceof AggregateFunctionCallExpression) {
- /**
- * hive aggregation info
- */
- AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr
- .getFunctionInfo()).getInfo();
- /**
- * type inference for aggregation function
- */
- GenericUDAFEvaluator result = aggregateDesc
- .getGenericUDAFEvaluator();
-
- ObjectInspector returnOI = null;
- try {
- returnOI = result
- .init(aggregateDesc.getMode(), childrenOIs);
- } catch (HiveException e) {
- e.printStackTrace();
- }
- TypeInfo exprType = TypeInfoUtils
- .getTypeInfoFromObjectInspector(returnOI);
- return exprType;
- } else if (funcExpr instanceof UnnestingFunctionCallExpression) {
- /**
- * type inference for UDTF function
- */
- UDTFDesc hiveDesc = (UDTFDesc) ((HiveFunctionInfo) funcExpr
- .getFunctionInfo()).getInfo();
- GenericUDTF udtf = hiveDesc.getGenericUDTF();
- ObjectInspector returnOI = null;
- try {
- returnOI = udtf.initialize(childrenOIs);
- } catch (HiveException e) {
- e.printStackTrace();
- }
- TypeInfo exprType = TypeInfoUtils
- .getTypeInfoFromObjectInspector(returnOI);
- return exprType;
- } else {
- throw new IllegalStateException(
- "unrecognized function expression "
- + expr.getClass().getName());
- }
- } else if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
- /**
- * get type for variable in the environment
- */
- VariableReferenceExpression varExpr = (VariableReferenceExpression) expr;
- LogicalVariable var = varExpr.getVariableReference();
- TypeInfo type = (TypeInfo) env.getVarType(var);
- return type;
- } else if (expr.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
- /**
- * get type for constant, from its java class
- */
- ConstantExpression constExpr = (ConstantExpression) expr;
- HivesterixConstantValue value = (HivesterixConstantValue) constExpr
- .getValue();
- TypeInfo type = TypeInfoFactory
- .getPrimitiveTypeInfoFromJavaPrimitive(value.getObject()
- .getClass());
- return type;
- } else {
- throw new IllegalStateException("illegal expressions "
- + expr.getClass().getName());
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveFunctionInfo.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveFunctionInfo.java
deleted file mode 100644
index 220bd00..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveFunctionInfo.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import java.io.Serializable;
-
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
-
-public class HiveFunctionInfo implements IFunctionInfo, Serializable {
-
- private static final long serialVersionUID = 1L;
-
- /**
- * primary function identifier
- */
- private transient FunctionIdentifier fid;
-
- /**
- * secondary function identifier: function name
- */
- private transient Object secondaryFid;
-
- public HiveFunctionInfo(FunctionIdentifier fid, Object secondFid) {
- this.fid = fid;
- this.secondaryFid = secondFid;
- }
-
- @Override
- public FunctionIdentifier getFunctionIdentifier() {
- return fid;
- }
-
- public Object getInfo() {
- return secondaryFid;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveMergeAggregationExpressionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveMergeAggregationExpressionFactory.java
deleted file mode 100644
index 8dea691..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveMergeAggregationExpressionFactory.java
+++ /dev/null
@@ -1,84 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IMergeAggregationExpressionFactory;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-
-/**
- * generate merge aggregation expression from an aggregation expression
- *
- * @author yingyib
- *
- */
-public class HiveMergeAggregationExpressionFactory implements
- IMergeAggregationExpressionFactory {
-
- public static IMergeAggregationExpressionFactory INSTANCE = new HiveMergeAggregationExpressionFactory();
-
- @Override
- public ILogicalExpression createMergeAggregation(ILogicalExpression expr,
- IOptimizationContext context) throws AlgebricksException {
- /**
- * type inference for scalar function
- */
- if (expr instanceof AggregateFunctionCallExpression) {
- AggregateFunctionCallExpression funcExpr = (AggregateFunctionCallExpression) expr;
- /**
- * hive aggregation info
- */
- AggregationDesc aggregator = (AggregationDesc) ((HiveFunctionInfo) funcExpr
- .getFunctionInfo()).getInfo();
- LogicalVariable inputVar = context.newVar();
- ExprNodeDesc col = new ExprNodeColumnDesc(
- TypeInfoFactory.voidTypeInfo, inputVar.toString(), null,
- false);
- ArrayList<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
- parameters.add(col);
-
- GenericUDAFEvaluator.Mode mergeMode;
- if (aggregator.getMode() == GenericUDAFEvaluator.Mode.PARTIAL1)
- mergeMode = GenericUDAFEvaluator.Mode.PARTIAL2;
- else if (aggregator.getMode() == GenericUDAFEvaluator.Mode.COMPLETE)
- mergeMode = GenericUDAFEvaluator.Mode.FINAL;
- else
- mergeMode = aggregator.getMode();
- AggregationDesc mergeDesc = new AggregationDesc(
- aggregator.getGenericUDAFName(),
- aggregator.getGenericUDAFEvaluator(), parameters,
- aggregator.getDistinct(), mergeMode);
-
- String UDAFName = mergeDesc.getGenericUDAFName();
- List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
- arguments.add(new MutableObject<ILogicalExpression>(
- new VariableReferenceExpression(inputVar)));
-
- FunctionIdentifier funcId = new FunctionIdentifier(
- ExpressionConstant.NAMESPACE, UDAFName + "("
- + mergeDesc.getMode() + ")");
- HiveFunctionInfo funcInfo = new HiveFunctionInfo(funcId, mergeDesc);
- AggregateFunctionCallExpression aggregationExpression = new AggregateFunctionCallExpression(
- funcInfo, false, arguments);
- return aggregationExpression;
- } else {
- throw new IllegalStateException("illegal expressions "
- + expr.getClass().getName());
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveNullableTypeComputer.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveNullableTypeComputer.java
deleted file mode 100644
index 10c9b8a..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveNullableTypeComputer.java
+++ /dev/null
@@ -1,15 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.INullableTypeComputer;
-
-public class HiveNullableTypeComputer implements INullableTypeComputer {
-
- public static INullableTypeComputer INSTANCE = new HiveNullableTypeComputer();
-
- @Override
- public Object makeNullableType(Object type) throws AlgebricksException {
- return type;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivePartialAggregationTypeComputer.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivePartialAggregationTypeComputer.java
deleted file mode 100644
index 7062e26..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivePartialAggregationTypeComputer.java
+++ /dev/null
@@ -1,116 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionTypeComputer;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IPartialAggregationTypeComputer;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-
-public class HivePartialAggregationTypeComputer implements
- IPartialAggregationTypeComputer {
-
- public static IPartialAggregationTypeComputer INSTANCE = new HivePartialAggregationTypeComputer();
-
- @Override
- public Object getType(ILogicalExpression expr,
- IVariableTypeEnvironment env,
- IMetadataProvider<?, ?> metadataProvider)
- throws AlgebricksException {
- if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
- IExpressionTypeComputer tc = HiveExpressionTypeComputer.INSTANCE;
- /**
- * function expression
- */
- AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
-
- /**
- * argument expressions, types, object inspectors
- */
- List<Mutable<ILogicalExpression>> arguments = funcExpr
- .getArguments();
- List<TypeInfo> argumentTypes = new ArrayList<TypeInfo>();
-
- /**
- * get types of argument
- */
- for (Mutable<ILogicalExpression> argument : arguments) {
- TypeInfo type = (TypeInfo) tc.getType(argument.getValue(),
- metadataProvider, env);
- argumentTypes.add(type);
- }
-
- ObjectInspector[] childrenOIs = new ObjectInspector[argumentTypes
- .size()];
-
- /**
- * get object inspector
- */
- for (int i = 0; i < argumentTypes.size(); i++) {
- childrenOIs[i] = TypeInfoUtils
- .getStandardWritableObjectInspectorFromTypeInfo(argumentTypes
- .get(i));
- }
-
- /**
- * type inference for scalar function
- */
- if (funcExpr instanceof AggregateFunctionCallExpression) {
- /**
- * hive aggregation info
- */
- AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr
- .getFunctionInfo()).getInfo();
- /**
- * type inference for aggregation function
- */
- GenericUDAFEvaluator result = aggregateDesc
- .getGenericUDAFEvaluator();
-
- ObjectInspector returnOI = null;
- try {
- returnOI = result.init(
- getPartialMode(aggregateDesc.getMode()),
- childrenOIs);
- } catch (HiveException e) {
- e.printStackTrace();
- }
- TypeInfo exprType = TypeInfoUtils
- .getTypeInfoFromObjectInspector(returnOI);
- return exprType;
- } else {
- throw new IllegalStateException("illegal expressions "
- + expr.getClass().getName());
- }
- } else {
- throw new IllegalStateException("illegal expressions "
- + expr.getClass().getName());
- }
- }
-
- private Mode getPartialMode(Mode mode) {
- Mode partialMode;
- if (mode == Mode.FINAL)
- partialMode = Mode.PARTIAL2;
- else if (mode == Mode.COMPLETE)
- partialMode = Mode.PARTIAL1;
- else
- partialMode = mode;
- return partialMode;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivesterixConstantValue.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivesterixConstantValue.java
deleted file mode 100644
index de9cea6..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivesterixConstantValue.java
+++ /dev/null
@@ -1,55 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IAlgebricksConstantValue;
-
-public class HivesterixConstantValue implements IAlgebricksConstantValue {
-
- private Object object;
-
- public HivesterixConstantValue(Object object) {
- this.setObject(object);
- }
-
- @Override
- public boolean isFalse() {
- return object == Boolean.FALSE;
- }
-
- @Override
- public boolean isNull() {
- return object == null;
- }
-
- @Override
- public boolean isTrue() {
- return object == Boolean.TRUE;
- }
-
- public void setObject(Object object) {
- this.object = object;
- }
-
- public Object getObject() {
- return object;
- }
-
- @Override
- public String toString() {
- return object.toString();
- }
-
- @Override
- public boolean equals(Object o) {
- if (!(o instanceof HivesterixConstantValue)) {
- return false;
- }
- HivesterixConstantValue v2 = (HivesterixConstantValue) o;
- return object.equals(v2.getObject());
- }
-
- @Override
- public int hashCode() {
- return object.hashCode();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/Schema.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/Schema.java
deleted file mode 100644
index 2b1d191..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/Schema.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import java.io.Serializable;
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-
-public class Schema implements Serializable {
-
- private static final long serialVersionUID = 1L;
-
- private List<String> fieldNames;
-
- private List<TypeInfo> fieldTypes;
-
- public Schema(List<String> fieldNames, List<TypeInfo> fieldTypes) {
- this.fieldNames = fieldNames;
- this.fieldTypes = fieldTypes;
- }
-
- public ObjectInspector toObjectInspector() {
- return LazyUtils.getLazyObjectInspector(fieldNames, fieldTypes);
- }
-
- public List<String> getNames() {
- return fieldNames;
- }
-
- public List<TypeInfo> getTypes() {
- return fieldTypes;
- }
-
- public Object[] getSchema() {
- return fieldTypes.toArray();
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveLogicalPlanAndMetaData.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveLogicalPlanAndMetaData.java
deleted file mode 100644
index 494e796..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveLogicalPlanAndMetaData.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan;
-
-import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlanAndMetadata;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-
-@SuppressWarnings({ "rawtypes", "unchecked" })
-public class HiveLogicalPlanAndMetaData implements ILogicalPlanAndMetadata {
-
- IMetadataProvider metadata;
- ILogicalPlan plan;
-
- public HiveLogicalPlanAndMetaData(ILogicalPlan plan,
- IMetadataProvider metadata) {
- this.plan = plan;
- this.metadata = metadata;
- }
-
- @Override
- public IMetadataProvider getMetadataProvider() {
- return metadata;
- }
-
- @Override
- public ILogicalPlan getPlan() {
- return plan;
- }
-
- @Override
- public AlgebricksPartitionConstraint getClusterLocations() {
- // TODO Auto-generated method stub
- return null;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveOperatorAnnotations.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveOperatorAnnotations.java
deleted file mode 100644
index 0d234fb..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveOperatorAnnotations.java
+++ /dev/null
@@ -1,8 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan;
-
-public class HiveOperatorAnnotations {
-
- // hints
- public static final String LOCAL_GROUP_BY = "LOCAL_GROUP_BY";
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ExtractVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ExtractVisitor.java
deleted file mode 100644
index 9a84164..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ExtractVisitor.java
+++ /dev/null
@@ -1,28 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-
-public class ExtractVisitor extends DefaultVisitor {
-
- @Override
- public Mutable<ILogicalOperator> visit(ExtractOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- Schema currentSchema = t.generateInputSchema(operator
- .getParentOperators().get(0));
- operator.setSchema(operator.getParentOperators().get(0).getSchema());
- List<LogicalVariable> latestOutputSchema = t
- .getVariablesFromSchema(currentSchema);
- t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
- return null;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/FilterVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/FilterVisitor.java
deleted file mode 100644
index b276ba9..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/FilterVisitor.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.FilterDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
-
-public class FilterVisitor extends DefaultVisitor {
-
- @Override
- public Mutable<ILogicalOperator> visit(FilterOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- Schema currentSchema = t.generateInputSchema(operator
- .getParentOperators().get(0));
-
- FilterDesc desc = (FilterDesc) operator.getConf();
- ExprNodeDesc predicate = desc.getPredicate();
- t.rewriteExpression(predicate);
-
- Mutable<ILogicalExpression> exprs = t.translateScalarFucntion(desc
- .getPredicate());
- ILogicalOperator currentOperator = new SelectOperator(exprs);
- currentOperator.getInputs().add(AlgebricksParentOperatorRef);
-
- // populate the schema from upstream operator
- operator.setSchema(operator.getParentOperators().get(0).getSchema());
- List<LogicalVariable> latestOutputSchema = t
- .getVariablesFromSchema(currentSchema);
- t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/GroupByVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/GroupByVisitor.java
deleted file mode 100644
index d2180a3..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/GroupByVisitor.java
+++ /dev/null
@@ -1,291 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.lang.reflect.Field;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.GroupByDesc;
-import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
-import org.apache.hadoop.hive.ql.plan.api.OperatorType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.plan.HiveOperatorAnnotations;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.OperatorAnnotations;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.NestedTupleSourceOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl;
-
-@SuppressWarnings({ "rawtypes", "unchecked" })
-public class GroupByVisitor extends DefaultVisitor {
-
- private List<Mutable<ILogicalExpression>> AlgebricksAggs = new ArrayList<Mutable<ILogicalExpression>>();
- private List<IFunctionInfo> localAggs = new ArrayList<IFunctionInfo>();
- private boolean isDistinct = false;
- private boolean gbyKeyNotRedKey = false;
-
- @Override
- public Mutable<ILogicalOperator> visit(GroupByOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException {
-
- // get descriptors
- GroupByDesc desc = (GroupByDesc) operator.getConf();
- GroupByDesc.Mode mode = desc.getMode();
-
- List<ExprNodeDesc> keys = desc.getKeys();
- List<AggregationDesc> aggregators = desc.getAggregators();
-
- Operator child = operator.getChildOperators().get(0);
-
- if (child.getType() == OperatorType.REDUCESINK) {
- List<ExprNodeDesc> partKeys = ((ReduceSinkDesc) child.getConf())
- .getPartitionCols();
- if (keys.size() != partKeys.size())
- gbyKeyNotRedKey = true;
- }
-
- if (mode == GroupByDesc.Mode.PARTIAL1 || mode == GroupByDesc.Mode.HASH
- || mode == GroupByDesc.Mode.COMPLETE
- || (aggregators.size() == 0 && isDistinct == false)
- || gbyKeyNotRedKey) {
- AlgebricksAggs.clear();
- // add an assign operator if the key is not a column expression
- ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
- ILogicalOperator currentOperator = null;
- ILogicalOperator assignOperator = t.getAssignOperator(
- AlgebricksParentOperatorRef, keys, keyVariables);
- if (assignOperator != null) {
- currentOperator = assignOperator;
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
-
- // get key variable expression list
- List<Mutable<ILogicalExpression>> keyExprs = new ArrayList<Mutable<ILogicalExpression>>();
- for (LogicalVariable var : keyVariables) {
- keyExprs.add(t.translateScalarFucntion(new ExprNodeColumnDesc(
- TypeInfoFactory.intTypeInfo, var.toString(), "", false)));
- }
-
- if (aggregators.size() == 0) {
- List<Mutable<ILogicalExpression>> distinctExprs = new ArrayList<Mutable<ILogicalExpression>>();
- for (LogicalVariable var : keyVariables) {
- Mutable<ILogicalExpression> varExpr = new MutableObject<ILogicalExpression>(
- new VariableReferenceExpression(var));
- distinctExprs.add(varExpr);
- }
- t.rewriteOperatorOutputSchema(keyVariables, operator);
- isDistinct = true;
- ILogicalOperator lop = new DistinctOperator(distinctExprs);
- lop.getInputs().add(AlgebricksParentOperatorRef);
- return new MutableObject<ILogicalOperator>(lop);
- }
-
- // get the pair<LogicalVariable, ILogicalExpression> list
- List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> keyParameters = new ArrayList<Pair<LogicalVariable, Mutable<ILogicalExpression>>>();
- keyVariables.clear();
- for (Mutable<ILogicalExpression> expr : keyExprs) {
- LogicalVariable keyVar = t.getVariable(expr.getValue()
- .toString(), TypeInfoFactory.unknownTypeInfo);
- keyParameters.add(new Pair(keyVar, expr));
- keyVariables.add(keyVar);
- }
-
- // get the parameters for the aggregator operator
- ArrayList<LogicalVariable> aggVariables = new ArrayList<LogicalVariable>();
- ArrayList<Mutable<ILogicalExpression>> aggExprs = new ArrayList<Mutable<ILogicalExpression>>();
-
- // get the type of each aggregation function
- HashMap<AggregationDesc, TypeInfo> aggToType = new HashMap<AggregationDesc, TypeInfo>();
- List<ColumnInfo> columns = operator.getSchema().getSignature();
- int offset = keys.size();
- for (int i = offset; i < columns.size(); i++) {
- aggToType.put(aggregators.get(i - offset), columns.get(i)
- .getType());
- }
-
- localAggs.clear();
- // rewrite parameter expressions for all aggregators
- for (AggregationDesc aggregator : aggregators) {
- for (ExprNodeDesc parameter : aggregator.getParameters()) {
- t.rewriteExpression(parameter);
- }
- Mutable<ILogicalExpression> aggExpr = t
- .translateAggregation(aggregator);
- AbstractFunctionCallExpression localAggExpr = (AbstractFunctionCallExpression) aggExpr
- .getValue();
- localAggs.add(localAggExpr.getFunctionInfo());
-
- AggregationDesc logicalAgg = new AggregationDesc(
- aggregator.getGenericUDAFName(),
- aggregator.getGenericUDAFEvaluator(),
- aggregator.getParameters(), aggregator.getDistinct(),
- Mode.COMPLETE);
- Mutable<ILogicalExpression> logicalAggExpr = t
- .translateAggregation(logicalAgg);
-
- AlgebricksAggs.add(logicalAggExpr);
- if (!gbyKeyNotRedKey)
- aggExprs.add(logicalAggExpr);
- else
- aggExprs.add(aggExpr);
-
- aggVariables.add(t.getVariable(aggregator.getExprString()
- + aggregator.getMode(), aggToType.get(aggregator)));
- }
-
- if (child.getType() != OperatorType.REDUCESINK)
- gbyKeyNotRedKey = false;
-
- // get the sub plan list
- AggregateOperator aggOperator = new AggregateOperator(aggVariables,
- aggExprs);
- NestedTupleSourceOperator nestedTupleSource = new NestedTupleSourceOperator(
- new MutableObject<ILogicalOperator>());
- aggOperator.getInputs().add(
- new MutableObject<ILogicalOperator>(nestedTupleSource));
-
- List<Mutable<ILogicalOperator>> subRoots = new ArrayList<Mutable<ILogicalOperator>>();
- subRoots.add(new MutableObject<ILogicalOperator>(aggOperator));
- ILogicalPlan subPlan = new ALogicalPlanImpl(subRoots);
- List<ILogicalPlan> subPlans = new ArrayList<ILogicalPlan>();
- subPlans.add(subPlan);
-
- // create the group by operator
- currentOperator = new edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator(
- keyParameters,
- new ArrayList<Pair<LogicalVariable, Mutable<ILogicalExpression>>>(),
- subPlans);
- currentOperator.getInputs().add(AlgebricksParentOperatorRef);
- nestedTupleSource.getDataSourceReference()
- .setValue(currentOperator);
-
- List<LogicalVariable> outputVariables = new ArrayList<LogicalVariable>();
- outputVariables.addAll(keyVariables);
- outputVariables.addAll(aggVariables);
- t.rewriteOperatorOutputSchema(outputVariables, operator);
-
- if (gbyKeyNotRedKey) {
- currentOperator.getAnnotations().put(
- HiveOperatorAnnotations.LOCAL_GROUP_BY, Boolean.TRUE);
- }
-
- HiveConf conf = ConfUtil.getHiveConf();
- Boolean extGby = conf.getBoolean(
- "hive.algebricks.groupby.external", false);
-
- if (extGby && isSerializable(aggregators)) {
- currentOperator.getAnnotations()
- .put(OperatorAnnotations.USE_EXTERNAL_GROUP_BY,
- Boolean.TRUE);
- }
- return new MutableObject<ILogicalOperator>(currentOperator);
- } else {
- isDistinct = false;
- // rewrite parameter expressions for all aggregators
- int i = 0;
- for (AggregationDesc aggregator : aggregators) {
- for (ExprNodeDesc parameter : aggregator.getParameters()) {
- t.rewriteExpression(parameter);
- }
- Mutable<ILogicalExpression> agg = t
- .translateAggregation(aggregator);
- AggregateFunctionCallExpression originalAgg = (AggregateFunctionCallExpression) AlgebricksAggs
- .get(i).getValue();
- originalAgg.setStepOneAggregate(localAggs.get(i));
- AggregateFunctionCallExpression currentAgg = (AggregateFunctionCallExpression) agg
- .getValue();
- if (currentAgg.getFunctionInfo() != null) {
- originalAgg.setTwoStep(true);
- originalAgg.setStepTwoAggregate(currentAgg
- .getFunctionInfo());
- }
- i++;
- }
- return null;
- }
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(ReduceSinkOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- Operator downStream = (Operator) operator.getChildOperators().get(0);
- if (!(downStream instanceof GroupByOperator)) {
- return null;
- }
-
- ReduceSinkDesc desc = (ReduceSinkDesc) operator.getConf();
- List<ExprNodeDesc> keys = desc.getKeyCols();
- List<ExprNodeDesc> values = desc.getValueCols();
-
- // insert assign for keys
- ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
- t.getAssignOperator(AlgebricksParentOperatorRef, keys, keyVariables);
-
- // insert assign for values
- ArrayList<LogicalVariable> valueVariables = new ArrayList<LogicalVariable>();
- t.getAssignOperator(AlgebricksParentOperatorRef, values, valueVariables);
-
- ArrayList<LogicalVariable> columns = new ArrayList<LogicalVariable>();
- columns.addAll(keyVariables);
- columns.addAll(valueVariables);
-
- t.rewriteOperatorOutputSchema(columns, operator);
- return null;
- }
-
- private boolean isSerializable(List<AggregationDesc> descs)
- throws AlgebricksException {
- try {
- for (AggregationDesc desc : descs) {
- GenericUDAFEvaluator udaf = desc.getGenericUDAFEvaluator();
- AggregationBuffer buf = udaf.getNewAggregationBuffer();
- Class<?> bufferClass = buf.getClass();
- Field[] fields = bufferClass.getDeclaredFields();
- for (Field field : fields) {
- field.setAccessible(true);
- String type = field.getType().toString();
- if (!(type.equals("int") || type.equals("long")
- || type.equals("float") || type.equals("double") || type
- .equals("boolean"))) {
- return false;
- }
- }
-
- }
- return true;
- } catch (Exception e) {
- throw new AlgebricksException(e);
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/JoinVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/JoinVisitor.java
deleted file mode 100644
index aea4be5..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/JoinVisitor.java
+++ /dev/null
@@ -1,445 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.JoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
-import org.apache.hadoop.hive.ql.plan.JoinDesc;
-import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
-
-@SuppressWarnings("rawtypes")
-public class JoinVisitor extends DefaultVisitor {
-
- /**
- * reduce sink operator to variables
- */
- private HashMap<Operator, List<LogicalVariable>> reduceSinkToKeyVariables = new HashMap<Operator, List<LogicalVariable>>();
-
- /**
- * reduce sink operator to variables
- */
- private HashMap<Operator, List<String>> reduceSinkToFieldNames = new HashMap<Operator, List<String>>();
-
- /**
- * reduce sink operator to variables
- */
- private HashMap<Operator, List<TypeInfo>> reduceSinkToTypes = new HashMap<Operator, List<TypeInfo>>();
-
- /**
- * map a join operator (in hive) to its parent operators (in hive)
- */
- private HashMap<Operator, List<Operator>> operatorToHiveParents = new HashMap<Operator, List<Operator>>();
-
- /**
- * map a join operator (in hive) to its parent operators (in asterix)
- */
- private HashMap<Operator, List<ILogicalOperator>> operatorToAsterixParents = new HashMap<Operator, List<ILogicalOperator>>();
-
- /**
- * the latest traversed reduce sink operator
- */
- private Operator latestReduceSink = null;
-
- /**
- * the latest generated parent for join
- */
- private ILogicalOperator latestAlgebricksOperator = null;
-
- /**
- * process a join operator
- */
- @Override
- public Mutable<ILogicalOperator> visit(JoinOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) {
- latestAlgebricksOperator = AlgebricksParentOperator.getValue();
- translateJoinOperatorPreprocess(operator, t);
- List<Operator> parents = operatorToHiveParents.get(operator);
- if (parents.size() < operator.getParentOperators().size()) {
- return null;
- } else {
- ILogicalOperator joinOp = translateJoinOperator(operator,
- AlgebricksParentOperator, t);
- // clearStatus();
- return new MutableObject<ILogicalOperator>(joinOp);
- }
- }
-
- private void reorder(Byte[] order, List<ILogicalOperator> parents,
- List<Operator> hiveParents) {
- ILogicalOperator[] lops = new ILogicalOperator[parents.size()];
- Operator[] ops = new Operator[hiveParents.size()];
-
- for (Operator op : hiveParents) {
- ReduceSinkOperator rop = (ReduceSinkOperator) op;
- ReduceSinkDesc rdesc = rop.getConf();
- int tag = rdesc.getTag();
-
- int index = -1;
- for (int i = 0; i < order.length; i++)
- if (order[i] == tag) {
- index = i;
- break;
- }
- lops[index] = parents.get(hiveParents.indexOf(op));
- ops[index] = op;
- }
-
- parents.clear();
- hiveParents.clear();
-
- for (int i = 0; i < lops.length; i++) {
- parents.add(lops[i]);
- hiveParents.add(ops[i]);
- }
- }
-
- /**
- * translate a hive join operator to asterix join operator->assign
- * operator->project operator
- *
- * @param parentOperator
- * @param operator
- * @return
- */
- private ILogicalOperator translateJoinOperator(Operator operator,
- Mutable<ILogicalOperator> parentOperator, Translator t) {
-
- JoinDesc joinDesc = (JoinDesc) operator.getConf();
-
- // get the projection expression (already re-written) from each source
- // table
- Map<Byte, List<ExprNodeDesc>> exprMap = joinDesc.getExprs();
- reorder(joinDesc.getTagOrder(), operatorToAsterixParents.get(operator),
- operatorToHiveParents.get(operator));
-
- // make an reduce join operator
- ILogicalOperator currentOperator = generateJoinTree(
- joinDesc.getCondsList(),
- operatorToAsterixParents.get(operator),
- operatorToHiveParents.get(operator), 0, t);
- parentOperator = new MutableObject<ILogicalOperator>(currentOperator);
-
- // add assign and project operator on top of a join
- // output variables
- ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
- Set<Entry<Byte, List<ExprNodeDesc>>> entries = exprMap.entrySet();
- Iterator<Entry<Byte, List<ExprNodeDesc>>> iterator = entries.iterator();
- while (iterator.hasNext()) {
- List<ExprNodeDesc> outputExprs = iterator.next().getValue();
- ILogicalOperator assignOperator = t.getAssignOperator(
- parentOperator, outputExprs, variables);
-
- if (assignOperator != null) {
- currentOperator = assignOperator;
- parentOperator = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
- }
-
- ILogicalOperator po = new ProjectOperator(variables);
- po.getInputs().add(parentOperator);
- t.rewriteOperatorOutputSchema(variables, operator);
- return po;
- }
-
- /**
- * deal with reduce sink operator for the case of join
- */
- @Override
- public Mutable<ILogicalOperator> visit(ReduceSinkOperator operator,
- Mutable<ILogicalOperator> parentOperator, Translator t) {
-
- Operator downStream = (Operator) operator.getChildOperators().get(0);
- if (!(downStream instanceof JoinOperator))
- return null;
-
- ReduceSinkDesc desc = (ReduceSinkDesc) operator.getConf();
- List<ExprNodeDesc> keys = desc.getKeyCols();
- List<ExprNodeDesc> values = desc.getValueCols();
- List<ExprNodeDesc> partitionCols = desc.getPartitionCols();
-
- /**
- * rewrite key, value, paritioncol expressions
- */
- for (ExprNodeDesc key : keys)
- t.rewriteExpression(key);
- for (ExprNodeDesc value : values)
- t.rewriteExpression(value);
- for (ExprNodeDesc col : partitionCols)
- t.rewriteExpression(col);
-
- ILogicalOperator currentOperator = null;
-
- // add assign operator for keys if necessary
- ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
- ILogicalOperator assignOperator = t.getAssignOperator(parentOperator,
- keys, keyVariables);
- if (assignOperator != null) {
- currentOperator = assignOperator;
- parentOperator = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
-
- // add assign operator for values if necessary
- ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
- assignOperator = t.getAssignOperator(parentOperator, values, variables);
- if (assignOperator != null) {
- currentOperator = assignOperator;
- parentOperator = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
-
- // unified schema: key, value
- ArrayList<LogicalVariable> unifiedKeyValues = new ArrayList<LogicalVariable>();
- unifiedKeyValues.addAll(keyVariables);
- for (LogicalVariable value : variables)
- if (keyVariables.indexOf(value) < 0)
- unifiedKeyValues.add(value);
-
- // insert projection operator, it is a *must*,
- // in hive, reduce sink sometimes also do the projection operator's
- // task
- currentOperator = new ProjectOperator(unifiedKeyValues);
- currentOperator.getInputs().add(parentOperator);
- parentOperator = new MutableObject<ILogicalOperator>(currentOperator);
-
- reduceSinkToKeyVariables.put(operator, keyVariables);
- List<String> fieldNames = new ArrayList<String>();
- List<TypeInfo> types = new ArrayList<TypeInfo>();
- for (LogicalVariable var : unifiedKeyValues) {
- fieldNames.add(var.toString());
- types.add(t.getType(var));
- }
- reduceSinkToFieldNames.put(operator, fieldNames);
- reduceSinkToTypes.put(operator, types);
- t.rewriteOperatorOutputSchema(variables, operator);
-
- latestAlgebricksOperator = currentOperator;
- latestReduceSink = operator;
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
- /**
- * partial rewrite a join operator
- *
- * @param operator
- * @param t
- */
- private void translateJoinOperatorPreprocess(Operator operator, Translator t) {
- JoinDesc desc = (JoinDesc) operator.getConf();
- ReduceSinkDesc reduceSinkDesc = (ReduceSinkDesc) latestReduceSink
- .getConf();
- int tag = reduceSinkDesc.getTag();
-
- Map<Byte, List<ExprNodeDesc>> exprMap = desc.getExprs();
- List<ExprNodeDesc> exprs = exprMap.get(Byte.valueOf((byte) tag));
-
- for (ExprNodeDesc expr : exprs)
- t.rewriteExpression(expr);
-
- List<Operator> parents = operatorToHiveParents.get(operator);
- if (parents == null) {
- parents = new ArrayList<Operator>();
- operatorToHiveParents.put(operator, parents);
- }
- parents.add(latestReduceSink);
-
- List<ILogicalOperator> asterixParents = operatorToAsterixParents
- .get(operator);
- if (asterixParents == null) {
- asterixParents = new ArrayList<ILogicalOperator>();
- operatorToAsterixParents.put(operator, asterixParents);
- }
- asterixParents.add(latestAlgebricksOperator);
- }
-
- // generate a join tree from a list of exchange/reducesink operator
- // both exchanges and reduce sinks have the same order
- private ILogicalOperator generateJoinTree(List<JoinCondDesc> conds,
- List<ILogicalOperator> exchanges, List<Operator> reduceSinks,
- int offset, Translator t) {
- // get a list of reduce sink descs (input descs)
- int inputSize = reduceSinks.size() - offset;
-
- if (inputSize == 2) {
- ILogicalOperator currentRoot;
-
- List<ReduceSinkDesc> reduceSinkDescs = new ArrayList<ReduceSinkDesc>();
- for (int i = reduceSinks.size() - 1; i >= offset; i--)
- reduceSinkDescs.add((ReduceSinkDesc) reduceSinks.get(i)
- .getConf());
-
- // get the object inspector for the join
- List<String> fieldNames = new ArrayList<String>();
- List<TypeInfo> types = new ArrayList<TypeInfo>();
- for (int i = reduceSinks.size() - 1; i >= offset; i--) {
- fieldNames
- .addAll(reduceSinkToFieldNames.get(reduceSinks.get(i)));
- types.addAll(reduceSinkToTypes.get(reduceSinks.get(i)));
- }
-
- // get number of equality conjunctions in the final join condition
- int size = reduceSinkDescs.get(0).getKeyCols().size();
-
- // make up the join conditon expression
- List<ExprNodeDesc> joinConditionChildren = new ArrayList<ExprNodeDesc>();
- for (int i = 0; i < size; i++) {
- // create a join key pair
- List<ExprNodeDesc> keyPair = new ArrayList<ExprNodeDesc>();
- for (ReduceSinkDesc sink : reduceSinkDescs) {
- keyPair.add(sink.getKeyCols().get(i));
- }
- // create a hive equal condition
- ExprNodeDesc equality = new ExprNodeGenericFuncDesc(
- TypeInfoFactory.booleanTypeInfo,
- new GenericUDFOPEqual(), keyPair);
- // add the equal condition to the conjunction list
- joinConditionChildren.add(equality);
- }
- // get final conjunction expression
- ExprNodeDesc conjunct = null;
-
- if (joinConditionChildren.size() > 1)
- conjunct = new ExprNodeGenericFuncDesc(
- TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(),
- joinConditionChildren);
- else if (joinConditionChildren.size() == 1)
- conjunct = joinConditionChildren.get(0);
- else {
- // there is no join equality condition, equal-join
- conjunct = new ExprNodeConstantDesc(
- TypeInfoFactory.booleanTypeInfo, new Boolean(true));
- }
- // get an ILogicalExpression from hive's expression
- Mutable<ILogicalExpression> expression = t
- .translateScalarFucntion(conjunct);
-
- Mutable<ILogicalOperator> leftBranch = new MutableObject<ILogicalOperator>(
- exchanges.get(exchanges.size() - 1));
- Mutable<ILogicalOperator> rightBranch = new MutableObject<ILogicalOperator>(
- exchanges.get(exchanges.size() - 2));
- // get the join operator
- if (conds.get(offset).getType() == JoinDesc.LEFT_OUTER_JOIN) {
- currentRoot = new LeftOuterJoinOperator(expression);
- Mutable<ILogicalOperator> temp = leftBranch;
- leftBranch = rightBranch;
- rightBranch = temp;
- } else if (conds.get(offset).getType() == JoinDesc.RIGHT_OUTER_JOIN) {
- currentRoot = new LeftOuterJoinOperator(expression);
- } else
- currentRoot = new InnerJoinOperator(expression);
-
- currentRoot.getInputs().add(leftBranch);
- currentRoot.getInputs().add(rightBranch);
-
- // rewriteOperatorOutputSchema(variables, operator);
- return currentRoot;
- } else {
- // get the child join operator and insert and one-to-one exchange
- ILogicalOperator joinSrcOne = generateJoinTree(conds, exchanges,
- reduceSinks, offset + 1, t);
- // joinSrcOne.addInput(childJoin);
-
- ILogicalOperator currentRoot;
-
- List<ReduceSinkDesc> reduceSinkDescs = new ArrayList<ReduceSinkDesc>();
- for (int i = offset; i < offset + 2; i++)
- reduceSinkDescs.add((ReduceSinkDesc) reduceSinks.get(i)
- .getConf());
-
- // get the object inspector for the join
- List<String> fieldNames = new ArrayList<String>();
- List<TypeInfo> types = new ArrayList<TypeInfo>();
- for (int i = offset; i < reduceSinks.size(); i++) {
- fieldNames
- .addAll(reduceSinkToFieldNames.get(reduceSinks.get(i)));
- types.addAll(reduceSinkToTypes.get(reduceSinks.get(i)));
- }
-
- // get number of equality conjunctions in the final join condition
- int size = reduceSinkDescs.get(0).getKeyCols().size();
-
- // make up the join condition expression
- List<ExprNodeDesc> joinConditionChildren = new ArrayList<ExprNodeDesc>();
- for (int i = 0; i < size; i++) {
- // create a join key pair
- List<ExprNodeDesc> keyPair = new ArrayList<ExprNodeDesc>();
- for (ReduceSinkDesc sink : reduceSinkDescs) {
- keyPair.add(sink.getKeyCols().get(i));
- }
- // create a hive equal condition
- ExprNodeDesc equality = new ExprNodeGenericFuncDesc(
- TypeInfoFactory.booleanTypeInfo,
- new GenericUDFOPEqual(), keyPair);
- // add the equal condition to the conjunction list
- joinConditionChildren.add(equality);
- }
- // get final conjunction expression
- ExprNodeDesc conjunct = null;
-
- if (joinConditionChildren.size() > 1)
- conjunct = new ExprNodeGenericFuncDesc(
- TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(),
- joinConditionChildren);
- else if (joinConditionChildren.size() == 1)
- conjunct = joinConditionChildren.get(0);
- else {
- // there is no join equality condition, full outer join
- conjunct = new ExprNodeConstantDesc(
- TypeInfoFactory.booleanTypeInfo, new Boolean(true));
- }
- // get an ILogicalExpression from hive's expression
- Mutable<ILogicalExpression> expression = t
- .translateScalarFucntion(conjunct);
-
- Mutable<ILogicalOperator> leftBranch = new MutableObject<ILogicalOperator>(
- joinSrcOne);
- Mutable<ILogicalOperator> rightBranch = new MutableObject<ILogicalOperator>(
- exchanges.get(offset));
-
- // get the join operator
- if (conds.get(offset).getType() == JoinDesc.LEFT_OUTER_JOIN) {
- currentRoot = new LeftOuterJoinOperator(expression);
- Mutable<ILogicalOperator> temp = leftBranch;
- leftBranch = rightBranch;
- rightBranch = temp;
- } else if (conds.get(offset).getType() == JoinDesc.RIGHT_OUTER_JOIN) {
- currentRoot = new LeftOuterJoinOperator(expression);
- } else
- currentRoot = new InnerJoinOperator(expression);
-
- // set the inputs from Algebricks join operator
- // add the current table
- currentRoot.getInputs().add(leftBranch);
- currentRoot.getInputs().add(rightBranch);
-
- return currentRoot;
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java
deleted file mode 100644
index 004a8c2..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java
+++ /dev/null
@@ -1,124 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.UDTFOperator;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
-
-/**
- * The lateral view join operator is used for FROM src LATERAL VIEW udtf()...
- * This operator was implemented with the following operator DAG in mind.
- *
- * For a query such as
- *
- * SELECT pageid, adid.* FROM example_table LATERAL VIEW explode(adid_list) AS
- * adid
- *
- * The top of the operator DAG will look similar to
- *
- * [Table Scan] | [Lateral View Forward] / \ [Select](*) [Select](adid_list) | |
- * | [UDTF] (explode) \ / [Lateral View Join] | | [Select] (pageid, adid.*) |
- * ....
- *
- * Rows from the table scan operator are first to a lateral view forward
- * operator that just forwards the row and marks the start of a LV. The select
- * operator on the left picks all the columns while the select operator on the
- * right picks only the columns needed by the UDTF.
- *
- * The output of select in the left branch and output of the UDTF in the right
- * branch are then sent to the lateral view join (LVJ). In most cases, the UDTF
- * will generate > 1 row for every row received from the TS, while the left
- * select operator will generate only one. For each row output from the TS, the
- * LVJ outputs all possible rows that can be created by joining the row from the
- * left select and one of the rows output from the UDTF.
- *
- * Additional lateral views can be supported by adding a similar DAG after the
- * previous LVJ operator.
- */
-
-@SuppressWarnings("rawtypes")
-public class LateralViewJoinVisitor extends DefaultVisitor {
-
- private UDTFDesc udtf;
-
- private List<Mutable<ILogicalOperator>> parents = new ArrayList<Mutable<ILogicalOperator>>();
-
- @Override
- public Mutable<ILogicalOperator> visit(LateralViewJoinOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException {
-
- parents.add(AlgebricksParentOperatorRef);
- if (operator.getParentOperators().size() > parents.size()) {
- return null;
- }
-
- Operator parent0 = operator.getParentOperators().get(0);
- ILogicalOperator parentOperator;
- ILogicalExpression unnestArg;
- if (parent0 instanceof UDTFOperator) {
- List<LogicalVariable> unnestVars = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(parents.get(1).getValue(),
- unnestVars);
- unnestArg = new VariableReferenceExpression(unnestVars.get(0));
- parentOperator = parents.get(1).getValue();
- } else {
- List<LogicalVariable> unnestVars = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(parents.get(0).getValue(),
- unnestVars);
- unnestArg = new VariableReferenceExpression(unnestVars.get(0));
- parentOperator = parents.get(0).getValue();
- }
-
- LogicalVariable var = t.getVariable(udtf.toString(),
- TypeInfoFactory.unknownTypeInfo);
-
- Mutable<ILogicalExpression> unnestExpr = t.translateUnnestFunction(
- udtf, new MutableObject<ILogicalExpression>(unnestArg));
- ILogicalOperator currentOperator = new UnnestOperator(var, unnestExpr);
-
- List<LogicalVariable> outputVars = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(parentOperator, outputVars);
- outputVars.add(var);
- currentOperator.getInputs().add(
- new MutableObject<ILogicalOperator>(parentOperator));
-
- parents.clear();
- udtf = null;
- t.rewriteOperatorOutputSchema(outputVars, operator);
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(UDTFOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- Schema currentSchema = t.generateInputSchema(operator
- .getParentOperators().get(0));
- udtf = (UDTFDesc) operator.getConf();
-
- // populate the schema from upstream operator
- operator.setSchema(operator.getParentOperators().get(0).getSchema());
- List<LogicalVariable> latestOutputSchema = t
- .getVariablesFromSchema(currentSchema);
- t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
- return null;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LimitVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LimitVisitor.java
deleted file mode 100644
index 84cdf00..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LimitVisitor.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.LimitOperator;
-import org.apache.hadoop.hive.ql.plan.LimitDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.HivesterixConstantValue;
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
-
-public class LimitVisitor extends DefaultVisitor {
-
- @Override
- public Mutable<ILogicalOperator> visit(LimitOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- Schema currentSchema = t.generateInputSchema(operator
- .getParentOperators().get(0));
-
- LimitDesc desc = (LimitDesc) operator.getConf();
- int limit = desc.getLimit();
- Integer limitValue = new Integer(limit);
-
- ILogicalExpression expr = new ConstantExpression(
- new HivesterixConstantValue(limitValue));
- ILogicalOperator currentOperator = new edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.LimitOperator(
- expr, true);
- currentOperator.getInputs().add(AlgebricksParentOperatorRef);
-
- operator.setSchema(operator.getParentOperators().get(0).getSchema());
- List<LogicalVariable> latestOutputSchema = t
- .getVariablesFromSchema(currentSchema);
- t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java
deleted file mode 100644
index fa5d014..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java
+++ /dev/null
@@ -1,183 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
-
-@SuppressWarnings("rawtypes")
-public class MapJoinVisitor extends DefaultVisitor {
-
- /**
- * map a join operator (in hive) to its parent operators (in asterix)
- */
- private HashMap<Operator, List<Mutable<ILogicalOperator>>> opMap = new HashMap<Operator, List<Mutable<ILogicalOperator>>>();
-
- @Override
- public Mutable<ILogicalOperator> visit(MapJoinOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- List<Operator<? extends Serializable>> joinSrc = operator
- .getParentOperators();
- List<Mutable<ILogicalOperator>> parents = opMap.get(operator);
- if (parents == null) {
- parents = new ArrayList<Mutable<ILogicalOperator>>();
- opMap.put(operator, parents);
- }
- parents.add(AlgebricksParentOperatorRef);
- if (joinSrc.size() != parents.size())
- return null;
-
- ILogicalOperator currentOperator;
- // make an map join operator
- // TODO: will have trouble for n-way joins
- MapJoinDesc joinDesc = (MapJoinDesc) operator.getConf();
-
- Map<Byte, List<ExprNodeDesc>> keyMap = joinDesc.getKeys();
- // get the projection expression (already re-written) from each source
- // table
- Map<Byte, List<ExprNodeDesc>> exprMap = joinDesc.getExprs();
-
- int inputSize = operator.getParentOperators().size();
- // get a list of reduce sink descs (input descs)
-
- // get the parent operator
- List<Mutable<ILogicalOperator>> parentOps = parents;
-
- List<String> fieldNames = new ArrayList<String>();
- List<TypeInfo> types = new ArrayList<TypeInfo>();
- for (Operator ts : joinSrc) {
- List<ColumnInfo> columns = ts.getSchema().getSignature();
- for (ColumnInfo col : columns) {
- fieldNames.add(col.getInternalName());
- types.add(col.getType());
- }
- }
-
- // get number of equality conjunctions in the final join condition
- Set<Entry<Byte, List<ExprNodeDesc>>> keyEntries = keyMap.entrySet();
- Iterator<Entry<Byte, List<ExprNodeDesc>>> entry = keyEntries.iterator();
-
- int size = 0;
- if (entry.hasNext())
- size = entry.next().getValue().size();
-
- // make up the join conditon expression
- List<ExprNodeDesc> joinConditionChildren = new ArrayList<ExprNodeDesc>();
- for (int i = 0; i < size; i++) {
- // create a join key pair
- List<ExprNodeDesc> keyPair = new ArrayList<ExprNodeDesc>();
- for (int j = 0; j < inputSize; j++) {
- keyPair.add(keyMap.get(Byte.valueOf((byte) j)).get(i));
- }
- // create a hive equal condition
- ExprNodeDesc equality = new ExprNodeGenericFuncDesc(
- TypeInfoFactory.booleanTypeInfo, new GenericUDFOPEqual(),
- keyPair);
- // add the equal condition to the conjunction list
- joinConditionChildren.add(equality);
- }
- // get final conjunction expression
- ExprNodeDesc conjunct = null;
-
- if (joinConditionChildren.size() > 1)
- conjunct = new ExprNodeGenericFuncDesc(
- TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(),
- joinConditionChildren);
- else if (joinConditionChildren.size() == 1)
- conjunct = joinConditionChildren.get(0);
- else {
- // there is no join equality condition, full outer join
- conjunct = new ExprNodeConstantDesc(
- TypeInfoFactory.booleanTypeInfo, new Boolean(true));
- }
- // get an ILogicalExpression from hive's expression
- Mutable<ILogicalExpression> expression = t
- .translateScalarFucntion(conjunct);
-
- ArrayList<LogicalVariable> left = new ArrayList<LogicalVariable>();
- ArrayList<LogicalVariable> right = new ArrayList<LogicalVariable>();
-
- Set<Entry<Byte, List<ExprNodeDesc>>> kentries = keyMap.entrySet();
- Iterator<Entry<Byte, List<ExprNodeDesc>>> kiterator = kentries
- .iterator();
- int iteration = 0;
- ILogicalOperator assignOperator = null;
- while (kiterator.hasNext()) {
- List<ExprNodeDesc> outputExprs = kiterator.next().getValue();
-
- if (iteration == 0)
- assignOperator = t.getAssignOperator(
- AlgebricksParentOperatorRef, outputExprs, left);
- else
- assignOperator = t.getAssignOperator(
- AlgebricksParentOperatorRef, outputExprs, right);
-
- if (assignOperator != null) {
- currentOperator = assignOperator;
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
- iteration++;
- }
-
- List<Mutable<ILogicalOperator>> inputs = parentOps;
-
- // get the join operator
- currentOperator = new InnerJoinOperator(expression);
-
- // set the inputs from asterix join operator
- for (Mutable<ILogicalOperator> input : inputs)
- currentOperator.getInputs().add(input);
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
-
- // add assign and project operator
- // output variables
- ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
- Set<Entry<Byte, List<ExprNodeDesc>>> entries = exprMap.entrySet();
- Iterator<Entry<Byte, List<ExprNodeDesc>>> iterator = entries.iterator();
- while (iterator.hasNext()) {
- List<ExprNodeDesc> outputExprs = iterator.next().getValue();
- assignOperator = t.getAssignOperator(AlgebricksParentOperatorRef,
- outputExprs, variables);
-
- if (assignOperator != null) {
- currentOperator = assignOperator;
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
- }
-
- currentOperator = new ProjectOperator(variables);
- currentOperator.getInputs().add(AlgebricksParentOperatorRef);
- t.rewriteOperatorOutputSchema(variables, operator);
- // opMap.clear();
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ProjectVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ProjectVisitor.java
deleted file mode 100644
index 0d2067c..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ProjectVisitor.java
+++ /dev/null
@@ -1,58 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.SelectDesc;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
-
-public class ProjectVisitor extends DefaultVisitor {
-
- /**
- * translate project operator
- */
- @Override
- public Mutable<ILogicalOperator> visit(SelectOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) {
-
- SelectDesc desc = (SelectDesc) operator.getConf();
-
- if (desc == null)
- return null;
-
- List<ExprNodeDesc> cols = desc.getColList();
-
- if (cols == null)
- return null;
-
- // insert assign operator if necessary
- ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
-
- for (ExprNodeDesc expr : cols)
- t.rewriteExpression(expr);
-
- ILogicalOperator assignOp = t.getAssignOperator(
- AlgebricksParentOperator, cols, variables);
- ILogicalOperator currentOperator = null;
- if (assignOp != null) {
- currentOperator = assignOp;
- AlgebricksParentOperator = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
-
- currentOperator = new ProjectOperator(variables);
- currentOperator.getInputs().add(AlgebricksParentOperator);
- t.rewriteOperatorOutputSchema(variables, operator);
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/SortVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/SortVisitor.java
deleted file mode 100644
index a2c0d03..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/SortVisitor.java
+++ /dev/null
@@ -1,125 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.OrderOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.OrderOperator.IOrder;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
-import edu.uci.ics.hyracks.algebricks.core.algebra.properties.OrderColumn;
-
-public class SortVisitor extends DefaultVisitor {
-
- @SuppressWarnings("rawtypes")
- @Override
- public Mutable<ILogicalOperator> visit(ReduceSinkOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException {
- ReduceSinkDesc desc = (ReduceSinkDesc) operator.getConf();
- Operator downStream = (Operator) operator.getChildOperators().get(0);
- List<ExprNodeDesc> keys = desc.getKeyCols();
- if (!(downStream instanceof ExtractOperator
- && desc.getNumReducers() == 1 && keys.size() > 0)) {
- return null;
- }
-
- List<ExprNodeDesc> schema = new ArrayList<ExprNodeDesc>();
- List<ExprNodeDesc> values = desc.getValueCols();
- List<ExprNodeDesc> partitionCols = desc.getPartitionCols();
- for (ExprNodeDesc key : keys) {
- t.rewriteExpression(key);
- }
- for (ExprNodeDesc value : values) {
- t.rewriteExpression(value);
- }
- for (ExprNodeDesc col : partitionCols) {
- t.rewriteExpression(col);
- }
-
- // add a order-by operator and limit if any
- List<Pair<IOrder, Mutable<ILogicalExpression>>> pairs = new ArrayList<Pair<IOrder, Mutable<ILogicalExpression>>>();
- char[] orders = desc.getOrder().toCharArray();
- int i = 0;
- for (ExprNodeDesc key : keys) {
- Mutable<ILogicalExpression> expr = t.translateScalarFucntion(key);
- IOrder order = orders[i] == '+' ? OrderOperator.ASC_ORDER
- : OrderOperator.DESC_ORDER;
-
- Pair<IOrder, Mutable<ILogicalExpression>> pair = new Pair<IOrder, Mutable<ILogicalExpression>>(
- order, expr);
- pairs.add(pair);
- i++;
- }
-
- // get input variables
- ArrayList<LogicalVariable> inputVariables = new ArrayList<LogicalVariable>();
- VariableUtilities.getProducedVariables(
- AlgebricksParentOperatorRef.getValue(), inputVariables);
-
- ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
- ILogicalOperator currentOperator;
- ILogicalOperator assignOp = t.getAssignOperator(
- AlgebricksParentOperatorRef, keys, keyVariables);
- if (assignOp != null) {
- currentOperator = assignOp;
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
-
- OrderColumn[] keyColumns = new OrderColumn[keyVariables.size()];
-
- for (int j = 0; j < keyColumns.length; j++)
- keyColumns[j] = new OrderColumn(keyVariables.get(j),
- pairs.get(j).first.getKind());
-
- // handle order operator
- currentOperator = new OrderOperator(pairs);
- currentOperator.getInputs().add(AlgebricksParentOperatorRef);
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
-
- // project back, remove generated sort-key columns if any
- if (assignOp != null) {
- currentOperator = new ProjectOperator(inputVariables);
- currentOperator.getInputs().add(AlgebricksParentOperatorRef);
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
-
- /**
- * a special rule for hive's order by output schema of reduce sink
- * operator only contains the columns
- */
- for (ExprNodeDesc value : values) {
- schema.add(value);
- }
-
- ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
- ILogicalOperator assignOperator = t.getAssignOperator(
- AlgebricksParentOperatorRef, schema, variables);
- t.rewriteOperatorOutputSchema(variables, operator);
-
- if (assignOperator != null) {
- currentOperator = assignOperator;
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java
deleted file mode 100644
index 3e12bb9..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java
+++ /dev/null
@@ -1,148 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-import org.apache.hadoop.hive.ql.plan.TableScanDesc;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hivesterix.runtime.jobgen.HiveDataSink;
-import edu.uci.ics.hivesterix.runtime.jobgen.HiveDataSource;
-import edu.uci.ics.hivesterix.runtime.jobgen.HiveMetaDataProvider;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSink;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.WriteOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
-
-public class TableScanWriteVisitor extends DefaultVisitor {
-
- /**
- * map from alias to partition desc
- */
- private HashMap<String, PartitionDesc> aliasToPathMap;
-
- /**
- * map from partition desc to data source
- */
- private HashMap<PartitionDesc, IDataSource<PartitionDesc>> dataSourceMap = new HashMap<PartitionDesc, IDataSource<PartitionDesc>>();
-
- /**
- * constructor
- *
- * @param aliasToPathMap
- */
- public TableScanWriteVisitor(HashMap<String, PartitionDesc> aliasToPathMap) {
- this.aliasToPathMap = aliasToPathMap;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(TableScanOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- TableScanDesc desc = (TableScanDesc) operator.getConf();
- if (desc == null) {
- List<LogicalVariable> schema = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(
- AlgebricksParentOperator.getValue(), schema);
- t.rewriteOperatorOutputSchema(schema, operator);
- return null;
- }
-
- List<ColumnInfo> columns = operator.getSchema().getSignature();
- for (int i = columns.size() - 1; i >= 0; i--)
- if (columns.get(i).getIsVirtualCol() == true)
- columns.remove(i);
-
- // start with empty tuple operator
- List<TypeInfo> types = new ArrayList<TypeInfo>();
- ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
- List<String> names = new ArrayList<String>();
- for (ColumnInfo column : columns) {
- types.add(column.getType());
-
- LogicalVariable var = t.getVariableFromFieldName(column
- .getTabAlias() + "." + column.getInternalName());
- LogicalVariable varNew;
-
- if (var != null) {
- varNew = t.getVariable(
- column.getTabAlias() + "." + column.getInternalName()
- + operator.toString(), column.getType());
- t.replaceVariable(var, varNew);
- var = varNew;
- } else
- var = t.getNewVariable(
- column.getTabAlias() + "." + column.getInternalName(),
- column.getType());
-
- variables.add(var);
- names.add(column.getInternalName());
- }
- Schema currentSchema = new Schema(names, types);
-
- String alias = desc.getAlias();
- PartitionDesc partDesc = aliasToPathMap.get(alias);
- IDataSource<PartitionDesc> dataSource = new HiveDataSource<PartitionDesc>(
- partDesc, currentSchema.getSchema());
- ILogicalOperator currentOperator = new DataSourceScanOperator(
- variables, dataSource);
-
- // set empty tuple source operator
- ILogicalOperator ets = new EmptyTupleSourceOperator();
- currentOperator.getInputs().add(
- new MutableObject<ILogicalOperator>(ets));
-
- // setup data source
- dataSourceMap.put(partDesc, dataSource);
- t.rewriteOperatorOutputSchema(variables, operator);
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(FileSinkOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) {
-
- if (hiveOperator.getChildOperators() != null
- && hiveOperator.getChildOperators().size() > 0)
- return null;
-
- Schema currentSchema = t.generateInputSchema(hiveOperator
- .getParentOperators().get(0));
-
- IDataSink sink = new HiveDataSink(hiveOperator,
- currentSchema.getSchema());
- List<Mutable<ILogicalExpression>> exprList = new ArrayList<Mutable<ILogicalExpression>>();
- for (String column : currentSchema.getNames()) {
- exprList.add(new MutableObject<ILogicalExpression>(
- new VariableReferenceExpression(t.getVariable(column))));
- }
-
- ILogicalOperator currentOperator = new WriteOperator(exprList, sink);
- if (AlgebricksParentOperator != null) {
- currentOperator.getInputs().add(AlgebricksParentOperator);
- }
-
- IMetadataProvider<PartitionDesc, Object> metaData = new HiveMetaDataProvider<PartitionDesc, Object>(
- hiveOperator, currentSchema, dataSourceMap);
- t.setMetadataProvider(metaData);
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java
deleted file mode 100644
index f4e74f6..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java
+++ /dev/null
@@ -1,64 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.common.utils.Triple;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
-
-public class UnionVisitor extends DefaultVisitor {
-
- List<Mutable<ILogicalOperator>> parents = new ArrayList<Mutable<ILogicalOperator>>();
-
- @Override
- public Mutable<ILogicalOperator> visit(UnionOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
-
- parents.add(AlgebricksParentOperator);
- if (operator.getParentOperators().size() > parents.size()) {
- return null;
- }
-
- List<LogicalVariable> leftVars = new ArrayList<LogicalVariable>();
- List<LogicalVariable> rightVars = new ArrayList<LogicalVariable>();
-
- VariableUtilities.getUsedVariables(parents.get(0).getValue(), leftVars);
- VariableUtilities
- .getUsedVariables(parents.get(1).getValue(), rightVars);
-
- List<Triple<LogicalVariable, LogicalVariable, LogicalVariable>> triples = new ArrayList<Triple<LogicalVariable, LogicalVariable, LogicalVariable>>();
- List<LogicalVariable> unionVars = new ArrayList<LogicalVariable>();
-
- for (int i = 0; i < leftVars.size(); i++) {
- LogicalVariable unionVar = t.getVariable(leftVars.get(i).getId()
- + "union" + AlgebricksParentOperator.hashCode(),
- TypeInfoFactory.unknownTypeInfo);
- unionVars.add(unionVar);
- Triple<LogicalVariable, LogicalVariable, LogicalVariable> triple = new Triple<LogicalVariable, LogicalVariable, LogicalVariable>(
- leftVars.get(i), rightVars.get(i), unionVar);
- t.replaceVariable(leftVars.get(i), unionVar);
- t.replaceVariable(rightVars.get(i), unionVar);
- triples.add(triple);
- }
- ILogicalOperator currentOperator = new edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator(
- triples);
- for (Mutable<ILogicalOperator> parent : parents)
- currentOperator.getInputs().add(parent);
-
- t.rewriteOperatorOutputSchema(unionVars, operator);
- parents.clear();
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/DefaultVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/DefaultVisitor.java
deleted file mode 100644
index 20013e3..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/DefaultVisitor.java
+++ /dev/null
@@ -1,166 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor.base;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.CollectOperator;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.exec.ForwardOperator;
-import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.JoinOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
-import org.apache.hadoop.hive.ql.exec.LimitOperator;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.MapOperator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.ScriptOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.UDTFOperator;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-
-/**
- * a default empty implementation of visitor
- *
- * @author yingyib
- */
-public class DefaultVisitor implements Visitor {
-
- @Override
- public Mutable<ILogicalOperator> visit(CollectOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(JoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(ExtractOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(MapJoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(SMBMapJoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- public Mutable<ILogicalOperator> visit(FileSinkOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- public Mutable<ILogicalOperator> visit(ReduceSinkOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(FilterOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(ForwardOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(GroupByOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(
- LateralViewForwardOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(
- LateralViewJoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(LimitOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(MapOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(ScriptOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(SelectOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(TableScanOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(UDTFOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(UnionOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java
deleted file mode 100644
index 9165386..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java
+++ /dev/null
@@ -1,174 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor.base;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-
-@SuppressWarnings("rawtypes")
-public interface Translator {
-
- /**
- * generate input schema
- *
- * @param operator
- * @return
- */
- public Schema generateInputSchema(Operator operator);
-
- /**
- * rewrite the names of output columns for feture expression evaluators to
- * use
- *
- * @param operator
- */
- public void rewriteOperatorOutputSchema(List<LogicalVariable> vars,
- Operator operator);
-
- /**
- * rewrite the names of output columns for feture expression evaluators to
- * use
- *
- * @param operator
- */
- public void rewriteOperatorOutputSchema(Operator operator);
-
- /**
- * rewrite an expression and substitute variables
- *
- * @param expr
- * hive expression
- */
- public void rewriteExpression(ExprNodeDesc expr);
-
- /**
- * rewrite an expression and substitute variables
- *
- * @param expr
- * hive expression
- */
- public void rewriteExpressionPartial(ExprNodeDesc expr);
-
- /**
- * get an assign operator as a child of parent
- *
- * @param parent
- * @param cols
- * @param variables
- * @return
- */
- public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent,
- List<ExprNodeDesc> cols, ArrayList<LogicalVariable> variables);
-
- /**
- * get type for a logical variable
- *
- * @param var
- * @return type info
- */
- public TypeInfo getType(LogicalVariable var);
-
- /**
- * translate an expression from hive to Algebricks
- *
- * @param desc
- * @return
- */
- public Mutable<ILogicalExpression> translateScalarFucntion(ExprNodeDesc desc);
-
- /**
- * translate an aggregation from hive to Algebricks
- *
- * @param aggregateDesc
- * @return
- */
- public Mutable<ILogicalExpression> translateAggregation(
- AggregationDesc aggregateDesc);
-
- /**
- * translate unnesting (UDTF) function expression
- *
- * @param aggregator
- * @return
- */
- public Mutable<ILogicalExpression> translateUnnestFunction(
- UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument);
-
- /**
- * get variable from a schema
- *
- * @param schema
- * @return
- */
- public List<LogicalVariable> getVariablesFromSchema(Schema schema);
-
- /**
- * get variable from name
- *
- * @param name
- * @return
- */
- public LogicalVariable getVariable(String name);
-
- /**
- * get variable from field name
- *
- * @param name
- * @return
- */
- public LogicalVariable getVariableFromFieldName(String name);
-
- /**
- * get variable from name, type
- *
- * @param fieldName
- * @param type
- * @return
- */
- public LogicalVariable getVariable(String fieldName, TypeInfo type);
-
- /**
- * get new variable from name, type
- *
- * @param fieldName
- * @param type
- * @return
- */
- public LogicalVariable getNewVariable(String fieldName, TypeInfo type);
-
- /**
- * set the metadata provider
- *
- * @param metadata
- */
- public void setMetadataProvider(
- IMetadataProvider<PartitionDesc, Object> metadata);
-
- /**
- * get the metadata provider
- *
- * @param metadata
- */
- public IMetadataProvider<PartitionDesc, Object> getMetadataProvider();
-
- /**
- * replace the variable
- *
- * @param oldVar
- * @param newVar
- */
- public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar);
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Visitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Visitor.java
deleted file mode 100644
index 745f93e..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Visitor.java
+++ /dev/null
@@ -1,106 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor.base;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.CollectOperator;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.exec.ForwardOperator;
-import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.JoinOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
-import org.apache.hadoop.hive.ql.exec.LimitOperator;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.MapOperator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.ScriptOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.UDTFOperator;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-
-public interface Visitor {
-
- public Mutable<ILogicalOperator> visit(CollectOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(JoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(ExtractOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(MapJoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(SMBMapJoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(FilterOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(ForwardOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(GroupByOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(
- LateralViewForwardOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(
- LateralViewJoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(LimitOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(MapOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(ScriptOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(SelectOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(TableScanOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(FileSinkOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(ReduceSinkOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(UDTFOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(UnionOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java
deleted file mode 100644
index 4ebea0a..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java
+++ /dev/null
@@ -1,114 +0,0 @@
-package edu.uci.ics.hivesterix.optimizer.rulecollections;
-
-import java.util.LinkedList;
-
-import edu.uci.ics.hivesterix.optimizer.rules.InsertProjectBeforeWriteRule;
-import edu.uci.ics.hivesterix.optimizer.rules.IntroduceEarlyProjectRule;
-import edu.uci.ics.hivesterix.optimizer.rules.LocalGroupByRule;
-import edu.uci.ics.hivesterix.optimizer.rules.RemoveRedundantSelectRule;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.HeuristicOptimizer;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.BreakSelectIntoConjunctsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ComplexJoinInferenceRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateAssignsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateSelectsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.EliminateSubplanRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.EnforceStructuralPropertiesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractCommonOperatorsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractGbyExpressionsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.FactorRedundantGroupAndDecorVarsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.InferTypesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.InlineVariablesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.InsertProjectBeforeUnionRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceAggregateCombinerRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceGroupByCombinerRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.IsolateHyracksOperatorsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PullSelectOutOfEqJoin;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushLimitDownRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectDownRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectIntoDataSourceScanRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectDownRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectIntoJoinRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ReinferAllTypesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveRedundantProjectionRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveUnusedAssignAndAggregateRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetAlgebricksPhysicalOperatorsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetExecutionModeRule;
-
-public final class HiveRuleCollections {
-
- public final static LinkedList<IAlgebraicRewriteRule> NORMALIZATION = new LinkedList<IAlgebraicRewriteRule>();
- static {
- NORMALIZATION.add(new EliminateSubplanRule());
- NORMALIZATION.add(new IntroduceAggregateCombinerRule());
- NORMALIZATION.add(new BreakSelectIntoConjunctsRule());
- NORMALIZATION.add(new IntroduceAggregateCombinerRule());
- NORMALIZATION.add(new PushSelectIntoJoinRule());
- NORMALIZATION.add(new ExtractGbyExpressionsRule());
- NORMALIZATION.add(new RemoveRedundantSelectRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> COND_PUSHDOWN_AND_JOIN_INFERENCE = new LinkedList<IAlgebraicRewriteRule>();
- static {
- COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new PushSelectDownRule());
- COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new InlineVariablesRule());
- COND_PUSHDOWN_AND_JOIN_INFERENCE
- .add(new FactorRedundantGroupAndDecorVarsRule());
- COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new EliminateSubplanRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> LOAD_FIELDS = new LinkedList<IAlgebraicRewriteRule>();
- static {
- // should LoadRecordFieldsRule be applied in only one pass over the
- // plan?
- LOAD_FIELDS.add(new InlineVariablesRule());
- // LOAD_FIELDS.add(new RemoveUnusedAssignAndAggregateRule());
- LOAD_FIELDS.add(new ComplexJoinInferenceRule());
- LOAD_FIELDS.add(new InferTypesRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> OP_PUSHDOWN = new LinkedList<IAlgebraicRewriteRule>();
- static {
- OP_PUSHDOWN.add(new PushProjectDownRule());
- OP_PUSHDOWN.add(new PushSelectDownRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> DATA_EXCHANGE = new LinkedList<IAlgebraicRewriteRule>();
- static {
- DATA_EXCHANGE.add(new SetExecutionModeRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> CONSOLIDATION = new LinkedList<IAlgebraicRewriteRule>();
- static {
- CONSOLIDATION.add(new RemoveRedundantProjectionRule());
- CONSOLIDATION.add(new ConsolidateSelectsRule());
- CONSOLIDATION.add(new IntroduceEarlyProjectRule());
- CONSOLIDATION.add(new ConsolidateAssignsRule());
- CONSOLIDATION.add(new IntroduceGroupByCombinerRule());
- CONSOLIDATION.add(new RemoveUnusedAssignAndAggregateRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> PHYSICAL_PLAN_REWRITES = new LinkedList<IAlgebraicRewriteRule>();
- static {
- PHYSICAL_PLAN_REWRITES.add(new PullSelectOutOfEqJoin());
- PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
- PHYSICAL_PLAN_REWRITES.add(new EnforceStructuralPropertiesRule());
- PHYSICAL_PLAN_REWRITES.add(new PushProjectDownRule());
- PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
- PHYSICAL_PLAN_REWRITES.add(new PushLimitDownRule());
- PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeWriteRule());
- PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeUnionRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> prepareJobGenRules = new LinkedList<IAlgebraicRewriteRule>();
- static {
- prepareJobGenRules.add(new ReinferAllTypesRule());
- prepareJobGenRules.add(new IsolateHyracksOperatorsRule(
- HeuristicOptimizer.hyraxOperatorsBelowWhichJobGenIsDisabled));
- prepareJobGenRules.add(new ExtractCommonOperatorsRule());
- prepareJobGenRules.add(new LocalGroupByRule());
- prepareJobGenRules.add(new PushProjectIntoDataSourceScanRule());
- prepareJobGenRules.add(new ReinferAllTypesRule());
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/InsertProjectBeforeWriteRule.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/InsertProjectBeforeWriteRule.java
deleted file mode 100644
index c58982e..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/InsertProjectBeforeWriteRule.java
+++ /dev/null
@@ -1,85 +0,0 @@
-package edu.uci.ics.hivesterix.optimizer.rules;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator.ExecutionMode;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.WriteOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.StreamProjectPOperator;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
-
-public class InsertProjectBeforeWriteRule implements IAlgebraicRewriteRule {
-
- @Override
- public boolean rewritePost(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) {
- return false;
- }
-
- /**
- * When the input schema to WriteOperator is different from the output
- * schema in terms of variable order, add a project operator to get the
- * write order
- */
- @Override
- public boolean rewritePre(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) throws AlgebricksException {
- AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
- if (op.getOperatorTag() != LogicalOperatorTag.WRITE) {
- return false;
- }
- WriteOperator opWrite = (WriteOperator) op;
- ArrayList<LogicalVariable> finalSchema = new ArrayList<LogicalVariable>();
- VariableUtilities.getUsedVariables(opWrite, finalSchema);
- ArrayList<LogicalVariable> inputSchema = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(opWrite, inputSchema);
- if (!isIdentical(finalSchema, inputSchema)) {
- ProjectOperator projectOp = new ProjectOperator(finalSchema);
- Mutable<ILogicalOperator> parentOpRef = opWrite.getInputs().get(0);
- projectOp.getInputs().add(parentOpRef);
- opWrite.getInputs().clear();
- opWrite.getInputs().add(
- new MutableObject<ILogicalOperator>(projectOp));
- projectOp.setPhysicalOperator(new StreamProjectPOperator());
- projectOp.setExecutionMode(ExecutionMode.PARTITIONED);
-
- AbstractLogicalOperator op2 = (AbstractLogicalOperator) parentOpRef
- .getValue();
- if (op2.getOperatorTag() == LogicalOperatorTag.PROJECT) {
- ProjectOperator pi2 = (ProjectOperator) op2;
- parentOpRef.setValue(pi2.getInputs().get(0).getValue());
- }
- context.computeAndSetTypeEnvironmentForOperator(projectOp);
- return true;
- } else
- return false;
-
- }
-
- private boolean isIdentical(List<LogicalVariable> finalSchema,
- List<LogicalVariable> inputSchema) {
- int finalSchemaSize = finalSchema.size();
- int inputSchemaSize = inputSchema.size();
- if (finalSchemaSize != inputSchemaSize)
- throw new IllegalStateException(
- "final output schema variables missing!");
- for (int i = 0; i < finalSchemaSize; i++) {
- LogicalVariable var1 = finalSchema.get(i);
- LogicalVariable var2 = inputSchema.get(i);
- if (!var1.equals(var2))
- return false;
- }
- return true;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/IntroduceEarlyProjectRule.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/IntroduceEarlyProjectRule.java
deleted file mode 100644
index 2bebe81..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/IntroduceEarlyProjectRule.java
+++ /dev/null
@@ -1,77 +0,0 @@
-package edu.uci.ics.hivesterix.optimizer.rules;
-
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
-
-public class IntroduceEarlyProjectRule implements IAlgebraicRewriteRule {
-
- @Override
- public boolean rewritePre(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) throws AlgebricksException {
- return false;
- }
-
- @Override
- public boolean rewritePost(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) throws AlgebricksException {
- AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
- if (op.getOperatorTag() != LogicalOperatorTag.PROJECT) {
- return false;
- }
- AbstractLogicalOperator middleOp = (AbstractLogicalOperator) op
- .getInputs().get(0).getValue();
- List<LogicalVariable> deliveredVars = new ArrayList<LogicalVariable>();
- List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
- List<LogicalVariable> producedVars = new ArrayList<LogicalVariable>();
-
- VariableUtilities.getUsedVariables(op, deliveredVars);
- VariableUtilities.getUsedVariables(middleOp, usedVars);
- VariableUtilities.getProducedVariables(middleOp, producedVars);
-
- Set<LogicalVariable> requiredVariables = new HashSet<LogicalVariable>();
- requiredVariables.addAll(deliveredVars);
- requiredVariables.addAll(usedVars);
- requiredVariables.removeAll(producedVars);
-
- if (middleOp.getInputs().size() <= 0 || middleOp.getInputs().size() > 1)
- return false;
-
- AbstractLogicalOperator targetOp = (AbstractLogicalOperator) middleOp
- .getInputs().get(0).getValue();
- if (targetOp.getOperatorTag() != LogicalOperatorTag.DATASOURCESCAN)
- return false;
-
- Set<LogicalVariable> deliveredEarlyVars = new HashSet<LogicalVariable>();
- VariableUtilities.getLiveVariables(targetOp, deliveredEarlyVars);
-
- deliveredEarlyVars.removeAll(requiredVariables);
- if (deliveredEarlyVars.size() > 0) {
- ArrayList<LogicalVariable> requiredVars = new ArrayList<LogicalVariable>();
- requiredVars.addAll(requiredVariables);
- ILogicalOperator earlyProjectOp = new ProjectOperator(requiredVars);
- Mutable<ILogicalOperator> earlyProjectOpRef = new MutableObject<ILogicalOperator>(
- earlyProjectOp);
- Mutable<ILogicalOperator> targetRef = middleOp.getInputs().get(0);
- middleOp.getInputs().set(0, earlyProjectOpRef);
- earlyProjectOp.getInputs().add(targetRef);
- context.computeAndSetTypeEnvironmentForOperator(earlyProjectOp);
- return true;
- }
- return false;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/LocalGroupByRule.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/LocalGroupByRule.java
deleted file mode 100644
index 72cbe21..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/LocalGroupByRule.java
+++ /dev/null
@@ -1,71 +0,0 @@
-package edu.uci.ics.hivesterix.optimizer.rules;
-
-import org.apache.commons.lang3.mutable.Mutable;
-
-import edu.uci.ics.hivesterix.logical.plan.HiveOperatorAnnotations;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.IPhysicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.OperatorAnnotations;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.PhysicalOperatorTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.OneToOneExchangePOperator;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
-
-public class LocalGroupByRule implements IAlgebraicRewriteRule {
-
- @Override
- public boolean rewritePre(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) throws AlgebricksException {
- return false;
- }
-
- @Override
- public boolean rewritePost(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) throws AlgebricksException {
- AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
- if (op.getOperatorTag() != LogicalOperatorTag.GROUP) {
- return false;
- }
- Boolean localGby = (Boolean) op.getAnnotations().get(
- HiveOperatorAnnotations.LOCAL_GROUP_BY);
- if (localGby != null && localGby.equals(Boolean.TRUE)) {
- Boolean hashGby = (Boolean) op.getAnnotations().get(
- OperatorAnnotations.USE_HASH_GROUP_BY);
- Boolean externalGby = (Boolean) op.getAnnotations().get(
- OperatorAnnotations.USE_EXTERNAL_GROUP_BY);
- if ((hashGby != null && (hashGby.equals(Boolean.TRUE)) || (externalGby != null && externalGby
- .equals(Boolean.TRUE)))) {
- reviseExchange(op);
- } else {
- ILogicalOperator child = op.getInputs().get(0).getValue();
- AbstractLogicalOperator childOp = (AbstractLogicalOperator) child;
- while (child.getInputs().size() > 0) {
- if (childOp.getOperatorTag() == LogicalOperatorTag.ORDER)
- break;
- else {
- child = child.getInputs().get(0).getValue();
- childOp = (AbstractLogicalOperator) child;
- }
- }
- if (childOp.getOperatorTag() == LogicalOperatorTag.ORDER)
- reviseExchange(childOp);
- }
- return true;
- }
- return false;
- }
-
- private void reviseExchange(AbstractLogicalOperator op) {
- ExchangeOperator exchange = (ExchangeOperator) op.getInputs().get(0)
- .getValue();
- IPhysicalOperator physicalOp = exchange.getPhysicalOperator();
- if (physicalOp.getOperatorTag() == PhysicalOperatorTag.HASH_PARTITION_EXCHANGE) {
- exchange.setPhysicalOperator(new OneToOneExchangePOperator());
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/RemoveRedundantSelectRule.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/RemoveRedundantSelectRule.java
deleted file mode 100644
index 9958ba8..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/RemoveRedundantSelectRule.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package edu.uci.ics.hivesterix.optimizer.rules;
-
-import org.apache.commons.lang3.mutable.Mutable;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
-
-public class RemoveRedundantSelectRule implements IAlgebraicRewriteRule {
-
- @Override
- public boolean rewritePre(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) throws AlgebricksException {
- return false;
- }
-
- @Override
- public boolean rewritePost(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) throws AlgebricksException {
- AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
- if (op.getOperatorTag() != LogicalOperatorTag.SELECT) {
- return false;
- }
- AbstractLogicalOperator inputOp = (AbstractLogicalOperator) op
- .getInputs().get(0).getValue();
- if (inputOp.getOperatorTag() != LogicalOperatorTag.SELECT) {
- return false;
- }
- SelectOperator selectOp = (SelectOperator) op;
- SelectOperator inputSelectOp = (SelectOperator) inputOp;
- ILogicalExpression expr1 = selectOp.getCondition().getValue();
- ILogicalExpression expr2 = inputSelectOp.getCondition().getValue();
-
- if (expr1.equals(expr2)) {
- selectOp.getInputs().set(0, inputSelectOp.getInputs().get(0));
- return true;
- }
- return false;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/config/ConfUtil.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/config/ConfUtil.java
deleted file mode 100644
index 6b4d697..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/config/ConfUtil.java
+++ /dev/null
@@ -1,144 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.config;
-
-import java.net.InetAddress;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.api.client.HyracksConnection;
-import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
-import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
-import edu.uci.ics.hyracks.api.topology.ClusterTopology;
-
-@SuppressWarnings({ "rawtypes", "deprecation" })
-public class ConfUtil {
-
- private static JobConf job;
- private static HiveConf hconf;
- private static String[] NCs;
- private static Map<String, List<String>> ncMapping;
- private static IHyracksClientConnection hcc = null;
- private static ClusterTopology topology = null;
-
- public static JobConf getJobConf(Class<? extends InputFormat> format,
- Path path) {
- JobConf conf = new JobConf();
- if (job != null)
- conf = job;
-
- String hadoopPath = System.getProperty("HADOOP_HOME", "/hadoop");
- Path pathCore = new Path(hadoopPath + "/conf/core-site.xml");
- conf.addResource(pathCore);
- Path pathMapRed = new Path(hadoopPath + "/conf/mapred-site.xml");
- conf.addResource(pathMapRed);
- Path pathHDFS = new Path(hadoopPath + "/conf/hdfs-site.xml");
- conf.addResource(pathHDFS);
-
- conf.setInputFormat(format);
- FileInputFormat.setInputPaths(conf, path);
- return conf;
- }
-
- public static JobConf getJobConf() {
- JobConf conf = new JobConf();
- if (job != null)
- conf = job;
-
- String hadoopPath = System.getProperty("HADOOP_HOME", "/hadoop");
- Path pathCore = new Path(hadoopPath + "/conf/core-site.xml");
- conf.addResource(pathCore);
- Path pathMapRed = new Path(hadoopPath + "/conf/mapred-site.xml");
- conf.addResource(pathMapRed);
- Path pathHDFS = new Path(hadoopPath + "/conf/hdfs-site.xml");
- conf.addResource(pathHDFS);
-
- return conf;
- }
-
- public static void setJobConf(JobConf conf) {
- job = conf;
- }
-
- public static void setHiveConf(HiveConf hiveConf) {
- hconf = hiveConf;
- }
-
- public static HiveConf getHiveConf() {
- if (hconf == null) {
- hconf = new HiveConf(SessionState.class);
- hconf.addResource(new Path("conf/hive-default.xml"));
- }
- return hconf;
- }
-
- public static String[] getNCs() throws AlgebricksException {
- if (NCs == null) {
- try {
- loadClusterConfig();
- } catch (Exception e) {
- throw new AlgebricksException(e);
- }
- }
- return NCs;
- }
-
- public static Map<String, List<String>> getNCMapping()
- throws AlgebricksException {
- if (ncMapping == null) {
- try {
- loadClusterConfig();
- } catch (Exception e) {
- throw new AlgebricksException(e);
- }
- }
- return ncMapping;
- }
-
- private static void loadClusterConfig() {
- try {
- getHiveConf();
- String ipAddress = hconf.get("hive.hyracks.host");
- int port = Integer.parseInt(hconf.get("hive.hyracks.port"));
- int mpl = Integer.parseInt(hconf.get("hive.hyracks.parrallelism"));
- hcc = new HyracksConnection(ipAddress, port);
- topology = hcc.getClusterTopology();
- Map<String, NodeControllerInfo> ncNameToNcInfos = hcc
- .getNodeControllerInfos();
- NCs = new String[ncNameToNcInfos.size() * mpl];
- ncMapping = new HashMap<String, List<String>>();
- int i = 0;
- for (Map.Entry<String, NodeControllerInfo> entry : ncNameToNcInfos
- .entrySet()) {
- String ipAddr = InetAddress.getByAddress(
- entry.getValue().getNetworkAddress().getIpAddress())
- .getHostAddress();
- List<String> matchedNCs = ncMapping.get(ipAddr);
- if (matchedNCs == null) {
- matchedNCs = new ArrayList<String>();
- ncMapping.put(ipAddr, matchedNCs);
- }
- matchedNCs.add(entry.getKey());
- for (int j = i * mpl; j < i * mpl + mpl; j++)
- NCs[j] = entry.getKey();
- i++;
- }
- } catch (Exception e) {
- throw new IllegalStateException(e);
- }
- }
-
- public static ClusterTopology getClusterTopology() {
- if (topology == null)
- loadClusterConfig();
- return topology;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java
deleted file mode 100644
index 8f6d9ca..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java
+++ /dev/null
@@ -1,174 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.evaluator;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.io.BytesWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
-import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
-import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public abstract class AbstractExpressionEvaluator implements ICopyEvaluator {
-
- private List<ICopyEvaluator> children;
-
- private ExprNodeEvaluator evaluator;
-
- private IDataOutputProvider out;
-
- private ObjectInspector inspector;
-
- /**
- * output object inspector
- */
- private ObjectInspector outputInspector;
-
- /**
- * cached row object
- */
- private LazyObject<? extends ObjectInspector> cachedRowObject;
-
- /**
- * serializer/derialzer for lazy object
- */
- private SerDe lazySer;
-
- /**
- * data output
- */
- DataOutput dataOutput;
-
- public AbstractExpressionEvaluator(ExprNodeEvaluator hiveEvaluator,
- ObjectInspector oi, IDataOutputProvider output)
- throws AlgebricksException {
- evaluator = hiveEvaluator;
- out = output;
- inspector = oi;
- dataOutput = out.getDataOutput();
- }
-
- protected ObjectInspector getRowInspector() {
- return null;
- }
-
- protected IDataOutputProvider getIDataOutputProvider() {
- return out;
- }
-
- protected ExprNodeEvaluator getHiveEvaluator() {
- return evaluator;
- }
-
- public ObjectInspector getObjectInspector() {
- return inspector;
- }
-
- @Override
- public void evaluate(IFrameTupleReference r) throws AlgebricksException {
- // initialize hive evaluator
- try {
- if (outputInspector == null)
- outputInspector = evaluator.initialize(inspector);
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
-
- readIntoCache(r);
- try {
- Object result = evaluator.evaluate(cachedRowObject);
-
- // if (result == null) {
- // result = evaluator.evaluate(cachedRowObject);
- //
- // // check if result is null
- //
- // String errorMsg = "serialize null object in \n output " +
- // outputInspector.toString() + " \n input "
- // + inspector.toString() + "\n ";
- // errorMsg += "";
- // List<Object> columns = ((StructObjectInspector)
- // inspector).getStructFieldsDataAsList(cachedRowObject);
- // for (Object column : columns) {
- // errorMsg += column.toString() + " ";
- // }
- // errorMsg += "\n";
- // Log.info(errorMsg);
- // System.out.println(errorMsg);
- // // result = new BooleanWritable(true);
- // throw new IllegalStateException(errorMsg);
- // }
-
- serializeResult(result);
- } catch (HiveException e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- } catch (IOException e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- }
-
- /**
- * serialize the result
- *
- * @param result
- * the evaluation result
- * @throws IOException
- * @throws AlgebricksException
- */
- private void serializeResult(Object result) throws IOException,
- AlgebricksException {
- if (lazySer == null)
- lazySer = new LazySerDe();
-
- try {
- BytesWritable outputWritable = (BytesWritable) lazySer.serialize(
- result, outputInspector);
- dataOutput.write(outputWritable.getBytes(), 0,
- outputWritable.getLength());
- } catch (SerDeException e) {
- throw new AlgebricksException(e);
- }
- }
-
- /**
- * bind the tuple reference to the cached row object
- *
- * @param r
- */
- private void readIntoCache(IFrameTupleReference r) {
- if (cachedRowObject == null)
- cachedRowObject = (LazyObject<? extends ObjectInspector>) LazyFactory
- .createLazyObject(inspector);
- cachedRowObject.init(r);
- }
-
- /**
- * set a list of children of this evaluator
- *
- * @param children
- */
- public void setChildren(List<ICopyEvaluator> children) {
- this.children = children;
- }
-
- public void addChild(ICopyEvaluator child) {
- if (children == null)
- children = new ArrayList<ICopyEvaluator>();
- children.add(child);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java
deleted file mode 100644
index 271b5e4..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java
+++ /dev/null
@@ -1,231 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.evaluator;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.BytesWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunction;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class AggregationFunctionEvaluator implements ICopyAggregateFunction {
-
- /**
- * the mode of aggregation function
- */
- private GenericUDAFEvaluator.Mode mode;
-
- /**
- * an array of evaluators
- */
- private ExprNodeEvaluator[] evaluators;
-
- /**
- * udaf evaluator partial
- */
- private GenericUDAFEvaluator udafPartial;
-
- /**
- * udaf evaluator complete
- */
- private GenericUDAFEvaluator udafComplete;
-
- /**
- * cached parameter objects
- */
- private Object[] cachedParameters;
-
- /**
- * cached row objects
- */
- private LazyObject<? extends ObjectInspector> cachedRowObject;
-
- /**
- * the output channel
- */
- private DataOutput out;
-
- /**
- * aggregation buffer
- */
- private AggregationBuffer aggBuffer;
-
- /**
- * we only use lazy serde to do serialization
- */
- private SerDe lazySer;
-
- /**
- * the output object inspector for this aggregation function
- */
- private ObjectInspector outputInspector;
-
- /**
- * the output object inspector for this aggregation function
- */
- private ObjectInspector outputInspectorPartial;
-
- /**
- * parameter inspectors
- */
- private ObjectInspector[] parameterInspectors;
-
- /**
- * output make sure the aggregation functio has least object creation
- *
- * @param desc
- * @param oi
- * @param output
- */
- public AggregationFunctionEvaluator(List<ExprNodeDesc> inputs,
- List<TypeInfo> inputTypes, String genericUDAFName,
- GenericUDAFEvaluator.Mode aggMode, boolean distinct,
- ObjectInspector oi, DataOutput output, ExprNodeEvaluator[] evals,
- ObjectInspector[] pInspectors, Object[] parameterCache,
- SerDe serde, LazyObject<? extends ObjectInspector> row,
- GenericUDAFEvaluator udafunctionPartial,
- GenericUDAFEvaluator udafunctionComplete, ObjectInspector outputOi,
- ObjectInspector outputOiPartial) {
- // shared object across threads
- this.out = output;
- this.mode = aggMode;
- this.parameterInspectors = pInspectors;
-
- // thread local objects
- this.evaluators = evals;
- this.cachedParameters = parameterCache;
- this.cachedRowObject = row;
- this.lazySer = serde;
- this.udafPartial = udafunctionPartial;
- this.udafComplete = udafunctionComplete;
- this.outputInspector = outputOi;
- this.outputInspectorPartial = outputOiPartial;
- }
-
- @Override
- public void init() throws AlgebricksException {
- try {
- aggBuffer = udafPartial.getNewAggregationBuffer();
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- }
-
- @Override
- public void step(IFrameTupleReference tuple) throws AlgebricksException {
- readIntoCache(tuple);
- processRow();
- }
-
- private void processRow() throws AlgebricksException {
- try {
- // get values by evaluating them
- for (int i = 0; i < cachedParameters.length; i++) {
- cachedParameters[i] = evaluators[i].evaluate(cachedRowObject);
- }
- processAggregate();
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- }
-
- private void processAggregate() throws HiveException {
- /**
- * accumulate the aggregation function
- */
- switch (mode) {
- case PARTIAL1:
- case COMPLETE:
- udafPartial.iterate(aggBuffer, cachedParameters);
- break;
- case PARTIAL2:
- case FINAL:
- if (udafPartial instanceof GenericUDAFCount.GenericUDAFCountEvaluator) {
- Object parameter = ((PrimitiveObjectInspector) parameterInspectors[0])
- .getPrimitiveWritableObject(cachedParameters[0]);
- udafPartial.merge(aggBuffer, parameter);
- } else
- udafPartial.merge(aggBuffer, cachedParameters[0]);
- break;
- default:
- break;
- }
- }
-
- /**
- * serialize the result
- *
- * @param result
- * the evaluation result
- * @throws IOException
- * @throws AlgebricksException
- */
- private void serializeResult(Object result, ObjectInspector oi)
- throws IOException, AlgebricksException {
- try {
- BytesWritable outputWritable = (BytesWritable) lazySer.serialize(
- result, oi);
- out.write(outputWritable.getBytes(), 0, outputWritable.getLength());
- } catch (SerDeException e) {
- throw new AlgebricksException(e);
- }
- }
-
- /**
- * bind the tuple reference to the cached row object
- *
- * @param r
- */
- private void readIntoCache(IFrameTupleReference r) {
- cachedRowObject.init(r);
- }
-
- @Override
- public void finish() throws AlgebricksException {
- // aggregator
- try {
- Object result = null;
- result = udafPartial.terminatePartial(aggBuffer);
- if (mode == GenericUDAFEvaluator.Mode.COMPLETE
- || mode == GenericUDAFEvaluator.Mode.FINAL) {
- result = udafComplete.terminate(aggBuffer);
- serializeResult(result, outputInspector);
- } else {
- serializeResult(result, outputInspectorPartial);
- }
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- } catch (IOException e) {
- throw new AlgebricksException(e);
- }
- }
-
- @Override
- public void finishPartial() throws AlgebricksException {
- // aggregator.
- try {
- Object result = null;
- // get aggregations
- result = udafPartial.terminatePartial(aggBuffer);
- serializeResult(result, outputInspectorPartial);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- } catch (IOException e) {
- throw new AlgebricksException(e);
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java
deleted file mode 100644
index 032437b..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java
+++ /dev/null
@@ -1,259 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.evaluator;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.BytesWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunction;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class AggregatuibFunctionSerializableEvaluator implements
- ICopySerializableAggregateFunction {
-
- /**
- * the mode of aggregation function
- */
- private GenericUDAFEvaluator.Mode mode;
-
- /**
- * an array of evaluators
- */
- private ExprNodeEvaluator[] evaluators;
-
- /**
- * udaf evaluator partial
- */
- private GenericUDAFEvaluator udafPartial;
-
- /**
- * udaf evaluator complete
- */
- private GenericUDAFEvaluator udafComplete;
-
- /**
- * cached parameter objects
- */
- private Object[] cachedParameters;
-
- /**
- * cached row objects
- */
- private LazyObject<? extends ObjectInspector> cachedRowObject;
-
- /**
- * aggregation buffer
- */
- private SerializableBuffer aggBuffer;
-
- /**
- * we only use lazy serde to do serialization
- */
- private SerDe lazySer;
-
- /**
- * the output object inspector for this aggregation function
- */
- private ObjectInspector outputInspector;
-
- /**
- * the output object inspector for this aggregation function
- */
- private ObjectInspector outputInspectorPartial;
-
- /**
- * parameter inspectors
- */
- private ObjectInspector[] parameterInspectors;
-
- /**
- * output make sure the aggregation functio has least object creation
- *
- * @param desc
- * @param oi
- * @param output
- */
- public AggregatuibFunctionSerializableEvaluator(List<ExprNodeDesc> inputs,
- List<TypeInfo> inputTypes, String genericUDAFName,
- GenericUDAFEvaluator.Mode aggMode, boolean distinct,
- ObjectInspector oi, ExprNodeEvaluator[] evals,
- ObjectInspector[] pInspectors, Object[] parameterCache,
- SerDe serde, LazyObject<? extends ObjectInspector> row,
- GenericUDAFEvaluator udafunctionPartial,
- GenericUDAFEvaluator udafunctionComplete, ObjectInspector outputOi,
- ObjectInspector outputOiPartial) throws AlgebricksException {
- // shared object across threads
- this.mode = aggMode;
- this.parameterInspectors = pInspectors;
-
- // thread local objects
- this.evaluators = evals;
- this.cachedParameters = parameterCache;
- this.cachedRowObject = row;
- this.lazySer = serde;
- this.udafPartial = udafunctionPartial;
- this.udafComplete = udafunctionComplete;
- this.outputInspector = outputOi;
- this.outputInspectorPartial = outputOiPartial;
-
- try {
- aggBuffer = (SerializableBuffer) udafPartial
- .getNewAggregationBuffer();
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- }
-
- @Override
- public void init(DataOutput output) throws AlgebricksException {
- try {
- udafPartial.reset(aggBuffer);
- outputAggBuffer(aggBuffer, output);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- }
-
- @Override
- public void step(IFrameTupleReference tuple, byte[] data, int start, int len)
- throws AlgebricksException {
- deSerializeAggBuffer(aggBuffer, data, start, len);
- readIntoCache(tuple);
- processRow();
- serializeAggBuffer(aggBuffer, data, start, len);
- }
-
- private void processRow() throws AlgebricksException {
- try {
- // get values by evaluating them
- for (int i = 0; i < cachedParameters.length; i++) {
- cachedParameters[i] = evaluators[i].evaluate(cachedRowObject);
- }
- processAggregate();
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- }
-
- private void processAggregate() throws HiveException {
- /**
- * accumulate the aggregation function
- */
- switch (mode) {
- case PARTIAL1:
- case COMPLETE:
- udafPartial.iterate(aggBuffer, cachedParameters);
- break;
- case PARTIAL2:
- case FINAL:
- if (udafPartial instanceof GenericUDAFCount.GenericUDAFCountEvaluator) {
- Object parameter = ((PrimitiveObjectInspector) parameterInspectors[0])
- .getPrimitiveWritableObject(cachedParameters[0]);
- udafPartial.merge(aggBuffer, parameter);
- } else
- udafPartial.merge(aggBuffer, cachedParameters[0]);
- break;
- default:
- break;
- }
- }
-
- /**
- * serialize the result
- *
- * @param result
- * the evaluation result
- * @throws IOException
- * @throws AlgebricksException
- */
- private void serializeResult(Object result, ObjectInspector oi,
- DataOutput out) throws IOException, AlgebricksException {
- try {
- BytesWritable outputWritable = (BytesWritable) lazySer.serialize(
- result, oi);
- out.write(outputWritable.getBytes(), 0, outputWritable.getLength());
- } catch (SerDeException e) {
- throw new AlgebricksException(e);
- }
- }
-
- /**
- * bind the tuple reference to the cached row object
- *
- * @param r
- */
- private void readIntoCache(IFrameTupleReference r) {
- cachedRowObject.init(r);
- }
-
- @Override
- public void finish(byte[] data, int start, int len, DataOutput output)
- throws AlgebricksException {
- deSerializeAggBuffer(aggBuffer, data, start, len);
- // aggregator
- try {
- Object result = null;
- result = udafPartial.terminatePartial(aggBuffer);
- if (mode == GenericUDAFEvaluator.Mode.COMPLETE
- || mode == GenericUDAFEvaluator.Mode.FINAL) {
- result = udafComplete.terminate(aggBuffer);
- serializeResult(result, outputInspector, output);
- } else {
- serializeResult(result, outputInspectorPartial, output);
- }
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- } catch (IOException e) {
- throw new AlgebricksException(e);
- }
- }
-
- @Override
- public void finishPartial(byte[] data, int start, int len, DataOutput output)
- throws AlgebricksException {
- deSerializeAggBuffer(aggBuffer, data, start, len);
- // aggregator.
- try {
- Object result = null;
- // get aggregations
- result = udafPartial.terminatePartial(aggBuffer);
- serializeResult(result, outputInspectorPartial, output);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- } catch (IOException e) {
- throw new AlgebricksException(e);
- }
- }
-
- private void serializeAggBuffer(SerializableBuffer buffer, byte[] data,
- int start, int len) throws AlgebricksException {
- buffer.serializeAggBuffer(data, start, len);
- }
-
- private void deSerializeAggBuffer(SerializableBuffer buffer, byte[] data,
- int start, int len) throws AlgebricksException {
- buffer.deSerializeAggBuffer(data, start, len);
- }
-
- private void outputAggBuffer(SerializableBuffer buffer, DataOutput out)
- throws AlgebricksException {
- try {
- buffer.serializeAggBuffer(out);
- } catch (IOException e) {
- throw new AlgebricksException(e);
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/BufferSerDeUtil.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/BufferSerDeUtil.java
deleted file mode 100644
index d73be93..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/BufferSerDeUtil.java
+++ /dev/null
@@ -1,73 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.evaluator;
-
-public class BufferSerDeUtil {
-
- public static double getDouble(byte[] bytes, int offset) {
- return Double.longBitsToDouble(getLong(bytes, offset));
- }
-
- public static float getFloat(byte[] bytes, int offset) {
- return Float.intBitsToFloat(getInt(bytes, offset));
- }
-
- public static boolean getBoolean(byte[] bytes, int offset) {
- if (bytes[offset] == 0)
- return false;
- else
- return true;
- }
-
- public static int getInt(byte[] bytes, int offset) {
- return ((bytes[offset] & 0xff) << 24)
- + ((bytes[offset + 1] & 0xff) << 16)
- + ((bytes[offset + 2] & 0xff) << 8)
- + ((bytes[offset + 3] & 0xff) << 0);
- }
-
- public static long getLong(byte[] bytes, int offset) {
- return (((long) (bytes[offset] & 0xff)) << 56)
- + (((long) (bytes[offset + 1] & 0xff)) << 48)
- + (((long) (bytes[offset + 2] & 0xff)) << 40)
- + (((long) (bytes[offset + 3] & 0xff)) << 32)
- + (((long) (bytes[offset + 4] & 0xff)) << 24)
- + (((long) (bytes[offset + 5] & 0xff)) << 16)
- + (((long) (bytes[offset + 6] & 0xff)) << 8)
- + (((long) (bytes[offset + 7] & 0xff)) << 0);
- }
-
- public static void writeBoolean(boolean value, byte[] bytes, int offset) {
- if (value)
- bytes[offset] = (byte) 1;
- else
- bytes[offset] = (byte) 0;
- }
-
- public static void writeInt(int value, byte[] bytes, int offset) {
- bytes[offset++] = (byte) (value >> 24);
- bytes[offset++] = (byte) (value >> 16);
- bytes[offset++] = (byte) (value >> 8);
- bytes[offset++] = (byte) (value);
- }
-
- public static void writeLong(long value, byte[] bytes, int offset) {
- bytes[offset++] = (byte) (value >> 56);
- bytes[offset++] = (byte) (value >> 48);
- bytes[offset++] = (byte) (value >> 40);
- bytes[offset++] = (byte) (value >> 32);
- bytes[offset++] = (byte) (value >> 24);
- bytes[offset++] = (byte) (value >> 16);
- bytes[offset++] = (byte) (value >> 8);
- bytes[offset++] = (byte) (value);
- }
-
- public static void writeDouble(double value, byte[] bytes, int offset) {
- long lValue = Double.doubleToLongBits(value);
- writeLong(lValue, bytes, offset);
- }
-
- public static void writeFloat(float value, byte[] bytes, int offset) {
- int iValue = Float.floatToIntBits(value);
- writeInt(iValue, bytes, offset);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ExpressionTranslator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ExpressionTranslator.java
deleted file mode 100644
index 2180910..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ExpressionTranslator.java
+++ /dev/null
@@ -1,233 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.evaluator;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.FunctionInfo;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.expression.ExpressionConstant;
-import edu.uci.ics.hivesterix.logical.expression.HiveAlgebricksBuiltInFunctionMap;
-import edu.uci.ics.hivesterix.logical.expression.HiveFunctionInfo;
-import edu.uci.ics.hivesterix.logical.expression.HivesterixConstantValue;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
-
-public class ExpressionTranslator {
-
- public static Object getHiveExpression(ILogicalExpression expr,
- IVariableTypeEnvironment env) throws Exception {
- if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
- /**
- * function expression
- */
- AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
- IFunctionInfo funcInfo = funcExpr.getFunctionInfo();
- FunctionIdentifier fid = funcInfo.getFunctionIdentifier();
-
- if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
- Object info = ((HiveFunctionInfo) funcInfo).getInfo();
- ExprNodeFieldDesc desc = (ExprNodeFieldDesc) info;
- return new ExprNodeFieldDesc(desc.getTypeInfo(),
- desc.getDesc(), desc.getFieldName(), desc.getIsList());
- }
-
- if (fid.getName().equals(ExpressionConstant.NULL)) {
- return new ExprNodeNullDesc();
- }
-
- /**
- * argument expressions: translate argument expressions recursively
- * first, this logic is shared in scalar, aggregation and unnesting
- * function
- */
- List<Mutable<ILogicalExpression>> arguments = funcExpr
- .getArguments();
- List<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
- for (Mutable<ILogicalExpression> argument : arguments) {
- /**
- * parameters could not be aggregate function desc
- */
- ExprNodeDesc parameter = (ExprNodeDesc) getHiveExpression(
- argument.getValue(), env);
- parameters.add(parameter);
- }
-
- /**
- * get expression
- */
- if (funcExpr instanceof ScalarFunctionCallExpression) {
- String udfName = HiveAlgebricksBuiltInFunctionMap.INSTANCE
- .getHiveFunctionName(fid);
- GenericUDF udf;
- if (udfName != null) {
- /**
- * get corresponding function info for built-in functions
- */
- FunctionInfo fInfo = FunctionRegistry
- .getFunctionInfo(udfName);
- udf = fInfo.getGenericUDF();
-
- int inputSize = parameters.size();
- List<ExprNodeDesc> currentDescs = new ArrayList<ExprNodeDesc>();
-
- // generate expression tree if necessary
- while (inputSize > 2) {
- int pairs = inputSize / 2;
- for (int i = 0; i < pairs; i++) {
- List<ExprNodeDesc> descs = new ArrayList<ExprNodeDesc>();
- descs.add(parameters.get(2 * i));
- descs.add(parameters.get(2 * i + 1));
- ExprNodeDesc desc = ExprNodeGenericFuncDesc
- .newInstance(udf, descs);
- currentDescs.add(desc);
- }
-
- if (inputSize % 2 != 0) {
- // List<ExprNodeDesc> descs = new
- // ArrayList<ExprNodeDesc>();
- // ExprNodeDesc lastExpr =
- // currentDescs.remove(currentDescs.size() - 1);
- // descs.add(lastExpr);
- currentDescs.add(parameters.get(inputSize - 1));
- // ExprNodeDesc desc =
- // ExprNodeGenericFuncDesc.newInstance(udf, descs);
- // currentDescs.add(desc);
- }
- inputSize = currentDescs.size();
- parameters.clear();
- parameters.addAll(currentDescs);
- currentDescs.clear();
- }
-
- } else {
- Object secondInfo = ((HiveFunctionInfo) funcInfo).getInfo();
- if (secondInfo != null) {
-
- /**
- * for GenericUDFBridge: we should not call get type of
- * this hive expression, because parameters may have
- * been changed!
- */
- ExprNodeGenericFuncDesc hiveExpr = (ExprNodeGenericFuncDesc) ((HiveFunctionInfo) funcInfo)
- .getInfo();
- udf = hiveExpr.getGenericUDF();
- } else {
- /**
- * for other generic UDF
- */
- Class<?> udfClass;
- try {
- udfClass = Class.forName(fid.getName());
- udf = (GenericUDF) udfClass.newInstance();
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- }
- }
- /**
- * get hive generic function expression
- */
- ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf,
- parameters);
- return desc;
- } else if (funcExpr instanceof AggregateFunctionCallExpression) {
- /**
- * hive aggregation info
- */
- AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr
- .getFunctionInfo()).getInfo();
- /**
- * set parameters
- */
- aggregateDesc
- .setParameters((ArrayList<ExprNodeDesc>) parameters);
-
- List<TypeInfo> originalParameterTypeInfos = new ArrayList<TypeInfo>();
- for (ExprNodeDesc parameter : parameters) {
- if (parameter.getTypeInfo() instanceof StructTypeInfo) {
- originalParameterTypeInfos
- .add(TypeInfoFactory.doubleTypeInfo);
- } else
- originalParameterTypeInfos.add(parameter.getTypeInfo());
- }
-
- GenericUDAFEvaluator eval = FunctionRegistry
- .getGenericUDAFEvaluator(
- aggregateDesc.getGenericUDAFName(),
- originalParameterTypeInfos,
- aggregateDesc.getDistinct(), false);
-
- AggregationDesc newAggregateDesc = new AggregationDesc(
- aggregateDesc.getGenericUDAFName(), eval,
- aggregateDesc.getParameters(),
- aggregateDesc.getDistinct(), aggregateDesc.getMode());
- return newAggregateDesc;
- } else if (funcExpr instanceof UnnestingFunctionCallExpression) {
- /**
- * type inference for UDTF function
- */
- UDTFDesc hiveDesc = (UDTFDesc) ((HiveFunctionInfo) funcExpr
- .getFunctionInfo()).getInfo();
- String funcName = hiveDesc.getUDTFName();
- FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
- GenericUDTF udtf = fi.getGenericUDTF();
- UDTFDesc desc = new UDTFDesc(udtf);
- return desc;
- } else {
- throw new IllegalStateException(
- "unrecognized function expression "
- + expr.getClass().getName());
- }
- } else if ((expr.getExpressionTag() == LogicalExpressionTag.VARIABLE)) {
- /**
- * get type for variable in the environment
- */
- VariableReferenceExpression varExpr = (VariableReferenceExpression) expr;
- LogicalVariable var = varExpr.getVariableReference();
- TypeInfo typeInfo = (TypeInfo) env.getVarType(var);
- ExprNodeDesc desc = new ExprNodeColumnDesc(typeInfo,
- var.toString(), "", false);
- return desc;
- } else if ((expr.getExpressionTag() == LogicalExpressionTag.CONSTANT)) {
- /**
- * get expression for constant in the environment
- */
- ConstantExpression varExpr = (ConstantExpression) expr;
- Object value = ((HivesterixConstantValue) varExpr.getValue())
- .getObject();
- ExprNodeDesc desc = new ExprNodeConstantDesc(value);
- return desc;
- } else {
- throw new IllegalStateException("illegal expressions "
- + expr.getClass().getName());
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/SerializableBuffer.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/SerializableBuffer.java
deleted file mode 100644
index 328b384..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/SerializableBuffer.java
+++ /dev/null
@@ -1,16 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.evaluator;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
-
-public interface SerializableBuffer extends AggregationBuffer {
-
- public void deSerializeAggBuffer(byte[] data, int start, int len);
-
- public void serializeAggBuffer(byte[] data, int start, int len);
-
- public void serializeAggBuffer(DataOutput output) throws IOException;
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java
deleted file mode 100644
index de0141b..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java
+++ /dev/null
@@ -1,147 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.evaluator;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.ql.udf.generic.Collector;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.io.BytesWritable;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
-import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
-import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
-import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyUnnestingFunction;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class UDTFFunctionEvaluator implements ICopyUnnestingFunction, Collector {
-
- /**
- * udtf function
- */
- private UDTFDesc func;
-
- /**
- * input object inspector
- */
- private ObjectInspector inputInspector;
-
- /**
- * output object inspector
- */
- private ObjectInspector outputInspector;
-
- /**
- * object inspector for udtf
- */
- private ObjectInspector[] udtfInputOIs;
-
- /**
- * generic udtf
- */
- private GenericUDTF udtf;
-
- /**
- * data output
- */
- private DataOutput out;
-
- /**
- * the input row object
- */
- private LazyColumnar cachedRowObject;
-
- /**
- * cached row object (input)
- */
- private Object[] cachedInputObjects;
-
- /**
- * serialization/deserialization
- */
- private SerDe lazySerDe;
-
- /**
- * columns feed into UDTF
- */
- private int[] columns;
-
- public UDTFFunctionEvaluator(UDTFDesc desc, Schema schema, int[] cols,
- DataOutput output) {
- this.func = desc;
- this.inputInspector = schema.toObjectInspector();
- udtf = func.getGenericUDTF();
- out = output;
- columns = cols;
- }
-
- @Override
- public void init(IFrameTupleReference tuple) throws AlgebricksException {
- cachedInputObjects = new LazyObject[columns.length];
- try {
- cachedRowObject = (LazyColumnar) LazyFactory
- .createLazyObject(inputInspector);
- outputInspector = udtf.initialize(udtfInputOIs);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- udtf.setCollector(this);
- lazySerDe = new LazySerDe();
- readIntoCache(tuple);
- }
-
- @Override
- public boolean step() throws AlgebricksException {
- try {
- udtf.process(cachedInputObjects);
- return true;
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- }
-
- /**
- * bind the tuple reference to the cached row object
- *
- * @param r
- */
- private void readIntoCache(IFrameTupleReference r) {
- cachedRowObject.init(r);
- for (int i = 0; i < cachedInputObjects.length; i++) {
- cachedInputObjects[i] = cachedRowObject.getField(columns[i]);
- }
- }
-
- /**
- * serialize the result
- *
- * @param result
- * the evaluation result
- * @throws IOException
- * @throws AlgebricksException
- */
- private void serializeResult(Object result) throws SerDeException,
- IOException {
- BytesWritable outputWritable = (BytesWritable) lazySerDe.serialize(
- result, outputInspector);
- out.write(outputWritable.getBytes(), 0, outputWritable.getLength());
- }
-
- @Override
- public void collect(Object input) throws HiveException {
- try {
- serializeResult(input);
- } catch (IOException e) {
- throw new HiveException(e);
- } catch (SerDeException e) {
- throw new HiveException(e);
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/IExecutionEngine.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/IExecutionEngine.java
deleted file mode 100644
index 8f4c471..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/IExecutionEngine.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.exec;
-
-import java.io.Serializable;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.Task;
-
-public interface IExecutionEngine {
-
- /**
- * compile the job
- *
- * @param rootTasks
- * : Hive MapReduce plan
- * @return 0 pass, 1 fail
- */
- public int compileJob(List<Task<? extends Serializable>> rootTasks);
-
- /**
- * execute the job with latest compiled plan
- *
- * @return
- */
- public int executeJob();
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryAscComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryAscComparatorFactory.java
deleted file mode 100644
index 9c2d463..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryAscComparatorFactory.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveByteBinaryAscComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveByteBinaryAscComparatorFactory INSTANCE = new HiveByteBinaryAscComparatorFactory();
-
- private HiveByteBinaryAscComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private byte left;
- private byte right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = b1[s1];
- right = b2[s2];
- if (left > right)
- return 1;
- else if (left == right)
- return 0;
- else
- return -1;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryDescComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryDescComparatorFactory.java
deleted file mode 100644
index ee71655..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryDescComparatorFactory.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveByteBinaryDescComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveByteBinaryDescComparatorFactory INSTANCE = new HiveByteBinaryDescComparatorFactory();
-
- private HiveByteBinaryDescComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private byte left;
- private byte right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = b1[s1];
- right = b2[s2];
- if (left > right)
- return -1;
- else if (left == right)
- return 0;
- else
- return 1;
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryAscComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryAscComparatorFactory.java
deleted file mode 100644
index 739e417..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryAscComparatorFactory.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveDoubleBinaryAscComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveDoubleBinaryAscComparatorFactory INSTANCE = new HiveDoubleBinaryAscComparatorFactory();
-
- private HiveDoubleBinaryAscComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private double left;
- private double right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = Double.longBitsToDouble(LazyUtils
- .byteArrayToLong(b1, s1));
- right = Double.longBitsToDouble(LazyUtils.byteArrayToLong(b2,
- s2));
- if (left > right)
- return 1;
- else if (left == right)
- return 0;
- else
- return -1;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryDescComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryDescComparatorFactory.java
deleted file mode 100644
index 0424c9f..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryDescComparatorFactory.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveDoubleBinaryDescComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveDoubleBinaryDescComparatorFactory INSTANCE = new HiveDoubleBinaryDescComparatorFactory();
-
- private HiveDoubleBinaryDescComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private double left;
- private double right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = Double.longBitsToDouble(LazyUtils
- .byteArrayToLong(b1, s1));
- right = Double.longBitsToDouble(LazyUtils.byteArrayToLong(b2,
- s2));
- if (left > right)
- return -1;
- else if (left == right)
- return 0;
- else
- return 1;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryAscComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryAscComparatorFactory.java
deleted file mode 100644
index 08542a7..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryAscComparatorFactory.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveFloatBinaryAscComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveFloatBinaryAscComparatorFactory INSTANCE = new HiveFloatBinaryAscComparatorFactory();
-
- private HiveFloatBinaryAscComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private float left;
- private float right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b1, s1));
- right = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b2, s2));
- if (left > right)
- return 1;
- else if (left == right)
- return 0;
- else
- return -1;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryDescComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryDescComparatorFactory.java
deleted file mode 100644
index 513512e..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryDescComparatorFactory.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveFloatBinaryDescComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveFloatBinaryDescComparatorFactory INSTANCE = new HiveFloatBinaryDescComparatorFactory();
-
- private HiveFloatBinaryDescComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private float left;
- private float right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b1, s1));
- right = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b2, s2));
- if (left > right)
- return -1;
- else if (left == right)
- return 0;
- else
- return 1;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryAscComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryAscComparatorFactory.java
deleted file mode 100644
index 947f30f..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryAscComparatorFactory.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveIntegerBinaryAscComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static final HiveIntegerBinaryAscComparatorFactory INSTANCE = new HiveIntegerBinaryAscComparatorFactory();
-
- private HiveIntegerBinaryAscComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private VInt left = new VInt();
- private VInt right = new VInt();
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- LazyUtils.readVInt(b1, s1, left);
- LazyUtils.readVInt(b2, s2, right);
-
- if (left.length != l1 || right.length != l2)
- throw new IllegalArgumentException(
- "length mismatch in int comparator function actual: "
- + left.length + "," + right.length
- + " expected " + l1 + "," + l2);
-
- if (left.value > right.value)
- return 1;
- else if (left.value == right.value)
- return 0;
- else
- return -1;
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryDescComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryDescComparatorFactory.java
deleted file mode 100644
index 7614aa1..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryDescComparatorFactory.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveIntegerBinaryDescComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static final HiveIntegerBinaryDescComparatorFactory INSTANCE = new HiveIntegerBinaryDescComparatorFactory();
-
- private HiveIntegerBinaryDescComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private VInt left = new VInt();
- private VInt right = new VInt();
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- LazyUtils.readVInt(b1, s1, left);
- LazyUtils.readVInt(b2, s2, right);
- if (left.length != l1 || right.length != l2)
- throw new IllegalArgumentException(
- "length mismatch in int comparator function actual: "
- + left.length + " expected " + l1);
- if (left.value > right.value)
- return -1;
- else if (left.value == right.value)
- return 0;
- else
- return 1;
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryAscComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryAscComparatorFactory.java
deleted file mode 100644
index f5f3473..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryAscComparatorFactory.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveLongBinaryAscComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static final HiveLongBinaryAscComparatorFactory INSTANCE = new HiveLongBinaryAscComparatorFactory();
-
- private HiveLongBinaryAscComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private VLong left = new VLong();
- private VLong right = new VLong();
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- LazyUtils.readVLong(b1, s1, left);
- LazyUtils.readVLong(b2, s2, right);
- if (left.length != l1 || right.length != l2)
- throw new IllegalArgumentException(
- "length mismatch in int comparator function actual: "
- + left.length + " expected " + l1);
- if (left.value > right.value)
- return 1;
- else if (left.value == right.value)
- return 0;
- else
- return -1;
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryDescComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryDescComparatorFactory.java
deleted file mode 100644
index b878b22..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryDescComparatorFactory.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveLongBinaryDescComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static final HiveLongBinaryDescComparatorFactory INSTANCE = new HiveLongBinaryDescComparatorFactory();
-
- private HiveLongBinaryDescComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private VLong left = new VLong();
- private VLong right = new VLong();
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- LazyUtils.readVLong(b1, s1, left);
- LazyUtils.readVLong(b2, s2, right);
- if (left.length != l1 || right.length != l2)
- throw new IllegalArgumentException(
- "length mismatch in int comparator function actual: "
- + left.length + " expected " + l1);
- if (left.value > right.value)
- return -1;
- else if (left.value == right.value)
- return 0;
- else
- return 1;
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryAscComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryAscComparatorFactory.java
deleted file mode 100644
index 8d55cdb..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryAscComparatorFactory.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveShortBinaryAscComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveShortBinaryAscComparatorFactory INSTANCE = new HiveShortBinaryAscComparatorFactory();
-
- private HiveShortBinaryAscComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private short left;
- private short right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = LazyUtils.byteArrayToShort(b1, s1);
- right = LazyUtils.byteArrayToShort(b2, s2);
- if (left > right)
- return 1;
- else if (left == right)
- return 0;
- else
- return -1;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryDescComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryDescComparatorFactory.java
deleted file mode 100644
index 4e8dde6..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryDescComparatorFactory.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveShortBinaryDescComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveShortBinaryDescComparatorFactory INSTANCE = new HiveShortBinaryDescComparatorFactory();
-
- private HiveShortBinaryDescComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private short left;
- private short right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = LazyUtils.byteArrayToShort(b1, s1);
- right = LazyUtils.byteArrayToShort(b2, s2);
- if (left > right)
- return -1;
- else if (left == right)
- return 0;
- else
- return 1;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryAscComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryAscComparatorFactory.java
deleted file mode 100644
index a334ecf..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryAscComparatorFactory.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import org.apache.hadoop.io.Text;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveStringBinaryAscComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveStringBinaryAscComparatorFactory INSTANCE = new HiveStringBinaryAscComparatorFactory();
-
- private HiveStringBinaryAscComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private VInt leftLen = new VInt();
- private VInt rightLen = new VInt();
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- LazyUtils.readVInt(b1, s1, leftLen);
- LazyUtils.readVInt(b2, s2, rightLen);
-
- if (leftLen.value + leftLen.length != l1
- || rightLen.value + rightLen.length != l2)
- throw new IllegalStateException(
- "parse string: length mismatch, expected "
- + (leftLen.value + leftLen.length) + ", "
- + (rightLen.value + rightLen.length)
- + " but get " + l1 + ", " + l2);
-
- return Text.Comparator.compareBytes(b1, s1 + leftLen.length, l1
- - leftLen.length, b2, s2 + rightLen.length, l2
- - rightLen.length);
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryDescComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryDescComparatorFactory.java
deleted file mode 100644
index e00b58e..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryDescComparatorFactory.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import org.apache.hadoop.io.WritableComparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveStringBinaryDescComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveStringBinaryDescComparatorFactory INSTANCE = new HiveStringBinaryDescComparatorFactory();
-
- private HiveStringBinaryDescComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private VInt leftLen = new VInt();
- private VInt rightLen = new VInt();
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- LazyUtils.readVInt(b1, s1, leftLen);
- LazyUtils.readVInt(b2, s2, rightLen);
-
- if (leftLen.value + leftLen.length != l1
- || rightLen.value + rightLen.length != l2)
- throw new IllegalStateException(
- "parse string: length mismatch, expected "
- + (leftLen.value + leftLen.length) + ", "
- + (rightLen.value + rightLen.length)
- + " but get " + l1 + ", " + l2);
-
- return -WritableComparator.compareBytes(b1,
- s1 + leftLen.length, l1 - leftLen.length, b2, s2
- + rightLen.length, l2 - rightLen.length);
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java
deleted file mode 100644
index c6078ca..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java
+++ /dev/null
@@ -1,381 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.AggregationFunctionEvaluator;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
-import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
-import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunction;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunctionFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class AggregationFunctionFactory implements
- ICopyAggregateFunctionFactory {
-
- private static final long serialVersionUID = 1L;
-
- /**
- * list of parameters' serialization
- */
- private List<String> parametersSerialization = new ArrayList<String>();
-
- /**
- * the name of the udf
- */
- private String genericUDAFName;
-
- /**
- * aggregation mode
- */
- private GenericUDAFEvaluator.Mode mode;
-
- /**
- * list of type info
- */
- private List<TypeInfo> types = new ArrayList<TypeInfo>();
-
- /**
- * distinct or not
- */
- private boolean distinct;
-
- /**
- * the schema of incoming rows
- */
- private Schema rowSchema;
-
- /**
- * list of parameters
- */
- private transient List<ExprNodeDesc> parametersOrigin;
-
- /**
- * row inspector
- */
- private transient ObjectInspector rowInspector = null;
-
- /**
- * output object inspector
- */
- private transient ObjectInspector outputInspector = null;
-
- /**
- * output object inspector
- */
- private transient ObjectInspector outputInspectorPartial = null;
-
- /**
- * parameter inspectors
- */
- private transient ObjectInspector[] parameterInspectors = null;
-
- /**
- * expression desc
- */
- private transient HashMap<Long, List<ExprNodeDesc>> parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
-
- /**
- * evaluators
- */
- private transient HashMap<Long, ExprNodeEvaluator[]> evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
-
- /**
- * cached parameter objects
- */
- private transient HashMap<Long, Object[]> cachedParameters = new HashMap<Long, Object[]>();
-
- /**
- * cached row object: one per thread
- */
- private transient HashMap<Long, LazyObject<? extends ObjectInspector>> cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
-
- /**
- * we only use lazy serde to do serialization
- */
- private transient HashMap<Long, SerDe> serDe = new HashMap<Long, SerDe>();
-
- /**
- * udaf evaluators
- */
- private transient HashMap<Long, GenericUDAFEvaluator> udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * udaf evaluators
- */
- private transient HashMap<Long, GenericUDAFEvaluator> udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * aggregation function desc
- */
- private transient AggregationDesc aggregator;
-
- /**
- *
- * @param aggregator
- * Algebricks function call expression
- * @param oi
- * schema
- */
- public AggregationFunctionFactory(
- AggregateFunctionCallExpression expression, Schema oi,
- IVariableTypeEnvironment env) throws AlgebricksException {
-
- try {
- aggregator = (AggregationDesc) ExpressionTranslator
- .getHiveExpression(expression, env);
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- init(aggregator.getParameters(), aggregator.getGenericUDAFName(),
- aggregator.getMode(), aggregator.getDistinct(), oi);
- }
-
- /**
- * constructor of aggregation function factory
- *
- * @param inputs
- * @param name
- * @param udafMode
- * @param distinct
- * @param oi
- */
- private void init(List<ExprNodeDesc> inputs, String name,
- GenericUDAFEvaluator.Mode udafMode, boolean distinct, Schema oi) {
- parametersOrigin = inputs;
- genericUDAFName = name;
- mode = udafMode;
- this.distinct = distinct;
- rowSchema = oi;
-
- for (ExprNodeDesc input : inputs) {
- TypeInfo type = input.getTypeInfo();
- if (type instanceof StructTypeInfo) {
- types.add(TypeInfoFactory.doubleTypeInfo);
- } else
- types.add(type);
-
- String s = Utilities.serializeExpression(input);
- parametersSerialization.add(s);
- }
- }
-
- @Override
- public synchronized ICopyAggregateFunction createAggregateFunction(
- IDataOutputProvider provider) throws AlgebricksException {
- if (parametersOrigin == null) {
- Configuration config = new Configuration();
- config.setClassLoader(this.getClass().getClassLoader());
- /**
- * in case of class.forname(...) call in hive code
- */
- Thread.currentThread().setContextClassLoader(
- this.getClass().getClassLoader());
-
- parametersOrigin = new ArrayList<ExprNodeDesc>();
- for (String serialization : parametersSerialization) {
- parametersOrigin.add(Utilities.deserializeExpression(
- serialization, config));
- }
- }
-
- /**
- * exprs
- */
- if (parameterExprs == null)
- parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
-
- /**
- * evaluators
- */
- if (evaluators == null)
- evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
-
- /**
- * cached parameter objects
- */
- if (cachedParameters == null)
- cachedParameters = new HashMap<Long, Object[]>();
-
- /**
- * cached row object: one per thread
- */
- if (cachedRowObjects == null)
- cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
-
- /**
- * we only use lazy serde to do serialization
- */
- if (serDe == null)
- serDe = new HashMap<Long, SerDe>();
-
- /**
- * UDAF functions
- */
- if (udafsComplete == null)
- udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * UDAF functions
- */
- if (udafsPartial == null)
- udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
-
- if (parameterInspectors == null)
- parameterInspectors = new ObjectInspector[parametersOrigin.size()];
-
- if (rowInspector == null)
- rowInspector = rowSchema.toObjectInspector();
-
- // get current thread id
- long threadId = Thread.currentThread().getId();
-
- /**
- * expressions, expressions are thread local
- */
- List<ExprNodeDesc> parameters = parameterExprs.get(threadId);
- if (parameters == null) {
- parameters = new ArrayList<ExprNodeDesc>();
- for (ExprNodeDesc parameter : parametersOrigin)
- parameters.add(parameter.clone());
- parameterExprs.put(threadId, parameters);
- }
-
- /**
- * cached parameter objects
- */
- Object[] cachedParas = cachedParameters.get(threadId);
- if (cachedParas == null) {
- cachedParas = new Object[parameters.size()];
- cachedParameters.put(threadId, cachedParas);
- }
-
- /**
- * cached row object: one per thread
- */
- LazyObject<? extends ObjectInspector> cachedRowObject = cachedRowObjects
- .get(threadId);
- if (cachedRowObject == null) {
- cachedRowObject = LazyFactory.createLazyObject(rowInspector);
- cachedRowObjects.put(threadId, cachedRowObject);
- }
-
- /**
- * we only use lazy serde to do serialization
- */
- SerDe lazySer = serDe.get(threadId);
- if (lazySer == null) {
- lazySer = new LazySerDe();
- serDe.put(threadId, lazySer);
- }
-
- /**
- * evaluators
- */
- ExprNodeEvaluator[] evals = evaluators.get(threadId);
- if (evals == null) {
- evals = new ExprNodeEvaluator[parameters.size()];
- evaluators.put(threadId, evals);
- }
-
- GenericUDAFEvaluator udafPartial;
- GenericUDAFEvaluator udafComplete;
-
- // initialize object inspectors
- try {
- /**
- * evaluators, udf, object inpsectors are shared in one thread
- */
- for (int i = 0; i < evals.length; i++) {
- if (evals[i] == null) {
- evals[i] = ExprNodeEvaluatorFactory.get(parameters.get(i));
- if (parameterInspectors[i] == null) {
- parameterInspectors[i] = evals[i]
- .initialize(rowInspector);
- } else {
- evals[i].initialize(rowInspector);
- }
- }
- }
-
- udafComplete = udafsComplete.get(threadId);
- if (udafComplete == null) {
- try {
- udafComplete = FunctionRegistry.getGenericUDAFEvaluator(
- genericUDAFName, types, distinct, false);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- udafsComplete.put(threadId, udafComplete);
- udafComplete.init(mode, parameterInspectors);
- }
-
- // multiple stage group by, determined by the mode parameter
- if (outputInspector == null)
- outputInspector = udafComplete.init(mode, parameterInspectors);
-
- // initial partial gby udaf
- GenericUDAFEvaluator.Mode partialMode;
- // adjust mode for external groupby
- if (mode == GenericUDAFEvaluator.Mode.COMPLETE)
- partialMode = GenericUDAFEvaluator.Mode.PARTIAL1;
- else if (mode == GenericUDAFEvaluator.Mode.FINAL)
- partialMode = GenericUDAFEvaluator.Mode.PARTIAL2;
- else
- partialMode = mode;
- udafPartial = udafsPartial.get(threadId);
- if (udafPartial == null) {
- try {
- udafPartial = FunctionRegistry.getGenericUDAFEvaluator(
- genericUDAFName, types, distinct, false);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- udafPartial.init(partialMode, parameterInspectors);
- udafsPartial.put(threadId, udafPartial);
- }
-
- // multiple stage group by, determined by the mode parameter
- if (outputInspectorPartial == null)
- outputInspectorPartial = udafPartial.init(partialMode,
- parameterInspectors);
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e);
- }
-
- return new AggregationFunctionEvaluator(parameters, types,
- genericUDAFName, mode, distinct, rowInspector,
- provider.getDataOutput(), evals, parameterInspectors,
- cachedParas, lazySer, cachedRowObject, udafPartial,
- udafComplete, outputInspector, outputInspectorPartial);
- }
-
- public String toString() {
- return "aggregation function expression evaluator factory: "
- + this.genericUDAFName;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java
deleted file mode 100644
index 73717a3..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java
+++ /dev/null
@@ -1,381 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.AggregatuibFunctionSerializableEvaluator;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
-import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
-import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunction;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunctionFactory;
-
-public class AggregationFunctionSerializableFactory implements
- ICopySerializableAggregateFunctionFactory {
-
- private static final long serialVersionUID = 1L;
-
- /**
- * list of parameters' serialization
- */
- private List<String> parametersSerialization = new ArrayList<String>();
-
- /**
- * the name of the udf
- */
- private String genericUDAFName;
-
- /**
- * aggregation mode
- */
- private GenericUDAFEvaluator.Mode mode;
-
- /**
- * list of type info
- */
- private List<TypeInfo> types = new ArrayList<TypeInfo>();
-
- /**
- * distinct or not
- */
- private boolean distinct;
-
- /**
- * the schema of incoming rows
- */
- private Schema rowSchema;
-
- /**
- * list of parameters
- */
- private transient List<ExprNodeDesc> parametersOrigin;
-
- /**
- * row inspector
- */
- private transient ObjectInspector rowInspector = null;
-
- /**
- * output object inspector
- */
- private transient ObjectInspector outputInspector = null;
-
- /**
- * output object inspector
- */
- private transient ObjectInspector outputInspectorPartial = null;
-
- /**
- * parameter inspectors
- */
- private transient ObjectInspector[] parameterInspectors = null;
-
- /**
- * expression desc
- */
- private transient HashMap<Long, List<ExprNodeDesc>> parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
-
- /**
- * evaluators
- */
- private transient HashMap<Long, ExprNodeEvaluator[]> evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
-
- /**
- * cached parameter objects
- */
- private transient HashMap<Long, Object[]> cachedParameters = new HashMap<Long, Object[]>();
-
- /**
- * cached row object: one per thread
- */
- private transient HashMap<Long, LazyObject<? extends ObjectInspector>> cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
-
- /**
- * we only use lazy serde to do serialization
- */
- private transient HashMap<Long, SerDe> serDe = new HashMap<Long, SerDe>();
-
- /**
- * udaf evaluators
- */
- private transient HashMap<Long, GenericUDAFEvaluator> udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * udaf evaluators
- */
- private transient HashMap<Long, GenericUDAFEvaluator> udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * aggregation function desc
- */
- private transient AggregationDesc aggregator;
-
- /**
- *
- * @param aggregator
- * Algebricks function call expression
- * @param oi
- * schema
- */
- public AggregationFunctionSerializableFactory(
- AggregateFunctionCallExpression expression, Schema oi,
- IVariableTypeEnvironment env) throws AlgebricksException {
-
- try {
- aggregator = (AggregationDesc) ExpressionTranslator
- .getHiveExpression(expression, env);
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- init(aggregator.getParameters(), aggregator.getGenericUDAFName(),
- aggregator.getMode(), aggregator.getDistinct(), oi);
- }
-
- /**
- * constructor of aggregation function factory
- *
- * @param inputs
- * @param name
- * @param udafMode
- * @param distinct
- * @param oi
- */
- private void init(List<ExprNodeDesc> inputs, String name,
- GenericUDAFEvaluator.Mode udafMode, boolean distinct, Schema oi) {
- parametersOrigin = inputs;
- genericUDAFName = name;
- mode = udafMode;
- this.distinct = distinct;
- rowSchema = oi;
-
- for (ExprNodeDesc input : inputs) {
- TypeInfo type = input.getTypeInfo();
- if (type instanceof StructTypeInfo) {
- types.add(TypeInfoFactory.doubleTypeInfo);
- } else
- types.add(type);
-
- String s = Utilities.serializeExpression(input);
- parametersSerialization.add(s);
- }
- }
-
- @Override
- public synchronized ICopySerializableAggregateFunction createAggregateFunction()
- throws AlgebricksException {
- if (parametersOrigin == null) {
- Configuration config = new Configuration();
- config.setClassLoader(this.getClass().getClassLoader());
- /**
- * in case of class.forname(...) call in hive code
- */
- Thread.currentThread().setContextClassLoader(
- this.getClass().getClassLoader());
-
- parametersOrigin = new ArrayList<ExprNodeDesc>();
- for (String serialization : parametersSerialization) {
- parametersOrigin.add(Utilities.deserializeExpression(
- serialization, config));
- }
- }
-
- /**
- * exprs
- */
- if (parameterExprs == null)
- parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
-
- /**
- * evaluators
- */
- if (evaluators == null)
- evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
-
- /**
- * cached parameter objects
- */
- if (cachedParameters == null)
- cachedParameters = new HashMap<Long, Object[]>();
-
- /**
- * cached row object: one per thread
- */
- if (cachedRowObjects == null)
- cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
-
- /**
- * we only use lazy serde to do serialization
- */
- if (serDe == null)
- serDe = new HashMap<Long, SerDe>();
-
- /**
- * UDAF functions
- */
- if (udafsComplete == null)
- udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * UDAF functions
- */
- if (udafsPartial == null)
- udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
-
- if (parameterInspectors == null)
- parameterInspectors = new ObjectInspector[parametersOrigin.size()];
-
- if (rowInspector == null)
- rowInspector = rowSchema.toObjectInspector();
-
- // get current thread id
- long threadId = Thread.currentThread().getId();
-
- /**
- * expressions, expressions are thread local
- */
- List<ExprNodeDesc> parameters = parameterExprs.get(threadId);
- if (parameters == null) {
- parameters = new ArrayList<ExprNodeDesc>();
- for (ExprNodeDesc parameter : parametersOrigin)
- parameters.add(parameter.clone());
- parameterExprs.put(threadId, parameters);
- }
-
- /**
- * cached parameter objects
- */
- Object[] cachedParas = cachedParameters.get(threadId);
- if (cachedParas == null) {
- cachedParas = new Object[parameters.size()];
- cachedParameters.put(threadId, cachedParas);
- }
-
- /**
- * cached row object: one per thread
- */
- LazyObject<? extends ObjectInspector> cachedRowObject = cachedRowObjects
- .get(threadId);
- if (cachedRowObject == null) {
- cachedRowObject = LazyFactory.createLazyObject(rowInspector);
- cachedRowObjects.put(threadId, cachedRowObject);
- }
-
- /**
- * we only use lazy serde to do serialization
- */
- SerDe lazySer = serDe.get(threadId);
- if (lazySer == null) {
- lazySer = new LazySerDe();
- serDe.put(threadId, lazySer);
- }
-
- /**
- * evaluators
- */
- ExprNodeEvaluator[] evals = evaluators.get(threadId);
- if (evals == null) {
- evals = new ExprNodeEvaluator[parameters.size()];
- evaluators.put(threadId, evals);
- }
-
- GenericUDAFEvaluator udafPartial;
- GenericUDAFEvaluator udafComplete;
-
- // initialize object inspectors
- try {
- /**
- * evaluators, udf, object inpsectors are shared in one thread
- */
- for (int i = 0; i < evals.length; i++) {
- if (evals[i] == null) {
- evals[i] = ExprNodeEvaluatorFactory.get(parameters.get(i));
- if (parameterInspectors[i] == null) {
- parameterInspectors[i] = evals[i]
- .initialize(rowInspector);
- } else {
- evals[i].initialize(rowInspector);
- }
- }
- }
-
- udafComplete = udafsComplete.get(threadId);
- if (udafComplete == null) {
- try {
- udafComplete = FunctionRegistry.getGenericUDAFEvaluator(
- genericUDAFName, types, distinct, false);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- udafsComplete.put(threadId, udafComplete);
- udafComplete.init(mode, parameterInspectors);
- }
-
- // multiple stage group by, determined by the mode parameter
- if (outputInspector == null)
- outputInspector = udafComplete.init(mode, parameterInspectors);
-
- // initial partial gby udaf
- GenericUDAFEvaluator.Mode partialMode;
- // adjust mode for external groupby
- if (mode == GenericUDAFEvaluator.Mode.COMPLETE)
- partialMode = GenericUDAFEvaluator.Mode.PARTIAL1;
- else if (mode == GenericUDAFEvaluator.Mode.FINAL)
- partialMode = GenericUDAFEvaluator.Mode.PARTIAL2;
- else
- partialMode = mode;
- udafPartial = udafsPartial.get(threadId);
- if (udafPartial == null) {
- try {
- udafPartial = FunctionRegistry.getGenericUDAFEvaluator(
- genericUDAFName, types, distinct, false);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- udafPartial.init(partialMode, parameterInspectors);
- udafsPartial.put(threadId, udafPartial);
- }
-
- // multiple stage group by, determined by the mode parameter
- if (outputInspectorPartial == null)
- outputInspectorPartial = udafPartial.init(partialMode,
- parameterInspectors);
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e);
- }
-
- return new AggregatuibFunctionSerializableEvaluator(parameters, types,
- genericUDAFName, mode, distinct, rowInspector, evals,
- parameterInspectors, cachedParas, lazySer, cachedRowObject,
- udafPartial, udafComplete, outputInspector,
- outputInspectorPartial);
- }
-
- public String toString() {
- return "aggregation function expression evaluator factory: "
- + this.genericUDAFName;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ColumnExpressionEvaluatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ColumnExpressionEvaluatorFactory.java
deleted file mode 100644
index 68bf408..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ColumnExpressionEvaluatorFactory.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.ColumnExpressionEvaluator;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class ColumnExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
-
- private static final long serialVersionUID = 1L;
-
- private ExprNodeColumnDesc expr;
-
- private Schema inputSchema;
-
- public ColumnExpressionEvaluatorFactory(ILogicalExpression expression,
- Schema schema, IVariableTypeEnvironment env)
- throws AlgebricksException {
- try {
- expr = (ExprNodeColumnDesc) ExpressionTranslator.getHiveExpression(
- expression, env);
- } catch (Exception e) {
- throw new AlgebricksException(e.getMessage());
- }
- inputSchema = schema;
- }
-
- public ICopyEvaluator createEvaluator(IDataOutputProvider output)
- throws AlgebricksException {
- return new ColumnExpressionEvaluator(expr,
- inputSchema.toObjectInspector(), output);
- }
-
- public String toString() {
- return "column expression evaluator factory: " + expr.toString();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ConstantExpressionEvaluatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ConstantExpressionEvaluatorFactory.java
deleted file mode 100644
index e0241a1..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ConstantExpressionEvaluatorFactory.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.ConstantExpressionEvaluator;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class ConstantExpressionEvaluatorFactory implements
- ICopyEvaluatorFactory {
-
- private static final long serialVersionUID = 1L;
-
- private ExprNodeConstantDesc expr;
-
- private Schema schema;
-
- public ConstantExpressionEvaluatorFactory(ILogicalExpression expression,
- Schema inputSchema, IVariableTypeEnvironment env)
- throws AlgebricksException {
- try {
- expr = (ExprNodeConstantDesc) ExpressionTranslator
- .getHiveExpression(expression, env);
- } catch (Exception e) {
- throw new AlgebricksException(e.getMessage());
- }
- schema = inputSchema;
- }
-
- public ICopyEvaluator createEvaluator(IDataOutputProvider output)
- throws AlgebricksException {
- return new ConstantExpressionEvaluator(expr,
- schema.toObjectInspector(), output);
- }
-
- public String toString() {
- return "constant expression evaluator factory: " + expr.toString();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/FieldExpressionEvaluatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/FieldExpressionEvaluatorFactory.java
deleted file mode 100644
index 4b5f906..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/FieldExpressionEvaluatorFactory.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hivesterix.runtime.evaluator.FieldExpressionEvaluator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class FieldExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
- private static final long serialVersionUID = 1L;
-
- private ExprNodeFieldDesc expr;
-
- private Schema inputSchema;
-
- public FieldExpressionEvaluatorFactory(ILogicalExpression expression,
- Schema schema, IVariableTypeEnvironment env)
- throws AlgebricksException {
- try {
- expr = (ExprNodeFieldDesc) ExpressionTranslator.getHiveExpression(
- expression, env);
- } catch (Exception e) {
- throw new AlgebricksException(e.getMessage());
- }
- inputSchema = schema;
- }
-
- public ICopyEvaluator createEvaluator(IDataOutputProvider output)
- throws AlgebricksException {
- return new FieldExpressionEvaluator(expr,
- inputSchema.toObjectInspector(), output);
- }
-
- public String toString() {
- return "field access expression evaluator factory: " + expr.toString();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/HiveExpressionRuntimeProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/HiveExpressionRuntimeProvider.java
deleted file mode 100644
index 387ca72..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/HiveExpressionRuntimeProvider.java
+++ /dev/null
@@ -1,192 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-
-import edu.uci.ics.hivesterix.logical.expression.ExpressionConstant;
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression.FunctionKind;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionRuntimeProvider;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.LogicalExpressionJobGenToExpressionRuntimeProviderAdapter.AggregateFunctionFactoryAdapter;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.LogicalExpressionJobGenToExpressionRuntimeProviderAdapter.ScalarEvaluatorFactoryAdapter;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.LogicalExpressionJobGenToExpressionRuntimeProviderAdapter.UnnestingFunctionFactoryAdapter;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.StatefulFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
-import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenContext;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IAggregateEvaluatorFactory;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunctionFactory;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IRunningAggregateEvaluatorFactory;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IUnnestingEvaluatorFactory;
-
-public class HiveExpressionRuntimeProvider implements
- IExpressionRuntimeProvider {
-
- public static final IExpressionRuntimeProvider INSTANCE = new HiveExpressionRuntimeProvider();
-
- @Override
- public IAggregateEvaluatorFactory createAggregateFunctionFactory(
- AggregateFunctionCallExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- Schema schema = this.getSchema(inputSchemas[0], env);
- return new AggregateFunctionFactoryAdapter(
- new AggregationFunctionFactory(expr, schema, env));
- }
-
- @Override
- public ICopySerializableAggregateFunctionFactory createSerializableAggregateFunctionFactory(
- AggregateFunctionCallExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- Schema schema = this.getSchema(inputSchemas[0], env);
- return new AggregationFunctionSerializableFactory(expr, schema, env);
- }
-
- @Override
- public IRunningAggregateEvaluatorFactory createRunningAggregateFunctionFactory(
- StatefulFunctionCallExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public IUnnestingEvaluatorFactory createUnnestingFunctionFactory(
- UnnestingFunctionCallExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- Schema schema = this.getSchema(inputSchemas[0], env);
- return new UnnestingFunctionFactoryAdapter(
- new UnnestingFunctionFactory(expr, schema, env));
- }
-
- public IScalarEvaluatorFactory createEvaluatorFactory(
- ILogicalExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- switch (expr.getExpressionTag()) {
- case VARIABLE: {
- VariableReferenceExpression v = (VariableReferenceExpression) expr;
- return new ScalarEvaluatorFactoryAdapter(
- createVariableEvaluatorFactory(v, env, inputSchemas,
- context));
- }
- case CONSTANT: {
- ConstantExpression c = (ConstantExpression) expr;
- return new ScalarEvaluatorFactoryAdapter(
- createConstantEvaluatorFactory(c, env, inputSchemas,
- context));
- }
- case FUNCTION_CALL: {
- AbstractFunctionCallExpression fun = (AbstractFunctionCallExpression) expr;
- FunctionIdentifier fid = fun.getFunctionIdentifier();
-
- if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
- return new ScalarEvaluatorFactoryAdapter(
- createFieldExpressionEvaluatorFactory(fun, env,
- inputSchemas, context));
- }
-
- if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
- return new ScalarEvaluatorFactoryAdapter(
- createNullExpressionEvaluatorFactory(fun, env,
- inputSchemas, context));
- }
-
- if (fun.getKind() == FunctionKind.SCALAR) {
- ScalarFunctionCallExpression scalar = (ScalarFunctionCallExpression) fun;
- return new ScalarEvaluatorFactoryAdapter(
- createScalarFunctionEvaluatorFactory(scalar, env,
- inputSchemas, context));
- } else {
- throw new AlgebricksException(
- "Cannot create evaluator for function " + fun
- + " of kind " + fun.getKind());
- }
- }
- default: {
- throw new IllegalStateException();
- }
- }
- }
-
- private ICopyEvaluatorFactory createVariableEvaluatorFactory(
- VariableReferenceExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- Schema schema = this.getSchema(inputSchemas[0], env);
- return new ColumnExpressionEvaluatorFactory(expr, schema, env);
- }
-
- private ICopyEvaluatorFactory createScalarFunctionEvaluatorFactory(
- AbstractFunctionCallExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- List<String> names = new ArrayList<String>();
- List<TypeInfo> types = new ArrayList<TypeInfo>();
- for (IOperatorSchema inputSchema : inputSchemas) {
- Schema schema = this.getSchema(inputSchema, env);
- names.addAll(schema.getNames());
- types.addAll(schema.getTypes());
- }
- Schema inputSchema = new Schema(names, types);
- return new ScalarFunctionExpressionEvaluatorFactory(expr, inputSchema,
- env);
- }
-
- private ICopyEvaluatorFactory createFieldExpressionEvaluatorFactory(
- AbstractFunctionCallExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- Schema schema = this.getSchema(inputSchemas[0], env);
- return new FieldExpressionEvaluatorFactory(expr, schema, env);
- }
-
- private ICopyEvaluatorFactory createNullExpressionEvaluatorFactory(
- AbstractFunctionCallExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- Schema schema = this.getSchema(inputSchemas[0], env);
- return new NullExpressionEvaluatorFactory(expr, schema, env);
- }
-
- private ICopyEvaluatorFactory createConstantEvaluatorFactory(
- ConstantExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- Schema schema = this.getSchema(inputSchemas[0], env);
- return new ConstantExpressionEvaluatorFactory(expr, schema, env);
- }
-
- private Schema getSchema(IOperatorSchema inputSchema,
- IVariableTypeEnvironment env) throws AlgebricksException {
- List<String> names = new ArrayList<String>();
- List<TypeInfo> types = new ArrayList<TypeInfo>();
- Iterator<LogicalVariable> variables = inputSchema.iterator();
- while (variables.hasNext()) {
- LogicalVariable var = variables.next();
- names.add(var.toString());
- types.add((TypeInfo) env.getVarType(var));
- }
-
- Schema schema = new Schema(names, types);
- return schema;
- }
-
-}
\ No newline at end of file
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/NullExpressionEvaluatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/NullExpressionEvaluatorFactory.java
deleted file mode 100644
index 8f516e8..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/NullExpressionEvaluatorFactory.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hivesterix.runtime.evaluator.NullExpressionEvaluator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class NullExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
-
- private static final long serialVersionUID = 1L;
-
- private ExprNodeNullDesc expr;
-
- private Schema schema;
-
- public NullExpressionEvaluatorFactory(ILogicalExpression expression,
- Schema intputSchema, IVariableTypeEnvironment env)
- throws AlgebricksException {
- try {
- expr = (ExprNodeNullDesc) ExpressionTranslator.getHiveExpression(
- expression, env);
- } catch (Exception e) {
- throw new AlgebricksException(e.getMessage());
- }
- schema = intputSchema;
- }
-
- public ICopyEvaluator createEvaluator(IDataOutputProvider output)
- throws AlgebricksException {
- return new NullExpressionEvaluator(expr, schema.toObjectInspector(),
- output);
- }
-
- public String toString() {
- return "null expression evaluator factory: " + expr.toString();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ScalarFunctionExpressionEvaluatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ScalarFunctionExpressionEvaluatorFactory.java
deleted file mode 100644
index 262758e..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ScalarFunctionExpressionEvaluatorFactory.java
+++ /dev/null
@@ -1,77 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hivesterix.runtime.evaluator.FunctionExpressionEvaluator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class ScalarFunctionExpressionEvaluatorFactory implements
- ICopyEvaluatorFactory {
-
- private static final long serialVersionUID = 1L;
-
- private transient ExprNodeGenericFuncDesc expr;
-
- private String exprSerialization;
-
- private Schema inputSchema;
-
- private transient Configuration config;
-
- public ScalarFunctionExpressionEvaluatorFactory(
- ILogicalExpression expression, Schema schema,
- IVariableTypeEnvironment env) throws AlgebricksException {
- try {
- expr = (ExprNodeGenericFuncDesc) ExpressionTranslator
- .getHiveExpression(expression, env);
-
- exprSerialization = Utilities.serializeExpression(expr);
-
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- inputSchema = schema;
- }
-
- public synchronized ICopyEvaluator createEvaluator(
- IDataOutputProvider output) throws AlgebricksException {
- if (expr == null) {
- configClassLoader();
- expr = (ExprNodeGenericFuncDesc) Utilities.deserializeExpression(
- exprSerialization, config);
- }
-
- ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) expr
- .clone();
- return new FunctionExpressionEvaluator(funcDesc,
- inputSchema.toObjectInspector(), output);
- }
-
- private void configClassLoader() {
- config = new Configuration();
- ClassLoader loader = this.getClass().getClassLoader();
- config.setClassLoader(loader);
- Thread.currentThread().setContextClassLoader(loader);
- }
-
- public String toString() {
- if (expr == null) {
- configClassLoader();
- expr = (ExprNodeGenericFuncDesc) Utilities.deserializeExpression(
- exprSerialization, new Configuration());
- }
-
- return "function expression evaluator factory: " + expr.getExprString();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/UnnestingFunctionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/UnnestingFunctionFactory.java
deleted file mode 100644
index 1d77737..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/UnnestingFunctionFactory.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hivesterix.runtime.evaluator.UDTFFunctionEvaluator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyUnnestingFunction;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyUnnestingFunctionFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class UnnestingFunctionFactory implements ICopyUnnestingFunctionFactory {
-
- private static final long serialVersionUID = 1L;
-
- private UDTFDesc expr;
-
- private Schema inputSchema;
-
- private int[] columns;
-
- public UnnestingFunctionFactory(ILogicalExpression expression,
- Schema schema, IVariableTypeEnvironment env)
- throws AlgebricksException {
- try {
- expr = (UDTFDesc) ExpressionTranslator.getHiveExpression(
- expression, env);
- } catch (Exception e) {
- throw new AlgebricksException(e.getMessage());
- }
- inputSchema = schema;
- }
-
- @Override
- public ICopyUnnestingFunction createUnnestingFunction(
- IDataOutputProvider provider) throws AlgebricksException {
- return new UDTFFunctionEvaluator(expr, inputSchema, columns,
- provider.getDataOutput());
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveDoubleBinaryHashFunctionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveDoubleBinaryHashFunctionFactory.java
deleted file mode 100644
index fc302e1..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveDoubleBinaryHashFunctionFactory.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-
-public class HiveDoubleBinaryHashFunctionFactory implements
- IBinaryHashFunctionFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveDoubleBinaryHashFunctionFactory INSTANCE = new HiveDoubleBinaryHashFunctionFactory();
-
- private HiveDoubleBinaryHashFunctionFactory() {
- }
-
- @Override
- public IBinaryHashFunction createBinaryHashFunction() {
- // TODO Auto-generated method stub
- return new IBinaryHashFunction() {
- private Double value;
-
- @Override
- public int hash(byte[] bytes, int offset, int length) {
- value = Double.longBitsToDouble(LazyUtils.byteArrayToLong(
- bytes, offset));
- return value.hashCode();
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveIntegerBinaryHashFunctionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveIntegerBinaryHashFunctionFactory.java
deleted file mode 100644
index e1a9994..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveIntegerBinaryHashFunctionFactory.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-
-public class HiveIntegerBinaryHashFunctionFactory implements
- IBinaryHashFunctionFactory {
- private static final long serialVersionUID = 1L;
-
- public static IBinaryHashFunctionFactory INSTANCE = new HiveIntegerBinaryHashFunctionFactory();
-
- private HiveIntegerBinaryHashFunctionFactory() {
- }
-
- @Override
- public IBinaryHashFunction createBinaryHashFunction() {
-
- return new IBinaryHashFunction() {
- private VInt value = new VInt();
-
- @Override
- public int hash(byte[] bytes, int offset, int length) {
- LazyUtils.readVInt(bytes, offset, value);
- if (value.length != length)
- throw new IllegalArgumentException(
- "length mismatch in int hash function actual: "
- + length + " expected " + value.length);
- return value.value;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveLongBinaryHashFunctionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveLongBinaryHashFunctionFactory.java
deleted file mode 100644
index 6f7c6f2..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveLongBinaryHashFunctionFactory.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-
-public class HiveLongBinaryHashFunctionFactory implements
- IBinaryHashFunctionFactory {
- private static final long serialVersionUID = 1L;
-
- public static IBinaryHashFunctionFactory INSTANCE = new HiveLongBinaryHashFunctionFactory();
-
- private HiveLongBinaryHashFunctionFactory() {
- }
-
- @Override
- public IBinaryHashFunction createBinaryHashFunction() {
-
- return new IBinaryHashFunction() {
- private VLong value = new VLong();
-
- @Override
- public int hash(byte[] bytes, int offset, int length) {
- LazyUtils.readVLong(bytes, offset, value);
- return (int) value.value;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveRawBinaryHashFunctionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveRawBinaryHashFunctionFactory.java
deleted file mode 100644
index e03dde0..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveRawBinaryHashFunctionFactory.java
+++ /dev/null
@@ -1,32 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
-
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-
-public class HiveRawBinaryHashFunctionFactory implements
- IBinaryHashFunctionFactory {
- private static final long serialVersionUID = 1L;
-
- public static IBinaryHashFunctionFactory INSTANCE = new HiveRawBinaryHashFunctionFactory();
-
- private HiveRawBinaryHashFunctionFactory() {
-
- }
-
- @Override
- public IBinaryHashFunction createBinaryHashFunction() {
-
- return new IBinaryHashFunction() {
-
- @Override
- public int hash(byte[] bytes, int offset, int length) {
- int value = 1;
- int end = offset + length;
- for (int i = offset; i < end; i++)
- value = value * 31 + (int) bytes[i];
- return value;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveStingBinaryHashFunctionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveStingBinaryHashFunctionFactory.java
deleted file mode 100644
index 055c077..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveStingBinaryHashFunctionFactory.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-
-public class HiveStingBinaryHashFunctionFactory implements
- IBinaryHashFunctionFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveStingBinaryHashFunctionFactory INSTANCE = new HiveStingBinaryHashFunctionFactory();
-
- private HiveStingBinaryHashFunctionFactory() {
- }
-
- @Override
- public IBinaryHashFunction createBinaryHashFunction() {
- // TODO Auto-generated method stub
- return new IBinaryHashFunction() {
- private VInt len = new VInt();
-
- @Override
- public int hash(byte[] bytes, int offset, int length) {
- LazyUtils.readVInt(bytes, offset, len);
- if (len.value + len.length != length)
- throw new IllegalStateException(
- "parse string: length mismatch, expected "
- + (len.value + len.length) + " but get "
- + length);
- return hashBytes(bytes, offset + len.length, length
- - len.length);
- }
-
- public int hashBytes(byte[] bytes, int offset, int length) {
- int value = 1;
- int end = offset + length;
- for (int i = offset; i < end; i++)
- value = value * 31 + (int) bytes[i];
- return value;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleAscNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleAscNormalizedKeyComputerFactory.java
deleted file mode 100644
index 5f03962..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleAscNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class HiveDoubleAscNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
-
- return new INormalizedKeyComputer() {
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- int header = LazyUtils.byteArrayToInt(bytes, start);
- long unsignedValue = (long) header;
- return (int) ((unsignedValue - ((long) Integer.MIN_VALUE)) & 0xffffffffL);
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleDescNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleDescNormalizedKeyComputerFactory.java
deleted file mode 100644
index e4587a2..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleDescNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,26 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class HiveDoubleDescNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
- private final INormalizedKeyComputerFactory ascNormalizedKeyComputerFactory = new HiveDoubleAscNormalizedKeyComputerFactory();
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
- return new INormalizedKeyComputer() {
- private INormalizedKeyComputer nmkComputer = ascNormalizedKeyComputerFactory
- .createNormalizedKeyComputer();
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- int nk = nmkComputer.normalize(bytes, start, length);
- return (int) ((long) Integer.MAX_VALUE - (long) (nk - Integer.MIN_VALUE));
- }
-
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerAscNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerAscNormalizedKeyComputerFactory.java
deleted file mode 100644
index 2ff390a..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerAscNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class HiveIntegerAscNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
-
- return new INormalizedKeyComputer() {
- private VInt vint = new VInt();
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- LazyUtils.readVInt(bytes, start, vint);
- if (vint.length != length)
- throw new IllegalArgumentException(
- "length mismatch in int comparator function actual: "
- + vint.length + " expected " + length);
- long unsignedValue = (long) vint.value;
- return (int) ((unsignedValue - ((long) Integer.MIN_VALUE)) & 0xffffffffL);
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerDescNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerDescNormalizedKeyComputerFactory.java
deleted file mode 100644
index 8eff1f8..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerDescNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class HiveIntegerDescNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
-
- return new INormalizedKeyComputer() {
- private VInt vint = new VInt();
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- LazyUtils.readVInt(bytes, start, vint);
- if (vint.length != length)
- throw new IllegalArgumentException(
- "length mismatch in int comparator function actual: "
- + vint.length + " expected " + length);
- long unsignedValue = (long) vint.value;
- return (int) ((long) 0xffffffff - unsignedValue);
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongAscNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongAscNormalizedKeyComputerFactory.java
deleted file mode 100644
index 768eec2..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongAscNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,65 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class HiveLongAscNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
-
- return new INormalizedKeyComputer() {
- private static final int POSTIVE_LONG_MASK = (3 << 30);
- private static final int NON_NEGATIVE_INT_MASK = (2 << 30);
- private static final int NEGATIVE_LONG_MASK = (0 << 30);
- private VLong vlong = new VLong();
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- LazyUtils.readVLong(bytes, start, vlong);
- if (vlong.length != length)
- throw new IllegalArgumentException(
- "length mismatch in int comparator function actual: "
- + vlong.length + " expected " + length);
- long value = (long) vlong.value;
- int highValue = (int) (value >> 32);
- if (highValue > 0) {
- /**
- * larger than Integer.MAX
- */
- int highNmk = getKey(highValue);
- highNmk >>= 2;
- highNmk |= POSTIVE_LONG_MASK;
- return highNmk;
- } else if (highValue == 0) {
- /**
- * smaller than Integer.MAX but >=0
- */
- int lowNmk = (int) value;
- lowNmk >>= 2;
- lowNmk |= NON_NEGATIVE_INT_MASK;
- return lowNmk;
- } else {
- /**
- * less than 0; TODO: have not optimized for that
- */
- int highNmk = getKey(highValue);
- highNmk >>= 2;
- highNmk |= NEGATIVE_LONG_MASK;
- return highNmk;
- }
- }
-
- private int getKey(int value) {
- long unsignedFirstValue = (long) value;
- int nmk = (int) ((unsignedFirstValue - ((long) Integer.MIN_VALUE)) & 0xffffffffL);
- return nmk;
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongDescNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongDescNormalizedKeyComputerFactory.java
deleted file mode 100644
index 20ae56a..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongDescNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,27 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class HiveLongDescNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
- private final INormalizedKeyComputerFactory ascNormalizedKeyComputerFactory = new HiveIntegerAscNormalizedKeyComputerFactory();
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
- return new INormalizedKeyComputer() {
- private INormalizedKeyComputer nmkComputer = ascNormalizedKeyComputerFactory
- .createNormalizedKeyComputer();
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- int nk = nmkComputer.normalize(bytes, start, length);
- return (int) ((long) Integer.MAX_VALUE - (long) (nk - Integer.MIN_VALUE));
- }
-
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringAscNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringAscNormalizedKeyComputerFactory.java
deleted file mode 100644
index b16ccba..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringAscNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
-
-public class HiveStringAscNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
-
- return new INormalizedKeyComputer() {
- private VInt len = new VInt();
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- LazyUtils.readVInt(bytes, start, len);
-
- if (len.value + len.length != length)
- throw new IllegalStateException(
- "parse string: length mismatch, expected "
- + (len.value + len.length) + " but get "
- + length);
- int nk = 0;
- int offset = start + len.length;
- for (int i = 0; i < 2; ++i) {
- nk <<= 16;
- if (i < len.value) {
- char character = UTF8StringPointable.charAt(bytes,
- offset);
- nk += ((int) character) & 0xffff;
- offset += UTF8StringPointable.charSize(bytes, offset);
- }
- }
- return nk;
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringDescNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringDescNormalizedKeyComputerFactory.java
deleted file mode 100644
index e8978c6..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringDescNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,40 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
-
-public class HiveStringDescNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
- return new INormalizedKeyComputer() {
- private VInt len = new VInt();
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- LazyUtils.readVInt(bytes, start, len);
- if (len.value + len.length != length)
- throw new IllegalStateException(
- "parse string: length mismatch, expected "
- + (len.value + len.length) + " but get "
- + length);
- int nk = 0;
- int offset = start + len.length;
- for (int i = 0; i < 2; ++i) {
- nk <<= 16;
- if (i < len.value) {
- nk += ((int) UTF8StringPointable.charAt(bytes, offset)) & 0xffff;
- offset += UTF8StringPointable.charSize(bytes, offset);
- }
- }
- return (int) ((long) 0xffffffff - (long) nk);
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/nullwriter/HiveNullWriterFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/nullwriter/HiveNullWriterFactory.java
deleted file mode 100644
index 91d08c6..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/nullwriter/HiveNullWriterFactory.java
+++ /dev/null
@@ -1,28 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.nullwriter;
-
-import java.io.DataOutput;
-
-import edu.uci.ics.hyracks.api.dataflow.value.INullWriter;
-import edu.uci.ics.hyracks.api.dataflow.value.INullWriterFactory;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-
-public class HiveNullWriterFactory implements INullWriterFactory {
-
- private static final long serialVersionUID = 1L;
-
- public static HiveNullWriterFactory INSTANCE = new HiveNullWriterFactory();
-
- @Override
- public INullWriter createNullWriter() {
- return new HiveNullWriter();
- }
-}
-
-class HiveNullWriter implements INullWriter {
-
- @Override
- public void writeNull(DataOutput out) throws HyracksDataException {
- // do nothing
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspector.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspector.java
deleted file mode 100644
index 3d2b141..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspector.java
+++ /dev/null
@@ -1,20 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.inspector;
-
-import edu.uci.ics.hyracks.algebricks.data.IBinaryBooleanInspector;
-
-public class HiveBinaryBooleanInspector implements IBinaryBooleanInspector {
-
- HiveBinaryBooleanInspector() {
- }
-
- @Override
- public boolean getBooleanValue(byte[] bytes, int offset, int length) {
- if (length == 0)
- return false;
- if (length != 1)
- throw new IllegalStateException("boolean field error: with length "
- + length);
- return bytes[0] == 1;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspectorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspectorFactory.java
deleted file mode 100644
index 86afbee..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspectorFactory.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.inspector;
-
-import edu.uci.ics.hyracks.algebricks.data.IBinaryBooleanInspector;
-import edu.uci.ics.hyracks.algebricks.data.IBinaryBooleanInspectorFactory;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-
-public class HiveBinaryBooleanInspectorFactory implements
- IBinaryBooleanInspectorFactory {
- private static final long serialVersionUID = 1L;
- public static HiveBinaryBooleanInspectorFactory INSTANCE = new HiveBinaryBooleanInspectorFactory();
-
- private HiveBinaryBooleanInspectorFactory() {
-
- }
-
- @Override
- public IBinaryBooleanInspector createBinaryBooleanInspector(
- IHyracksTaskContext arg0) {
- return new HiveBinaryBooleanInspector();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspector.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspector.java
deleted file mode 100644
index e82e501..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspector.java
+++ /dev/null
@@ -1,23 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.inspector;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.algebricks.data.IBinaryIntegerInspector;
-
-public class HiveBinaryIntegerInspector implements IBinaryIntegerInspector {
- private VInt value = new VInt();
-
- HiveBinaryIntegerInspector() {
- }
-
- @Override
- public int getIntegerValue(byte[] bytes, int offset, int length) {
- LazyUtils.readVInt(bytes, offset, value);
- if (value.length != length)
- throw new IllegalArgumentException(
- "length mismatch in int hash function actual: " + length
- + " expected " + value.length);
- return value.value;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspectorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspectorFactory.java
deleted file mode 100644
index b44e610..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspectorFactory.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.inspector;
-
-import edu.uci.ics.hyracks.algebricks.data.IBinaryIntegerInspector;
-import edu.uci.ics.hyracks.algebricks.data.IBinaryIntegerInspectorFactory;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-
-public class HiveBinaryIntegerInspectorFactory implements
- IBinaryIntegerInspectorFactory {
- private static final long serialVersionUID = 1L;
- public static HiveBinaryIntegerInspectorFactory INSTANCE = new HiveBinaryIntegerInspectorFactory();
-
- private HiveBinaryIntegerInspectorFactory() {
-
- }
-
- @Override
- public IBinaryIntegerInspector createBinaryIntegerInspector(
- IHyracksTaskContext arg0) {
- return new HiveBinaryIntegerInspector();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveConnectorPolicyAssignmentPolicy.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveConnectorPolicyAssignmentPolicy.java
deleted file mode 100644
index 8f559e2..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveConnectorPolicyAssignmentPolicy.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
-import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicyAssignmentPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.PipeliningConnectorPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedBlockingConnectorPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedPipeliningConnectorPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedReceiveSideMaterializedBlockingConnectorPolicy;
-import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
-
-public class HiveConnectorPolicyAssignmentPolicy implements
- IConnectorPolicyAssignmentPolicy {
- public enum Policy {
- PIPELINING, SEND_SIDE_MAT_PIPELINING, SEND_SIDE_MAT_BLOCKING, SEND_SIDE_MAT_RECEIVE_SIDE_MAT_BLOCKING;
- };
-
- private static final long serialVersionUID = 1L;
-
- private final IConnectorPolicy pipeliningPolicy = new PipeliningConnectorPolicy();
- private final IConnectorPolicy sendSideMatPipeliningPolicy = new SendSideMaterializedPipeliningConnectorPolicy();
- private final IConnectorPolicy sendSideMatBlockingPolicy = new SendSideMaterializedBlockingConnectorPolicy();
- private final IConnectorPolicy sendSideMatReceiveSideMatBlockingPolicy = new SendSideMaterializedReceiveSideMaterializedBlockingConnectorPolicy();
- private final Policy policy;
-
- public HiveConnectorPolicyAssignmentPolicy(Policy policy) {
- this.policy = policy;
- }
-
- @Override
- public IConnectorPolicy getConnectorPolicyAssignment(
- IConnectorDescriptor c, int nProducers, int nConsumers,
- int[] fanouts) {
- if (c instanceof MToNPartitioningMergingConnectorDescriptor) {
- // avoid deadlocks
- switch (policy) {
- case PIPELINING:
- case SEND_SIDE_MAT_PIPELINING:
- return sendSideMatPipeliningPolicy;
- case SEND_SIDE_MAT_BLOCKING:
- return sendSideMatBlockingPolicy;
- case SEND_SIDE_MAT_RECEIVE_SIDE_MAT_BLOCKING:
- return sendSideMatReceiveSideMatBlockingPolicy;
- default:
- return sendSideMatPipeliningPolicy;
- }
- } else if (c instanceof MToNPartitioningConnectorDescriptor) {
- // support different repartitioning policies
- switch (policy) {
- case PIPELINING:
- return pipeliningPolicy;
- case SEND_SIDE_MAT_PIPELINING:
- return sendSideMatPipeliningPolicy;
- case SEND_SIDE_MAT_BLOCKING:
- return sendSideMatBlockingPolicy;
- case SEND_SIDE_MAT_RECEIVE_SIDE_MAT_BLOCKING:
- return sendSideMatReceiveSideMatBlockingPolicy;
- default:
- return pipeliningPolicy;
- }
- } else {
- // pipelining for other connectors
- return pipeliningPolicy;
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSink.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSink.java
deleted file mode 100644
index e4fbca5..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSink.java
+++ /dev/null
@@ -1,32 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSink;
-import edu.uci.ics.hyracks.algebricks.core.algebra.properties.IPartitioningProperty;
-import edu.uci.ics.hyracks.algebricks.core.algebra.properties.RandomPartitioningProperty;
-
-public class HiveDataSink implements IDataSink {
-
- private Object[] schema;
-
- private Object fsOperator;
-
- public HiveDataSink(Object sink, Object[] sourceSchema) {
- schema = sourceSchema;
- fsOperator = sink;
- }
-
- @Override
- public Object getId() {
- return fsOperator;
- }
-
- @Override
- public Object[] getSchemaTypes() {
- return schema;
- }
-
- public IPartitioningProperty getPartitioningProperty() {
- return new RandomPartitioningProperty(new HiveDomain());
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSource.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSource.java
deleted file mode 100644
index edff056..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSource.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSourcePropertiesProvider;
-import edu.uci.ics.hyracks.algebricks.core.algebra.properties.FunctionalDependency;
-
-public class HiveDataSource<P> implements IDataSource<P> {
-
- private P source;
-
- private Object[] schema;
-
- public HiveDataSource(P dataSource, Object[] sourceSchema) {
- source = dataSource;
- schema = sourceSchema;
- }
-
- @Override
- public P getId() {
- return source;
- }
-
- @Override
- public Object[] getSchemaTypes() {
- return schema;
- }
-
- @Override
- public void computeFDs(List<LogicalVariable> scanVariables,
- List<FunctionalDependency> fdList) {
- }
-
- @Override
- public IDataSourcePropertiesProvider getPropertiesProvider() {
- return new HiveDataSourcePartitioningProvider();
- }
-
- @Override
- public String toString() {
- PartitionDesc desc = (PartitionDesc) source;
- return desc.getTableName();
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDomain.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDomain.java
deleted file mode 100644
index 0af253a..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDomain.java
+++ /dev/null
@@ -1,17 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import edu.uci.ics.hyracks.algebricks.core.algebra.properties.INodeDomain;
-
-public class HiveDomain implements INodeDomain {
-
- @Override
- public boolean sameAs(INodeDomain domain) {
- return true;
- }
-
- @Override
- public Integer cardinality() {
- return 0;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveMetaDataProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveMetaDataProvider.java
deleted file mode 100644
index 5782703..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveMetaDataProvider.java
+++ /dev/null
@@ -1,149 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.HiveFunctionInfo;
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSink;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
-import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenContext;
-import edu.uci.ics.hyracks.algebricks.data.IPrinterFactory;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntimeFactory;
-import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-
-@SuppressWarnings("rawtypes")
-public class HiveMetaDataProvider<S, T> implements IMetadataProvider<S, T> {
-
- private Operator fileSink;
- private Schema outputSchema;
- private HashMap<S, IDataSource<S>> dataSourceMap;
-
- public HiveMetaDataProvider(Operator fsOp, Schema oi,
- HashMap<S, IDataSource<S>> map) {
- fileSink = fsOp;
- outputSchema = oi;
- dataSourceMap = map;
- }
-
- @Override
- public IDataSourceIndex<T, S> findDataSourceIndex(T indexId, S dataSourceId)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public IDataSource<S> findDataSource(S id) throws AlgebricksException {
- return dataSourceMap.get(id);
- }
-
- @Override
- public boolean scannerOperatorIsLeaf(IDataSource<S> dataSource) {
- return true;
- }
-
- @Override
- public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getScannerRuntime(
- IDataSource<S> dataSource, List<LogicalVariable> scanVariables,
- List<LogicalVariable> projectVariables, boolean projectPushed,
- IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv,
- JobGenContext context, JobSpecification jobSpec)
- throws AlgebricksException {
-
- S desc = dataSource.getId();
- HiveScanRuntimeGenerator generator = new HiveScanRuntimeGenerator(
- (PartitionDesc) desc);
- return generator.getRuntimeOperatorAndConstraint(dataSource,
- scanVariables, projectVariables, projectPushed, context,
- jobSpec);
- }
-
- @Override
- public Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> getWriteFileRuntime(
- IDataSink sink, int[] printColumns,
- IPrinterFactory[] printerFactories, RecordDescriptor inputDesc) {
-
- HiveWriteRuntimeGenerator generator = new HiveWriteRuntimeGenerator(
- (FileSinkOperator) fileSink, outputSchema);
- return generator.getWriterRuntime(inputDesc);
- }
-
- @Override
- public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getDeleteRuntime(
- IDataSource<S> arg0, IOperatorSchema arg1,
- List<LogicalVariable> arg2, LogicalVariable arg3,
- RecordDescriptor arg4, JobGenContext arg5, JobSpecification arg6)
- throws AlgebricksException {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getInsertRuntime(
- IDataSource<S> arg0, IOperatorSchema arg1,
- List<LogicalVariable> arg2, LogicalVariable arg3,
- RecordDescriptor arg4, JobGenContext arg5, JobSpecification arg6)
- throws AlgebricksException {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getWriteResultRuntime(
- IDataSource<S> arg0, IOperatorSchema arg1,
- List<LogicalVariable> arg2, LogicalVariable arg3,
- JobGenContext arg4, JobSpecification arg5)
- throws AlgebricksException {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public IFunctionInfo lookupFunction(FunctionIdentifier arg0) {
- return new HiveFunctionInfo(arg0, null);
- }
-
- @Override
- public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexInsertRuntime(
- IDataSourceIndex<T, S> dataSource,
- IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas,
- IVariableTypeEnvironment typeEnv,
- List<LogicalVariable> primaryKeys,
- List<LogicalVariable> secondaryKeys, ILogicalExpression filterExpr,
- RecordDescriptor recordDesc, JobGenContext context,
- JobSpecification spec) throws AlgebricksException {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexDeleteRuntime(
- IDataSourceIndex<T, S> dataSource,
- IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas,
- IVariableTypeEnvironment typeEnv,
- List<LogicalVariable> primaryKeys,
- List<LogicalVariable> secondaryKeys, ILogicalExpression filterExpr,
- RecordDescriptor recordDesc, JobGenContext context,
- JobSpecification spec) throws AlgebricksException {
- // TODO Auto-generated method stub
- return null;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveOperatorSchema.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveOperatorSchema.java
deleted file mode 100644
index 83382f0..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveOperatorSchema.java
+++ /dev/null
@@ -1,84 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
-
-public class HiveOperatorSchema implements IOperatorSchema {
-
- private final Map<LogicalVariable, Integer> varMap;
-
- private final List<LogicalVariable> varList;
-
- public HiveOperatorSchema() {
- varMap = new HashMap<LogicalVariable, Integer>();
- varList = new ArrayList<LogicalVariable>();
- }
-
- @Override
- public void addAllVariables(IOperatorSchema source) {
- for (LogicalVariable v : source) {
- varMap.put(v, varList.size());
- varList.add(v);
- }
- }
-
- @Override
- public void addAllNewVariables(IOperatorSchema source) {
- for (LogicalVariable v : source) {
- if (varMap.get(v) == null) {
- varMap.put(v, varList.size());
- varList.add(v);
- }
- }
- }
-
- @Override
- public int addVariable(LogicalVariable var) {
- int idx = varList.size();
- varMap.put(var, idx);
- varList.add(var);
- return idx;
- }
-
- @Override
- public void clear() {
- varMap.clear();
- varList.clear();
- }
-
- @Override
- public int findVariable(LogicalVariable var) {
- Integer i = varMap.get(var);
- if (i == null) {
- return -1;
- }
- return i;
- }
-
- @Override
- public int getSize() {
- return varList.size();
- }
-
- @Override
- public LogicalVariable getVariable(int index) {
- return varList.get(index);
- }
-
- @Override
- public Iterator<LogicalVariable> iterator() {
- return varList.iterator();
- }
-
- @Override
- public String toString() {
- return varMap.toString();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveScanRuntimeGenerator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveScanRuntimeGenerator.java
deleted file mode 100644
index 9c8aee4..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveScanRuntimeGenerator.java
+++ /dev/null
@@ -1,117 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import java.util.List;
-import java.util.Properties;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hivesterix.runtime.operator.filescan.HiveFileScanOperatorDescriptor;
-import edu.uci.ics.hivesterix.runtime.operator.filescan.HiveFileSplitProvider;
-import edu.uci.ics.hivesterix.runtime.operator.filescan.HiveTupleParserFactory;
-import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
-import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenContext;
-import edu.uci.ics.hyracks.algebricks.data.ISerializerDeserializerProvider;
-import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
-import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.ITupleParserFactory;
-
-@SuppressWarnings({ "rawtypes", "deprecation" })
-public class HiveScanRuntimeGenerator {
-
- private PartitionDesc fileDesc;
-
- private transient Path filePath;
-
- private String filePathName;
-
- private Properties properties;
-
- public HiveScanRuntimeGenerator(PartitionDesc path) {
- fileDesc = path;
- properties = fileDesc.getProperties();
-
- String inputPath = (String) properties.getProperty("location");
-
- if (inputPath.startsWith("file:")) {
- // Windows
- String[] strs = inputPath.split(":");
- filePathName = strs[strs.length - 1];
- } else {
- // Linux
- filePathName = inputPath;
- }
-
- filePath = new Path(filePathName);
- }
-
- public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getRuntimeOperatorAndConstraint(
- IDataSource dataSource, List<LogicalVariable> scanVariables,
- List<LogicalVariable> projectVariables, boolean projectPushed,
- JobGenContext context, JobSpecification jobSpec)
- throws AlgebricksException {
- // get the correct delimiter from Hive metastore or other data
- // structures
- IOperatorSchema propagatedSchema = new HiveOperatorSchema();
-
- List<LogicalVariable> outputVariables = projectPushed ? projectVariables
- : scanVariables;
- for (LogicalVariable var : outputVariables)
- propagatedSchema.addVariable(var);
-
- int[] outputColumnsOffset = new int[scanVariables.size()];
- int i = 0;
- for (LogicalVariable var : scanVariables)
- if (outputVariables.contains(var)) {
- int offset = outputVariables.indexOf(var);
- outputColumnsOffset[i++] = offset;
- } else
- outputColumnsOffset[i++] = -1;
-
- Object[] schemaTypes = dataSource.getSchemaTypes();
- // get record descriptor
- RecordDescriptor recDescriptor = mkRecordDescriptor(propagatedSchema,
- schemaTypes, context);
-
- // setup the run time operator
- JobConf conf = ConfUtil.getJobConf(fileDesc.getInputFileFormatClass(),
- filePath);
- int clusterSize = ConfUtil.getNCs().length;
- IFileSplitProvider fsprovider = new HiveFileSplitProvider(conf,
- filePathName, clusterSize);
- ITupleParserFactory tupleParserFactory = new HiveTupleParserFactory(
- fileDesc, conf, outputColumnsOffset);
- HiveFileScanOperatorDescriptor opDesc = new HiveFileScanOperatorDescriptor(
- jobSpec, fsprovider, tupleParserFactory, recDescriptor);
-
- return new Pair<IOperatorDescriptor, AlgebricksPartitionConstraint>(
- opDesc, opDesc.getPartitionConstraint());
- }
-
- private static RecordDescriptor mkRecordDescriptor(
- IOperatorSchema opSchema, Object[] types, JobGenContext context)
- throws AlgebricksException {
- ISerializerDeserializer[] fields = new ISerializerDeserializer[opSchema
- .getSize()];
- ISerializerDeserializerProvider sdp = context
- .getSerializerDeserializerProvider();
- int size = opSchema.getSize();
- for (int i = 0; i < size; i++) {
- Object t = types[i];
- fields[i] = sdp.getSerializerDeserializer(t);
- i++;
- }
- return new RecordDescriptor(fields);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveWriteRuntimeGenerator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveWriteRuntimeGenerator.java
deleted file mode 100644
index d372868..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveWriteRuntimeGenerator.java
+++ /dev/null
@@ -1,40 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hivesterix.runtime.operator.filewrite.HivePushRuntimeFactory;
-import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntimeFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-
-@SuppressWarnings("deprecation")
-public class HiveWriteRuntimeGenerator {
- private FileSinkOperator fileSink;
-
- private Schema inputSchema;
-
- public HiveWriteRuntimeGenerator(FileSinkOperator fsOp, Schema oi) {
- fileSink = fsOp;
- inputSchema = oi;
- }
-
- /**
- * get the write runtime
- *
- * @param inputDesc
- * @return
- */
- public Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> getWriterRuntime(
- RecordDescriptor inputDesc) {
- JobConf conf = ConfUtil.getJobConf();
- IPushRuntimeFactory factory = new HivePushRuntimeFactory(inputDesc,
- conf, fileSink, inputSchema);
- Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> pair = new Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint>(
- factory, null);
- return pair;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/AbstractHiveFileSplitProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/AbstractHiveFileSplitProvider.java
deleted file mode 100644
index 2f988f8..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/AbstractHiveFileSplitProvider.java
+++ /dev/null
@@ -1,19 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.operator.filescan;
-
-import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
-import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-
-public abstract class AbstractHiveFileSplitProvider implements
- IFileSplitProvider {
- private static final long serialVersionUID = 1L;
-
- @Override
- public FileSplit[] getFileSplits() {
- // TODO Auto-generated method stub
- return null;
- }
-
- @SuppressWarnings("deprecation")
- public abstract org.apache.hadoop.mapred.FileSplit[] getFileSplitArray();
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/AbstractHiveTupleParser.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/AbstractHiveTupleParser.java
deleted file mode 100644
index a8addeb..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/AbstractHiveTupleParser.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.operator.filescan;
-
-import java.io.InputStream;
-
-import org.apache.hadoop.mapred.FileSplit;
-
-import edu.uci.ics.hyracks.api.comm.IFrameWriter;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.dataflow.std.file.ITupleParser;
-
-@SuppressWarnings("deprecation")
-public abstract class AbstractHiveTupleParser implements ITupleParser {
-
- @Override
- public void parse(InputStream in, IFrameWriter writer)
- throws HyracksDataException {
- // empty implementation
- }
-
- /**
- * method for parsing HDFS file split
- *
- * @param split
- * @param writer
- */
- abstract public void parse(FileSplit split, IFrameWriter writer)
- throws HyracksDataException;
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveFileScanOperatorDescriptor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveFileScanOperatorDescriptor.java
deleted file mode 100644
index d248486..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveFileScanOperatorDescriptor.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright 2009-2010 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.runtime.operator.filescan;
-
-import java.io.IOException;
-import java.net.InetAddress;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-import org.apache.hadoop.mapred.FileSplit;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
-import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable;
-import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.ITupleParser;
-import edu.uci.ics.hyracks.dataflow.std.file.ITupleParserFactory;
-
-@SuppressWarnings("deprecation")
-public class HiveFileScanOperatorDescriptor extends
- AbstractSingleActivityOperatorDescriptor {
- private static final long serialVersionUID = 1L;
-
- /**
- * tuple parser factory
- */
- private final ITupleParserFactory tupleParserFactory;
-
- /**
- * Hive file split
- */
- private Partition[] parts;
-
- /**
- * IFileSplitProvider
- */
- private IFileSplitProvider fileSplitProvider;
-
- /**
- * constrains in the form of host DNS names
- */
- private String[] constraintsByHostNames;
-
- /**
- * ip-to-node controller mapping
- */
- private Map<String, List<String>> ncMapping;
-
- /**
- * an array of NCs
- */
- private String[] NCs;
-
- /**
- *
- * @param spec
- * @param fsProvider
- */
- public HiveFileScanOperatorDescriptor(JobSpecification spec,
- IFileSplitProvider fsProvider,
- ITupleParserFactory tupleParserFactory, RecordDescriptor rDesc) {
- super(spec, 0, 1);
- this.tupleParserFactory = tupleParserFactory;
- recordDescriptors[0] = rDesc;
- fileSplitProvider = fsProvider;
- }
-
- /**
- * set partition constraint at the first time it is called the number of
- * partitions is obtained from HDFS name node
- */
- public AlgebricksAbsolutePartitionConstraint getPartitionConstraint()
- throws AlgebricksException {
- FileSplit[] returnedSplits = ((AbstractHiveFileSplitProvider) fileSplitProvider)
- .getFileSplitArray();
- Random random = new Random(System.currentTimeMillis());
- ncMapping = ConfUtil.getNCMapping();
- NCs = ConfUtil.getNCs();
-
- int size = 0;
- for (FileSplit split : returnedSplits)
- if (split != null)
- size++;
-
- FileSplit[] splits = new FileSplit[size];
- for (int i = 0; i < returnedSplits.length; i++)
- if (returnedSplits[i] != null)
- splits[i] = returnedSplits[i];
-
- System.out.println("!!! number of splits: " + splits.length);
- constraintsByHostNames = new String[splits.length];
- for (int i = 0; i < splits.length; i++) {
- try {
- String[] loc = splits[i].getLocations();
- Collections.shuffle(Arrays.asList(loc), random);
- if (loc.length > 0) {
- InetAddress[] allIps = InetAddress.getAllByName(loc[0]);
- for (InetAddress ip : allIps) {
- if (ncMapping.get(ip.getHostAddress()) != null) {
- List<String> ncs = ncMapping.get(ip
- .getHostAddress());
- int pos = random.nextInt(ncs.size());
- constraintsByHostNames[i] = ncs.get(pos);
- } else {
- int pos = random.nextInt(NCs.length);
- constraintsByHostNames[i] = NCs[pos];
- }
- }
- } else {
- int pos = random.nextInt(NCs.length);
- constraintsByHostNames[i] = NCs[pos];
- if (splits[i].getLength() > 0)
- throw new IllegalStateException(
- "non local scanner non locations!!");
- }
- } catch (IOException e) {
- throw new AlgebricksException(e);
- }
- }
-
- parts = new Partition[splits.length];
- for (int i = 0; i < splits.length; i++) {
- parts[i] = new Partition(splits[i]);
- }
- return new AlgebricksAbsolutePartitionConstraint(constraintsByHostNames);
- }
-
- @Override
- public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
- IRecordDescriptorProvider recordDescProvider, int partition,
- int nPartitions) {
-
- final ITupleParser tp = tupleParserFactory.createTupleParser(ctx);
- final int partitionId = partition;
-
- return new AbstractUnaryOutputSourceOperatorNodePushable() {
-
- @Override
- public void initialize() throws HyracksDataException {
- writer.open();
- FileSplit split = parts[partitionId].toFileSplit();
- if (split == null)
- throw new HyracksDataException("partition " + partitionId
- + " is null!");
- ((AbstractHiveTupleParser) tp).parse(split, writer);
- writer.close();
- }
- };
- }
-}
\ No newline at end of file
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveFileSplitProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveFileSplitProvider.java
deleted file mode 100644
index d92d353..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveFileSplitProvider.java
+++ /dev/null
@@ -1,115 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.operator.filescan;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.util.UUID;
-
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.eclipse.jetty.util.log.Log;
-
-@SuppressWarnings({ "deprecation", "rawtypes" })
-public class HiveFileSplitProvider extends AbstractHiveFileSplitProvider {
- private static final long serialVersionUID = 1L;
-
- private transient InputFormat format;
- private transient JobConf conf;
- private String confContent;
- final private int nPartition;
- private transient FileSplit[] splits;
-
- public HiveFileSplitProvider(JobConf conf, String filePath, int nPartition) {
- format = conf.getInputFormat();
- this.conf = conf;
- this.nPartition = nPartition;
- writeConfContent();
- }
-
- private void writeConfContent() {
- File dir = new File("hadoop-conf-tmp");
- if (!dir.exists()) {
- dir.mkdir();
- }
-
- String fileName = "hadoop-conf-tmp/" + UUID.randomUUID()
- + System.currentTimeMillis() + ".xml";
- try {
- DataOutputStream out = new DataOutputStream(new FileOutputStream(
- new File(fileName)));
- conf.writeXml(out);
- out.close();
-
- DataInputStream in = new DataInputStream(new FileInputStream(
- fileName));
- StringBuffer buffer = new StringBuffer();
- String line;
- while ((line = in.readLine()) != null) {
- buffer.append(line + "\n");
- }
- in.close();
- confContent = buffer.toString();
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- private void readConfContent() {
- File dir = new File("hadoop-conf-tmp");
- if (!dir.exists()) {
- dir.mkdir();
- }
-
- String fileName = "hadoop-conf-tmp/" + UUID.randomUUID()
- + System.currentTimeMillis() + ".xml";
- try {
- PrintWriter out = new PrintWriter((new OutputStreamWriter(
- new FileOutputStream(new File(fileName)))));
- out.write(confContent);
- out.close();
- conf = new JobConf(fileName);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- @Override
- /**
- * get the HDFS file split
- */
- public FileSplit[] getFileSplitArray() {
- readConfContent();
- conf.setClassLoader(this.getClass().getClassLoader());
- format = conf.getInputFormat();
- // int splitSize = conf.getInt("mapred.min.split.size", 0);
-
- if (splits == null) {
- try {
- splits = (org.apache.hadoop.mapred.FileSplit[]) format
- .getSplits(conf, nPartition);
- System.out.println("hdfs split number: " + splits.length);
- } catch (IOException e) {
- String inputPath = conf.get("mapred.input.dir");
- String hdfsURL = conf.get("fs.default.name");
- String alternatePath = inputPath.replaceAll(hdfsURL, "file:");
- conf.set("mapred.input.dir", alternatePath);
- try {
- splits = (org.apache.hadoop.mapred.FileSplit[]) format
- .getSplits(conf, nPartition);
- System.out.println("hdfs split number: " + splits.length);
- } catch (IOException e1) {
- e1.printStackTrace();
- Log.debug(e1.getMessage());
- return null;
- }
- }
- }
- return splits;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParser.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParser.java
deleted file mode 100644
index 7681bd1..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParser.java
+++ /dev/null
@@ -1,233 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.operator.filescan;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.List;
-import java.util.Properties;
-
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.util.ReflectionUtils;
-
-import edu.uci.ics.hivesterix.serde.parser.IHiveParser;
-import edu.uci.ics.hivesterix.serde.parser.TextToBinaryTupleParser;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.api.comm.IFrameWriter;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
-
-@SuppressWarnings({ "rawtypes", "deprecation", "unchecked" })
-public class HiveTupleParser extends AbstractHiveTupleParser {
-
- private int[] outputColumnsOffset;
- /**
- * class of input format
- */
- private InputFormat inputFormat;
-
- /**
- * serialization/deserialization object
- */
- private SerDe serDe;
-
- /**
- * the input row object inspector
- */
- private ObjectInspector objectInspector;
-
- /**
- * the hadoop job conf
- */
- private JobConf job;
-
- /**
- * Hyrax context to control resource allocation
- */
- private final IHyracksTaskContext ctx;
-
- /**
- * lazy serde: format flow in between operators
- */
- private final SerDe outputSerDe;
-
- /**
- * the parser from hive data to binary data
- */
- private IHiveParser parser = null;
-
- /**
- * parser for any hive input format
- *
- * @param inputFormatClass
- * @param serDeClass
- * @param tbl
- * @param conf
- * @throws AlgebricksException
- */
- public HiveTupleParser(String inputFormatClass, String serDeClass,
- String outputSerDeClass, Properties tbl, JobConf conf,
- final IHyracksTaskContext ctx, int[] outputColumnsOffset)
- throws AlgebricksException {
- try {
- conf.setClassLoader(this.getClass().getClassLoader());
-
- inputFormat = (InputFormat) ReflectionUtils.newInstance(
- Class.forName(inputFormatClass), conf);
- job = conf;
-
- // initialize the input serde
- serDe = (SerDe) ReflectionUtils.newInstance(
- Class.forName(serDeClass), job);
- serDe.initialize(job, tbl);
-
- // initialize the output serde
- outputSerDe = (SerDe) ReflectionUtils.newInstance(
- Class.forName(outputSerDeClass), job);
- outputSerDe.initialize(job, tbl);
-
- // object inspector of the row
- objectInspector = serDe.getObjectInspector();
-
- // hyracks context
- this.ctx = ctx;
- this.outputColumnsOffset = outputColumnsOffset;
-
- if (objectInspector instanceof LazySimpleStructObjectInspector) {
- LazySimpleStructObjectInspector rowInspector = (LazySimpleStructObjectInspector) objectInspector;
- List<? extends StructField> fieldRefs = rowInspector
- .getAllStructFieldRefs();
- boolean lightWeightParsable = true;
- for (StructField fieldRef : fieldRefs) {
- Category category = fieldRef.getFieldObjectInspector()
- .getCategory();
- if (!(category == Category.PRIMITIVE)) {
- lightWeightParsable = false;
- break;
- }
- }
- if (lightWeightParsable)
- parser = new TextToBinaryTupleParser(
- this.outputColumnsOffset, this.objectInspector);
- }
- } catch (Exception e) {
- throw new AlgebricksException(e);
- }
- }
-
- /**
- * parse a input HDFS file split, the result is send to the writer
- * one-frame-a-time
- *
- * @param split
- * the HDFS file split
- * @param writer
- * the writer
- * @throws HyracksDataException
- * if there is sth. wrong in the ser/de
- */
- @Override
- public void parse(FileSplit split, IFrameWriter writer)
- throws HyracksDataException {
- try {
- StructObjectInspector structInspector = (StructObjectInspector) objectInspector;
-
- // create the reader, key, and value
- RecordReader reader = inputFormat.getRecordReader(split, job,
- Reporter.NULL);
- Object key = reader.createKey();
- Object value = reader.createValue();
-
- // allocate a new frame
- ByteBuffer frame = ctx.allocateFrame();
- FrameTupleAppender appender = new FrameTupleAppender(
- ctx.getFrameSize());
- appender.reset(frame, true);
-
- List<? extends StructField> fieldRefs = structInspector
- .getAllStructFieldRefs();
- int size = 0;
- for (int i = 0; i < outputColumnsOffset.length; i++)
- if (outputColumnsOffset[i] >= 0)
- size++;
-
- ArrayTupleBuilder tb = new ArrayTupleBuilder(size);
- DataOutput dos = tb.getDataOutput();
- StructField[] outputFieldRefs = new StructField[size];
- Object[] outputFields = new Object[size];
- for (int i = 0; i < outputColumnsOffset.length; i++)
- if (outputColumnsOffset[i] >= 0)
- outputFieldRefs[outputColumnsOffset[i]] = fieldRefs.get(i);
-
- while (reader.next(key, value)) {
- // reuse the tuple builder
- tb.reset();
- if (parser != null) {
- Text text = (Text) value;
- parser.parse(text.getBytes(), 0, text.getLength(), tb);
- } else {
- Object row = serDe.deserialize((Writable) value);
- // write fields to the tuple builder one by one
- int i = 0;
- for (StructField fieldRef : fieldRefs) {
- if (outputColumnsOffset[i] >= 0)
- outputFields[outputColumnsOffset[i]] = structInspector
- .getStructFieldData(row, fieldRef);
- i++;
- }
-
- i = 0;
- for (Object field : outputFields) {
- BytesWritable fieldWritable = (BytesWritable) outputSerDe
- .serialize(field, outputFieldRefs[i]
- .getFieldObjectInspector());
- dos.write(fieldWritable.getBytes(), 0,
- fieldWritable.getSize());
- tb.addFieldEndOffset();
- i++;
- }
- }
-
- if (!appender.append(tb.getFieldEndOffsets(),
- tb.getByteArray(), 0, tb.getSize())) {
- if (appender.getTupleCount() <= 0)
- throw new IllegalStateException(
- "zero tuples in a frame!");
- FrameUtils.flushFrame(frame, writer);
- appender.reset(frame, true);
- if (!appender.append(tb.getFieldEndOffsets(),
- tb.getByteArray(), 0, tb.getSize())) {
- throw new IllegalStateException();
- }
- }
- }
- reader.close();
- System.gc();
-
- // flush the last frame
- if (appender.getTupleCount() > 0) {
- FrameUtils.flushFrame(frame, writer);
- }
- } catch (IOException e) {
- throw new HyracksDataException(e);
- } catch (SerDeException e) {
- throw new HyracksDataException(e);
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParserFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParserFactory.java
deleted file mode 100644
index 69aa881..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParserFactory.java
+++ /dev/null
@@ -1,111 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.operator.filescan;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.util.Properties;
-import java.util.UUID;
-
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.dataflow.std.file.ITupleParser;
-import edu.uci.ics.hyracks.dataflow.std.file.ITupleParserFactory;
-
-@SuppressWarnings("deprecation")
-public class HiveTupleParserFactory implements ITupleParserFactory {
-
- private static final long serialVersionUID = 1L;
-
- private int[] outputColumns;
-
- private String outputSerDeClass = LazySerDe.class.getName();
-
- private String inputSerDeClass;
-
- private transient JobConf conf;
-
- private Properties tbl;
-
- private String confContent;
-
- private String inputFormatClass;
-
- public HiveTupleParserFactory(PartitionDesc desc, JobConf conf,
- int[] outputColumns) {
- this.conf = conf;
- tbl = desc.getProperties();
- inputFormatClass = (String) tbl.getProperty("file.inputformat");
- inputSerDeClass = (String) tbl.getProperty("serialization.lib");
- this.outputColumns = outputColumns;
-
- writeConfContent();
- }
-
- @Override
- public ITupleParser createTupleParser(IHyracksTaskContext ctx) {
- readConfContent();
- try {
- return new HiveTupleParser(inputFormatClass, inputSerDeClass,
- outputSerDeClass, tbl, conf, ctx, outputColumns);
- } catch (Exception e) {
- e.printStackTrace();
- return null;
- }
- }
-
- private void writeConfContent() {
- File dir = new File("hadoop-conf-tmp");
- if (!dir.exists()) {
- dir.mkdir();
- }
-
- String fileName = "hadoop-conf-tmp/" + UUID.randomUUID()
- + System.currentTimeMillis() + ".xml";
- try {
- DataOutputStream out = new DataOutputStream(new FileOutputStream(
- new File(fileName)));
- conf.writeXml(out);
- out.close();
-
- DataInputStream in = new DataInputStream(new FileInputStream(
- fileName));
- StringBuffer buffer = new StringBuffer();
- String line;
- while ((line = in.readLine()) != null) {
- buffer.append(line + "\n");
- }
- in.close();
- confContent = buffer.toString();
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- private void readConfContent() {
- File dir = new File("hadoop-conf-tmp");
- if (!dir.exists()) {
- dir.mkdir();
- }
-
- String fileName = "hadoop-conf-tmp/" + UUID.randomUUID()
- + System.currentTimeMillis() + ".xml";
- try {
- PrintWriter out = new PrintWriter((new OutputStreamWriter(
- new FileOutputStream(new File(fileName)))));
- out.write(confContent);
- out.close();
-
- conf = new JobConf(fileName);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/Partition.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/Partition.java
deleted file mode 100644
index 1b3dcf2..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/Partition.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.operator.filescan;
-
-import java.io.IOException;
-import java.io.Serializable;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileSplit;
-
-@SuppressWarnings("deprecation")
-public class Partition implements Serializable {
- private static final long serialVersionUID = 1L;
-
- private String uri;
- private long offset;
- private long length;
- private String[] locations;
-
- public Partition() {
- }
-
- public Partition(FileSplit file) {
- uri = file.getPath().toUri().toString();
- offset = file.getStart();
- length = file.getLength();
- try {
- locations = file.getLocations();
- } catch (IOException e) {
- throw new IllegalStateException(e);
- }
- }
-
- public FileSplit toFileSplit() {
- return new FileSplit(new Path(uri), offset, length, locations);
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HivePushRuntimeFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HivePushRuntimeFactory.java
deleted file mode 100644
index 43e90fa..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HivePushRuntimeFactory.java
+++ /dev/null
@@ -1,113 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.operator.filewrite;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.util.UUID;
-
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.RowSchema;
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntime;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntimeFactory;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-
-@SuppressWarnings("deprecation")
-public class HivePushRuntimeFactory implements IPushRuntimeFactory {
-
- private static final long serialVersionUID = 1L;
-
- private final RecordDescriptor inputRecordDesc;
- private transient JobConf conf;
- private final FileSinkDesc fileSink;
- private final RowSchema outSchema;
- private final Schema schema;
-
- /**
- * the content of the configuration
- */
- private String confContent;
-
- public HivePushRuntimeFactory(RecordDescriptor inputRecordDesc,
- JobConf conf, FileSinkOperator fsp, Schema sch) {
- this.inputRecordDesc = inputRecordDesc;
- this.conf = conf;
- this.fileSink = fsp.getConf();
- outSchema = fsp.getSchema();
- this.schema = sch;
-
- writeConfContent();
- }
-
- @Override
- public String toString() {
- return "file write";
- }
-
- @Override
- public IPushRuntime createPushRuntime(IHyracksTaskContext context)
- throws AlgebricksException {
- if (conf == null)
- readConfContent();
-
- return new HiveFileWritePushRuntime(context, inputRecordDesc, conf,
- fileSink, outSchema, schema);
- }
-
- private void readConfContent() {
- File dir = new File("hadoop-conf-tmp");
- if (!dir.exists()) {
- dir.mkdir();
- }
-
- String fileName = "hadoop-conf-tmp/" + UUID.randomUUID()
- + System.currentTimeMillis() + ".xml";
- try {
- PrintWriter out = new PrintWriter((new OutputStreamWriter(
- new FileOutputStream(new File(fileName)))));
- out.write(confContent);
- out.close();
- conf = new JobConf(fileName);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- private void writeConfContent() {
- File dir = new File("hadoop-conf-tmp");
- if (!dir.exists()) {
- dir.mkdir();
- }
-
- String fileName = "hadoop-conf-tmp/" + UUID.randomUUID()
- + System.currentTimeMillis() + ".xml";
- try {
- DataOutputStream out = new DataOutputStream(new FileOutputStream(
- new File(fileName)));
- conf.writeXml(out);
- out.close();
-
- DataInputStream in = new DataInputStream(new FileInputStream(
- fileName));
- StringBuffer buffer = new StringBuffer();
- String line;
- while ((line = in.readLine()) != null) {
- buffer.append(line + "\n");
- }
- in.close();
- confContent = buffer.toString();
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryComparatorFactoryProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryComparatorFactoryProvider.java
deleted file mode 100644
index 5a2e98c..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryComparatorFactoryProvider.java
+++ /dev/null
@@ -1,77 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.provider;
-
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveByteBinaryAscComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveByteBinaryDescComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveDoubleBinaryAscComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveDoubleBinaryDescComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveFloatBinaryAscComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveFloatBinaryDescComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveIntegerBinaryAscComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveIntegerBinaryDescComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveLongBinaryAscComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveLongBinaryDescComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveShortBinaryAscComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveShortBinaryDescComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveStringBinaryAscComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveStringBinaryDescComparatorFactory;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
-import edu.uci.ics.hyracks.algebricks.data.IBinaryComparatorFactoryProvider;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveBinaryComparatorFactoryProvider implements
- IBinaryComparatorFactoryProvider {
-
- public static final HiveBinaryComparatorFactoryProvider INSTANCE = new HiveBinaryComparatorFactoryProvider();
-
- private HiveBinaryComparatorFactoryProvider() {
- }
-
- @Override
- public IBinaryComparatorFactory getBinaryComparatorFactory(Object type,
- boolean ascending) throws AlgebricksException {
- if (type.equals(TypeInfoFactory.intTypeInfo)) {
- if (ascending)
- return HiveIntegerBinaryAscComparatorFactory.INSTANCE;
- else
- return HiveIntegerBinaryDescComparatorFactory.INSTANCE;
-
- } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
- if (ascending)
- return HiveLongBinaryAscComparatorFactory.INSTANCE;
- else
- return HiveLongBinaryDescComparatorFactory.INSTANCE;
-
- } else if (type.equals(TypeInfoFactory.floatTypeInfo)) {
- if (ascending)
- return HiveFloatBinaryAscComparatorFactory.INSTANCE;
- else
- return HiveFloatBinaryDescComparatorFactory.INSTANCE;
-
- } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
- if (ascending)
- return HiveDoubleBinaryAscComparatorFactory.INSTANCE;
- else
- return HiveDoubleBinaryDescComparatorFactory.INSTANCE;
- } else if (type.equals(TypeInfoFactory.shortTypeInfo)) {
- if (ascending)
- return HiveShortBinaryAscComparatorFactory.INSTANCE;
- else
- return HiveShortBinaryDescComparatorFactory.INSTANCE;
- } else if (type.equals(TypeInfoFactory.stringTypeInfo)) {
- if (ascending)
- return HiveStringBinaryAscComparatorFactory.INSTANCE;
- else
- return HiveStringBinaryDescComparatorFactory.INSTANCE;
- } else if (type.equals(TypeInfoFactory.byteTypeInfo)
- || type.equals(TypeInfoFactory.booleanTypeInfo)) {
- if (ascending)
- return HiveByteBinaryAscComparatorFactory.INSTANCE;
- else
- return HiveByteBinaryDescComparatorFactory.INSTANCE;
- } else
- throw new NotImplementedException();
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFactoryProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFactoryProvider.java
deleted file mode 100644
index 371d45b..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFactoryProvider.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.provider;
-
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveDoubleBinaryHashFunctionFactory;
-import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveIntegerBinaryHashFunctionFactory;
-import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveLongBinaryHashFunctionFactory;
-import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveRawBinaryHashFunctionFactory;
-import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveStingBinaryHashFunctionFactory;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.data.IBinaryHashFunctionFactoryProvider;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-
-public class HiveBinaryHashFunctionFactoryProvider implements
- IBinaryHashFunctionFactoryProvider {
-
- public static final HiveBinaryHashFunctionFactoryProvider INSTANCE = new HiveBinaryHashFunctionFactoryProvider();
-
- private HiveBinaryHashFunctionFactoryProvider() {
- }
-
- @Override
- public IBinaryHashFunctionFactory getBinaryHashFunctionFactory(Object type)
- throws AlgebricksException {
- if (type.equals(TypeInfoFactory.intTypeInfo)) {
- return HiveIntegerBinaryHashFunctionFactory.INSTANCE;
- } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
- return HiveLongBinaryHashFunctionFactory.INSTANCE;
- } else if (type.equals(TypeInfoFactory.stringTypeInfo)) {
- return HiveStingBinaryHashFunctionFactory.INSTANCE;
- } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
- return HiveDoubleBinaryHashFunctionFactory.INSTANCE;
- } else {
- return HiveRawBinaryHashFunctionFactory.INSTANCE;
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveNormalizedKeyComputerFactoryProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveNormalizedKeyComputerFactoryProvider.java
deleted file mode 100644
index 9e3a8ae..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveNormalizedKeyComputerFactoryProvider.java
+++ /dev/null
@@ -1,53 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.provider;
-
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveDoubleAscNormalizedKeyComputerFactory;
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveDoubleDescNormalizedKeyComputerFactory;
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveIntegerAscNormalizedKeyComputerFactory;
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveIntegerDescNormalizedKeyComputerFactory;
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveLongAscNormalizedKeyComputerFactory;
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveLongDescNormalizedKeyComputerFactory;
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveStringAscNormalizedKeyComputerFactory;
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveStringDescNormalizedKeyComputerFactory;
-import edu.uci.ics.hyracks.algebricks.data.INormalizedKeyComputerFactoryProvider;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class HiveNormalizedKeyComputerFactoryProvider implements
- INormalizedKeyComputerFactoryProvider {
-
- public static final HiveNormalizedKeyComputerFactoryProvider INSTANCE = new HiveNormalizedKeyComputerFactoryProvider();
-
- private HiveNormalizedKeyComputerFactoryProvider() {
- }
-
- @Override
- public INormalizedKeyComputerFactory getNormalizedKeyComputerFactory(
- Object type, boolean ascending) {
- if (ascending) {
- if (type.equals(TypeInfoFactory.stringTypeInfo)) {
- return new HiveStringAscNormalizedKeyComputerFactory();
- } else if (type.equals(TypeInfoFactory.intTypeInfo)) {
- return new HiveIntegerAscNormalizedKeyComputerFactory();
- } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
- return new HiveLongAscNormalizedKeyComputerFactory();
- } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
- return new HiveDoubleAscNormalizedKeyComputerFactory();
- } else {
- return null;
- }
- } else {
- if (type.equals(TypeInfoFactory.stringTypeInfo)) {
- return new HiveStringDescNormalizedKeyComputerFactory();
- } else if (type.equals(TypeInfoFactory.intTypeInfo)) {
- return new HiveIntegerDescNormalizedKeyComputerFactory();
- } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
- return new HiveLongDescNormalizedKeyComputerFactory();
- } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
- return new HiveDoubleDescNormalizedKeyComputerFactory();
- } else {
- return null;
- }
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveSerializerDeserializerProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveSerializerDeserializerProvider.java
deleted file mode 100644
index 7938de8..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveSerializerDeserializerProvider.java
+++ /dev/null
@@ -1,23 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.provider;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.data.ISerializerDeserializerProvider;
-import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
-
-public class HiveSerializerDeserializerProvider implements
- ISerializerDeserializerProvider {
-
- public static final HiveSerializerDeserializerProvider INSTANCE = new HiveSerializerDeserializerProvider();
-
- private HiveSerializerDeserializerProvider() {
- }
-
- @SuppressWarnings("rawtypes")
- @Override
- public ISerializerDeserializer getSerializerDeserializer(Object type)
- throws AlgebricksException {
- // return ARecordSerializerDeserializer.SCHEMALESS_INSTANCE;
- return null;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveTypeTraitProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveTypeTraitProvider.java
deleted file mode 100644
index 2059128..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveTypeTraitProvider.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.provider;
-
-import java.io.Serializable;
-
-import edu.uci.ics.hyracks.algebricks.data.ITypeTraitProvider;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
-
-public class HiveTypeTraitProvider implements ITypeTraitProvider, Serializable {
- private static final long serialVersionUID = 1L;
- public static HiveTypeTraitProvider INSTANCE = new HiveTypeTraitProvider();
-
- private HiveTypeTraitProvider() {
-
- }
-
- @Override
- public ITypeTraits getTypeTrait(Object arg0) {
- return new ITypeTraits() {
- private static final long serialVersionUID = 1L;
-
- @Override
- public int getFixedLength() {
- return -1;
- }
-
- @Override
- public boolean isFixedLength() {
- return false;
- }
-
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyArray.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyArray.java
deleted file mode 100644
index 821c03d..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyArray.java
+++ /dev/null
@@ -1,236 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyListObjectInspector;
-
-/**
- * LazyArray is serialized as follows: start A b b b b b b end bytes[] ->
- * |--------|---|---|---|---| ... |---|---|
- *
- * Section A is the null-bytes. Suppose the list has N elements, then there are
- * (N+7)/8 bytes used as null-bytes. Each bit corresponds to an element and it
- * indicates whether that element is null (0) or not null (1).
- *
- * After A, all b(s) represent the elements of the list. Each of them is again a
- * LazyObject.
- *
- */
-
-public class LazyArray extends LazyNonPrimitive<LazyListObjectInspector> {
-
- /**
- * Whether the data is already parsed or not.
- */
- boolean parsed = false;
- /**
- * The length of the array. Only valid when the data is parsed.
- */
- int arraySize = 0;
-
- /**
- * The start positions and lengths of array elements. Only valid when the
- * data is parsed.
- */
- int[] elementStart;
- int[] elementLength;
-
- /**
- * Whether an element is initialized or not.
- */
- boolean[] elementInited;
-
- /**
- * Whether an element is null or not. Because length is 0 does not means the
- * field is null. In particular, a 0-length string is not null.
- */
- boolean[] elementIsNull;
-
- /**
- * The elements of the array. Note that we call arrayElements[i].init(bytes,
- * begin, length) only when that element is accessed.
- */
- @SuppressWarnings("rawtypes")
- LazyObject[] arrayElements;
-
- /**
- * Construct a LazyArray object with the ObjectInspector.
- *
- * @param oi
- * the oi representing the type of this LazyArray
- */
- protected LazyArray(LazyListObjectInspector oi) {
- super(oi);
- }
-
- /**
- * Set the row data for this LazyArray.
- *
- * @see LazyObject#init(ByteArrayRef, int, int)
- */
- @Override
- public void init(byte[] bytes, int start, int length) {
- super.init(bytes, start, length);
- parsed = false;
- }
-
- /**
- * Enlarge the size of arrays storing information for the elements inside
- * the array.
- */
- private void adjustArraySize(int newSize) {
- if (elementStart == null || elementStart.length < newSize) {
- elementStart = new int[newSize];
- elementLength = new int[newSize];
- elementInited = new boolean[newSize];
- elementIsNull = new boolean[newSize];
- arrayElements = new LazyObject[newSize];
- }
- }
-
- VInt vInt = new LazyUtils.VInt();
- RecordInfo recordInfo = new LazyUtils.RecordInfo();
-
- /**
- * Parse the bytes and fill elementStart, elementLength, elementInited and
- * elementIsNull.
- */
- private void parse() {
-
- // get the vlong that represents the map size
- LazyUtils.readVInt(bytes, start, vInt);
- arraySize = vInt.value;
- if (0 == arraySize) {
- parsed = true;
- return;
- }
-
- // adjust arrays
- adjustArraySize(arraySize);
- // find out the null-bytes
- int arryByteStart = start + vInt.length;
- int nullByteCur = arryByteStart;
- int nullByteEnd = arryByteStart + (arraySize + 7) / 8;
- // the begin the real elements
- int lastElementByteEnd = nullByteEnd;
- // the list element object inspector
- ObjectInspector listEleObjectInspector = ((ListObjectInspector) oi)
- .getListElementObjectInspector();
- // parsing elements one by one
- for (int i = 0; i < arraySize; i++) {
- elementIsNull[i] = true;
- if ((bytes[nullByteCur] & (1 << (i % 8))) != 0) {
- elementIsNull[i] = false;
- LazyUtils.checkObjectByteInfo(listEleObjectInspector, bytes,
- lastElementByteEnd, recordInfo);
- elementStart[i] = lastElementByteEnd + recordInfo.elementOffset;
- elementLength[i] = recordInfo.elementSize;
- lastElementByteEnd = elementStart[i] + elementLength[i];
- }
- // move onto the next null byte
- if (7 == (i % 8)) {
- nullByteCur++;
- }
- }
-
- Arrays.fill(elementInited, 0, arraySize, false);
- parsed = true;
- }
-
- /**
- * Returns the actual primitive object at the index position inside the
- * array represented by this LazyObject.
- */
- public Object getListElementObject(int index) {
- if (!parsed) {
- parse();
- }
- if (index < 0 || index >= arraySize) {
- return null;
- }
- return uncheckedGetElement(index);
- }
-
- /**
- * Get the element without checking out-of-bound index.
- *
- * @param index
- * index to the array element
- */
- private Object uncheckedGetElement(int index) {
-
- if (elementIsNull[index]) {
- return null;
- } else {
- if (!elementInited[index]) {
- elementInited[index] = true;
- if (arrayElements[index] == null) {
- arrayElements[index] = LazyFactory.createLazyObject((oi)
- .getListElementObjectInspector());
- }
- arrayElements[index].init(bytes, elementStart[index],
- elementLength[index]);
- }
- }
- return arrayElements[index].getObject();
- }
-
- /**
- * Returns the array size.
- */
- public int getListLength() {
- if (!parsed) {
- parse();
- }
- return arraySize;
- }
-
- /**
- * cachedList is reused every time getList is called. Different
- * LazyBianryArray instances cannot share the same cachedList.
- */
- ArrayList<Object> cachedList;
-
- /**
- * Returns the List of actual primitive objects. Returns null for null
- * array.
- */
- public List<Object> getList() {
- if (!parsed) {
- parse();
- }
- if (cachedList == null) {
- cachedList = new ArrayList<Object>(arraySize);
- } else {
- cachedList.clear();
- }
- for (int index = 0; index < arraySize; index++) {
- cachedList.add(uncheckedGetElement(index));
- }
- return cachedList;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyBoolean.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyBoolean.java
deleted file mode 100644
index 83b6254..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyBoolean.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.io.BooleanWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyBooleanObjectInspector;
-
-/**
- * LazyObject for storing a value of boolean.
- *
- * <p>
- * Part of the code is adapted from Apache Harmony Project.
- *
- * As with the specification, this implementation relied on code laid out in <a
- * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
- * Delight, (Addison Wesley, 2002)</a> as well as <a
- * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
- * </p>
- *
- */
-public class LazyBoolean extends
- LazyPrimitive<LazyBooleanObjectInspector, BooleanWritable> {
-
- public LazyBoolean(LazyBooleanObjectInspector oi) {
- super(oi);
- data = new BooleanWritable();
- }
-
- public LazyBoolean(LazyBoolean copy) {
- super(copy);
- data = new BooleanWritable(copy.data.get());
- }
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
-
- // a temporal hack
- assert (1 == length);
- byte val = bytes[start];
- if (val == 0) {
- data.set(false);
- } else if (val == 1) {
- data.set(true);
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyByte.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyByte.java
deleted file mode 100644
index 264015b..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyByte.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.io.ByteWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyByteObjectInspector;
-
-/**
- * LazyObject for storing a value of Byte.
- *
- * <p>
- * Part of the code is adapted from Apache Harmony Project.
- *
- * As with the specification, this implementation relied on code laid out in <a
- * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
- * Delight, (Addison Wesley, 2002)</a> as well as <a
- * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
- * </p>
- *
- */
-public class LazyByte extends
- LazyPrimitive<LazyByteObjectInspector, ByteWritable> {
-
- public LazyByte(LazyByteObjectInspector oi) {
- super(oi);
- data = new ByteWritable();
- }
-
- public LazyByte(LazyByte copy) {
- super(copy);
- data = new ByteWritable(copy.data.get());
- }
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
-
- assert (1 == length);
- data.set(bytes[start]);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyColumnar.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyColumnar.java
deleted file mode 100644
index a25ae49..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyColumnar.java
+++ /dev/null
@@ -1,211 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-/**
- * LazyObject for storing a struct. The field of a struct can be primitive or
- * non-primitive.
- *
- * LazyStruct does not deal with the case of a NULL struct. That is handled by
- * the parent LazyObject.
- */
-@SuppressWarnings("rawtypes")
-public class LazyColumnar extends LazyNonPrimitive<LazyColumnarObjectInspector> {
-
- /**
- * IFrameTupleReference: the backend of the struct
- */
- IFrameTupleReference tuple;
-
- /**
- * Whether the data is already parsed or not.
- */
- boolean reset;
-
- /**
- * The fields of the struct.
- */
- LazyObject[] fields;
-
- /**
- * Whether init() has been called on the field or not.
- */
- boolean[] fieldVisited;
-
- /**
- * whether it is the first time initialization
- */
- boolean start = true;
-
- /**
- * Construct a LazyStruct object with the ObjectInspector.
- */
- public LazyColumnar(LazyColumnarObjectInspector oi) {
- super(oi);
- }
-
- /**
- * Set the row data for this LazyStruct.
- *
- * @see LazyObject#init(ByteArrayRef, int, int)
- */
- @Override
- public void init(byte[] bytes, int start, int length) {
- super.init(bytes, start, length);
- reset = false;
- }
-
- /**
- * Parse the byte[] and fill each field.
- */
- private void parse() {
-
- if (start) {
- // initialize field array and reusable objects
- List<? extends StructField> fieldRefs = ((StructObjectInspector) oi)
- .getAllStructFieldRefs();
-
- fields = new LazyObject[fieldRefs.size()];
- for (int i = 0; i < fields.length; i++) {
- fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i)
- .getFieldObjectInspector());
- }
- fieldVisited = new boolean[fields.length];
- start = false;
- }
-
- Arrays.fill(fieldVisited, false);
- reset = true;
- }
-
- /**
- * Get one field out of the struct.
- *
- * If the field is a primitive field, return the actual object. Otherwise
- * return the LazyObject. This is because PrimitiveObjectInspector does not
- * have control over the object used by the user - the user simply directly
- * use the Object instead of going through Object
- * PrimitiveObjectInspector.get(Object).
- *
- * @param fieldID
- * The field ID
- * @return The field as a LazyObject
- */
- public Object getField(int fieldID) {
- if (!reset) {
- parse();
- }
- return uncheckedGetField(fieldID);
- }
-
- /**
- * Get the field out of the row without checking parsed. This is called by
- * both getField and getFieldsAsList.
- *
- * @param fieldID
- * The id of the field starting from 0.
- * @param nullSequence
- * The sequence representing NULL value.
- * @return The value of the field
- */
- private Object uncheckedGetField(int fieldID) {
- // get the buffer
- byte[] buffer = tuple.getFieldData(fieldID);
- // get the offset of the field
- int s1 = tuple.getFieldStart(fieldID);
- int l1 = tuple.getFieldLength(fieldID);
-
- if (!fieldVisited[fieldID]) {
- fieldVisited[fieldID] = true;
- fields[fieldID].init(buffer, s1, l1);
- }
- // if (fields[fieldID].getObject() == null) {
- // throw new IllegalStateException("illegal field " + fieldID);
- // }
- return fields[fieldID].getObject();
- }
-
- ArrayList<Object> cachedList;
-
- /**
- * Get the values of the fields as an ArrayList.
- *
- * @return The values of the fields as an ArrayList.
- */
- public ArrayList<Object> getFieldsAsList() {
- if (!reset) {
- parse();
- }
- if (cachedList == null) {
- cachedList = new ArrayList<Object>();
- } else {
- cachedList.clear();
- }
- for (int i = 0; i < fields.length; i++) {
- cachedList.add(uncheckedGetField(i));
- }
- return cachedList;
- }
-
- @Override
- public Object getObject() {
- return this;
- }
-
- protected boolean getParsed() {
- return reset;
- }
-
- protected void setParsed(boolean parsed) {
- this.reset = parsed;
- }
-
- protected LazyObject[] getFields() {
- return fields;
- }
-
- protected void setFields(LazyObject[] fields) {
- this.fields = fields;
- }
-
- protected boolean[] getFieldInited() {
- return fieldVisited;
- }
-
- protected void setFieldInited(boolean[] fieldInited) {
- this.fieldVisited = fieldInited;
- }
-
- /**
- * rebind a frametuplereference to the struct
- */
- public void init(IFrameTupleReference r) {
- this.tuple = r;
- reset = false;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFactory.java
deleted file mode 100644
index e7593e4..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFactory.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyListObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyMapObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyStructObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyBooleanObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyByteObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyDoubleObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyFloatObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyIntObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyLongObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyShortObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyStringObjectInspector;
-
-/**
- * LazyFactory.
- *
- */
-public final class LazyFactory {
-
- /**
- * Create a lazy binary primitive class given the type name.
- */
- public static LazyPrimitive<?, ?> createLazyPrimitiveClass(
- PrimitiveObjectInspector oi) {
- PrimitiveCategory p = oi.getPrimitiveCategory();
- switch (p) {
- case BOOLEAN:
- return new LazyBoolean((LazyBooleanObjectInspector) oi);
- case BYTE:
- return new LazyByte((LazyByteObjectInspector) oi);
- case SHORT:
- return new LazyShort((LazyShortObjectInspector) oi);
- case INT:
- return new LazyInteger((LazyIntObjectInspector) oi);
- case LONG:
- return new LazyLong((LazyLongObjectInspector) oi);
- case FLOAT:
- return new LazyFloat((LazyFloatObjectInspector) oi);
- case DOUBLE:
- return new LazyDouble((LazyDoubleObjectInspector) oi);
- case STRING:
- return new LazyString((LazyStringObjectInspector) oi);
- default:
- throw new RuntimeException("Internal error: no LazyObject for " + p);
- }
- }
-
- /**
- * Create a hierarchical LazyObject based on the given typeInfo.
- */
- public static LazyObject<? extends ObjectInspector> createLazyObject(
- ObjectInspector oi) {
- ObjectInspector.Category c = oi.getCategory();
- switch (c) {
- case PRIMITIVE:
- return createLazyPrimitiveClass((PrimitiveObjectInspector) oi);
- case MAP:
- return new LazyMap((LazyMapObjectInspector) oi);
- case LIST:
- return new LazyArray((LazyListObjectInspector) oi);
- case STRUCT: // check whether it is a top-level struct
- if (oi instanceof LazyStructObjectInspector)
- return new LazyStruct((LazyStructObjectInspector) oi);
- else
- return new LazyColumnar((LazyColumnarObjectInspector) oi);
- default:
- throw new RuntimeException("Hive LazySerDe Internal error.");
- }
- }
-
- private LazyFactory() {
- // prevent instantiation
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyInteger.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyInteger.java
deleted file mode 100644
index c908c40..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyInteger.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.io.IntWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyIntObjectInspector;
-
-/**
- * LazyObject for storing a value of Integer.
- *
- * <p>
- * Part of the code is adapted from Apache Harmony Project.
- *
- * As with the specification, this implementation relied on code laid out in <a
- * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
- * Delight, (Addison Wesley, 2002)</a> as well as <a
- * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
- * </p>
- *
- */
-public class LazyInteger extends
- LazyPrimitive<LazyIntObjectInspector, IntWritable> {
-
- public LazyInteger(LazyIntObjectInspector oi) {
- super(oi);
- data = new IntWritable();
- }
-
- public LazyInteger(LazyInteger copy) {
- super(copy);
- data = new IntWritable(copy.data.get());
- }
-
- /**
- * The reusable vInt for decoding the integer.
- */
- VInt vInt = new LazyUtils.VInt();
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
-
- LazyUtils.readVInt(bytes, start, vInt);
- assert (length == vInt.length);
- if (length != vInt.length)
- throw new IllegalStateException(
- "parse int: length mismatch, expected " + vInt.length
- + " but get " + length);
- data.set(vInt.value);
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyLong.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyLong.java
deleted file mode 100644
index 38097e6..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyLong.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.io.LongWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyLongObjectInspector;
-
-/**
- * LazyObject for storing a value of Long.
- *
- * <p>
- * Part of the code is adapted from Apache Harmony Project.
- *
- * As with the specification, this implementation relied on code laid out in <a
- * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
- * Delight, (Addison Wesley, 2002)</a> as well as <a
- * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
- * </p>
- *
- */
-public class LazyLong extends
- LazyPrimitive<LazyLongObjectInspector, LongWritable> {
-
- public LazyLong(LazyLongObjectInspector oi) {
- super(oi);
- data = new LongWritable();
- }
-
- public LazyLong(LazyLong copy) {
- super(copy);
- data = new LongWritable(copy.data.get());
- }
-
- /**
- * The reusable vLong for decoding the long.
- */
- VLong vLong = new LazyUtils.VLong();
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
-
- LazyUtils.readVLong(bytes, start, vLong);
- assert (length == vLong.length);
- if (length != vLong.length)
- throw new IllegalStateException("parse long: length mismatch");
- data.set(vLong.value);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyMap.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyMap.java
deleted file mode 100644
index 56bc41b..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyMap.java
+++ /dev/null
@@ -1,337 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import java.util.Arrays;
-import java.util.LinkedHashMap;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
-import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyMapObjectInspector;
-
-/**
- * LazyMap is serialized as follows: start A b c b c b c end bytes[] ->
- * |--------|---|---|---|---| ... |---|---|
- *
- * Section A is the null-bytes. Suppose the map has N key-value pairs, then
- * there are (N*2+7)/8 bytes used as null-bytes. Each bit corresponds to a key
- * or a value and it indicates whether that key or value is null (0) or not null
- * (1).
- *
- * After A, all the bytes are actual serialized data of the map, which are
- * key-value pairs. b represent the keys and c represent the values. Each of
- * them is again a LazyObject.
- *
- */
-
-@SuppressWarnings("rawtypes")
-public class LazyMap extends LazyNonPrimitive<LazyMapObjectInspector> {
-
- private static Log LOG = LogFactory.getLog(LazyMap.class.getName());
-
- /**
- * Whether the data is already parsed or not.
- */
- boolean parsed;
-
- /**
- * The size of the map. Only valid when the data is parsed. -1 when the map
- * is NULL.
- */
- int mapSize = 0;
-
- /**
- * The beginning position and length of key[i] and value[i]. Only valid when
- * the data is parsed.
- */
- int[] keyStart;
- int[] keyLength;
- int[] valueStart;
- int[] valueLength;
- /**
- * Whether valueObjects[i]/keyObjects[i] is initialized or not.
- */
- boolean[] keyInited;
- boolean[] valueInited;
-
- /**
- * Whether valueObjects[i]/keyObjects[i] is null or not This could not be
- * inferred from the length of the object. In particular, a 0-length string
- * is not null.
- */
- boolean[] keyIsNull;
- boolean[] valueIsNull;
-
- /**
- * The keys are stored in an array of LazyPrimitives.
- */
- LazyPrimitive<?, ?>[] keyObjects;
- /**
- * The values are stored in an array of LazyObjects. value[index] will start
- * from KeyEnd[index] + 1, and ends before KeyStart[index+1] - 1.
- */
- LazyObject[] valueObjects;
-
- protected LazyMap(LazyMapObjectInspector oi) {
- super(oi);
- }
-
- /**
- * Set the row data for this LazyMap.
- *
- * @see LazyObject#init(ByteArrayRef, int, int)
- */
- @Override
- public void init(byte[] bytes, int start, int length) {
- super.init(bytes, start, length);
- parsed = false;
- }
-
- /**
- * Adjust the size of arrays: keyStart, keyLength valueStart, valueLength
- * keyInited, keyIsNull valueInited, valueIsNull.
- */
- protected void adjustArraySize(int newSize) {
- if (keyStart == null || keyStart.length < newSize) {
- keyStart = new int[newSize];
- keyLength = new int[newSize];
- valueStart = new int[newSize];
- valueLength = new int[newSize];
- keyInited = new boolean[newSize];
- keyIsNull = new boolean[newSize];
- valueInited = new boolean[newSize];
- valueIsNull = new boolean[newSize];
- keyObjects = new LazyPrimitive<?, ?>[newSize];
- valueObjects = new LazyObject[newSize];
- }
- }
-
- boolean nullMapKey = false;
- VInt vInt = new LazyUtils.VInt();
- RecordInfo recordInfo = new LazyUtils.RecordInfo();
-
- /**
- * Parse the byte[] and fill keyStart, keyLength, keyIsNull valueStart,
- * valueLength and valueIsNull.
- */
- private void parse() {
-
- // get the VInt that represents the map size
- LazyUtils.readVInt(bytes, start, vInt);
- mapSize = vInt.value;
- if (0 == mapSize) {
- parsed = true;
- return;
- }
-
- // adjust arrays
- adjustArraySize(mapSize);
-
- // find out the null-bytes
- int mapByteStart = start + vInt.length;
- int nullByteCur = mapByteStart;
- int nullByteEnd = mapByteStart + (mapSize * 2 + 7) / 8;
- int lastElementByteEnd = nullByteEnd;
-
- // parsing the keys and values one by one
- for (int i = 0; i < mapSize; i++) {
- // parse a key
- keyIsNull[i] = true;
- if ((bytes[nullByteCur] & (1 << ((i * 2) % 8))) != 0) {
- keyIsNull[i] = false;
- LazyUtils.checkObjectByteInfo(
- ((MapObjectInspector) oi).getMapKeyObjectInspector(),
- bytes, lastElementByteEnd, recordInfo);
- keyStart[i] = lastElementByteEnd + recordInfo.elementOffset;
- keyLength[i] = recordInfo.elementSize;
- lastElementByteEnd = keyStart[i] + keyLength[i];
- } else if (!nullMapKey) {
- nullMapKey = true;
- LOG.warn("Null map key encountered! Ignoring similar problems.");
- }
-
- // parse a value
- valueIsNull[i] = true;
- if ((bytes[nullByteCur] & (1 << ((i * 2 + 1) % 8))) != 0) {
- valueIsNull[i] = false;
- LazyUtils.checkObjectByteInfo(
- ((MapObjectInspector) oi).getMapValueObjectInspector(),
- bytes, lastElementByteEnd, recordInfo);
- valueStart[i] = lastElementByteEnd + recordInfo.elementOffset;
- valueLength[i] = recordInfo.elementSize;
- lastElementByteEnd = valueStart[i] + valueLength[i];
- }
-
- // move onto the next null byte
- if (3 == (i % 4)) {
- nullByteCur++;
- }
- }
-
- Arrays.fill(keyInited, 0, mapSize, false);
- Arrays.fill(valueInited, 0, mapSize, false);
- parsed = true;
- }
-
- /**
- * Get the value object with the index without checking parsed.
- *
- * @param index
- * The index into the array starting from 0
- */
- private LazyObject uncheckedGetValue(int index) {
- if (valueIsNull[index]) {
- return null;
- }
- if (!valueInited[index]) {
- valueInited[index] = true;
- if (valueObjects[index] == null) {
- valueObjects[index] = LazyFactory
- .createLazyObject(((MapObjectInspector) oi)
- .getMapValueObjectInspector());
- }
- valueObjects[index].init(bytes, valueStart[index],
- valueLength[index]);
- }
- return valueObjects[index];
- }
-
- /**
- * Get the value in the map for the key.
- *
- * If there are multiple matches (which is possible in the serialized
- * format), only the first one is returned.
- *
- * The most efficient way to get the value for the key is to serialize the
- * key and then try to find it in the array. We do linear search because in
- * most cases, user only wants to get one or two values out of the map, and
- * the cost of building up a HashMap is substantially higher.
- *
- * @param key
- * The key object that we are looking for.
- * @return The corresponding value object, or NULL if not found
- */
- public Object getMapValueElement(Object key) {
- if (!parsed) {
- parse();
- }
- // search for the key
- for (int i = 0; i < mapSize; i++) {
- LazyPrimitive<?, ?> lazyKeyI = uncheckedGetKey(i);
- if (lazyKeyI == null) {
- continue;
- }
- // getWritableObject() will convert LazyPrimitive to actual
- // primitive
- // writable objects.
- Object keyI = lazyKeyI.getWritableObject();
- if (keyI == null) {
- continue;
- }
- if (keyI.equals(key)) {
- // Got a match, return the value
- LazyObject v = uncheckedGetValue(i);
- return v == null ? v : v.getObject();
- }
- }
- return null;
- }
-
- /**
- * Get the key object with the index without checking parsed.
- *
- * @param index
- * The index into the array starting from 0
- */
- private LazyPrimitive<?, ?> uncheckedGetKey(int index) {
- if (keyIsNull[index]) {
- return null;
- }
- if (!keyInited[index]) {
- keyInited[index] = true;
- if (keyObjects[index] == null) {
- // Keys are always primitive
- keyObjects[index] = LazyFactory
- .createLazyPrimitiveClass((PrimitiveObjectInspector) ((MapObjectInspector) oi)
- .getMapKeyObjectInspector());
- }
- keyObjects[index].init(bytes, keyStart[index], keyLength[index]);
- }
- return keyObjects[index];
- }
-
- /**
- * cachedMap is reused for different calls to getMap(). But each LazyMap has
- * a separate cachedMap so we won't overwrite the data by accident.
- */
- LinkedHashMap<Object, Object> cachedMap;
-
- /**
- * Return the map object representing this LazyMap. Note that the keyObjects
- * will be Writable primitive objects.
- *
- * @return the map object
- */
- public Map<Object, Object> getMap() {
- if (!parsed) {
- parse();
- }
- if (cachedMap == null) {
- // Use LinkedHashMap to provide deterministic order
- cachedMap = new LinkedHashMap<Object, Object>();
- } else {
- cachedMap.clear();
- }
-
- // go through each element of the map
- for (int i = 0; i < mapSize; i++) {
- LazyPrimitive<?, ?> lazyKey = uncheckedGetKey(i);
- if (lazyKey == null) {
- continue;
- }
- Object key = lazyKey.getObject();
- // do not overwrite if there are duplicate keys
- if (key != null && !cachedMap.containsKey(key)) {
- LazyObject lazyValue = uncheckedGetValue(i);
- Object value = (lazyValue == null ? null : lazyValue
- .getObject());
- cachedMap.put(key, value);
- }
- }
- return cachedMap;
- }
-
- /**
- * Get the size of the map represented by this LazyMap.
- *
- * @return The size of the map
- */
- public int getMapSize() {
- if (!parsed) {
- parse();
- }
- return mapSize;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyNonPrimitive.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyNonPrimitive.java
deleted file mode 100644
index b151f2d..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyNonPrimitive.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-/**
- * LazyPrimitive stores a primitive Object in a LazyObject.
- */
-public abstract class LazyNonPrimitive<OI extends ObjectInspector> extends
- LazyObject<OI> {
-
- protected byte[] bytes;
- protected int start;
- protected int length;
-
- /**
- * Create a LazyNonPrimitive object with the specified ObjectInspector.
- *
- * @param oi
- * The ObjectInspector would have to have a hierarchy of
- * LazyObjectInspectors with the leaf nodes being
- * WritableObjectInspectors. It's used both for accessing the
- * type hierarchy of the complex object, as well as getting meta
- * information (separator, nullSequence, etc) when parsing the
- * lazy object.
- */
- protected LazyNonPrimitive(OI oi) {
- super(oi);
- bytes = null;
- start = 0;
- length = 0;
- }
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (bytes == null) {
- throw new RuntimeException("bytes cannot be null!");
- }
- this.bytes = bytes;
- this.start = start;
- this.length = length;
- assert start >= 0;
- assert start + length <= bytes.length;
- }
-
- @Override
- public Object getObject() {
- return this;
- }
-
- @Override
- public int hashCode() {
- return LazyUtils.hashBytes(bytes, start, length);
- }
-
- @Override
- public void init(IFrameTupleReference tuple) {
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyObject.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyObject.java
deleted file mode 100644
index 9aaaa88..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyObject.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-/**
- * LazyObject stores an object in a range of bytes in a byte[].
- *
- * A LazyObject can represent any primitive object or hierarchical object like
- * array, map or struct.
- */
-public abstract class LazyObject<OI extends ObjectInspector> {
-
- OI oi;
-
- /**
- * Create a LazyObject.
- *
- * @param oi
- * Derived classes can access meta information about this Lazy
- * Object (e.g, separator, nullSequence, escaper) from it.
- */
- protected LazyObject(OI oi) {
- this.oi = oi;
- }
-
- /**
- * Set the data for this LazyObject. We take ByteArrayRef instead of byte[]
- * so that we will be able to drop the reference to byte[] by a single
- * assignment. The ByteArrayRef object can be reused across multiple rows.
- *
- * @param bytes
- * The wrapper of the byte[].
- * @param start
- * The start position inside the bytes.
- * @param length
- * The length of the data, starting from "start"
- * @see ByteArrayRef
- */
- public abstract void init(byte[] bytes, int start, int length);
-
- public abstract void init(IFrameTupleReference tuple);
-
- /**
- * If the LazyObject is a primitive Object, then deserialize it and return
- * the actual primitive Object. Otherwise (array, map, struct), return this.
- */
- public abstract Object getObject();
-
- @Override
- public abstract int hashCode();
-
- protected OI getInspector() {
- return oi;
- }
-
- protected void setInspector(OI oi) {
- this.oi = oi;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyPrimitive.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyPrimitive.java
deleted file mode 100644
index 888e5b2..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyPrimitive.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.io.Writable;
-
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-/**
- * LazyPrimitive stores a primitive Object in a LazyObject.
- */
-public abstract class LazyPrimitive<OI extends ObjectInspector, T extends Writable>
- extends LazyObject<OI> {
-
- LazyPrimitive(OI oi) {
- super(oi);
- }
-
- LazyPrimitive(LazyPrimitive<OI, T> copy) {
- super(copy.oi);
- isNull = copy.isNull;
- }
-
- T data;
- boolean isNull = false;
-
- /**
- * Returns the primitive object represented by this LazyObject. This is
- * useful because it can make sure we have "null" for null objects.
- */
- @Override
- public Object getObject() {
- return isNull ? null : this;
- }
-
- public T getWritableObject() {
- return isNull ? null : data;
- }
-
- @Override
- public String toString() {
- return isNull ? "null" : data.toString();
- }
-
- @Override
- public int hashCode() {
- return isNull ? 0 : data.hashCode();
- }
-
- @Override
- public void init(IFrameTupleReference tuple) {
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
deleted file mode 100644
index 4d0dff6..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
+++ /dev/null
@@ -1,477 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.serde.Constants;
-import org.apache.hadoop.hive.serde2.ByteStream;
-import org.apache.hadoop.hive.serde2.ByteStream.Output;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-
-/**
- * The LazySerDe class combines the lazy property of LazySimpleSerDe class and
- * the binary property of BinarySortable class. Lazy means a field is not
- * deserialized until required. Binary means a field is serialized in binary
- * compact format.
- */
-public class LazySerDe implements SerDe {
-
- public static final Log LOG = LogFactory.getLog(LazySerDe.class.getName());
-
- public LazySerDe() {
- }
-
- List<String> columnNames;
- List<TypeInfo> columnTypes;
-
- TypeInfo rowTypeInfo;
- ObjectInspector cachedObjectInspector;
-
- // The object for storing row data
- LazyColumnar cachedLazyStruct;
-
- /**
- * Initialize the SerDe with configuration and table information.
- */
- @Override
- public void initialize(Configuration conf, Properties tbl)
- throws SerDeException {
- // Get column names and types
- String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
- String columnTypeProperty = tbl
- .getProperty(Constants.LIST_COLUMN_TYPES);
- if (columnNameProperty.length() == 0) {
- columnNames = new ArrayList<String>();
- } else {
- columnNames = Arrays.asList(columnNameProperty.split(","));
- }
- if (columnTypeProperty.length() == 0) {
- columnTypes = new ArrayList<TypeInfo>();
- } else {
- columnTypes = TypeInfoUtils
- .getTypeInfosFromTypeString(columnTypeProperty);
- }
- assert (columnNames.size() == columnTypes.size());
- // Create row related objects
- rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames,
- columnTypes);
- // Create the object inspector and the lazy binary struct object
- cachedObjectInspector = LazyUtils.getLazyObjectInspectorFromTypeInfo(
- rowTypeInfo, true);
- cachedLazyStruct = (LazyColumnar) LazyFactory
- .createLazyObject(cachedObjectInspector);
- // output debug info
- LOG.debug("LazySerDe initialized with: columnNames=" + columnNames
- + " columnTypes=" + columnTypes);
- }
-
- /**
- * Returns the ObjectInspector for the row.
- */
- @Override
- public ObjectInspector getObjectInspector() throws SerDeException {
- return cachedObjectInspector;
- }
-
- /**
- * Returns the Writable Class after serialization.
- */
- @Override
- public Class<? extends Writable> getSerializedClass() {
- return BytesWritable.class;
- }
-
- // The wrapper for byte array
- ByteArrayRef byteArrayRef;
-
- /**
- * Deserialize a table record to a Lazy struct.
- */
- @SuppressWarnings("deprecation")
- @Override
- public Object deserialize(Writable field) throws SerDeException {
- if (byteArrayRef == null) {
- byteArrayRef = new ByteArrayRef();
- }
- if (field instanceof BytesWritable) {
- BytesWritable b = (BytesWritable) field;
- if (b.getSize() == 0) {
- return null;
- }
- // For backward-compatibility with hadoop 0.17
- byteArrayRef.setData(b.get());
- cachedLazyStruct.init(byteArrayRef.getData(), 0, b.getSize());
- } else if (field instanceof Text) {
- Text t = (Text) field;
- if (t.getLength() == 0) {
- return null;
- }
- byteArrayRef.setData(t.getBytes());
- cachedLazyStruct.init(byteArrayRef.getData(), 0, t.getLength());
- } else {
- throw new SerDeException(getClass().toString()
- + ": expects either BytesWritable or Text object!");
- }
- return cachedLazyStruct;
- }
-
- /**
- * The reusable output buffer and serialize byte buffer.
- */
- BytesWritable serializeBytesWritable = new BytesWritable();
- ByteStream.Output serializeByteStream = new ByteStream.Output();
-
- /**
- * Serialize an object to a byte buffer in a binary compact way.
- */
- @Override
- public Writable serialize(Object obj, ObjectInspector objInspector)
- throws SerDeException {
- // make sure it is a struct record or not
- serializeByteStream.reset();
-
- if (objInspector.getCategory() != Category.STRUCT) {
- // serialize the primitive object
- serialize(serializeByteStream, obj, objInspector);
- } else {
- // serialize the row as a struct
- serializeStruct(serializeByteStream, obj,
- (StructObjectInspector) objInspector);
- }
- // return the serialized bytes
- serializeBytesWritable.set(serializeByteStream.getData(), 0,
- serializeByteStream.getCount());
- return serializeBytesWritable;
- }
-
- boolean nullMapKey = false;
-
- /**
- * Serialize a struct object without writing the byte size. This function is
- * shared by both row serialization and struct serialization.
- *
- * @param byteStream
- * the byte stream storing the serialization data
- * @param obj
- * the struct object to serialize
- * @param objInspector
- * the struct object inspector
- */
- private void serializeStruct(Output byteStream, Object obj,
- StructObjectInspector soi) {
- // do nothing for null struct
- if (null == obj) {
- return;
- }
- /*
- * Interleave serializing one null byte and 8 struct fields in each
- * round, in order to support data deserialization with different table
- * schemas
- */
- List<? extends StructField> fields = soi.getAllStructFieldRefs();
- int size = fields.size();
- int lasti = 0;
- byte nullByte = 0;
- for (int i = 0; i < size; i++) {
- // set bit to 1 if a field is not null
- if (null != soi.getStructFieldData(obj, fields.get(i))) {
- nullByte |= 1 << (i % 8);
- }
- // write the null byte every eight elements or
- // if this is the last element and serialize the
- // corresponding 8 struct fields at the same time
- if (7 == i % 8 || i == size - 1) {
- serializeByteStream.write(nullByte);
- for (int j = lasti; j <= i; j++) {
- serialize(serializeByteStream, soi.getStructFieldData(obj,
- fields.get(j)), fields.get(j)
- .getFieldObjectInspector());
- }
- lasti = i + 1;
- nullByte = 0;
- }
- }
- }
-
- /**
- * A recursive function that serialize an object to a byte buffer based on
- * its object inspector.
- *
- * @param byteStream
- * the byte stream storing the serialization data
- * @param obj
- * the object to serialize
- * @param objInspector
- * the object inspector
- */
- private void serialize(Output byteStream, Object obj,
- ObjectInspector objInspector) {
-
- // do nothing for null object
- if (null == obj) {
- return;
- }
-
- switch (objInspector.getCategory()) {
- case PRIMITIVE: {
- PrimitiveObjectInspector poi = (PrimitiveObjectInspector) objInspector;
- switch (poi.getPrimitiveCategory()) {
- case VOID: {
- return;
- }
- case BOOLEAN: {
- boolean v = ((BooleanObjectInspector) poi).get(obj);
- byteStream.write((byte) (v ? 1 : 0));
- return;
- }
- case BYTE: {
- ByteObjectInspector boi = (ByteObjectInspector) poi;
- byte v = boi.get(obj);
- byteStream.write(v);
- return;
- }
- case SHORT: {
- ShortObjectInspector spoi = (ShortObjectInspector) poi;
- short v = spoi.get(obj);
- byteStream.write((byte) (v >> 8));
- byteStream.write((byte) (v));
- return;
- }
- case INT: {
- IntObjectInspector ioi = (IntObjectInspector) poi;
- int v = ioi.get(obj);
- LazyUtils.writeVInt(byteStream, v);
- return;
- }
- case LONG: {
- LongObjectInspector loi = (LongObjectInspector) poi;
- long v = loi.get(obj);
- LazyUtils.writeVLong(byteStream, v);
- return;
- }
- case FLOAT: {
- FloatObjectInspector foi = (FloatObjectInspector) poi;
- int v = Float.floatToIntBits(foi.get(obj));
- byteStream.write((byte) (v >> 24));
- byteStream.write((byte) (v >> 16));
- byteStream.write((byte) (v >> 8));
- byteStream.write((byte) (v));
- return;
- }
- case DOUBLE: {
- DoubleObjectInspector doi = (DoubleObjectInspector) poi;
- long v = Double.doubleToLongBits(doi.get(obj));
- byteStream.write((byte) (v >> 56));
- byteStream.write((byte) (v >> 48));
- byteStream.write((byte) (v >> 40));
- byteStream.write((byte) (v >> 32));
- byteStream.write((byte) (v >> 24));
- byteStream.write((byte) (v >> 16));
- byteStream.write((byte) (v >> 8));
- byteStream.write((byte) (v));
- return;
- }
- case STRING: {
- StringObjectInspector soi = (StringObjectInspector) poi;
- Text t = soi.getPrimitiveWritableObject(obj);
- /* write byte size of the string which is a vint */
- int length = t.getLength();
- LazyUtils.writeVInt(byteStream, length);
- /* write string itself */
- byte[] data = t.getBytes();
- byteStream.write(data, 0, length);
- return;
- }
- default: {
- throw new RuntimeException("Unrecognized type: "
- + poi.getPrimitiveCategory());
- }
- }
- }
- case LIST: {
- ListObjectInspector loi = (ListObjectInspector) objInspector;
- ObjectInspector eoi = loi.getListElementObjectInspector();
-
- // 1/ reserve spaces for the byte size of the list
- // which is a integer and takes four bytes
- int byteSizeStart = byteStream.getCount();
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- int listStart = byteStream.getCount();
-
- // 2/ write the size of the list as a VInt
- int size = loi.getListLength(obj);
- LazyUtils.writeVInt(byteStream, size);
-
- // 3/ write the null bytes
- byte nullByte = 0;
- for (int eid = 0; eid < size; eid++) {
- // set the bit to 1 if an element is not null
- if (null != loi.getListElement(obj, eid)) {
- nullByte |= 1 << (eid % 8);
- }
- // store the byte every eight elements or
- // if this is the last element
- if (7 == eid % 8 || eid == size - 1) {
- byteStream.write(nullByte);
- nullByte = 0;
- }
- }
-
- // 4/ write element by element from the list
- for (int eid = 0; eid < size; eid++) {
- serialize(byteStream, loi.getListElement(obj, eid), eoi);
- }
-
- // 5/ update the list byte size
- int listEnd = byteStream.getCount();
- int listSize = listEnd - listStart;
- byte[] bytes = byteStream.getData();
- bytes[byteSizeStart] = (byte) (listSize >> 24);
- bytes[byteSizeStart + 1] = (byte) (listSize >> 16);
- bytes[byteSizeStart + 2] = (byte) (listSize >> 8);
- bytes[byteSizeStart + 3] = (byte) (listSize);
-
- return;
- }
- case MAP: {
- MapObjectInspector moi = (MapObjectInspector) objInspector;
- ObjectInspector koi = moi.getMapKeyObjectInspector();
- ObjectInspector voi = moi.getMapValueObjectInspector();
- Map<?, ?> map = moi.getMap(obj);
-
- // 1/ reserve spaces for the byte size of the map
- // which is a integer and takes four bytes
- int byteSizeStart = byteStream.getCount();
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- int mapStart = byteStream.getCount();
-
- // 2/ write the size of the map which is a VInt
- int size = map.size();
- LazyUtils.writeVInt(byteStream, size);
-
- // 3/ write the null bytes
- int b = 0;
- byte nullByte = 0;
- for (Map.Entry<?, ?> entry : map.entrySet()) {
- // set the bit to 1 if a key is not null
- if (null != entry.getKey()) {
- nullByte |= 1 << (b % 8);
- } else if (!nullMapKey) {
- nullMapKey = true;
- LOG.warn("Null map key encountered! Ignoring similar problems.");
- }
- b++;
- // set the bit to 1 if a value is not null
- if (null != entry.getValue()) {
- nullByte |= 1 << (b % 8);
- }
- b++;
- // write the byte to stream every 4 key-value pairs
- // or if this is the last key-value pair
- if (0 == b % 8 || b == size * 2) {
- byteStream.write(nullByte);
- nullByte = 0;
- }
- }
-
- // 4/ write key-value pairs one by one
- for (Map.Entry<?, ?> entry : map.entrySet()) {
- serialize(byteStream, entry.getKey(), koi);
- serialize(byteStream, entry.getValue(), voi);
- }
-
- // 5/ update the byte size of the map
- int mapEnd = byteStream.getCount();
- int mapSize = mapEnd - mapStart;
- byte[] bytes = byteStream.getData();
- bytes[byteSizeStart] = (byte) (mapSize >> 24);
- bytes[byteSizeStart + 1] = (byte) (mapSize >> 16);
- bytes[byteSizeStart + 2] = (byte) (mapSize >> 8);
- bytes[byteSizeStart + 3] = (byte) (mapSize);
-
- return;
- }
- case STRUCT: {
- // 1/ reserve spaces for the byte size of the struct
- // which is a integer and takes four bytes
- int byteSizeStart = byteStream.getCount();
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- int structStart = byteStream.getCount();
-
- // 2/ serialize the struct
- serializeStruct(byteStream, obj,
- (StructObjectInspector) objInspector);
-
- // 3/ update the byte size of the struct
- int structEnd = byteStream.getCount();
- int structSize = structEnd - structStart;
- byte[] bytes = byteStream.getData();
- bytes[byteSizeStart] = (byte) (structSize >> 24);
- bytes[byteSizeStart + 1] = (byte) (structSize >> 16);
- bytes[byteSizeStart + 2] = (byte) (structSize >> 8);
- bytes[byteSizeStart + 3] = (byte) (structSize);
-
- return;
- }
- default: {
- throw new RuntimeException("Unrecognized type: "
- + objInspector.getCategory());
- }
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyShort.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyShort.java
deleted file mode 100644
index 7484b72..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyShort.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.hive.serde2.io.ShortWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyShortObjectInspector;
-
-/**
- * LazyObject for storing a value of Short.
- *
- * <p>
- * Part of the code is adapted from Apache Harmony Project.
- *
- * As with the specification, this implementation relied on code laid out in <a
- * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
- * Delight, (Addison Wesley, 2002)</a> as well as <a
- * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
- * </p>
- *
- */
-public class LazyShort extends
- LazyPrimitive<LazyShortObjectInspector, ShortWritable> {
-
- public LazyShort(LazyShortObjectInspector oi) {
- super(oi);
- data = new ShortWritable();
- }
-
- public LazyShort(LazyShort copy) {
- super(copy);
- data = new ShortWritable(copy.data.get());
- }
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
-
- assert (2 == length);
- data.set(LazyUtils.byteArrayToShort(bytes, start));
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyString.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyString.java
deleted file mode 100644
index c13533b..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyString.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.io.Text;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyStringObjectInspector;
-
-/**
- * LazyObject for storing a value of String.
- */
-public class LazyString extends LazyPrimitive<LazyStringObjectInspector, Text> {
-
- public LazyString(LazyStringObjectInspector oi) {
- super(oi);
- data = new Text();
- }
-
- public LazyString(LazyString copy) {
- super(copy);
- data = new Text(copy.data);
- }
-
- VInt vInt = new LazyUtils.VInt();
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
-
- // get the byte length of the string
- LazyUtils.readVInt(bytes, start, vInt);
- if (vInt.value + vInt.length != length)
- throw new IllegalStateException(
- "parse string: length mismatch, expected "
- + (vInt.value + vInt.length) + " but get " + length);
- assert (length - vInt.length > -1);
- data.set(bytes, start + vInt.length, length - vInt.length);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyStruct.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyStruct.java
deleted file mode 100644
index 61cc335..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyStruct.java
+++ /dev/null
@@ -1,244 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyStructObjectInspector;
-
-/**
- * LazyStruct is serialized as follows: start A B A B A B end bytes[] ->
- * |-----|---------|--- ... ---|-----|---------|
- *
- * Section A is one null-byte, corresponding to eight struct fields in Section
- * B. Each bit indicates whether the corresponding field is null (0) or not null
- * (1). Each field is a LazyObject.
- *
- * Following B, there is another section A and B. This pattern repeats until the
- * all struct fields are serialized.
- */
-public class LazyStruct extends LazyNonPrimitive<LazyStructObjectInspector> {
-
- private static Log LOG = LogFactory.getLog(LazyStruct.class.getName());
-
- /**
- * Whether the data is already parsed or not.
- */
- boolean parsed;
-
- /**
- * The fields of the struct.
- */
- @SuppressWarnings("rawtypes")
- LazyObject[] fields;
-
- /**
- * Whether a field is initialized or not.
- */
- boolean[] fieldInited;
-
- /**
- * Whether a field is null or not. Because length is 0 does not means the
- * field is null. In particular, a 0-length string is not null.
- */
- boolean[] fieldIsNull;
-
- /**
- * The start positions and lengths of struct fields. Only valid when the
- * data is parsed.
- */
- int[] fieldStart;
- int[] fieldLength;
-
- /**
- * Construct a LazyStruct object with an ObjectInspector.
- */
- protected LazyStruct(LazyStructObjectInspector oi) {
- super(oi);
- }
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- super.init(bytes, start, length);
- parsed = false;
- }
-
- RecordInfo recordInfo = new LazyUtils.RecordInfo();
- boolean missingFieldWarned = false;
- boolean extraFieldWarned = false;
-
- /**
- * Parse the byte[] and fill fieldStart, fieldLength, fieldInited and
- * fieldIsNull.
- */
- private void parse() {
-
- List<? extends StructField> fieldRefs = ((StructObjectInspector) oi)
- .getAllStructFieldRefs();
-
- if (fields == null) {
- fields = new LazyObject[fieldRefs.size()];
- for (int i = 0; i < fields.length; i++) {
- ObjectInspector insp = fieldRefs.get(i)
- .getFieldObjectInspector();
- fields[i] = insp == null ? null : LazyFactory
- .createLazyObject(insp);
- }
- fieldInited = new boolean[fields.length];
- fieldIsNull = new boolean[fields.length];
- fieldStart = new int[fields.length];
- fieldLength = new int[fields.length];
- }
-
- /**
- * Please note that one null byte is followed by eight fields, then more
- * null byte and fields.
- */
-
- int fieldId = 0;
- int structByteEnd = start + length;
-
- byte nullByte = bytes[start];
- int lastFieldByteEnd = start + 1;
- // Go through all bytes in the byte[]
- for (int i = 0; i < fields.length; i++) {
- fieldIsNull[i] = true;
- if ((nullByte & (1 << (i % 8))) != 0) {
- fieldIsNull[i] = false;
- LazyUtils.checkObjectByteInfo(fieldRefs.get(i)
- .getFieldObjectInspector(), bytes, lastFieldByteEnd,
- recordInfo);
- fieldStart[i] = lastFieldByteEnd + recordInfo.elementOffset;
- fieldLength[i] = recordInfo.elementSize;
- lastFieldByteEnd = fieldStart[i] + fieldLength[i];
- }
-
- // count how many fields are there
- if (lastFieldByteEnd <= structByteEnd) {
- fieldId++;
- }
- // next byte is a null byte if there are more bytes to go
- if (7 == (i % 8)) {
- if (lastFieldByteEnd < structByteEnd) {
- nullByte = bytes[lastFieldByteEnd];
- lastFieldByteEnd++;
- } else {
- // otherwise all null afterwards
- nullByte = 0;
- lastFieldByteEnd++;
- }
- }
- }
-
- // Extra bytes at the end?
- if (!extraFieldWarned && lastFieldByteEnd < structByteEnd) {
- extraFieldWarned = true;
- LOG.warn("Extra bytes detected at the end of the row! Ignoring similar "
- + "problems.");
- }
-
- // Missing fields?
- if (!missingFieldWarned && lastFieldByteEnd > structByteEnd) {
- missingFieldWarned = true;
- LOG.warn("Missing fields! Expected " + fields.length
- + " fields but " + "only got " + fieldId
- + "! Ignoring similar problems.");
- }
-
- Arrays.fill(fieldInited, false);
- parsed = true;
- }
-
- /**
- * Get one field out of the struct.
- *
- * If the field is a primitive field, return the actual object. Otherwise
- * return the LazyObject. This is because PrimitiveObjectInspector does not
- * have control over the object used by the user - the user simply directly
- * use the Object instead of going through Object
- * PrimitiveObjectInspector.get(Object).
- *
- * @param fieldID
- * The field ID
- * @return The field as a LazyObject
- */
- public Object getField(int fieldID) {
- if (!parsed) {
- parse();
- }
- return uncheckedGetField(fieldID);
- }
-
- /**
- * Get the field out of the row without checking parsed. This is called by
- * both getField and getFieldsAsList.
- *
- * @param fieldID
- * The id of the field starting from 0.
- * @return The value of the field
- */
- private Object uncheckedGetField(int fieldID) {
- // Test the length first so in most cases we avoid doing a byte[]
- // comparison.
- if (fieldIsNull[fieldID]) {
- return null;
- }
- if (!fieldInited[fieldID]) {
- fieldInited[fieldID] = true;
- fields[fieldID].init(bytes, fieldStart[fieldID],
- fieldLength[fieldID]);
- }
- return fields[fieldID].getObject();
- }
-
- ArrayList<Object> cachedList;
-
- /**
- * Get the values of the fields as an ArrayList.
- *
- * @return The values of the fields as an ArrayList.
- */
- public ArrayList<Object> getFieldsAsList() {
- if (!parsed) {
- parse();
- }
- if (cachedList == null) {
- cachedList = new ArrayList<Object>();
- } else {
- cachedList.clear();
- }
- for (int i = 0; i < fields.length; i++) {
- cachedList.add(uncheckedGetField(i));
- }
- return cachedList;
- }
-
- @Override
- public Object getObject() {
- return this;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyUtils.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyUtils.java
deleted file mode 100644
index 2d0406c..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyUtils.java
+++ /dev/null
@@ -1,529 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-
-import org.apache.hadoop.hive.serde2.ByteStream.Output;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.WritableUtils;
-
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyObjectInspectorFactory;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-
-/**
- * LazyUtils.
- *
- */
-public final class LazyUtils {
-
- /**
- * Convert the byte array to an int starting from the given offset. Refer to
- * code by aeden on DZone Snippets:
- *
- * @param b
- * the byte array
- * @param offset
- * the array offset
- * @return the integer
- */
- public static int byteArrayToInt(byte[] b, int offset) {
- int value = 0;
- for (int i = 0; i < 4; i++) {
- int shift = (4 - 1 - i) * 8;
- value += (b[i + offset] & 0x000000FF) << shift;
- }
- return value;
- }
-
- /**
- * Convert the byte array to a long starting from the given offset.
- *
- * @param b
- * the byte array
- * @param offset
- * the array offset
- * @return the long
- */
- public static long byteArrayToLong(byte[] b, int offset) {
- long value = 0;
- for (int i = 0; i < 8; i++) {
- int shift = (8 - 1 - i) * 8;
- value += ((long) (b[i + offset] & 0x00000000000000FF)) << shift;
- }
- return value;
- }
-
- /**
- * Convert the byte array to a short starting from the given offset.
- *
- * @param b
- * the byte array
- * @param offset
- * the array offset
- * @return the short
- */
- public static short byteArrayToShort(byte[] b, int offset) {
- short value = 0;
- value += (b[offset] & 0x000000FF) << 8;
- value += (b[offset + 1] & 0x000000FF);
- return value;
- }
-
- /**
- * Record is the unit that data is serialized in. A record includes two
- * parts. The first part stores the size of the element and the second part
- * stores the real element. size element record ->
- * |----|-------------------------|
- *
- * A RecordInfo stores two information of a record, the size of the "size"
- * part which is the element offset and the size of the element part which
- * is element size.
- */
- public static class RecordInfo {
- public RecordInfo() {
- elementOffset = 0;
- elementSize = 0;
- }
-
- public byte elementOffset;
- public int elementSize;
-
- @Override
- public String toString() {
- return "(" + elementOffset + ", " + elementSize + ")";
- }
- }
-
- static VInt vInt = new LazyUtils.VInt();
-
- /**
- * Check a particular field and set its size and offset in bytes based on
- * the field type and the bytes arrays.
- *
- * For void, boolean, byte, short, int, long, float and double, there is no
- * offset and the size is fixed. For string, map, list, struct, the first
- * four bytes are used to store the size. So the offset is 4 and the size is
- * computed by concating the first four bytes together. The first four bytes
- * are defined with respect to the offset in the bytes arrays.
- *
- * @param objectInspector
- * object inspector of the field
- * @param bytes
- * bytes arrays store the table row
- * @param offset
- * offset of this field
- * @param recordInfo
- * modify this byteinfo object and return it
- */
- public static void checkObjectByteInfo(ObjectInspector objectInspector,
- byte[] bytes, int offset, RecordInfo recordInfo) {
- Category category = objectInspector.getCategory();
- switch (category) {
- case PRIMITIVE:
- PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) objectInspector)
- .getPrimitiveCategory();
- switch (primitiveCategory) {
- case VOID:
- recordInfo.elementOffset = 0;
- recordInfo.elementSize = 0;
- break;
- case BOOLEAN:
- case BYTE:
- recordInfo.elementOffset = 0;
- recordInfo.elementSize = 1;
- break;
- case SHORT:
- recordInfo.elementOffset = 0;
- recordInfo.elementSize = 2;
- break;
- case FLOAT:
- recordInfo.elementOffset = 0;
- recordInfo.elementSize = 4;
- break;
- case DOUBLE:
- recordInfo.elementOffset = 0;
- recordInfo.elementSize = 8;
- break;
- case INT:
- recordInfo.elementOffset = 0;
- recordInfo.elementSize = WritableUtils
- .decodeVIntSize(bytes[offset]);
- break;
- case LONG:
- recordInfo.elementOffset = 0;
- recordInfo.elementSize = WritableUtils
- .decodeVIntSize(bytes[offset]);
- break;
- case STRING:
- // using vint instead of 4 bytes
- LazyUtils.readVInt(bytes, offset, vInt);
- recordInfo.elementOffset = vInt.length;
- recordInfo.elementSize = vInt.value;
- break;
- default: {
- throw new RuntimeException("Unrecognized primitive type: "
- + primitiveCategory);
- }
- }
- break;
- case LIST:
- case MAP:
- case STRUCT:
- recordInfo.elementOffset = 4;
- recordInfo.elementSize = LazyUtils.byteArrayToInt(bytes, offset);
- break;
- default: {
- throw new RuntimeException("Unrecognized non-primitive type: "
- + category);
- }
- }
- }
-
- /**
- * A zero-compressed encoded long.
- */
- public static class VLong {
- public VLong() {
- value = 0;
- length = 0;
- }
-
- public long value;
- public byte length;
- };
-
- /**
- * Reads a zero-compressed encoded long from a byte array and returns it.
- *
- * @param bytes
- * the byte array
- * @param offset
- * offset of the array to read from
- * @param vlong
- * storing the deserialized long and its size in byte
- */
- public static void readVLong(byte[] bytes, int offset, VLong vlong) {
- byte firstByte = bytes[offset];
- vlong.length = (byte) WritableUtils.decodeVIntSize(firstByte);
- if (vlong.length == 1) {
- vlong.value = firstByte;
- return;
- }
- long i = 0;
- for (int idx = 0; idx < vlong.length - 1; idx++) {
- byte b = bytes[offset + 1 + idx];
- i = i << 8;
- i = i | (b & 0xFF);
- }
- vlong.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i);
- }
-
- /**
- * A zero-compressed encoded integer.
- */
- public static class VInt implements Serializable {
- private static final long serialVersionUID = 1L;
-
- public VInt() {
- value = 0;
- length = 0;
- }
-
- public int value;
- public byte length;
- };
-
- /**
- * Reads a zero-compressed encoded int from a byte array and returns it.
- *
- * @param bytes
- * the byte array
- * @param offset
- * offset of the array to read from
- * @param vInt
- * storing the deserialized int and its size in byte
- */
- public static void readVInt(byte[] bytes, int offset, VInt vInt) {
- byte firstByte = bytes[offset];
- vInt.length = (byte) WritableUtils.decodeVIntSize(firstByte);
- if (vInt.length == 1) {
- vInt.value = firstByte;
- return;
- }
- int i = 0;
- for (int idx = 0; idx < vInt.length - 1; idx++) {
- byte b = bytes[offset + 1 + idx];
- i = i << 8;
- i = i | (b & 0xFF);
- }
- vInt.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1) : i);
- }
-
- /**
- * Writes a zero-compressed encoded int to a byte array.
- *
- * @param byteStream
- * the byte array/stream
- * @param i
- * the int
- */
- public static void writeVInt(Output byteStream, int i) {
- writeVLong(byteStream, i);
- }
-
- /**
- * Write a zero-compressed encoded long to a byte array.
- *
- * @param byteStream
- * the byte array/stream
- * @param l
- * the long
- */
- public static void writeVLong(Output byteStream, long l) {
- if (l >= -112 && l <= 127) {
- byteStream.write((byte) l);
- return;
- }
-
- int len = -112;
- if (l < 0) {
- l ^= -1L; // take one's complement'
- len = -120;
- }
-
- long tmp = l;
- while (tmp != 0) {
- tmp = tmp >> 8;
- len--;
- }
-
- byteStream.write((byte) len);
-
- len = (len < -120) ? -(len + 120) : -(len + 112);
-
- for (int idx = len; idx != 0; idx--) {
- int shiftbits = (idx - 1) * 8;
- long mask = 0xFFL << shiftbits;
- byteStream.write((byte) ((l & mask) >> shiftbits));
- }
- }
-
- static Map<TypeInfo, ObjectInspector> cachedLazyObjectInspector = new ConcurrentHashMap<TypeInfo, ObjectInspector>();
-
- /**
- * Returns the lazy binary object inspector that can be used to inspect an
- * lazy binary object of that typeInfo
- *
- * For primitive types, we use the standard writable object inspector.
- */
- public static ObjectInspector getLazyObjectInspectorFromTypeInfo(
- TypeInfo typeInfo, boolean topLevel) {
- if (typeInfo == null)
- throw new IllegalStateException("illegal type null ");
- ObjectInspector result = cachedLazyObjectInspector.get(typeInfo);
- if (result == null) {
- switch (typeInfo.getCategory()) {
- case PRIMITIVE: {
- result = PrimitiveObjectInspectorFactory
- .getPrimitiveLazyObjectInspector(((PrimitiveTypeInfo) typeInfo)
- .getPrimitiveCategory());
- break;
- }
- case LIST: {
- ObjectInspector elementObjectInspector = getLazyObjectInspectorFromTypeInfo(
- ((ListTypeInfo) typeInfo).getListElementTypeInfo(),
- false);
- result = LazyObjectInspectorFactory
- .getLazyListObjectInspector(elementObjectInspector);
- break;
- }
- case MAP: {
- MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
- ObjectInspector keyObjectInspector = getLazyObjectInspectorFromTypeInfo(
- mapTypeInfo.getMapKeyTypeInfo(), false);
- ObjectInspector valueObjectInspector = getLazyObjectInspectorFromTypeInfo(
- mapTypeInfo.getMapValueTypeInfo(), false);
- result = LazyObjectInspectorFactory.getLazyMapObjectInspector(
- keyObjectInspector, valueObjectInspector);
- break;
- }
- case STRUCT: {
- StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
- List<String> fieldNames = structTypeInfo
- .getAllStructFieldNames();
- List<TypeInfo> fieldTypeInfos = structTypeInfo
- .getAllStructFieldTypeInfos();
- List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(
- fieldTypeInfos.size());
-
- for (int i = 0; i < fieldTypeInfos.size(); i++) {
- fieldObjectInspectors
- .add(getLazyObjectInspectorFromTypeInfo(
- fieldTypeInfos.get(i), false));
- }
-
- // if it is top level then create columnar
- if (topLevel)
- result = LazyObjectInspectorFactory
- .getLazyColumnarObjectInspector(fieldNames,
- fieldObjectInspectors);
- // if it is not top level then create struct
- else
- result = LazyObjectInspectorFactory
- .getLazyStructObjectInspector(fieldNames,
- fieldObjectInspectors);
-
- break;
- }
- default: {
- result = null;
- }
- }
- cachedLazyObjectInspector.put(typeInfo, result);
- }
- return result;
- }
-
- /**
- * get top-level lazy object inspector
- *
- * @param fieldNames
- * @param fieldTypeInfos
- * @return
- */
- public static ObjectInspector getLazyObjectInspector(
- List<String> fieldNames, List<TypeInfo> fieldTypeInfos) {
- List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(
- fieldTypeInfos.size());
- for (int i = 0; i < fieldTypeInfos.size(); i++) {
- fieldObjectInspectors.add(getLazyObjectInspectorFromTypeInfo(
- fieldTypeInfos.get(i), false));
- }
-
- return LazyObjectInspectorFactory.getLazyColumnarObjectInspector(
- fieldNames, fieldObjectInspectors);
- }
-
- private LazyUtils() {
- // prevent instantiation
- }
-
- /**
- * Returns -1 if the first byte sequence is lexicographically less than the
- * second; returns +1 if the second byte sequence is lexicographically less
- * than the first; otherwise return 0.
- */
- public static int compare(byte[] b1, int start1, int length1, byte[] b2,
- int start2, int length2) {
-
- int min = Math.min(length1, length2);
-
- for (int i = 0; i < min; i++) {
- if (b1[start1 + i] == b2[start2 + i]) {
- continue;
- }
- if (b1[start1 + i] < b2[start2 + i]) {
- return -1;
- } else {
- return 1;
- }
- }
-
- if (length1 < length2) {
- return -1;
- }
- if (length1 > length2) {
- return 1;
- }
- return 0;
- }
-
- public static int hashBytes(byte[] data, int start, int len) {
- int hash = 1;
- for (int i = start; i < len; i++) {
- hash = (31 * hash) + data[i];
- }
- return hash;
- }
-
- /**
- * Writes a zero-compressed encoded int to a byte array.
- *
- * @param byteStream
- * the byte array/stream
- * @param i
- * the int
- */
- public static void writeVInt(DataOutput byteStream, int i)
- throws IOException {
- writeVLong(byteStream, i);
- }
-
- /**
- * Write a zero-compressed encoded long to a byte array.
- *
- * @param byteStream
- * the byte array/stream
- * @param l
- * the long
- */
- public static void writeVLong(DataOutput byteStream, long l)
- throws IOException {
- if (l >= -112 && l <= 127) {
- byteStream.write((byte) l);
- return;
- }
-
- int len = -112;
- if (l < 0) {
- l ^= -1L; // take one's complement'
- len = -120;
- }
-
- long tmp = l;
- while (tmp != 0) {
- tmp = tmp >> 8;
- len--;
- }
-
- byteStream.write((byte) len);
-
- len = (len < -120) ? -(len + 120) : -(len + 112);
-
- for (int idx = len; idx != 0; idx--) {
- int shiftbits = (idx - 1) * 8;
- long mask = 0xFFL << shiftbits;
- byteStream.write((byte) ((l & mask) >> shiftbits));
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyColumnarObjectInspector.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyColumnarObjectInspector.java
deleted file mode 100644
index b20f185..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyColumnarObjectInspector.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
-
-import java.io.Serializable;
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
-
-/**
- * ObjectInspector for LazyColumnar.
- *
- * @see LazyColumnar
- */
-public class LazyColumnarObjectInspector extends StandardStructObjectInspector
- implements Serializable {
-
- private static final long serialVersionUID = 1L;
-
- public LazyColumnarObjectInspector(List<String> structFieldNames,
- List<ObjectInspector> structFieldObjectInspectors) {
- super(structFieldNames, structFieldObjectInspectors);
- }
-
- public LazyColumnarObjectInspector(List<StructField> fields) {
- super(fields);
- }
-
- @Override
- public Object getStructFieldData(Object data, StructField fieldRef) {
- if (data == null) {
- return null;
- }
- LazyColumnar struct = (LazyColumnar) data;
- MyField f = (MyField) fieldRef;
-
- int fieldID = f.getFieldID();
- assert (fieldID >= 0 && fieldID < fields.size());
-
- Object column = struct.getField(fieldID);
- return column;
- }
-
- @Override
- public List<Object> getStructFieldsDataAsList(Object data) {
- if (data == null) {
- return null;
- }
- LazyColumnar struct = (LazyColumnar) data;
- return struct.getFieldsAsList();
- }
-
- public String toString() {
- String str = "";
- for (MyField f : fields) {
- str += f.getFieldName() + ":"
- + f.getFieldObjectInspector().getTypeName() + " ";
- }
- return str;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyObjectInspectorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyObjectInspectorFactory.java
deleted file mode 100644
index 439b130..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyObjectInspectorFactory.java
+++ /dev/null
@@ -1,93 +0,0 @@
-package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.ConcurrentHashMap;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-
-/**
- * ObjectInspectorFactory is the primary way to create new ObjectInspector
- * instances.
- *
- * SerDe classes should call the static functions in this library to create an
- * ObjectInspector to return to the caller of SerDe2.getObjectInspector().
- *
- * The reason of having caches here is that ObjectInspectors do not have an
- * internal state - so ObjectInspectors with the same construction parameters
- * should result in exactly the same ObjectInspector.
- */
-
-public final class LazyObjectInspectorFactory {
-
- static ConcurrentHashMap<ArrayList<Object>, LazyColumnarObjectInspector> cachedLazyColumnarObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyColumnarObjectInspector>();
-
- static ConcurrentHashMap<ArrayList<Object>, LazyStructObjectInspector> cachedLazyStructObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyStructObjectInspector>();
-
- static ConcurrentHashMap<ArrayList<Object>, LazyListObjectInspector> cachedLazyListObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyListObjectInspector>();
-
- static ConcurrentHashMap<ArrayList<Object>, LazyMapObjectInspector> cachedLazyMapObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyMapObjectInspector>();
-
- public static LazyColumnarObjectInspector getLazyColumnarObjectInspector(
- List<String> structFieldNames,
- List<ObjectInspector> structFieldObjectInspectors) {
- ArrayList<Object> signature = new ArrayList<Object>();
- signature.add(structFieldNames);
- signature.add(structFieldObjectInspectors);
- LazyColumnarObjectInspector result = cachedLazyColumnarObjectInspector
- .get(signature);
- if (result == null) {
- result = new LazyColumnarObjectInspector(structFieldNames,
- structFieldObjectInspectors);
- cachedLazyColumnarObjectInspector.put(signature, result);
- }
- return result;
- }
-
- public static LazyStructObjectInspector getLazyStructObjectInspector(
- List<String> structFieldNames,
- List<ObjectInspector> structFieldObjectInspectors) {
- ArrayList<Object> signature = new ArrayList<Object>();
- signature.add(structFieldNames);
- signature.add(structFieldObjectInspectors);
- LazyStructObjectInspector result = cachedLazyStructObjectInspector
- .get(signature);
- if (result == null) {
- result = new LazyStructObjectInspector(structFieldNames,
- structFieldObjectInspectors);
- cachedLazyStructObjectInspector.put(signature, result);
- }
- return result;
- }
-
- public static LazyListObjectInspector getLazyListObjectInspector(
- ObjectInspector listElementInspector) {
- ArrayList<Object> signature = new ArrayList<Object>();
- signature.add(listElementInspector);
- LazyListObjectInspector result = cachedLazyListObjectInspector
- .get(signature);
- if (result == null) {
- result = new LazyListObjectInspector(listElementInspector);
- cachedLazyListObjectInspector.put(signature, result);
- }
- return result;
- }
-
- public static LazyMapObjectInspector getLazyMapObjectInspector(
- ObjectInspector keyInspector, ObjectInspector valueInspector) {
- ArrayList<Object> signature = new ArrayList<Object>();
- signature.add(keyInspector);
- signature.add(valueInspector);
- LazyMapObjectInspector result = cachedLazyMapObjectInspector
- .get(signature);
- if (result == null) {
- result = new LazyMapObjectInspector(keyInspector, valueInspector);
- cachedLazyMapObjectInspector.put(signature, result);
- }
- return result;
- }
-
- private LazyObjectInspectorFactory() {
- // prevent instantiation
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyStructObjectInspector.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyStructObjectInspector.java
deleted file mode 100644
index 1a50233..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyStructObjectInspector.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
-
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyStruct;
-
-/**
- * ObjectInspector for LazyStruct.
- *
- * @see LazyStruct
- */
-public class LazyStructObjectInspector extends StandardStructObjectInspector {
-
- protected LazyStructObjectInspector(List<String> structFieldNames,
- List<ObjectInspector> structFieldObjectInspectors) {
- super(structFieldNames, structFieldObjectInspectors);
- }
-
- protected LazyStructObjectInspector(List<StructField> fields) {
- super(fields);
- }
-
- @Override
- public Object getStructFieldData(Object data, StructField fieldRef) {
- if (data == null) {
- return null;
- }
- LazyStruct struct = (LazyStruct) data;
- MyField f = (MyField) fieldRef;
-
- int fieldID = f.getFieldID();
- assert (fieldID >= 0 && fieldID < fields.size());
-
- return struct.getField(fieldID);
- }
-
- @Override
- public List<Object> getStructFieldsDataAsList(Object data) {
- if (data == null) {
- return null;
- }
- LazyStruct struct = (LazyStruct) data;
- return struct.getFieldsAsList();
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java
deleted file mode 100644
index 134dc5a..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
-
-import java.util.ArrayList;
-import java.util.concurrent.ConcurrentHashMap;
-
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-
-/**
- * LazyPrimitiveObjectInspectorFactory is the primary way to create new
- * ObjectInspector instances.
- *
- * SerDe classes should call the static functions in this library to create an
- * ObjectInspector to return to the caller of SerDe2.getObjectInspector().
- *
- * The reason of having caches here is that ObjectInspector is because
- * ObjectInspectors do not have an internal state - so ObjectInspectors with the
- * same construction parameters should result in exactly the same
- * ObjectInspector.
- */
-public final class LazyPrimitiveObjectInspectorFactory {
-
- public static final LazyBooleanObjectInspector LAZY_BOOLEAN_OBJECT_INSPECTOR = new LazyBooleanObjectInspector();
- public static final LazyByteObjectInspector LAZY_BYTE_OBJECT_INSPECTOR = new LazyByteObjectInspector();
- public static final LazyShortObjectInspector LAZY_SHORT_OBJECT_INSPECTOR = new LazyShortObjectInspector();
- public static final LazyIntObjectInspector LAZY_INT_OBJECT_INSPECTOR = new LazyIntObjectInspector();
- public static final LazyLongObjectInspector LAZY_LONG_OBJECT_INSPECTOR = new LazyLongObjectInspector();
- public static final LazyFloatObjectInspector LAZY_FLOAT_OBJECT_INSPECTOR = new LazyFloatObjectInspector();
- public static final LazyDoubleObjectInspector LAZY_DOUBLE_OBJECT_INSPECTOR = new LazyDoubleObjectInspector();
- public static final LazyVoidObjectInspector LAZY_VOID_OBJECT_INSPECTOR = new LazyVoidObjectInspector();
-
- static ConcurrentHashMap<ArrayList<Object>, LazyStringObjectInspector> cachedLazyStringObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyStringObjectInspector>();
-
- public static LazyStringObjectInspector getLazyStringObjectInspector(
- boolean escaped, byte escapeChar) {
- ArrayList<Object> signature = new ArrayList<Object>();
- signature.add(Boolean.valueOf(escaped));
- signature.add(Byte.valueOf(escapeChar));
- LazyStringObjectInspector result = cachedLazyStringObjectInspector
- .get(signature);
- if (result == null) {
- result = new LazyStringObjectInspector(escaped, escapeChar);
- cachedLazyStringObjectInspector.put(signature, result);
- }
- return result;
- }
-
- public static AbstractPrimitiveLazyObjectInspector<?> getLazyObjectInspector(
- PrimitiveCategory primitiveCategory, boolean escaped,
- byte escapeChar) {
-
- switch (primitiveCategory) {
- case BOOLEAN:
- return LAZY_BOOLEAN_OBJECT_INSPECTOR;
- case BYTE:
- return LAZY_BYTE_OBJECT_INSPECTOR;
- case SHORT:
- return LAZY_SHORT_OBJECT_INSPECTOR;
- case INT:
- return LAZY_INT_OBJECT_INSPECTOR;
- case LONG:
- return LAZY_LONG_OBJECT_INSPECTOR;
- case FLOAT:
- return LAZY_FLOAT_OBJECT_INSPECTOR;
- case DOUBLE:
- return LAZY_DOUBLE_OBJECT_INSPECTOR;
- case STRING:
- return getLazyStringObjectInspector(escaped, escapeChar);
- case VOID:
- return LAZY_VOID_OBJECT_INSPECTOR;
- default:
- throw new RuntimeException(
- "Internal error: Cannot find ObjectInspector " + " for "
- + primitiveCategory);
- }
- }
-
- private LazyPrimitiveObjectInspectorFactory() {
- // prevent instantiation
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyStringObjectInspector.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyStringObjectInspector.java
deleted file mode 100644
index 5832f34..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyStringObjectInspector.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
-
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
-import org.apache.hadoop.io.Text;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyString;
-
-/**
- * A WritableStringObjectInspector inspects a Text Object.
- */
-public class LazyStringObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<Text> implements
- StringObjectInspector {
-
- boolean escaped;
- byte escapeChar;
-
- LazyStringObjectInspector(boolean escaped, byte escapeChar) {
- super(PrimitiveObjectInspectorUtils.stringTypeEntry);
- this.escaped = escaped;
- this.escapeChar = escapeChar;
- }
-
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyString((LazyString) o);
- }
-
- @Override
- public Text getPrimitiveWritableObject(Object o) {
- return o == null ? null : ((LazyString) o).getWritableObject();
- }
-
- @Override
- public String getPrimitiveJavaObject(Object o) {
- return o == null ? null : ((LazyString) o).getWritableObject()
- .toString();
- }
-
- public boolean isEscaped() {
- return escaped;
- }
-
- public byte getEscapeChar() {
- return escapeChar;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/PrimitiveObjectInspectorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/PrimitiveObjectInspectorFactory.java
deleted file mode 100644
index e70bdb9..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/PrimitiveObjectInspectorFactory.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
-
-import java.util.HashMap;
-
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-
-/**
- * PrimitiveObjectInspectorFactory is the primary way to create new
- * PrimitiveObjectInspector instances.
- *
- * The reason of having caches here is that ObjectInspector is because
- * ObjectInspectors do not have an internal state - so ObjectInspectors with the
- * same construction parameters should result in exactly the same
- * ObjectInspector.
- */
-public final class PrimitiveObjectInspectorFactory {
-
- public static final LazyBooleanObjectInspector LazyBooleanObjectInspector = new LazyBooleanObjectInspector();
- public static final LazyByteObjectInspector LazyByteObjectInspector = new LazyByteObjectInspector();
- public static final LazyShortObjectInspector LazyShortObjectInspector = new LazyShortObjectInspector();
- public static final LazyIntObjectInspector LazyIntObjectInspector = new LazyIntObjectInspector();
- public static final LazyLongObjectInspector LazyLongObjectInspector = new LazyLongObjectInspector();
- public static final LazyFloatObjectInspector LazyFloatObjectInspector = new LazyFloatObjectInspector();
- public static final LazyDoubleObjectInspector LazyDoubleObjectInspector = new LazyDoubleObjectInspector();
- public static final LazyStringObjectInspector LazyStringObjectInspector = new LazyStringObjectInspector(
- false, (byte) '\\');
- public static final LazyVoidObjectInspector LazyVoidObjectInspector = new LazyVoidObjectInspector();
-
- private static HashMap<PrimitiveCategory, AbstractPrimitiveLazyObjectInspector<?>> cachedPrimitiveLazyInspectorCache = new HashMap<PrimitiveCategory, AbstractPrimitiveLazyObjectInspector<?>>();
-
- static {
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.BOOLEAN,
- LazyBooleanObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.BYTE,
- LazyByteObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.SHORT,
- LazyShortObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.INT,
- LazyIntObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.LONG,
- LazyLongObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.FLOAT,
- LazyFloatObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.DOUBLE,
- LazyDoubleObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.STRING,
- LazyStringObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.VOID,
- LazyVoidObjectInspector);
- }
-
- /**
- * Returns the PrimitiveWritableObjectInspector for the PrimitiveCategory.
- *
- * @param primitiveCategory
- */
- public static AbstractPrimitiveLazyObjectInspector<?> getPrimitiveLazyObjectInspector(
- PrimitiveCategory primitiveCategory) {
- AbstractPrimitiveLazyObjectInspector<?> result = cachedPrimitiveLazyInspectorCache
- .get(primitiveCategory);
- if (result == null) {
- throw new RuntimeException(
- "Internal error: Cannot find ObjectInspector " + " for "
- + primitiveCategory);
- }
- return result;
- }
-
- private PrimitiveObjectInspectorFactory() {
- // prevent instantiation
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/parser/IHiveParser.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/parser/IHiveParser.java
deleted file mode 100644
index aeea68f..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/parser/IHiveParser.java
+++ /dev/null
@@ -1,17 +0,0 @@
-package edu.uci.ics.hivesterix.serde.parser;
-
-import java.io.IOException;
-
-import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-
-public interface IHiveParser {
- /**
- * parse one hive rwo into
- *
- * @param row
- * @param objectInspector
- * @param tb
- */
- public void parse(byte[] data, int start, int length, ArrayTupleBuilder tb)
- throws IOException;
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/parser/TextToBinaryTupleParser.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/parser/TextToBinaryTupleParser.java
deleted file mode 100644
index 3aeb058..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/parser/TextToBinaryTupleParser.java
+++ /dev/null
@@ -1,184 +0,0 @@
-package edu.uci.ics.hivesterix.serde.parser;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.lazy.LazyInteger;
-import org.apache.hadoop.hive.serde2.lazy.LazyLong;
-import org.apache.hadoop.hive.serde2.lazy.LazyShort;
-import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.io.Text;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-
-public class TextToBinaryTupleParser implements IHiveParser {
- private int[] invertedIndex;
- private int[] fieldEnds;
- private int lastNecessaryFieldIndex;
- private LazySimpleStructObjectInspector inputObjectInspector;
- private List<? extends StructField> fieldRefs;
-
- public TextToBinaryTupleParser(int[] outputColumnsOffset,
- ObjectInspector structInspector) {
- int size = 0;
- for (int i = 0; i < outputColumnsOffset.length; i++)
- if (outputColumnsOffset[i] >= 0)
- size++;
- invertedIndex = new int[size];
- for (int i = 0; i < outputColumnsOffset.length; i++)
- if (outputColumnsOffset[i] >= 0) {
- invertedIndex[outputColumnsOffset[i]] = i;
- lastNecessaryFieldIndex = i;
- }
- fieldEnds = new int[outputColumnsOffset.length];
- for (int i = 0; i < fieldEnds.length; i++)
- fieldEnds[i] = 0;
- inputObjectInspector = (LazySimpleStructObjectInspector) structInspector;
- fieldRefs = inputObjectInspector.getAllStructFieldRefs();
- }
-
- @Override
- public void parse(byte[] bytes, int start, int length, ArrayTupleBuilder tb)
- throws IOException {
- byte separator = inputObjectInspector.getSeparator();
- boolean lastColumnTakesRest = inputObjectInspector
- .getLastColumnTakesRest();
- boolean isEscaped = inputObjectInspector.isEscaped();
- byte escapeChar = inputObjectInspector.getEscapeChar();
- DataOutput output = tb.getDataOutput();
-
- int structByteEnd = start + length - 1;
- int fieldId = 0;
- int fieldByteEnd = start;
-
- // Go through all bytes in the byte[]
- while (fieldByteEnd <= structByteEnd
- && fieldId <= lastNecessaryFieldIndex) {
- if (fieldByteEnd == structByteEnd
- || bytes[fieldByteEnd] == separator) {
- // Reached the end of a field?
- if (lastColumnTakesRest && fieldId == fieldEnds.length - 1) {
- fieldByteEnd = structByteEnd;
- }
- fieldEnds[fieldId] = fieldByteEnd;
- if (fieldId == fieldEnds.length - 1
- || fieldByteEnd == structByteEnd) {
- // for the case of null fields
- for (int i = fieldId; i < fieldEnds.length; i++) {
- fieldEnds[i] = fieldByteEnd;
- }
- break;
- }
- fieldByteEnd++;
- fieldId++;
- } else {
- if (isEscaped && bytes[fieldByteEnd] == escapeChar
- && fieldByteEnd + 1 < structByteEnd) {
- // ignore the char after escape_char
- fieldByteEnd += 2;
- } else {
- fieldByteEnd++;
- }
- }
- }
-
- for (int i = 0; i < invertedIndex.length; i++) {
- int index = invertedIndex[i];
- StructField fieldRef = fieldRefs.get(index);
- ObjectInspector inspector = fieldRef.getFieldObjectInspector();
- Category category = inspector.getCategory();
- int fieldStart = index == 0 ? 0 : fieldEnds[index - 1] + 1;
- int fieldEnd = fieldEnds[index];
- if (bytes[fieldEnd] == separator)
- fieldEnd--;
- int fieldLen = fieldEnd - fieldStart + 1;
- switch (category) {
- case PRIMITIVE:
- PrimitiveObjectInspector poi = (PrimitiveObjectInspector) inspector;
- switch (poi.getPrimitiveCategory()) {
- case VOID: {
- break;
- }
- case BOOLEAN: {
- output.write(bytes[fieldStart]);
- break;
- }
- case BYTE: {
- output.write(bytes[fieldStart]);
- break;
- }
- case SHORT: {
- short v = LazyShort.parseShort(bytes, fieldStart, fieldLen);
- output.write((byte) (v >> 8));
- output.write((byte) (v));
- break;
- }
- case INT: {
- int v = LazyInteger.parseInt(bytes, fieldStart, fieldLen);
- LazyUtils.writeVInt(output, v);
- break;
- }
- case LONG: {
- long v = LazyLong.parseLong(bytes, fieldStart, fieldLen);
- LazyUtils.writeVLong(output, v);
- break;
- }
- case FLOAT: {
- float value = Float.parseFloat(Text.decode(bytes,
- fieldStart, fieldLen));
- int v = Float.floatToIntBits(value);
- output.write((byte) (v >> 24));
- output.write((byte) (v >> 16));
- output.write((byte) (v >> 8));
- output.write((byte) (v));
- break;
- }
- case DOUBLE: {
- try {
- double value = Double.parseDouble(Text.decode(bytes,
- fieldStart, fieldLen));
- long v = Double.doubleToLongBits(value);
- output.write((byte) (v >> 56));
- output.write((byte) (v >> 48));
- output.write((byte) (v >> 40));
- output.write((byte) (v >> 32));
- output.write((byte) (v >> 24));
- output.write((byte) (v >> 16));
- output.write((byte) (v >> 8));
- output.write((byte) (v));
- } catch (NumberFormatException e) {
- throw e;
- }
- break;
- }
- case STRING: {
- LazyUtils.writeVInt(output, fieldLen);
- output.write(bytes, fieldStart, fieldLen);
- break;
- }
- default: {
- throw new RuntimeException("Unrecognized type: "
- + poi.getPrimitiveCategory());
- }
- }
- break;
- case STRUCT:
- throw new NotImplementedException("Unrecognized type: struct ");
- case LIST:
- throw new NotImplementedException("Unrecognized type: struct ");
- case MAP:
- throw new NotImplementedException("Unrecognized type: struct ");
- case UNION:
- throw new NotImplementedException("Unrecognized type: struct ");
- }
- tb.addFieldEndOffset();
- }
- }
-}
diff --git a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/Driver.java b/hivesterix/src/main/java/org/apache/hadoop/hive/ql/Driver.java
deleted file mode 100644
index 57e2cc0..0000000
--- a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/Driver.java
+++ /dev/null
@@ -1,1441 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql;
-
-import java.io.DataInput;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Queue;
-import java.util.Set;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.JavaUtils;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.Schema;
-import org.apache.hadoop.hive.ql.exec.ConditionalTask;
-import org.apache.hadoop.hive.ql.exec.ExecDriver;
-import org.apache.hadoop.hive.ql.exec.FetchTask;
-import org.apache.hadoop.hive.ql.exec.MapRedTask;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.StatsTask;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.Task;
-import org.apache.hadoop.hive.ql.exec.TaskFactory;
-import org.apache.hadoop.hive.ql.exec.TaskResult;
-import org.apache.hadoop.hive.ql.exec.TaskRunner;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.history.HiveHistory.Keys;
-import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext;
-import org.apache.hadoop.hive.ql.hooks.Hook;
-import org.apache.hadoop.hive.ql.hooks.HookContext;
-import org.apache.hadoop.hive.ql.hooks.PostExecute;
-import org.apache.hadoop.hive.ql.hooks.PreExecute;
-import org.apache.hadoop.hive.ql.hooks.ReadEntity;
-import org.apache.hadoop.hive.ql.hooks.WriteEntity;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLock;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLockManagerCtx;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLockMode;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
-import org.apache.hadoop.hive.ql.lockmgr.LockException;
-import org.apache.hadoop.hive.ql.metadata.AuthorizationException;
-import org.apache.hadoop.hive.ql.metadata.DummyPartition;
-import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.HiveUtils;
-import org.apache.hadoop.hive.ql.metadata.Partition;
-import org.apache.hadoop.hive.ql.metadata.Table;
-import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
-import org.apache.hadoop.hive.ql.parse.ASTNode;
-import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook;
-import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
-import org.apache.hadoop.hive.ql.parse.ErrorMsg;
-import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
-import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl;
-import org.apache.hadoop.hive.ql.parse.ParseContext;
-import org.apache.hadoop.hive.ql.parse.ParseDriver;
-import org.apache.hadoop.hive.ql.parse.ParseException;
-import org.apache.hadoop.hive.ql.parse.ParseUtils;
-import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
-import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
-import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
-import org.apache.hadoop.hive.ql.plan.ConditionalResolver;
-import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles;
-import org.apache.hadoop.hive.ql.plan.HiveOperation;
-import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.processors.CommandProcessor;
-import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
-import org.apache.hadoop.hive.serde2.ByteStream;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.mapred.ClusterStatus;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.util.ReflectionUtils;
-
-import edu.uci.ics.hivesterix.runtime.exec.HyracksExecutionEngine;
-import edu.uci.ics.hivesterix.runtime.exec.IExecutionEngine;
-
-@SuppressWarnings({ "deprecation", "unused" })
-public class Driver implements CommandProcessor {
-
- static final private Log LOG = LogFactory.getLog(Driver.class.getName());
- static final private LogHelper console = new LogHelper(LOG);
-
- // hive-sterix
- private IExecutionEngine engine;
- private boolean hivesterix = false;
-
- private int maxRows = 100;
- ByteStream.Output bos = new ByteStream.Output();
-
- private HiveConf conf;
- private DataInput resStream;
- private Context ctx;
- private QueryPlan plan;
- private Schema schema;
- private HiveLockManager hiveLockMgr;
-
- private String errorMessage;
- private String SQLState;
-
- // A limit on the number of threads that can be launched
- private int maxthreads;
- private final int sleeptime = 2000;
-
- protected int tryCount = Integer.MAX_VALUE;
-
- private int checkLockManager() {
- boolean supportConcurrency = conf
- .getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
- if (supportConcurrency && (hiveLockMgr == null)) {
- try {
- setLockManager();
- } catch (SemanticException e) {
- errorMessage = "FAILED: Error in semantic analysis: "
- + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(
- errorMessage,
- "\n"
- + org.apache.hadoop.util.StringUtils
- .stringifyException(e));
- return (12);
- }
- }
- return (0);
- }
-
- private void setLockManager() throws SemanticException {
- boolean supportConcurrency = conf
- .getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
- if (supportConcurrency) {
- String lockMgr = conf.getVar(HiveConf.ConfVars.HIVE_LOCK_MANAGER);
- if ((lockMgr == null) || (lockMgr.isEmpty())) {
- throw new SemanticException(
- ErrorMsg.LOCKMGR_NOT_SPECIFIED.getMsg());
- }
-
- try {
- hiveLockMgr = (HiveLockManager) ReflectionUtils.newInstance(
- conf.getClassByName(lockMgr), conf);
- hiveLockMgr.setContext(new HiveLockManagerCtx(conf));
- } catch (Exception e) {
- throw new SemanticException(
- ErrorMsg.LOCKMGR_NOT_INITIALIZED.getMsg()
- + e.getMessage());
- }
- }
- }
-
- public void init() {
- Operator.resetId();
- }
-
- /**
- * Return the status information about the Map-Reduce cluster
- */
- public ClusterStatus getClusterStatus() throws Exception {
- ClusterStatus cs;
- try {
- JobConf job = new JobConf(conf, ExecDriver.class);
- JobClient jc = new JobClient(job);
- cs = jc.getClusterStatus();
- } catch (Exception e) {
- e.printStackTrace();
- throw e;
- }
- LOG.info("Returning cluster status: " + cs.toString());
- return cs;
- }
-
- public Schema getSchema() {
- return schema;
- }
-
- /**
- * Get a Schema with fields represented with native Hive types
- */
- public static Schema getSchema(BaseSemanticAnalyzer sem, HiveConf conf) {
- Schema schema = null;
-
- // If we have a plan, prefer its logical result schema if it's
- // available; otherwise, try digging out a fetch task; failing that,
- // give up.
- if (sem == null) {
- // can't get any info without a plan
- } else if (sem.getResultSchema() != null) {
- List<FieldSchema> lst = sem.getResultSchema();
- schema = new Schema(lst, null);
- } else if (sem.getFetchTask() != null) {
- FetchTask ft = sem.getFetchTask();
- TableDesc td = ft.getTblDesc();
- // partitioned tables don't have tableDesc set on the FetchTask.
- // Instead
- // they have a list of PartitionDesc objects, each with a table
- // desc.
- // Let's
- // try to fetch the desc for the first partition and use it's
- // deserializer.
- if (td == null && ft.getWork() != null
- && ft.getWork().getPartDesc() != null) {
- if (ft.getWork().getPartDesc().size() > 0) {
- td = ft.getWork().getPartDesc().get(0).getTableDesc();
- }
- }
-
- if (td == null) {
- LOG.info("No returning schema.");
- } else {
- String tableName = "result";
- List<FieldSchema> lst = null;
- try {
- lst = MetaStoreUtils.getFieldsFromDeserializer(tableName,
- td.getDeserializer());
- } catch (Exception e) {
- LOG.warn("Error getting schema: "
- + org.apache.hadoop.util.StringUtils
- .stringifyException(e));
- }
- if (lst != null) {
- schema = new Schema(lst, null);
- }
- }
- }
- if (schema == null) {
- schema = new Schema();
- }
- LOG.info("Returning Hive schema: " + schema);
- return schema;
- }
-
- /**
- * Get a Schema with fields represented with Thrift DDL types
- */
- public Schema getThriftSchema() throws Exception {
- Schema schema;
- try {
- schema = getSchema();
- if (schema != null) {
- List<FieldSchema> lst = schema.getFieldSchemas();
- // Go over the schema and convert type to thrift type
- if (lst != null) {
- for (FieldSchema f : lst) {
- f.setType(MetaStoreUtils.typeToThriftType(f.getType()));
- }
- }
- }
- } catch (Exception e) {
- e.printStackTrace();
- throw e;
- }
- LOG.info("Returning Thrift schema: " + schema);
- return schema;
- }
-
- /**
- * Return the maximum number of rows returned by getResults
- */
- public int getMaxRows() {
- return maxRows;
- }
-
- /**
- * Set the maximum number of rows returned by getResults
- */
- public void setMaxRows(int maxRows) {
- this.maxRows = maxRows;
- }
-
- public boolean hasReduceTasks(List<Task<? extends Serializable>> tasks) {
- if (tasks == null) {
- return false;
- }
-
- boolean hasReduce = false;
- for (Task<? extends Serializable> task : tasks) {
- if (task.hasReduce()) {
- return true;
- }
-
- hasReduce = (hasReduce || hasReduceTasks(task.getChildTasks()));
- }
- return hasReduce;
- }
-
- /**
- * for backwards compatibility with current tests
- */
- public Driver(HiveConf conf) {
- this.conf = conf;
-
- // hivesterix
- engine = new HyracksExecutionEngine(conf);
- }
-
- public Driver() {
- if (SessionState.get() != null) {
- conf = SessionState.get().getConf();
- }
-
- // hivesterix
- engine = new HyracksExecutionEngine(conf);
- }
-
- // hivesterix: plan printer
- public Driver(HiveConf conf, PrintWriter planPrinter) {
- this.conf = conf;
- engine = new HyracksExecutionEngine(conf, planPrinter);
- }
-
- public void clear() {
- this.hivesterix = false;
- }
-
- /**
- * Compile a new query. Any currently-planned query associated with this
- * Driver is discarded.
- *
- * @param command
- * The SQL query to compile.
- */
- public int compile(String command) {
- if (plan != null) {
- close();
- plan = null;
- }
-
- TaskFactory.resetId();
-
- try {
- command = new VariableSubstitution().substitute(conf, command);
- ctx = new Context(conf);
-
- ParseDriver pd = new ParseDriver();
- ASTNode tree = pd.parse(command, ctx);
- tree = ParseUtils.findRootNonNullToken(tree);
-
- BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree);
- List<AbstractSemanticAnalyzerHook> saHooks = getSemanticAnalyzerHooks();
-
- // Do semantic analysis and plan generation
- if (saHooks != null) {
- HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
- hookCtx.setConf(conf);
- for (AbstractSemanticAnalyzerHook hook : saHooks) {
- tree = hook.preAnalyze(hookCtx, tree);
- }
- sem.analyze(tree, ctx);
- for (AbstractSemanticAnalyzerHook hook : saHooks) {
- hook.postAnalyze(hookCtx, sem.getRootTasks());
- }
- } else {
- sem.analyze(tree, ctx);
- }
-
- LOG.info("Semantic Analysis Completed");
-
- // validate the plan
- sem.validate();
-
- plan = new QueryPlan(command, sem);
- // initialize FetchTask right here
- if (plan.getFetchTask() != null) {
- plan.getFetchTask().initialize(conf, plan, null);
- }
-
- // get the output schema
- schema = getSchema(sem, conf);
-
- // test Only - serialize the query plan and deserialize it
- if (sem instanceof SemanticAnalyzer
- && command.toLowerCase().indexOf("create") < 0) {
-
- Thread.currentThread().setContextClassLoader(
- this.getClass().getClassLoader());
-
- String queryPlanFileName = ctx.getLocalScratchDir(true)
- + Path.SEPARATOR_CHAR + "queryplan.xml";
- LOG.info("query plan = " + queryPlanFileName);
- queryPlanFileName = new Path(queryPlanFileName).toUri()
- .getPath();
-
- // serialize the queryPlan
- FileOutputStream fos = new FileOutputStream(queryPlanFileName);
- Utilities.serializeQueryPlan(plan, fos);
- fos.close();
-
- // deserialize the queryPlan
- FileInputStream fis = new FileInputStream(queryPlanFileName);
- QueryPlan newPlan = Utilities.deserializeQueryPlan(fis, conf);
- fis.close();
-
- // Use the deserialized plan
- plan = newPlan;
- }
-
- // initialize FetchTask right here
- if (plan.getFetchTask() != null) {
- plan.getFetchTask().initialize(conf, plan, null);
- }
-
- // do the authorization check
- if (HiveConf.getBoolVar(conf,
- HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
- try {
- // doAuthorization(sem);
- } catch (AuthorizationException authExp) {
- console.printError("Authorization failed:"
- + authExp.getMessage()
- + ". Use show grant to get more details.");
- return 403;
- }
- }
-
- // hyracks run
- if (sem instanceof SemanticAnalyzer
- && command.toLowerCase().indexOf("create") < 0) {
- hivesterix = true;
- return engine.compileJob(sem.getRootTasks());
- }
-
- return 0;
- } catch (SemanticException e) {
- errorMessage = "FAILED: Error in semantic analysis: "
- + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage, "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (10);
- } catch (ParseException e) {
- errorMessage = "FAILED: Parse Error: " + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage, "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (11);
- } catch (Exception e) {
- errorMessage = "FAILED: Hive Internal Error: "
- + Utilities.getNameMessage(e);
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage + "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (12);
- }
- }
-
- private void doAuthorization(BaseSemanticAnalyzer sem)
- throws HiveException, AuthorizationException {
- HashSet<ReadEntity> inputs = sem.getInputs();
- HashSet<WriteEntity> outputs = sem.getOutputs();
- SessionState ss = SessionState.get();
- HiveOperation op = ss.getHiveOperation();
- Hive db = sem.getDb();
- if (op != null) {
- if (op.equals(HiveOperation.CREATETABLE_AS_SELECT)
- || op.equals(HiveOperation.CREATETABLE)) {
- ss.getAuthorizer().authorize(
- db.getDatabase(db.getCurrentDatabase()),
- null,
- HiveOperation.CREATETABLE_AS_SELECT
- .getOutputRequiredPrivileges());
- } else {
- // if (op.equals(HiveOperation.IMPORT)) {
- // ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem;
- // if (!isa.existsTable()) {
- ss.getAuthorizer().authorize(
- db.getDatabase(db.getCurrentDatabase()),
- null,
- HiveOperation.CREATETABLE_AS_SELECT
- .getOutputRequiredPrivileges());
- // }
- // }
- }
- if (outputs != null && outputs.size() > 0) {
- for (WriteEntity write : outputs) {
-
- if (write.getType() == WriteEntity.Type.PARTITION) {
- Partition part = db.getPartition(write.getTable(),
- write.getPartition().getSpec(), false);
- if (part != null) {
- ss.getAuthorizer().authorize(write.getPartition(),
- null, op.getOutputRequiredPrivileges());
- continue;
- }
- }
-
- if (write.getTable() != null) {
- ss.getAuthorizer().authorize(write.getTable(), null,
- op.getOutputRequiredPrivileges());
- }
- }
-
- }
- }
-
- if (inputs != null && inputs.size() > 0) {
-
- Map<Table, List<String>> tab2Cols = new HashMap<Table, List<String>>();
- Map<Partition, List<String>> part2Cols = new HashMap<Partition, List<String>>();
-
- Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>();
- for (ReadEntity read : inputs) {
- if (read.getPartition() != null) {
- Table tbl = read.getTable();
- String tblName = tbl.getTableName();
- if (tableUsePartLevelAuth.get(tblName) == null) {
- boolean usePartLevelPriv = (tbl.getParameters().get(
- "PARTITION_LEVEL_PRIVILEGE") != null && ("TRUE"
- .equalsIgnoreCase(tbl.getParameters().get(
- "PARTITION_LEVEL_PRIVILEGE"))));
- if (usePartLevelPriv) {
- tableUsePartLevelAuth.put(tblName, Boolean.TRUE);
- } else {
- tableUsePartLevelAuth.put(tblName, Boolean.FALSE);
- }
- }
- }
- }
-
- if (op.equals(HiveOperation.CREATETABLE_AS_SELECT)
- || op.equals(HiveOperation.QUERY)) {
- SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
- ParseContext parseCtx = querySem.getParseContext();
- Map<TableScanOperator, Table> tsoTopMap = parseCtx
- .getTopToTable();
-
- for (Map.Entry<String, Operator<? extends Serializable>> topOpMap : querySem
- .getParseContext().getTopOps().entrySet()) {
- Operator<? extends Serializable> topOp = topOpMap
- .getValue();
- if (topOp instanceof TableScanOperator
- && tsoTopMap.containsKey(topOp)) {
- TableScanOperator tableScanOp = (TableScanOperator) topOp;
- Table tbl = tsoTopMap.get(tableScanOp);
- List<Integer> neededColumnIds = tableScanOp
- .getNeededColumnIDs();
- List<FieldSchema> columns = tbl.getCols();
- List<String> cols = new ArrayList<String>();
- if (neededColumnIds != null
- && neededColumnIds.size() > 0) {
- for (int i = 0; i < neededColumnIds.size(); i++) {
- cols.add(columns.get(neededColumnIds.get(i))
- .getName());
- }
- } else {
- for (int i = 0; i < columns.size(); i++) {
- cols.add(columns.get(i).getName());
- }
- }
- if (tbl.isPartitioned()
- && tableUsePartLevelAuth
- .get(tbl.getTableName())) {
- String alias_id = topOpMap.getKey();
- PrunedPartitionList partsList = PartitionPruner
- .prune(parseCtx.getTopToTable().get(topOp),
- parseCtx.getOpToPartPruner().get(
- topOp), parseCtx.getConf(),
- alias_id,
- parseCtx.getPrunedPartitions());
- Set<Partition> parts = new HashSet<Partition>();
- parts.addAll(partsList.getConfirmedPartns());
- parts.addAll(partsList.getUnknownPartns());
- for (Partition part : parts) {
- List<String> existingCols = part2Cols.get(part);
- if (existingCols == null) {
- existingCols = new ArrayList<String>();
- }
- existingCols.addAll(cols);
- part2Cols.put(part, existingCols);
- }
- } else {
- List<String> existingCols = tab2Cols.get(tbl);
- if (existingCols == null) {
- existingCols = new ArrayList<String>();
- }
- existingCols.addAll(cols);
- tab2Cols.put(tbl, existingCols);
- }
- }
- }
- }
-
- // cache the results for table authorization
- Set<String> tableAuthChecked = new HashSet<String>();
- for (ReadEntity read : inputs) {
- Table tbl = null;
- if (read.getPartition() != null) {
- tbl = read.getPartition().getTable();
- // use partition level authorization
- if (tableUsePartLevelAuth.get(tbl.getTableName())) {
- List<String> cols = part2Cols.get(read.getPartition());
- if (cols != null && cols.size() > 0) {
- ss.getAuthorizer().authorize(
- read.getPartition().getTable(),
- read.getPartition(), cols,
- op.getInputRequiredPrivileges(), null);
- } else {
- ss.getAuthorizer().authorize(read.getPartition(),
- op.getInputRequiredPrivileges(), null);
- }
- continue;
- }
- } else if (read.getTable() != null) {
- tbl = read.getTable();
- }
-
- // if we reach here, it means it needs to do a table
- // authorization
- // check, and the table authorization may already happened
- // because of other
- // partitions
- if (tbl != null
- && !tableAuthChecked.contains(tbl.getTableName())) {
- List<String> cols = tab2Cols.get(tbl);
- if (cols != null && cols.size() > 0) {
- ss.getAuthorizer().authorize(tbl, null, cols,
- op.getInputRequiredPrivileges(), null);
- } else {
- ss.getAuthorizer().authorize(tbl,
- op.getInputRequiredPrivileges(), null);
- }
- tableAuthChecked.add(tbl.getTableName());
- }
- }
-
- }
- }
-
- /**
- * @return The current query plan associated with this Driver, if any.
- */
- public QueryPlan getPlan() {
- return plan;
- }
-
- /**
- * @param t
- * The table to be locked
- * @param p
- * The partition to be locked
- * @param mode
- * The mode of the lock (SHARED/EXCLUSIVE) Get the list of
- * objects to be locked. If a partition needs to be locked (in
- * any mode), all its parents should also be locked in SHARED
- * mode.
- **/
- private List<HiveLockObj> getLockObjects(Table t, Partition p,
- HiveLockMode mode) throws SemanticException {
- List<HiveLockObj> locks = new LinkedList<HiveLockObj>();
-
- HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(),
- String.valueOf(System.currentTimeMillis()), "IMPLICIT");
-
- if (t != null) {
- locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode));
- mode = HiveLockMode.SHARED;
- locks.add(new HiveLockObj(new HiveLockObject(t.getDbName(),
- lockData), mode));
- return locks;
- }
-
- if (p != null) {
- if (!(p instanceof DummyPartition)) {
- locks.add(new HiveLockObj(new HiveLockObject(p, lockData), mode));
- }
-
- // All the parents are locked in shared mode
- mode = HiveLockMode.SHARED;
-
- // For dummy partitions, only partition name is needed
- String name = p.getName();
-
- if (p instanceof DummyPartition) {
- name = p.getName().split("@")[2];
- }
-
- String partName = name;
- String partialName = "";
- String[] partns = name.split("/");
- int len = p instanceof DummyPartition ? partns.length
- : partns.length - 1;
- for (int idx = 0; idx < len; idx++) {
- String partn = partns[idx];
- partialName += partn;
- try {
- locks.add(new HiveLockObj(new HiveLockObject(
- new DummyPartition(p.getTable(), p.getTable()
- .getDbName()
- + "/"
- + p.getTable().getTableName()
- + "/"
- + partialName), lockData), mode));
- partialName += "/";
- } catch (HiveException e) {
- throw new SemanticException(e.getMessage());
- }
- }
-
- locks.add(new HiveLockObj(
- new HiveLockObject(p.getTable(), lockData), mode));
- locks.add(new HiveLockObj(new HiveLockObject(p.getTable()
- .getDbName(), lockData), mode));
- }
- return locks;
- }
-
- /**
- * Acquire read and write locks needed by the statement. The list of objects
- * to be locked are obtained from he inputs and outputs populated by the
- * compiler. The lock acuisition scheme is pretty simple. If all the locks
- * cannot be obtained, error out. Deadlock is avoided by making sure that
- * the locks are lexicographically sorted.
- **/
- public int acquireReadWriteLocks() {
- try {
- int sleepTime = conf
- .getIntVar(HiveConf.ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES) * 1000;
- int numRetries = conf
- .getIntVar(HiveConf.ConfVars.HIVE_LOCK_NUMRETRIES);
-
- boolean supportConcurrency = conf
- .getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
- if (!supportConcurrency) {
- return 0;
- }
-
- List<HiveLockObj> lockObjects = new ArrayList<HiveLockObj>();
-
- // Sort all the inputs, outputs.
- // If a lock needs to be acquired on any partition, a read lock
- // needs to be acquired on all
- // its parents also
- for (ReadEntity input : plan.getInputs()) {
- if (input.getType() == ReadEntity.Type.TABLE) {
- lockObjects.addAll(getLockObjects(input.getTable(), null,
- HiveLockMode.SHARED));
- } else {
- lockObjects.addAll(getLockObjects(null,
- input.getPartition(), HiveLockMode.SHARED));
- }
- }
-
- for (WriteEntity output : plan.getOutputs()) {
- if (output.getTyp() == WriteEntity.Type.TABLE) {
- lockObjects.addAll(getLockObjects(output.getTable(), null,
- output.isComplete() ? HiveLockMode.EXCLUSIVE
- : HiveLockMode.SHARED));
- } else if (output.getTyp() == WriteEntity.Type.PARTITION) {
- lockObjects.addAll(getLockObjects(null,
- output.getPartition(), HiveLockMode.EXCLUSIVE));
- }
- // In case of dynamic queries, it is possible to have incomplete
- // dummy partitions
- else if (output.getTyp() == WriteEntity.Type.DUMMYPARTITION) {
- lockObjects.addAll(getLockObjects(null,
- output.getPartition(), HiveLockMode.SHARED));
- }
- }
-
- if (lockObjects.isEmpty() && !ctx.isNeedLockMgr()) {
- return 0;
- }
-
- int ret = checkLockManager();
- if (ret != 0) {
- return ret;
- }
-
- HiveLockObjectData lockData = new HiveLockObjectData(
- plan.getQueryId(), String.valueOf(System
- .currentTimeMillis()), "IMPLICIT");
-
- // Lock the database also
- try {
- Hive db = Hive.get(conf);
- lockObjects.add(new HiveLockObj(new HiveLockObject(db
- .getCurrentDatabase(), lockData), HiveLockMode.SHARED));
- } catch (HiveException e) {
- throw new SemanticException(e.getMessage());
- }
-
- ctx.setHiveLockMgr(hiveLockMgr);
- List<HiveLock> hiveLocks = null;
-
- int tryNum = 1;
- do {
-
- // ctx.getHiveLockMgr();
- // hiveLocks = ctx.getHiveLockMgr().lock(lockObjects, false);
-
- if (hiveLocks != null) {
- break;
- }
-
- tryNum++;
- try {
- Thread.sleep(sleepTime);
- } catch (InterruptedException e) {
- }
- } while (tryNum < numRetries);
-
- if (hiveLocks == null) {
- throw new SemanticException(
- ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg());
- } else {
- ctx.setHiveLocks(hiveLocks);
- }
-
- return (0);
- } catch (SemanticException e) {
- errorMessage = "FAILED: Error in acquiring locks: "
- + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage, "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (10);
- } catch (Exception e) {
- errorMessage = "FAILED: Error in acquiring locks: "
- + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage, "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (10);
- }
- }
-
- /**
- * Release all the locks acquired implicitly by the statement. Note that the
- * locks acquired with 'keepAlive' set to True are not released.
- **/
- private void releaseLocks() {
- if (ctx != null && ctx.getHiveLockMgr() != null) {
- try {
- ctx.getHiveLockMgr().close();
- ctx.setHiveLocks(null);
- } catch (LockException e) {
- }
- }
- }
-
- /**
- * @param hiveLocks
- * list of hive locks to be released Release all the locks
- * specified. If some of the locks have already been released,
- * ignore them
- **/
- private void releaseLocks(List<HiveLock> hiveLocks) {
- if (hiveLocks != null) {
- ctx.getHiveLockMgr().releaseLocks(hiveLocks);
- }
- ctx.setHiveLocks(null);
- }
-
- public CommandProcessorResponse run(String command) {
- errorMessage = null;
- SQLState = null;
-
- int ret = compile(command);
- if (ret != 0) {
- // releaseLocks(ctx.getHiveLocks());
- return new CommandProcessorResponse(ret, errorMessage, SQLState);
- }
-
- // ret = acquireReadWriteLocks();
- if (ret != 0) {
- // releaseLocks(ctx.getHiveLocks());
- return new CommandProcessorResponse(ret, errorMessage, SQLState);
- }
-
- ret = execute();
- if (ret != 0) {
- // releaseLocks(ctx.getHiveLocks());
- return new CommandProcessorResponse(ret, errorMessage, SQLState);
- }
-
- // releaseLocks(ctx.getHiveLocks());
- return new CommandProcessorResponse(ret);
- }
-
- private List<AbstractSemanticAnalyzerHook> getSemanticAnalyzerHooks()
- throws Exception {
- ArrayList<AbstractSemanticAnalyzerHook> saHooks = new ArrayList<AbstractSemanticAnalyzerHook>();
- String pestr = conf.getVar(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK);
- if (pestr == null) {
- return saHooks;
- }
- pestr = pestr.trim();
- if (pestr.equals("")) {
- return saHooks;
- }
-
- String[] peClasses = pestr.split(",");
-
- for (String peClass : peClasses) {
- try {
- AbstractSemanticAnalyzerHook hook = HiveUtils
- .getSemanticAnalyzerHook(conf, peClass);
- saHooks.add(hook);
- } catch (HiveException e) {
- console.printError("Pre Exec Hook Class not found:"
- + e.getMessage());
- throw e;
- }
- }
-
- return saHooks;
- }
-
- private List<Hook> getPreExecHooks() throws Exception {
- ArrayList<Hook> pehooks = new ArrayList<Hook>();
- String pestr = conf.getVar(HiveConf.ConfVars.PREEXECHOOKS);
- pestr = pestr.trim();
- if (pestr.equals("")) {
- return pehooks;
- }
-
- String[] peClasses = pestr.split(",");
-
- for (String peClass : peClasses) {
- try {
- pehooks.add((Hook) Class.forName(peClass.trim(), true,
- JavaUtils.getClassLoader()).newInstance());
- } catch (ClassNotFoundException e) {
- console.printError("Pre Exec Hook Class not found:"
- + e.getMessage());
- throw e;
- }
- }
-
- return pehooks;
- }
-
- private List<Hook> getPostExecHooks() throws Exception {
- ArrayList<Hook> pehooks = new ArrayList<Hook>();
- String pestr = conf.getVar(HiveConf.ConfVars.POSTEXECHOOKS);
- pestr = pestr.trim();
- if (pestr.equals("")) {
- return pehooks;
- }
-
- String[] peClasses = pestr.split(",");
-
- for (String peClass : peClasses) {
- try {
- pehooks.add((Hook) Class.forName(peClass.trim(), true,
- JavaUtils.getClassLoader()).newInstance());
- } catch (ClassNotFoundException e) {
- console.printError("Post Exec Hook Class not found:"
- + e.getMessage());
- throw e;
- }
- }
-
- return pehooks;
- }
-
- public int execute() {
- // execute hivesterix plan
- if (hivesterix) {
- hivesterix = false;
- int ret = engine.executeJob();
- if (ret != 0)
- return ret;
- }
-
- boolean noName = StringUtils.isEmpty(conf
- .getVar(HiveConf.ConfVars.HADOOPJOBNAME));
- int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
-
- String queryId = plan.getQueryId();
- String queryStr = plan.getQueryStr();
-
- conf.setVar(HiveConf.ConfVars.HIVEQUERYID, queryId);
- conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, queryStr);
- maxthreads = HiveConf.getIntVar(conf,
- HiveConf.ConfVars.EXECPARALLETHREADNUMBER);
-
- try {
- LOG.info("Starting command: " + queryStr);
-
- plan.setStarted();
-
- if (SessionState.get() != null) {
- SessionState
- .get()
- .getHiveHistory()
- .startQuery(queryStr,
- conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
- SessionState.get().getHiveHistory().logPlanProgress(plan);
- }
- resStream = null;
-
- HookContext hookContext = new HookContext(plan, conf);
-
- for (Hook peh : getPreExecHooks()) {
- if (peh instanceof ExecuteWithHookContext) {
- ((ExecuteWithHookContext) peh).run(hookContext);
- } else if (peh instanceof PreExecute) {
- ((PreExecute) peh).run(SessionState.get(),
- plan.getInputs(), plan.getOutputs(), ShimLoader
- .getHadoopShims().getUGIForConf(conf));
- }
- }
-
- int jobs = Utilities.getMRTasks(plan.getRootTasks()).size();
- if (jobs > 0) {
- console.printInfo("Total MapReduce jobs = " + jobs);
- }
- if (SessionState.get() != null) {
- SessionState
- .get()
- .getHiveHistory()
- .setQueryProperty(queryId, Keys.QUERY_NUM_TASKS,
- String.valueOf(jobs));
- SessionState.get().getHiveHistory()
- .setIdToTableMap(plan.getIdToTableNameMap());
- }
- String jobname = Utilities.abbreviate(queryStr, maxlen - 6);
-
- // A runtime that launches runnable tasks as separate Threads
- // through
- // TaskRunners
- // As soon as a task isRunnable, it is put in a queue
- // At any time, at most maxthreads tasks can be running
- // The main thread polls the TaskRunners to check if they have
- // finished.
-
- Queue<Task<? extends Serializable>> runnable = new LinkedList<Task<? extends Serializable>>();
- Map<TaskResult, TaskRunner> running = new HashMap<TaskResult, TaskRunner>();
-
- DriverContext driverCxt = new DriverContext(runnable, ctx);
-
- // Add root Tasks to runnable
-
- for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
- driverCxt.addToRunnable(tsk);
- }
-
- // Loop while you either have tasks running, or tasks queued up
-
- while (running.size() != 0 || runnable.peek() != null) {
- // Launch upto maxthreads tasks
- while (runnable.peek() != null && running.size() < maxthreads) {
- Task<? extends Serializable> tsk = runnable.remove();
- console.printInfo("executing task " + tsk.getName());
- launchTask(tsk, queryId, noName, running, jobname, jobs,
- driverCxt);
- }
-
- // poll the Tasks to see which one completed
- TaskResult tskRes = pollTasks(running.keySet());
- TaskRunner tskRun = running.remove(tskRes);
- Task<? extends Serializable> tsk = tskRun.getTask();
- hookContext.addCompleteTask(tskRun);
-
- int exitVal = tskRes.getExitVal();
- if (exitVal != 0) {
- Task<? extends Serializable> backupTask = tsk
- .getAndInitBackupTask();
- if (backupTask != null) {
- errorMessage = "FAILED: Execution Error, return code "
- + exitVal + " from " + tsk.getClass().getName();
- console.printError(errorMessage);
-
- errorMessage = "ATTEMPT: Execute BackupTask: "
- + backupTask.getClass().getName();
- console.printError(errorMessage);
-
- // add backup task to runnable
- if (DriverContext.isLaunchable(backupTask)) {
- driverCxt.addToRunnable(backupTask);
- }
- continue;
-
- } else {
- // TODO: This error messaging is not very informative.
- // Fix that.
- errorMessage = "FAILED: Execution Error, return code "
- + exitVal + " from " + tsk.getClass().getName();
- SQLState = "08S01";
- console.printError(errorMessage);
- if (running.size() != 0) {
- taskCleanup();
- }
- // in case we decided to run everything in local mode,
- // restore the
- // the jobtracker setting to its initial value
- ctx.restoreOriginalTracker();
- return 9;
- }
- }
-
- if (SessionState.get() != null) {
- SessionState
- .get()
- .getHiveHistory()
- .setTaskProperty(queryId, tsk.getId(),
- Keys.TASK_RET_CODE, String.valueOf(exitVal));
- SessionState.get().getHiveHistory().endTask(queryId, tsk);
- }
-
- if (tsk.getChildTasks() != null) {
- for (Task<? extends Serializable> child : tsk
- .getChildTasks()) {
- // hivesterix: don't check launchable condition
- // if (DriverContext.isLaunchable(child)) {
- driverCxt.addToRunnable(child);
- // }
- }
- }
- }
-
- // in case we decided to run everything in local mode, restore the
- // the jobtracker setting to its initial value
- ctx.restoreOriginalTracker();
-
- // remove incomplete outputs.
- // Some incomplete outputs may be added at the beginning, for eg:
- // for dynamic partitions.
- // remove them
- HashSet<WriteEntity> remOutputs = new HashSet<WriteEntity>();
- for (WriteEntity output : plan.getOutputs()) {
- if (!output.isComplete()) {
- remOutputs.add(output);
- }
- }
-
- for (WriteEntity output : remOutputs) {
- plan.getOutputs().remove(output);
- }
-
- // Get all the post execution hooks and execute them.
- for (Hook peh : getPostExecHooks()) {
- if (peh instanceof ExecuteWithHookContext) {
- ((ExecuteWithHookContext) peh).run(hookContext);
- } else if (peh instanceof PostExecute) {
- ((PostExecute) peh)
- .run(SessionState.get(),
- plan.getInputs(),
- plan.getOutputs(),
- (SessionState.get() != null ? SessionState
- .get().getLineageState()
- .getLineageInfo() : null),
- ShimLoader.getHadoopShims().getUGIForConf(
- conf));
- }
- }
-
- if (SessionState.get() != null) {
- SessionState
- .get()
- .getHiveHistory()
- .setQueryProperty(queryId, Keys.QUERY_RET_CODE,
- String.valueOf(0));
- SessionState.get().getHiveHistory().printRowCount(queryId);
- }
- } catch (Exception e) {
- if (SessionState.get() != null) {
- SessionState
- .get()
- .getHiveHistory()
- .setQueryProperty(queryId, Keys.QUERY_RET_CODE,
- String.valueOf(12));
- }
- // TODO: do better with handling types of Exception here
- errorMessage = "FAILED: Hive Internal Error: "
- + Utilities.getNameMessage(e);
- SQLState = "08S01";
- console.printError(errorMessage + "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (12);
- } finally {
- if (SessionState.get() != null) {
- SessionState.get().getHiveHistory().endQuery(queryId);
- }
- if (noName) {
- conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, "");
- }
- }
- plan.setDone();
-
- if (SessionState.get() != null) {
- try {
- SessionState.get().getHiveHistory().logPlanProgress(plan);
- } catch (Exception e) {
- }
- }
- console.printInfo("OK");
-
- return (0);
- }
-
- /**
- * Launches a new task
- *
- * @param tsk
- * task being launched
- * @param queryId
- * Id of the query containing the task
- * @param noName
- * whether the task has a name set
- * @param running
- * map from taskresults to taskrunners
- * @param jobname
- * name of the task, if it is a map-reduce job
- * @param jobs
- * number of map-reduce jobs
- * @param curJobNo
- * the sequential number of the next map-reduce job
- * @return the updated number of last the map-reduce job launched
- */
-
- public void launchTask(Task<? extends Serializable> tsk, String queryId,
- boolean noName, Map<TaskResult, TaskRunner> running,
- String jobname, int jobs, DriverContext cxt) {
-
- if (SessionState.get() != null) {
- SessionState.get().getHiveHistory()
- .startTask(queryId, tsk, tsk.getClass().getName());
- }
- if (tsk.isMapRedTask() && !(tsk instanceof ConditionalTask)) {
- if (noName) {
- conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname + "("
- + tsk.getId() + ")");
- }
- cxt.incCurJobNo(1);
- console.printInfo("Launching Job " + cxt.getCurJobNo() + " out of "
- + jobs);
- }
- tsk.initialize(conf, plan, cxt);
- TaskResult tskRes = new TaskResult();
- TaskRunner tskRun = new TaskRunner(tsk, tskRes);
-
- // HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) &&
- // Launch Task: hivesterix tweak
- if (tsk instanceof MapRedTask || tsk instanceof StatsTask) {
- // Launch it in the parallel mode, as a separate thread only for MR
- // tasks
- tskRes.setRunning(false);
- tskRes.setExitVal(0);
- } else if (tsk instanceof ConditionalTask) {
- ConditionalTask condTask = (ConditionalTask) tsk;
- ConditionalResolver crs = condTask.getResolver();
- if (crs instanceof ConditionalResolverMergeFiles) {
- tskRes.setRunning(false);
- tskRes.setExitVal(0);
-
- List<Task<? extends Serializable>> children = condTask
- .getListTasks();
- for (Task<? extends Serializable> child : children)
- if (child instanceof MapRedTask)
- cxt.addToRunnable(child);
- }
- } else {
- tskRun.runSequential();
- }
- running.put(tskRes, tskRun);
- return;
- }
-
- /**
- * Cleans up remaining tasks in case of failure
- */
-
- public void taskCleanup() {
- // The currently existing Shutdown hooks will be automatically called,
- // killing the map-reduce processes.
- // The non MR processes will be killed as well.
- System.exit(9);
- }
-
- /**
- * Polls running tasks to see if a task has ended.
- *
- * @param results
- * Set of result objects for running tasks
- * @return The result object for any completed/failed task
- */
-
- public TaskResult pollTasks(Set<TaskResult> results) {
- Iterator<TaskResult> resultIterator = results.iterator();
- while (true) {
- while (resultIterator.hasNext()) {
- TaskResult tskRes = resultIterator.next();
- if (tskRes.isRunning() == false) {
- return tskRes;
- }
- }
-
- // In this loop, nothing was found
- // Sleep 10 seconds and restart
- try {
- Thread.sleep(sleeptime);
- } catch (InterruptedException ie) {
- // Do Nothing
- ;
- }
- resultIterator = results.iterator();
- }
- }
-
- public boolean getResults(ArrayList<String> res) throws IOException {
- if (plan != null && plan.getFetchTask() != null) {
- FetchTask ft = plan.getFetchTask();
- ft.setMaxRows(maxRows);
- return ft.fetch(res);
- }
-
- if (resStream == null) {
- resStream = ctx.getStream();
- }
- if (resStream == null) {
- return false;
- }
-
- int numRows = 0;
- String row = null;
-
- while (numRows < maxRows) {
- if (resStream == null) {
- if (numRows > 0) {
- return true;
- } else {
- return false;
- }
- }
-
- bos.reset();
- Utilities.StreamStatus ss;
- try {
- ss = Utilities.readColumn(resStream, bos);
- if (bos.getCount() > 0) {
- row = new String(bos.getData(), 0, bos.getCount(), "UTF-8");
- } else if (ss == Utilities.StreamStatus.TERMINATED) {
- row = new String();
- }
-
- if (row != null) {
- numRows++;
- res.add(row);
- }
- } catch (IOException e) {
- console.printError("FAILED: Unexpected IO exception : "
- + e.getMessage());
- res = null;
- return false;
- }
-
- if (ss == Utilities.StreamStatus.EOF) {
- resStream = ctx.getStream();
- }
- }
- return true;
- }
-
- public int close() {
- try {
- if (plan != null) {
- FetchTask fetchTask = plan.getFetchTask();
- if (null != fetchTask) {
- try {
- fetchTask.clearFetch();
- } catch (Exception e) {
- LOG.debug(" Exception while clearing the Fetch task ",
- e);
- }
- }
- }
- if (ctx != null) {
- ctx.clear();
- }
- if (null != resStream) {
- try {
- ((FSDataInputStream) resStream).close();
- } catch (Exception e) {
- LOG.debug(" Exception while closing the resStream ", e);
- }
- }
- } catch (Exception e) {
- console.printError("FAILED: Hive Internal Error: "
- + Utilities.getNameMessage(e) + "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return 13;
- }
-
- return 0;
- }
-
- public void destroy() {
- releaseLocks();
- }
-
- public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan()
- throws IOException {
- return plan.getQueryPlan();
- }
-
- public int getTryCount() {
- return tryCount;
- }
-
- public void setTryCount(int tryCount) {
- this.tryCount = tryCount;
- }
-}
diff --git a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java b/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
deleted file mode 100644
index b174432..0000000
--- a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
+++ /dev/null
@@ -1,253 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.udf.generic;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.util.StringUtils;
-
-import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
-import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
-
-/**
- * GenericUDAFAverage.
- *
- */
-@Description(name = "avg", value = "_FUNC_(x) - Returns the mean of a set of numbers")
-public class GenericUDAFAverage extends AbstractGenericUDAFResolver {
-
- static final Log LOG = LogFactory
- .getLog(GenericUDAFAverage.class.getName());
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
- throws SemanticException {
- if (parameters.length != 1) {
- throw new UDFArgumentTypeException(parameters.length - 1,
- "Exactly one argument is expected.");
- }
-
- if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
- throw new UDFArgumentTypeException(0,
- "Only primitive type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case FLOAT:
- case DOUBLE:
- case STRING:
- return new GenericUDAFAverageEvaluator();
- case BOOLEAN:
- default:
- throw new UDFArgumentTypeException(0,
- "Only numeric or string type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- }
-
- /**
- * GenericUDAFAverageEvaluator.
- *
- */
- public static class GenericUDAFAverageEvaluator extends
- GenericUDAFEvaluator {
-
- // For PARTIAL1 and COMPLETE
- PrimitiveObjectInspector inputOI;
-
- // For PARTIAL2 and FINAL
- StructObjectInspector soi;
- StructField countField;
- StructField sumField;
- LongObjectInspector countFieldOI;
- DoubleObjectInspector sumFieldOI;
-
- // For PARTIAL1 and PARTIAL2
- Object[] partialResult;
-
- // For FINAL and COMPLETE
- DoubleWritable result;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- super.init(m, parameters);
-
- // init input
- if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
- inputOI = (PrimitiveObjectInspector) parameters[0];
- } else {
- soi = (StructObjectInspector) parameters[0];
- countField = soi.getStructFieldRef("count");
- sumField = soi.getStructFieldRef("sum");
- countFieldOI = (LongObjectInspector) countField
- .getFieldObjectInspector();
- sumFieldOI = (DoubleObjectInspector) sumField
- .getFieldObjectInspector();
- }
-
- // init output
- if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
- // The output of a partial aggregation is a struct containing
- // a "long" count and a "double" sum.
-
- ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
- foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- ArrayList<String> fname = new ArrayList<String>();
- fname.add("count");
- fname.add("sum");
- partialResult = new Object[2];
- partialResult[0] = new LongWritable(0);
- partialResult[1] = new DoubleWritable(0);
- return ObjectInspectorFactory.getStandardStructObjectInspector(
- fname, foi);
-
- } else {
- result = new DoubleWritable(0);
- return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
- }
- }
-
- static class AverageAgg implements SerializableBuffer {
- long count;
- double sum;
-
- @Override
- public void deSerializeAggBuffer(byte[] data, int start, int len) {
- count = BufferSerDeUtil.getLong(data, start);
- start += 8;
- sum = BufferSerDeUtil.getDouble(data, start);
- }
-
- @Override
- public void serializeAggBuffer(byte[] data, int start, int len) {
- BufferSerDeUtil.writeLong(count, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(sum, data, start);
- }
-
- @Override
- public void serializeAggBuffer(DataOutput output)
- throws IOException {
- output.writeLong(count);
- output.writeDouble(sum);
- }
- };
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- AverageAgg result = new AverageAgg();
- reset(result);
- return result;
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException {
- AverageAgg myagg = (AverageAgg) agg;
- myagg.count = 0;
- myagg.sum = 0;
- }
-
- boolean warned = false;
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- Object p = parameters[0];
- if (p != null) {
- AverageAgg myagg = (AverageAgg) agg;
- try {
- double v = PrimitiveObjectInspectorUtils.getDouble(p,
- inputOI);
- myagg.count++;
- myagg.sum += v;
- } catch (NumberFormatException e) {
- if (!warned) {
- warned = true;
- LOG.warn(getClass().getSimpleName() + " "
- + StringUtils.stringifyException(e));
- LOG.warn(getClass().getSimpleName()
- + " ignoring similar exceptions.");
- }
- }
- }
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException {
- AverageAgg myagg = (AverageAgg) agg;
- ((LongWritable) partialResult[0]).set(myagg.count);
- ((DoubleWritable) partialResult[1]).set(myagg.sum);
- return partialResult;
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException {
- if (partial != null) {
- AverageAgg myagg = (AverageAgg) agg;
- Object partialCount = soi.getStructFieldData(partial,
- countField);
- Object partialSum = soi.getStructFieldData(partial, sumField);
- myagg.count += countFieldOI.get(partialCount);
- myagg.sum += sumFieldOI.get(partialSum);
- }
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException {
- AverageAgg myagg = (AverageAgg) agg;
- if (myagg.count == 0) {
- return null;
- } else {
- result.set(myagg.sum / myagg.count);
- return result;
- }
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java b/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
deleted file mode 100644
index 716faac..0000000
--- a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
+++ /dev/null
@@ -1,428 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.udf.generic;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.LongWritable;
-
-import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
-import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
-
-/**
- * Compute the Pearson correlation coefficient corr(x, y), using the following
- * stable one-pass method, based on: "Formulas for Robust, One-Pass Parallel
- * Computation of Covariances and Arbitrary-Order Statistical Moments", Philippe
- * Pebay, Sandia Labs and
- * "The Art of Computer Programming, volume 2: Seminumerical Algorithms", Donald
- * Knuth.
- *
- * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg> my_n =
- * my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n - mx_(n-1))*(y_n
- * - my_n) : <covariance * n> vx_n = vx_(n-1) + (x_n - mx_n)(x_n - mx_(n-1)):
- * <variance * n> vy_n = vy_(n-1) + (y_n - my_n)(y_n - my_(n-1)): <variance * n>
- *
- * Merge: c_(A,B) = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
- * vx_(A,B) = vx_A + vx_B + (mx_A - mx_B)*(mx_A - mx_B)*n_A*n_B/(n_A+n_B)
- * vy_(A,B) = vy_A + vy_B + (my_A - my_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
- *
- */
-@Description(name = "corr", value = "_FUNC_(x,y) - Returns the Pearson coefficient of correlation\n"
- + "between a set of number pairs", extended = "The function takes as arguments any pair of numeric types and returns a double.\n"
- + "Any pair with a NULL is ignored. If the function is applied to an empty set or\n"
- + "a singleton set, NULL will be returned. Otherwise, it computes the following:\n"
- + " COVAR_POP(x,y)/(STDDEV_POP(x)*STDDEV_POP(y))\n"
- + "where neither x nor y is null,\n"
- + "COVAR_POP is the population covariance,\n"
- + "and STDDEV_POP is the population standard deviation.")
-public class GenericUDAFCorrelation extends AbstractGenericUDAFResolver {
-
- static final Log LOG = LogFactory.getLog(GenericUDAFCorrelation.class
- .getName());
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
- throws SemanticException {
- if (parameters.length != 2) {
- throw new UDFArgumentTypeException(parameters.length - 1,
- "Exactly two arguments are expected.");
- }
-
- if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
- throw new UDFArgumentTypeException(0,
- "Only primitive type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
-
- if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
- throw new UDFArgumentTypeException(1,
- "Only primitive type arguments are accepted but "
- + parameters[1].getTypeName() + " is passed.");
- }
-
- switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case FLOAT:
- case DOUBLE:
- switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) {
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case FLOAT:
- case DOUBLE:
- return new GenericUDAFCorrelationEvaluator();
- case STRING:
- case BOOLEAN:
- default:
- throw new UDFArgumentTypeException(1,
- "Only numeric type arguments are accepted but "
- + parameters[1].getTypeName() + " is passed.");
- }
- case STRING:
- case BOOLEAN:
- default:
- throw new UDFArgumentTypeException(0,
- "Only numeric type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- }
-
- /**
- * Evaluate the Pearson correlation coefficient using a stable one-pass
- * algorithm, based on work by Philippe Pébay and Donald Knuth.
- *
- * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg>
- * my_n = my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n -
- * mx_(n-1))*(y_n - my_n) : <covariance * n> vx_n = vx_(n-1) + (x_n -
- * mx_n)(x_n - mx_(n-1)): <variance * n> vy_n = vy_(n-1) + (y_n - my_n)(y_n
- * - my_(n-1)): <variance * n>
- *
- * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X vx_(A,B)
- * = vx_A + vx_B + (mx_A - mx_B)*(mx_A - mx_B)*n_A*n_B/(n_A+n_B) vy_(A,B) =
- * vy_A + vy_B + (my_A - my_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
- *
- */
- public static class GenericUDAFCorrelationEvaluator extends
- GenericUDAFEvaluator {
-
- // For PARTIAL1 and COMPLETE
- private PrimitiveObjectInspector xInputOI;
- private PrimitiveObjectInspector yInputOI;
-
- // For PARTIAL2 and FINAL
- private StructObjectInspector soi;
- private StructField countField;
- private StructField xavgField;
- private StructField yavgField;
- private StructField xvarField;
- private StructField yvarField;
- private StructField covarField;
- private LongObjectInspector countFieldOI;
- private DoubleObjectInspector xavgFieldOI;
- private DoubleObjectInspector yavgFieldOI;
- private DoubleObjectInspector xvarFieldOI;
- private DoubleObjectInspector yvarFieldOI;
- private DoubleObjectInspector covarFieldOI;
-
- // For PARTIAL1 and PARTIAL2
- private Object[] partialResult;
-
- // For FINAL and COMPLETE
- private DoubleWritable result;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException {
- super.init(m, parameters);
-
- // init input
- if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
- assert (parameters.length == 2);
- xInputOI = (PrimitiveObjectInspector) parameters[0];
- yInputOI = (PrimitiveObjectInspector) parameters[1];
- } else {
- assert (parameters.length == 1);
- soi = (StructObjectInspector) parameters[0];
-
- countField = soi.getStructFieldRef("count");
- xavgField = soi.getStructFieldRef("xavg");
- yavgField = soi.getStructFieldRef("yavg");
- xvarField = soi.getStructFieldRef("xvar");
- yvarField = soi.getStructFieldRef("yvar");
- covarField = soi.getStructFieldRef("covar");
-
- countFieldOI = (LongObjectInspector) countField
- .getFieldObjectInspector();
- xavgFieldOI = (DoubleObjectInspector) xavgField
- .getFieldObjectInspector();
- yavgFieldOI = (DoubleObjectInspector) yavgField
- .getFieldObjectInspector();
- xvarFieldOI = (DoubleObjectInspector) xvarField
- .getFieldObjectInspector();
- yvarFieldOI = (DoubleObjectInspector) yvarField
- .getFieldObjectInspector();
- covarFieldOI = (DoubleObjectInspector) covarField
- .getFieldObjectInspector();
- }
-
- // init output
- if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
- // The output of a partial aggregation is a struct containing
- // a long count, two double averages, two double variances,
- // and a double covariance.
-
- ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
-
- foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
-
- ArrayList<String> fname = new ArrayList<String>();
- fname.add("count");
- fname.add("xavg");
- fname.add("yavg");
- fname.add("xvar");
- fname.add("yvar");
- fname.add("covar");
-
- partialResult = new Object[6];
- partialResult[0] = new LongWritable(0);
- partialResult[1] = new DoubleWritable(0);
- partialResult[2] = new DoubleWritable(0);
- partialResult[3] = new DoubleWritable(0);
- partialResult[4] = new DoubleWritable(0);
- partialResult[5] = new DoubleWritable(0);
-
- return ObjectInspectorFactory.getStandardStructObjectInspector(
- fname, foi);
-
- } else {
- setResult(new DoubleWritable(0));
- return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
- }
- }
-
- static class StdAgg implements SerializableBuffer {
- long count; // number n of elements
- double xavg; // average of x elements
- double yavg; // average of y elements
- double xvar; // n times the variance of x elements
- double yvar; // n times the variance of y elements
- double covar; // n times the covariance
-
- @Override
- public void deSerializeAggBuffer(byte[] data, int start, int len) {
- count = BufferSerDeUtil.getLong(data, start);
- start += 8;
- xavg = BufferSerDeUtil.getDouble(data, start);
- start += 8;
- yavg = BufferSerDeUtil.getDouble(data, start);
- start += 8;
- xvar = BufferSerDeUtil.getDouble(data, start);
- start += 8;
- yvar = BufferSerDeUtil.getDouble(data, start);
- start += 8;
- covar = BufferSerDeUtil.getDouble(data, start);
- }
-
- @Override
- public void serializeAggBuffer(byte[] data, int start, int len) {
- BufferSerDeUtil.writeLong(count, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(xavg, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(yavg, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(xvar, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(yvar, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(covar, data, start);
- }
-
- @Override
- public void serializeAggBuffer(DataOutput output)
- throws IOException {
- output.writeLong(count);
- output.writeDouble(xavg);
- output.writeDouble(yavg);
- output.writeDouble(xvar);
- output.writeDouble(yvar);
- output.writeDouble(covar);
- }
- };
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- StdAgg result = new StdAgg();
- reset(result);
- return result;
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException {
- StdAgg myagg = (StdAgg) agg;
- myagg.count = 0;
- myagg.xavg = 0;
- myagg.yavg = 0;
- myagg.xvar = 0;
- myagg.yvar = 0;
- myagg.covar = 0;
- }
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
- assert (parameters.length == 2);
- Object px = parameters[0];
- Object py = parameters[1];
- if (px != null && py != null) {
- StdAgg myagg = (StdAgg) agg;
- double vx = PrimitiveObjectInspectorUtils.getDouble(px,
- xInputOI);
- double vy = PrimitiveObjectInspectorUtils.getDouble(py,
- yInputOI);
- double xavgOld = myagg.xavg;
- double yavgOld = myagg.yavg;
- myagg.count++;
- myagg.xavg += (vx - xavgOld) / myagg.count;
- myagg.yavg += (vy - yavgOld) / myagg.count;
- if (myagg.count > 1) {
- myagg.covar += (vx - xavgOld) * (vy - myagg.yavg);
- myagg.xvar += (vx - xavgOld) * (vx - myagg.xavg);
- myagg.yvar += (vy - yavgOld) * (vy - myagg.yavg);
- }
- }
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException {
- StdAgg myagg = (StdAgg) agg;
- ((LongWritable) partialResult[0]).set(myagg.count);
- ((DoubleWritable) partialResult[1]).set(myagg.xavg);
- ((DoubleWritable) partialResult[2]).set(myagg.yavg);
- ((DoubleWritable) partialResult[3]).set(myagg.xvar);
- ((DoubleWritable) partialResult[4]).set(myagg.yvar);
- ((DoubleWritable) partialResult[5]).set(myagg.covar);
- return partialResult;
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException {
- if (partial != null) {
- StdAgg myagg = (StdAgg) agg;
-
- Object partialCount = soi.getStructFieldData(partial,
- countField);
- Object partialXAvg = soi.getStructFieldData(partial, xavgField);
- Object partialYAvg = soi.getStructFieldData(partial, yavgField);
- Object partialXVar = soi.getStructFieldData(partial, xvarField);
- Object partialYVar = soi.getStructFieldData(partial, yvarField);
- Object partialCovar = soi.getStructFieldData(partial,
- covarField);
-
- long nA = myagg.count;
- long nB = countFieldOI.get(partialCount);
-
- if (nA == 0) {
- // Just copy the information since there is nothing so far
- myagg.count = countFieldOI.get(partialCount);
- myagg.xavg = xavgFieldOI.get(partialXAvg);
- myagg.yavg = yavgFieldOI.get(partialYAvg);
- myagg.xvar = xvarFieldOI.get(partialXVar);
- myagg.yvar = yvarFieldOI.get(partialYVar);
- myagg.covar = covarFieldOI.get(partialCovar);
- }
-
- if (nA != 0 && nB != 0) {
- // Merge the two partials
- double xavgA = myagg.xavg;
- double yavgA = myagg.yavg;
- double xavgB = xavgFieldOI.get(partialXAvg);
- double yavgB = yavgFieldOI.get(partialYAvg);
- double xvarB = xvarFieldOI.get(partialXVar);
- double yvarB = yvarFieldOI.get(partialYVar);
- double covarB = covarFieldOI.get(partialCovar);
-
- myagg.count += nB;
- myagg.xavg = (xavgA * nA + xavgB * nB) / myagg.count;
- myagg.yavg = (yavgA * nA + yavgB * nB) / myagg.count;
- myagg.xvar += xvarB + (xavgA - xavgB) * (xavgA - xavgB)
- * myagg.count;
- myagg.yvar += yvarB + (yavgA - yavgB) * (yavgA - yavgB)
- * myagg.count;
- myagg.covar += covarB + (xavgA - xavgB) * (yavgA - yavgB)
- * ((double) (nA * nB) / myagg.count);
- }
- }
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException {
- StdAgg myagg = (StdAgg) agg;
-
- if (myagg.count < 2) { // SQL standard - return null for zero or one
- // pair
- return null;
- } else {
- getResult().set(
- myagg.covar / java.lang.Math.sqrt(myagg.xvar)
- / java.lang.Math.sqrt(myagg.yvar));
- return getResult();
- }
- }
-
- public void setResult(DoubleWritable result) {
- this.result = result;
- }
-
- public DoubleWritable getResult() {
- return result;
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java b/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
deleted file mode 100644
index 4160d5b..0000000
--- a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.udf.generic;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.LongWritable;
-
-import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
-import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
-
-/**
- * This class implements the COUNT aggregation function as in SQL.
- */
-@Description(name = "count", value = "_FUNC_(*) - Returns the total number of retrieved rows, including "
- + "rows containing NULL values.\n"
-
- + "_FUNC_(expr) - Returns the number of rows for which the supplied "
- + "expression is non-NULL.\n"
-
- + "_FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for "
- + "which the supplied expression(s) are unique and non-NULL.")
-public class GenericUDAFCount implements GenericUDAFResolver2 {
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
- throws SemanticException {
- // This method implementation is preserved for backward compatibility.
- return new GenericUDAFCountEvaluator();
- }
-
- @Override
- public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo paramInfo)
- throws SemanticException {
-
- TypeInfo[] parameters = paramInfo.getParameters();
-
- if (parameters.length == 0) {
- if (!paramInfo.isAllColumns()) {
- throw new UDFArgumentException("Argument expected");
- }
- assert !paramInfo.isDistinct() : "DISTINCT not supported with *";
- } else {
- if (parameters.length > 1 && !paramInfo.isDistinct()) {
- throw new UDFArgumentException(
- "DISTINCT keyword must be specified");
- }
- assert !paramInfo.isAllColumns() : "* not supported in expression list";
- }
-
- return new GenericUDAFCountEvaluator().setCountAllColumns(paramInfo
- .isAllColumns());
- }
-
- /**
- * GenericUDAFCountEvaluator.
- *
- */
- public static class GenericUDAFCountEvaluator extends GenericUDAFEvaluator {
- private boolean countAllColumns = false;
- private LongObjectInspector partialCountAggOI;
- private LongWritable result;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException {
- super.init(m, parameters);
- partialCountAggOI = PrimitiveObjectInspectorFactory.writableLongObjectInspector;
- result = new LongWritable(0);
- return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
- }
-
- private GenericUDAFCountEvaluator setCountAllColumns(
- boolean countAllCols) {
- countAllColumns = countAllCols;
- return this;
- }
-
- /** class for storing count value. */
- static class CountAgg implements SerializableBuffer {
- long value;
-
- @Override
- public void deSerializeAggBuffer(byte[] data, int start, int len) {
- value = BufferSerDeUtil.getLong(data, start);
- }
-
- @Override
- public void serializeAggBuffer(byte[] data, int start, int len) {
- BufferSerDeUtil.writeLong(value, data, start);
- }
-
- @Override
- public void serializeAggBuffer(DataOutput output)
- throws IOException {
- output.writeLong(value);
- }
- }
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- CountAgg buffer = new CountAgg();
- reset(buffer);
- return buffer;
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException {
- ((CountAgg) agg).value = 0;
- }
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
- // parameters == null means the input table/split is empty
- if (parameters == null) {
- return;
- }
- if (countAllColumns) {
- assert parameters.length == 0;
- ((CountAgg) agg).value++;
- } else {
- assert parameters.length > 0;
- boolean countThisRow = true;
- for (Object nextParam : parameters) {
- if (nextParam == null) {
- countThisRow = false;
- break;
- }
- }
- if (countThisRow) {
- ((CountAgg) agg).value++;
- }
- }
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException {
- if (partial != null) {
- long p = partialCountAggOI.get(partial);
- ((CountAgg) agg).value += p;
- }
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException {
- result.set(((CountAgg) agg).value);
- return result;
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException {
- return terminate(agg);
- }
- }
-}
diff --git a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java b/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
deleted file mode 100644
index 11d9dc3..0000000
--- a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
+++ /dev/null
@@ -1,372 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.udf.generic;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.LongWritable;
-
-import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
-import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
-
-/**
- * Compute the covariance covar_pop(x, y), using the following one-pass method
- * (ref. "Formulas for Robust, One-Pass Parallel Computation of Covariances and
- * Arbitrary-Order Statistical Moments", Philippe Pebay, Sandia Labs):
- *
- * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg> my_n =
- * my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n - mx_(n-1))*(y_n
- * - my_n) : <covariance * n>
- *
- * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X
- *
- */
-@Description(name = "covariance,covar_pop", value = "_FUNC_(x,y) - Returns the population covariance of a set of number pairs", extended = "The function takes as arguments any pair of numeric types and returns a double.\n"
- + "Any pair with a NULL is ignored. If the function is applied to an empty set, NULL\n"
- + "will be returned. Otherwise, it computes the following:\n"
- + " (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/COUNT(x,y)\n"
- + "where neither x nor y is null.")
-public class GenericUDAFCovariance extends AbstractGenericUDAFResolver {
-
- static final Log LOG = LogFactory.getLog(GenericUDAFCovariance.class
- .getName());
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
- throws SemanticException {
- if (parameters.length != 2) {
- throw new UDFArgumentTypeException(parameters.length - 1,
- "Exactly two arguments are expected.");
- }
-
- if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
- throw new UDFArgumentTypeException(0,
- "Only primitive type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
-
- if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
- throw new UDFArgumentTypeException(1,
- "Only primitive type arguments are accepted but "
- + parameters[1].getTypeName() + " is passed.");
- }
-
- switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case FLOAT:
- case DOUBLE:
- switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) {
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case FLOAT:
- case DOUBLE:
- return new GenericUDAFCovarianceEvaluator();
- case STRING:
- case BOOLEAN:
- default:
- throw new UDFArgumentTypeException(1,
- "Only numeric or string type arguments are accepted but "
- + parameters[1].getTypeName() + " is passed.");
- }
- case STRING:
- case BOOLEAN:
- default:
- throw new UDFArgumentTypeException(0,
- "Only numeric or string type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- }
-
- /**
- * Evaluate the variance using the algorithm described in
- * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance,
- * presumably by Pébay, Philippe (2008), in "Formulas for Robust, One-Pass
- * Parallel Computation of Covariances and Arbitrary-Order Statistical
- * Moments", Technical Report SAND2008-6212, Sandia National Laboratories,
- * http://infoserve.sandia.gov/sand_doc/2008/086212.pdf
- *
- * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg>
- * my_n = my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n -
- * mx_(n-1))*(y_n - my_n) : <covariance * n>
- *
- * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X
- *
- * This one-pass algorithm is stable.
- *
- */
- public static class GenericUDAFCovarianceEvaluator extends
- GenericUDAFEvaluator {
-
- // For PARTIAL1 and COMPLETE
- private PrimitiveObjectInspector xInputOI;
- private PrimitiveObjectInspector yInputOI;
-
- // For PARTIAL2 and FINAL
- private StructObjectInspector soi;
- private StructField countField;
- private StructField xavgField;
- private StructField yavgField;
- private StructField covarField;
- private LongObjectInspector countFieldOI;
- private DoubleObjectInspector xavgFieldOI;
- private DoubleObjectInspector yavgFieldOI;
- private DoubleObjectInspector covarFieldOI;
-
- // For PARTIAL1 and PARTIAL2
- private Object[] partialResult;
-
- // For FINAL and COMPLETE
- private DoubleWritable result;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException {
- super.init(m, parameters);
-
- // init input
- if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
- assert (parameters.length == 2);
- xInputOI = (PrimitiveObjectInspector) parameters[0];
- yInputOI = (PrimitiveObjectInspector) parameters[1];
- } else {
- assert (parameters.length == 1);
- soi = (StructObjectInspector) parameters[0];
-
- countField = soi.getStructFieldRef("count");
- xavgField = soi.getStructFieldRef("xavg");
- yavgField = soi.getStructFieldRef("yavg");
- covarField = soi.getStructFieldRef("covar");
-
- countFieldOI = (LongObjectInspector) countField
- .getFieldObjectInspector();
- xavgFieldOI = (DoubleObjectInspector) xavgField
- .getFieldObjectInspector();
- yavgFieldOI = (DoubleObjectInspector) yavgField
- .getFieldObjectInspector();
- covarFieldOI = (DoubleObjectInspector) covarField
- .getFieldObjectInspector();
- }
-
- // init output
- if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
- // The output of a partial aggregation is a struct containing
- // a long count, two double averages, and a double covariance.
-
- ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
-
- foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
-
- ArrayList<String> fname = new ArrayList<String>();
- fname.add("count");
- fname.add("xavg");
- fname.add("yavg");
- fname.add("covar");
-
- partialResult = new Object[4];
- partialResult[0] = new LongWritable(0);
- partialResult[1] = new DoubleWritable(0);
- partialResult[2] = new DoubleWritable(0);
- partialResult[3] = new DoubleWritable(0);
-
- return ObjectInspectorFactory.getStandardStructObjectInspector(
- fname, foi);
-
- } else {
- setResult(new DoubleWritable(0));
- return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
- }
- }
-
- static class StdAgg implements SerializableBuffer {
- long count; // number n of elements
- double xavg; // average of x elements
- double yavg; // average of y elements
- double covar; // n times the covariance
-
- @Override
- public void deSerializeAggBuffer(byte[] data, int start, int len) {
- count = BufferSerDeUtil.getLong(data, start);
- start += 8;
- xavg = BufferSerDeUtil.getDouble(data, start);
- start += 8;
- yavg = BufferSerDeUtil.getDouble(data, start);
- start += 8;
- covar = BufferSerDeUtil.getDouble(data, start);
- }
-
- @Override
- public void serializeAggBuffer(byte[] data, int start, int len) {
- BufferSerDeUtil.writeLong(count, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(xavg, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(yavg, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(covar, data, start);
- }
-
- @Override
- public void serializeAggBuffer(DataOutput output)
- throws IOException {
- output.writeLong(count);
- output.writeDouble(xavg);
- output.writeDouble(yavg);
- output.writeDouble(covar);
- }
- };
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- StdAgg result = new StdAgg();
- reset(result);
- return result;
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException {
- StdAgg myagg = (StdAgg) agg;
- myagg.count = 0;
- myagg.xavg = 0;
- myagg.yavg = 0;
- myagg.covar = 0;
- }
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
- assert (parameters.length == 2);
- Object px = parameters[0];
- Object py = parameters[1];
- if (px != null && py != null) {
- StdAgg myagg = (StdAgg) agg;
- double vx = PrimitiveObjectInspectorUtils.getDouble(px,
- xInputOI);
- double vy = PrimitiveObjectInspectorUtils.getDouble(py,
- yInputOI);
- myagg.count++;
- myagg.yavg = myagg.yavg + (vy - myagg.yavg) / myagg.count;
- if (myagg.count > 1) {
- myagg.covar += (vx - myagg.xavg) * (vy - myagg.yavg);
- }
- myagg.xavg = myagg.xavg + (vx - myagg.xavg) / myagg.count;
- }
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException {
- StdAgg myagg = (StdAgg) agg;
- ((LongWritable) partialResult[0]).set(myagg.count);
- ((DoubleWritable) partialResult[1]).set(myagg.xavg);
- ((DoubleWritable) partialResult[2]).set(myagg.yavg);
- ((DoubleWritable) partialResult[3]).set(myagg.covar);
- return partialResult;
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException {
- if (partial != null) {
- StdAgg myagg = (StdAgg) agg;
-
- Object partialCount = soi.getStructFieldData(partial,
- countField);
- Object partialXAvg = soi.getStructFieldData(partial, xavgField);
- Object partialYAvg = soi.getStructFieldData(partial, yavgField);
- Object partialCovar = soi.getStructFieldData(partial,
- covarField);
-
- long nA = myagg.count;
- long nB = countFieldOI.get(partialCount);
-
- if (nA == 0) {
- // Just copy the information since there is nothing so far
- myagg.count = countFieldOI.get(partialCount);
- myagg.xavg = xavgFieldOI.get(partialXAvg);
- myagg.yavg = yavgFieldOI.get(partialYAvg);
- myagg.covar = covarFieldOI.get(partialCovar);
- }
-
- if (nA != 0 && nB != 0) {
- // Merge the two partials
- double xavgA = myagg.xavg;
- double yavgA = myagg.yavg;
- double xavgB = xavgFieldOI.get(partialXAvg);
- double yavgB = yavgFieldOI.get(partialYAvg);
- double covarB = covarFieldOI.get(partialCovar);
-
- myagg.count += nB;
- myagg.xavg = (xavgA * nA + xavgB * nB) / myagg.count;
- myagg.yavg = (yavgA * nA + yavgB * nB) / myagg.count;
- myagg.covar += covarB + (xavgA - xavgB) * (yavgA - yavgB)
- * ((double) (nA * nB) / myagg.count);
- }
- }
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException {
- StdAgg myagg = (StdAgg) agg;
-
- if (myagg.count == 0) { // SQL standard - return null for zero
- // elements
- return null;
- } else {
- getResult().set(myagg.covar / (myagg.count));
- return getResult();
- }
- }
-
- public void setResult(DoubleWritable result) {
- this.result = result;
- }
-
- public DoubleWritable getResult() {
- return result;
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java b/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
deleted file mode 100644
index 0323531..0000000
--- a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
+++ /dev/null
@@ -1,294 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.udf.generic;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.util.StringUtils;
-
-import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
-import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
-
-/**
- * GenericUDAFSum.
- *
- */
-@Description(name = "sum", value = "_FUNC_(x) - Returns the sum of a set of numbers")
-public class GenericUDAFSum extends AbstractGenericUDAFResolver {
-
- static final Log LOG = LogFactory.getLog(GenericUDAFSum.class.getName());
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
- throws SemanticException {
- if (parameters.length != 1) {
- throw new UDFArgumentTypeException(parameters.length - 1,
- "Exactly one argument is expected.");
- }
-
- if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
- throw new UDFArgumentTypeException(0,
- "Only primitive type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- return new GenericUDAFSumLong();
- case FLOAT:
- case DOUBLE:
- case STRING:
- return new GenericUDAFSumDouble();
- case BOOLEAN:
- default:
- throw new UDFArgumentTypeException(0,
- "Only numeric or string type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- }
-
- /**
- * GenericUDAFSumDouble.
- *
- */
- public static class GenericUDAFSumDouble extends GenericUDAFEvaluator {
- private PrimitiveObjectInspector inputOI;
- private DoubleWritable result;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- super.init(m, parameters);
- result = new DoubleWritable(0);
- inputOI = (PrimitiveObjectInspector) parameters[0];
- return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
- }
-
- /** class for storing double sum value. */
- static class SumDoubleAgg implements SerializableBuffer {
- boolean empty;
- double sum;
-
- @Override
- public void deSerializeAggBuffer(byte[] data, int start, int len) {
- empty = BufferSerDeUtil.getBoolean(data, start);
- start += 1;
- sum = BufferSerDeUtil.getDouble(data, start);
- }
-
- @Override
- public void serializeAggBuffer(byte[] data, int start, int len) {
- BufferSerDeUtil.writeBoolean(empty, data, start);
- start += 1;
- BufferSerDeUtil.writeDouble(sum, data, start);
- }
-
- @Override
- public void serializeAggBuffer(DataOutput output)
- throws IOException {
- output.writeBoolean(empty);
- output.writeDouble(sum);
- }
- }
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- SumDoubleAgg result = new SumDoubleAgg();
- reset(result);
- return result;
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException {
- SumDoubleAgg myagg = (SumDoubleAgg) agg;
- myagg.empty = true;
- myagg.sum = 0;
- }
-
- boolean warned = false;
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- try {
- merge(agg, parameters[0]);
- } catch (NumberFormatException e) {
- if (!warned) {
- warned = true;
- LOG.warn(getClass().getSimpleName() + " "
- + StringUtils.stringifyException(e));
- LOG.warn(getClass().getSimpleName()
- + " ignoring similar exceptions.");
- }
- }
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException {
- return terminate(agg);
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException {
- if (partial != null) {
- SumDoubleAgg myagg = (SumDoubleAgg) agg;
- myagg.empty = false;
- myagg.sum += PrimitiveObjectInspectorUtils.getDouble(partial,
- inputOI);
- }
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException {
- SumDoubleAgg myagg = (SumDoubleAgg) agg;
- if (myagg.empty) {
- return null;
- }
- result.set(myagg.sum);
- return result;
- }
-
- }
-
- /**
- * GenericUDAFSumLong.
- *
- */
- public static class GenericUDAFSumLong extends GenericUDAFEvaluator {
- private PrimitiveObjectInspector inputOI;
- private LongWritable result;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- super.init(m, parameters);
- result = new LongWritable(0);
- inputOI = (PrimitiveObjectInspector) parameters[0];
- return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
- }
-
- /** class for storing double sum value. */
- static class SumLongAgg implements SerializableBuffer {
- boolean empty;
- long sum;
-
- @Override
- public void deSerializeAggBuffer(byte[] data, int start, int len) {
- empty = BufferSerDeUtil.getBoolean(data, start);
- start += 1;
- sum = BufferSerDeUtil.getLong(data, start);
- }
-
- @Override
- public void serializeAggBuffer(byte[] data, int start, int len) {
- BufferSerDeUtil.writeBoolean(empty, data, start);
- start += 1;
- BufferSerDeUtil.writeLong(sum, data, start);
- }
-
- @Override
- public void serializeAggBuffer(DataOutput output)
- throws IOException {
- output.writeBoolean(empty);
- output.writeLong(sum);
- }
- }
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- SumLongAgg result = new SumLongAgg();
- reset(result);
- return result;
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException {
- SumLongAgg myagg = (SumLongAgg) agg;
- myagg.empty = true;
- myagg.sum = 0;
- }
-
- private boolean warned = false;
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- try {
- merge(agg, parameters[0]);
- } catch (NumberFormatException e) {
- if (!warned) {
- warned = true;
- LOG.warn(getClass().getSimpleName() + " "
- + StringUtils.stringifyException(e));
- }
- }
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException {
- return terminate(agg);
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException {
- if (partial != null) {
- SumLongAgg myagg = (SumLongAgg) agg;
- myagg.sum += PrimitiveObjectInspectorUtils.getLong(partial,
- inputOI);
- myagg.empty = false;
- }
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException {
- SumLongAgg myagg = (SumLongAgg) agg;
- if (myagg.empty) {
- return null;
- }
- result.set(myagg.sum);
- return result;
- }
-
- }
-
-}
diff --git a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java b/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
deleted file mode 100644
index 4c16f5a..0000000
--- a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
+++ /dev/null
@@ -1,331 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.udf.generic;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.util.StringUtils;
-
-import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
-import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
-
-/**
- * Compute the variance. This class is extended by: GenericUDAFVarianceSample
- * GenericUDAFStd GenericUDAFStdSample
- *
- */
-@Description(name = "variance,var_pop", value = "_FUNC_(x) - Returns the variance of a set of numbers")
-public class GenericUDAFVariance extends AbstractGenericUDAFResolver {
-
- static final Log LOG = LogFactory.getLog(GenericUDAFVariance.class
- .getName());
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
- throws SemanticException {
- if (parameters.length != 1) {
- throw new UDFArgumentTypeException(parameters.length - 1,
- "Exactly one argument is expected.");
- }
-
- if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
- throw new UDFArgumentTypeException(0,
- "Only primitive type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case FLOAT:
- case DOUBLE:
- case STRING:
- return new GenericUDAFVarianceEvaluator();
- case BOOLEAN:
- default:
- throw new UDFArgumentTypeException(0,
- "Only numeric or string type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- }
-
- /**
- * Evaluate the variance using the algorithm described by Chan, Golub, and
- * LeVeque in
- * "Algorithms for computing the sample variance: analysis and recommendations"
- * The American Statistician, 37 (1983) pp. 242--247.
- *
- * variance = variance1 + variance2 + n/(m*(m+n)) * pow(((m/n)*t1 - t2),2)
- *
- * where: - variance is sum[x-avg^2] (this is actually n times the variance)
- * and is updated at every step. - n is the count of elements in chunk1 - m
- * is the count of elements in chunk2 - t1 = sum of elements in chunk1, t2 =
- * sum of elements in chunk2.
- *
- * This algorithm was proven to be numerically stable by J.L. Barlow in
- * "Error analysis of a pairwise summation algorithm to compute sample variance"
- * Numer. Math, 58 (1991) pp. 583--590
- *
- */
- public static class GenericUDAFVarianceEvaluator extends
- GenericUDAFEvaluator {
-
- // For PARTIAL1 and COMPLETE
- private PrimitiveObjectInspector inputOI;
-
- // For PARTIAL2 and FINAL
- private StructObjectInspector soi;
- private StructField countField;
- private StructField sumField;
- private StructField varianceField;
- private LongObjectInspector countFieldOI;
- private DoubleObjectInspector sumFieldOI;
-
- // For PARTIAL1 and PARTIAL2
- private Object[] partialResult;
-
- // For FINAL and COMPLETE
- private DoubleWritable result;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- super.init(m, parameters);
-
- // init input
- if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
- inputOI = (PrimitiveObjectInspector) parameters[0];
- } else {
- soi = (StructObjectInspector) parameters[0];
-
- countField = soi.getStructFieldRef("count");
- sumField = soi.getStructFieldRef("sum");
- varianceField = soi.getStructFieldRef("variance");
-
- countFieldOI = (LongObjectInspector) countField
- .getFieldObjectInspector();
- sumFieldOI = (DoubleObjectInspector) sumField
- .getFieldObjectInspector();
- }
-
- // init output
- if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
- // The output of a partial aggregation is a struct containing
- // a long count and doubles sum and variance.
-
- ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
-
- foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
-
- ArrayList<String> fname = new ArrayList<String>();
- fname.add("count");
- fname.add("sum");
- fname.add("variance");
-
- partialResult = new Object[3];
- partialResult[0] = new LongWritable(0);
- partialResult[1] = new DoubleWritable(0);
- partialResult[2] = new DoubleWritable(0);
-
- return ObjectInspectorFactory.getStandardStructObjectInspector(
- fname, foi);
-
- } else {
- setResult(new DoubleWritable(0));
- return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
- }
- }
-
- static class StdAgg implements SerializableBuffer {
- long count; // number of elements
- double sum; // sum of elements
- double variance; // sum[x-avg^2] (this is actually n times the
- // variance)
-
- @Override
- public void deSerializeAggBuffer(byte[] data, int start, int len) {
- count = BufferSerDeUtil.getLong(data, start);
- start += 8;
- sum = BufferSerDeUtil.getDouble(data, start);
- start += 8;
- variance = BufferSerDeUtil.getDouble(data, start);
- }
-
- @Override
- public void serializeAggBuffer(byte[] data, int start, int len) {
- BufferSerDeUtil.writeLong(count, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(sum, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(variance, data, start);
- }
-
- @Override
- public void serializeAggBuffer(DataOutput output)
- throws IOException {
- output.writeLong(count);
- output.writeDouble(sum);
- output.writeDouble(variance);
- }
- };
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- StdAgg result = new StdAgg();
- reset(result);
- return result;
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException {
- StdAgg myagg = (StdAgg) agg;
- myagg.count = 0;
- myagg.sum = 0;
- myagg.variance = 0;
- }
-
- private boolean warned = false;
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- Object p = parameters[0];
- if (p != null) {
- StdAgg myagg = (StdAgg) agg;
- try {
- double v = PrimitiveObjectInspectorUtils.getDouble(p,
- inputOI);
- myagg.count++;
- myagg.sum += v;
- if (myagg.count > 1) {
- double t = myagg.count * v - myagg.sum;
- myagg.variance += (t * t)
- / ((double) myagg.count * (myagg.count - 1));
- }
- } catch (NumberFormatException e) {
- if (!warned) {
- warned = true;
- LOG.warn(getClass().getSimpleName() + " "
- + StringUtils.stringifyException(e));
- LOG.warn(getClass().getSimpleName()
- + " ignoring similar exceptions.");
- }
- }
- }
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException {
- StdAgg myagg = (StdAgg) agg;
- ((LongWritable) partialResult[0]).set(myagg.count);
- ((DoubleWritable) partialResult[1]).set(myagg.sum);
- ((DoubleWritable) partialResult[2]).set(myagg.variance);
- return partialResult;
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException {
- if (partial != null) {
- StdAgg myagg = (StdAgg) agg;
-
- Object partialCount = soi.getStructFieldData(partial,
- countField);
- Object partialSum = soi.getStructFieldData(partial, sumField);
- Object partialVariance = soi.getStructFieldData(partial,
- varianceField);
-
- long n = myagg.count;
- long m = countFieldOI.get(partialCount);
-
- if (n == 0) {
- // Just copy the information since there is nothing so far
- myagg.variance = sumFieldOI.get(partialVariance);
- myagg.count = countFieldOI.get(partialCount);
- myagg.sum = sumFieldOI.get(partialSum);
- }
-
- if (m != 0 && n != 0) {
- // Merge the two partials
-
- double a = myagg.sum;
- double b = sumFieldOI.get(partialSum);
-
- myagg.count += m;
- myagg.sum += b;
- double t = (m / (double) n) * a - b;
- myagg.variance += sumFieldOI.get(partialVariance)
- + ((n / (double) m) / ((double) n + m)) * t * t;
- }
- }
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException {
- StdAgg myagg = (StdAgg) agg;
-
- if (myagg.count == 0) { // SQL standard - return null for zero
- // elements
- return null;
- } else {
- if (myagg.count > 1) {
- getResult().set(myagg.variance / (myagg.count));
- } else { // for one element the variance is always 0
- getResult().set(0);
- }
- return getResult();
- }
- }
-
- public void setResult(DoubleWritable result) {
- this.result = result;
- }
-
- public DoubleWritable getResult() {
- return result;
- }
- }
-
-}
diff --git a/hivesterix/src/main/scripts/run.cmd b/hivesterix/src/main/scripts/run.cmd
deleted file mode 100755
index b8eb4a0..0000000
--- a/hivesterix/src/main/scripts/run.cmd
+++ /dev/null
@@ -1,63 +0,0 @@
-@ECHO OFF
-SETLOCAL
-
-:: Licensed to the Apache Software Foundation (ASF) under one or more
-:: contributor license agreements. See the NOTICE file distributed with
-:: this work for additional information regarding copyright ownership.
-:: The ASF licenses this file to You under the Apache License, Version 2.0
-:: (the "License"); you may not use this file except in compliance with
-:: the License. You may obtain a copy of the License at
-::
-:: http://www.apache.org/licenses/LICENSE-2.0
-::
-:: Unless required by applicable law or agreed to in writing, software
-:: distributed under the License is distributed on an "AS IS" BASIS,
-:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-:: See the License for the specific language governing permissions and
-:: limitations under the License.
-
-:: JAVA classpath
-:: Use the local variable CLASSPATH to add custom entries (e.g. JDBC drivers) to
-:: the classpath. Separate multiple paths with ":". Enclose the value
-:: in double quotes. Adding additional files or locations on separate
-:: lines makes things clearer.
-:: Note: If under running under cygwin use "/cygdrive/c/..." for "C:/..."
-:: Example:
-::
-:: Set the CLASSPATH to a jar file and a directory. Note that
-:: "classes dir" is a directory of class files with a space in the name.
-::
-:: CLASSPATH="usr/local/Product1/lib/product.jar"
-:: CLASSPATH="${CLASSPATH}:../MyProject/classes dir"
-::
-SET CLASSPATH="@classpath@"
-
-:: JVM parameters
-:: If you want to modify the default parameters (e.g. maximum heap size -Xmx)
-:: for the Java virtual machine set the local variable JVM_PARAMETERS below
-:: Example:
-:: JVM_PARAMETERS=-Xms100M -Xmx200M
-::
-:: Below are the JVM parameters needed to do remote debugging using Intellij
-:: IDEA. Uncomment and then do: JVM_PARAMETERS="$IDEA_REMOTE_DEBUG_PARAMS"
-:: IDEA_REMOTE_DEBUG_PARAMS="-Xdebug -Xnoagent -Djava.compiler=NONE -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005"
-::
-:: JVM_PARAMETERS=
-
-:: ---------------------------------------------------------------------------
-:: Default configuration. Do not modify below this line.
-:: ---------------------------------------------------------------------------
-:: Application specific parameters
-
-SET MAIN_CLASS=@main.class@
-SET JVM_PARAMS=@jvm.params@
-SET PROGRAM_PARAMS=@program.params@
-
-:: Try to find java virtual machine
-IF NOT DEFINED JAVA (
- IF NOT DEFINED JAVA_HOME SET JAVA="java.exe"
- IF DEFINED JAVA_HOME SET JAVA="%JAVA_HOME%\bin\java.exe"
-)
-
-:: Run program
-%JAVA% %JVM_PARAMS% %JVM_PARAMETERS% -classpath %CLASSPATH% %MAIN_CLASS% %PROGRAM_PARAMS% %*
diff --git a/hivesterix/src/main/scripts/run.sh b/hivesterix/src/main/scripts/run.sh
deleted file mode 100755
index a998626..0000000
--- a/hivesterix/src/main/scripts/run.sh
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/bin/sh
-# JAVA classpath
-# Use the local variable CLASSPATH to add custom entries (e.g. JDBC drivers) to
-# the classpath. Separate multiple paths with ":". Enclose the value
-# in double quotes. Adding additional files or locations on separate
-# lines makes things clearer.
-# Note: If under running under cygwin use "/cygdrive/c/..." for "C:/..."
-# Example:
-#
-# Set the CLASSPATH to a jar file and a directory. Note that
-# "classes dir" is a directory of class files with a space in the name.
-#
-# CLASSPATH="usr/local/Product1/lib/product.jar"
-# CLASSPATH="${CLASSPATH}:../MyProject/classes dir"
-#
-CLASSPATH="@classpath@"
-
-# JVM parameters
-# If you want to modify the default parameters (e.g. maximum heap size -Xmx)
-# for the Java virtual machine set the local variable JVM_PARAMETERS below
-# Example:
-# JVM_PARAMETERS=-Xms100M -Xmx200M
-#
-# Below are the JVM parameters needed to do remote debugging using Intellij
-# IDEA. Uncomment and then do: JVM_PARAMETERS="$IDEA_REMOTE_DEBUG_PARAMS"
-# IDEA_REMOTE_DEBUG_PARAMS="-Xdebug -Xnoagent -Djava.compiler=NONE -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005"
-#
-# JVM_PARAMETERS=
-
-#run with shared memory setup
-#if [ -n "${RUN_SHARED_MEM}"]; then
-# JVM_PARAMETERS="${JVM_PARAMETERS} -Xdebug -Xnoagent -Djava.compiler=NONE -Xrunjdwp:transport=dt_shmem,server=n,address=javadebug,suspend=y"
-#fi
-
-# ---------------------------------------------------------------------------
-# Default configuration. Do not modify below this line.
-# ---------------------------------------------------------------------------
-# Application specific parameters
-
-MAIN_CLASS="@main.class@"
-JVM_PARAMS="@jvm.params@"
-PROGRAM_PARAMS="@program.params@"
-
-# Cygwin support. $cygwin _must_ be set to either true or false.
-case "`uname`" in
- CYGWIN*) cygwin=true ;;
- *) cygwin=false ;;
-esac
-
-# For Cygwin, ensure paths are in UNIX format before anything is touched
-if $cygwin; then
- [ -n "$JAVA_HOME" ] &&
- JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
- [ -n "$CLASSPATH" ] &&
- CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
-fi
-
-# Try to find java virtual machine
-if [ -z "${JAVA}" ]; then
- if [ -z "${JAVA_HOME}" ]; then
- JAVA=java
- else
- JAVA=${JAVA_HOME}/bin/java
- fi
-fi
-
-# Try to find directory where this script is located
-COMMAND="${PWD}/$0"
-if [ ! -f "${COMMAND}" ]; then
- COMMAND="$0"
-fi
-BASEDIR=`expr "${COMMAND}" : '\(.*\)/\.*'`
-
-# For Cygwin, switch paths to Windows format before running java
-if $cygwin; then
-# JAVA=`cygpath --path --windows "$JAVA"`
- CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
-fi
-
-# Run program
-${JAVA} ${JVM_PARAMS} ${JVM_PARAMETERS} -classpath "${CLASSPATH}" ${MAIN_CLASS} ${PROGRAM_PARAMS} $*
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestCase.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestCase.java
deleted file mode 100644
index a69a3f2..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestCase.java
+++ /dev/null
@@ -1,144 +0,0 @@
-package edu.uci.ics.hivesterix.perf;
-
-import java.io.File;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.Driver;
-import org.junit.Test;
-
-import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestCase;
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-
-public class PerfTestCase extends AbstractPerfTestCase {
- private File resultFile;
- private FileSystem dfs;
-
- PerfTestCase(File queryFile, File resultFile) {
- super("testRuntimeFunction", queryFile);
- this.queryFile = queryFile;
- this.resultFile = resultFile;
- }
-
- @Test
- public void testRuntimeFunction() throws Exception {
- StringBuilder queryString = new StringBuilder();
- readFileToString(queryFile, queryString);
- String[] queries = queryString.toString().split(";");
- StringWriter sw = new StringWriter();
-
- HiveConf hconf = ConfUtil.getHiveConf();
- Driver driver = new Driver(hconf, new PrintWriter(sw));
- driver.init();
-
- dfs = FileSystem.get(ConfUtil.getJobConf());
-
- int i = 0;
- for (String query : queries) {
- if (i == queries.length - 1)
- break;
- driver.run(query);
- driver.clear();
- i++;
- }
-
- String warehouse = hconf.get("hive.metastore.warehouse.dir");
- String tableName = removeExt(resultFile.getName());
- String directory = warehouse + "/" + tableName + "/";
- String localDirectory = "tmp";
-
- FileStatus[] files = dfs.listStatus(new Path(directory));
- FileSystem lfs = null;
- if (files == null) {
- lfs = FileSystem.getLocal(ConfUtil.getJobConf());
- files = lfs.listStatus(new Path(directory));
- }
-
- File resultDirectory = new File(localDirectory + "/" + tableName);
- deleteDir(resultDirectory);
- resultDirectory.mkdir();
-
- for (FileStatus fs : files) {
- Path src = fs.getPath();
- if (src.getName().indexOf("crc") >= 0)
- continue;
-
- String destStr = localDirectory + "/" + tableName + "/"
- + src.getName();
- Path dest = new Path(destStr);
- if (lfs != null) {
- lfs.copyToLocalFile(src, dest);
- dfs.copyFromLocalFile(dest, new Path(directory));
- } else
- dfs.copyToLocalFile(src, dest);
- }
-
- File[] rFiles = resultDirectory.listFiles();
- StringBuilder sb = new StringBuilder();
- for (File r : rFiles) {
- if (r.getName().indexOf("crc") >= 0)
- continue;
- readFileToString(r, sb);
- }
- deleteDir(resultDirectory);
-
- StringBuilder buf = new StringBuilder();
- readFileToString(resultFile, buf);
- if (!equal(buf, sb)) {
- throw new Exception("Result for " + queryFile + " changed:\n"
- + sw.toString());
- }
- }
-
- private void deleteDir(File resultDirectory) {
- if (resultDirectory.exists()) {
- File[] rFiles = resultDirectory.listFiles();
- for (File r : rFiles)
- r.delete();
- resultDirectory.delete();
- }
- }
-
- private boolean equal(StringBuilder sb1, StringBuilder sb2) {
- String s1 = sb1.toString();
- String s2 = sb2.toString();
- String[] rowsOne = s1.split("\n");
- String[] rowsTwo = s2.split("\n");
-
- if (rowsOne.length != rowsTwo.length)
- return false;
-
- for (int i = 0; i < rowsOne.length; i++) {
- String row1 = rowsOne[i];
- String row2 = rowsTwo[i];
-
- if (row1.equals(row2))
- continue;
-
- String[] fields1 = row1.split("");
- String[] fields2 = row2.split("");
-
- for (int j = 0; j < fields1.length; j++) {
- if (fields1[j].equals(fields2[j])) {
- continue;
- } else if (fields1[j].indexOf('.') < 0) {
- return false;
- } else {
- Float float1 = Float.parseFloat(fields1[j]);
- Float float2 = Float.parseFloat(fields2[j]);
-
- if (Math.abs(float1 - float2) == 0)
- continue;
- else
- return false;
- }
- }
- }
-
- return true;
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuite.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuite.java
deleted file mode 100644
index 8d8178f..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuite.java
+++ /dev/null
@@ -1,74 +0,0 @@
-package edu.uci.ics.hivesterix.perf;
-
-import java.io.File;
-import java.util.List;
-
-import junit.framework.Test;
-import junit.framework.TestResult;
-import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestSuiteClass;
-
-public class PerfTestSuite extends AbstractPerfTestSuiteClass {
-
- private static final String PATH_TO_QUERIES = "src/test/resources/perf/queries/";
- private static final String PATH_TO_RESULTS = "src/test/resources/perf/results/";
- private static final String PATH_TO_IGNORES = "src/test/resources/perf/ignore.txt";
-
- private static final String FILE_EXTENSION_OF_RESULTS = "result";
-
- public static Test suite() throws Exception {
- List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
- File testData = new File(PATH_TO_QUERIES);
- File[] queries = testData.listFiles();
- PerfTestSuite testSuite = new PerfTestSuite();
-
- // set hdfs and hyracks cluster, and load test data to hdfs
- try {
- testSuite.setup();
- testSuite.loadData();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
-
- for (File qFile : queries) {
- if (isIgnored(qFile.getName(), ignores))
- continue;
-
- if (qFile.isFile()) {
- String resultFileName = hiveExtToResExt(qFile.getName());
- File rFile = new File(PATH_TO_RESULTS + resultFileName);
- testSuite.addTest(new PerfTestCase(qFile, rFile));
- }
- }
- return testSuite;
- }
-
- private static String hiveExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
- }
-
- /**
- * Runs the tests and collects their result in a TestResult.
- */
- @Override
- public void run(TestResult result) {
-
- int testCount = countTestCases();
- for (int i = 0; i < testCount; i++) {
- Test each = this.testAt(i);
- if (result.shouldStop())
- break;
- runTest(each, result);
- }
-
- // cleanup hdfs and hyracks cluster
- try {
- cleanup();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteCaseGenerator.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteCaseGenerator.java
deleted file mode 100644
index 258db22..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteCaseGenerator.java
+++ /dev/null
@@ -1,101 +0,0 @@
-package edu.uci.ics.hivesterix.perf;
-
-import java.io.File;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.Driver;
-import org.junit.Test;
-
-import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestCase;
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-
-public class PerfTestSuiteCaseGenerator extends AbstractPerfTestCase {
- private File resultFile;
- private FileSystem dfs;
-
- PerfTestSuiteCaseGenerator(File queryFile, File resultFile) {
- super("testRuntimeFunction", queryFile);
- this.queryFile = queryFile;
- this.resultFile = resultFile;
- }
-
- @Test
- public void testRuntimeFunction() throws Exception {
- StringBuilder queryString = new StringBuilder();
- readFileToString(queryFile, queryString);
- String[] queries = queryString.toString().split(";");
-
- HiveConf hconf = ConfUtil.getHiveConf();
- Driver driver = new Driver(hconf);
- driver.init();
-
- dfs = FileSystem.get(ConfUtil.getJobConf());
-
- long startTime = System.currentTimeMillis();
- int i = 0;
- for (String query : queries) {
- if (i == queries.length - 1)
- break;
- driver.run(query);
- // driver.clear();
- i++;
- }
- long endTime = System.currentTimeMillis();
- System.out.println(resultFile.getName() + " execution time "
- + (endTime - startTime));
-
- String warehouse = hconf.get("hive.metastore.warehouse.dir");
- String tableName = removeExt(resultFile.getName());
- String directory = warehouse + "/" + tableName + "/";
- String localDirectory = "tmp";
-
- FileStatus[] files = dfs.listStatus(new Path(directory));
- FileSystem lfs = null;
- if (files == null) {
- lfs = FileSystem.getLocal(ConfUtil.getJobConf());
- files = lfs.listStatus(new Path(directory));
- }
-
- File resultDirectory = new File(localDirectory + "/" + tableName);
- deleteDir(resultDirectory);
- resultDirectory.mkdir();
-
- for (FileStatus fs : files) {
- Path src = fs.getPath();
- if (src.getName().indexOf("crc") >= 0)
- continue;
-
- String destStr = localDirectory + "/" + tableName + "/"
- + src.getName();
- Path dest = new Path(destStr);
- if (lfs != null) {
- lfs.copyToLocalFile(src, dest);
- dfs.copyFromLocalFile(dest, new Path(directory));
- } else
- dfs.copyToLocalFile(src, dest);
- }
-
- File[] rFiles = resultDirectory.listFiles();
- StringBuilder sb = new StringBuilder();
- for (File r : rFiles) {
- if (r.getName().indexOf("crc") >= 0)
- continue;
- readFileToString(r, sb);
- }
- deleteDir(resultDirectory);
-
- writeStringToFile(resultFile, sb);
- }
-
- private void deleteDir(File resultDirectory) {
- if (resultDirectory.exists()) {
- File[] rFiles = resultDirectory.listFiles();
- for (File r : rFiles)
- r.delete();
- resultDirectory.delete();
- }
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteGenerator.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteGenerator.java
deleted file mode 100644
index 0a27ca2..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteGenerator.java
+++ /dev/null
@@ -1,74 +0,0 @@
-package edu.uci.ics.hivesterix.perf;
-
-import java.io.File;
-import java.util.List;
-
-import junit.framework.Test;
-import junit.framework.TestResult;
-import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestSuiteClass;
-
-public class PerfTestSuiteGenerator extends AbstractPerfTestSuiteClass {
-
- private static final String PATH_TO_QUERIES = "src/test/resources/perf/queries/";
- private static final String PATH_TO_RESULTS = "src/test/resources/perf/results/";
- private static final String PATH_TO_IGNORES = "src/test/resources/perf/ignore.txt";
-
- private static final String FILE_EXTENSION_OF_RESULTS = "result";
-
- public static Test suite() throws Exception {
- List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
- File testData = new File(PATH_TO_QUERIES);
- File[] queries = testData.listFiles();
- PerfTestSuiteGenerator testSuite = new PerfTestSuiteGenerator();
-
- // set hdfs and hyracks cluster, and load test data to hdfs
- try {
- testSuite.setup();
- testSuite.loadData();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
-
- for (File qFile : queries) {
- if (isIgnored(qFile.getName(), ignores))
- continue;
-
- if (qFile.isFile() && qFile.getName().startsWith("q18_")) {
- String resultFileName = hiveExtToResExt(qFile.getName());
- File rFile = new File(PATH_TO_RESULTS + resultFileName);
- testSuite.addTest(new PerfTestSuiteCaseGenerator(qFile, rFile));
- }
- }
- return testSuite;
- }
-
- private static String hiveExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
- }
-
- /**
- * Runs the tests and collects their result in a TestResult.
- */
- @Override
- public void run(TestResult result) {
-
- int testCount = countTestCases();
- for (int i = 0; i < testCount; i++) {
- Test each = this.testAt(i);
- if (result.shouldStop())
- break;
- runTest(each, result);
- }
-
- // cleanup hdfs and hyracks cluster
- try {
- cleanup();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestCase.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestCase.java
deleted file mode 100644
index f55d6a1..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestCase.java
+++ /dev/null
@@ -1,52 +0,0 @@
-package edu.uci.ics.hivesterix.perf.base;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import junit.framework.TestCase;
-
-public class AbstractPerfTestCase extends TestCase {
- protected File queryFile;
-
- public AbstractPerfTestCase(String testName, File queryFile) {
- super(testName);
- }
-
- protected static void readFileToString(File file, StringBuilder buf)
- throws Exception {
- BufferedReader result = new BufferedReader(new FileReader(file));
- while (true) {
- String s = result.readLine();
- if (s == null) {
- break;
- } else {
- buf.append(s);
- buf.append('\n');
- }
- }
- result.close();
- }
-
- protected static void writeStringToFile(File file, StringWriter buf)
- throws Exception {
- PrintWriter result = new PrintWriter(new FileWriter(file));
- result.print(buf);
- result.close();
- }
-
- protected static void writeStringToFile(File file, StringBuilder buf)
- throws Exception {
- PrintWriter result = new PrintWriter(new FileWriter(file));
- result.print(buf);
- result.close();
- }
-
- protected static String removeExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot);
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestSuiteClass.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestSuiteClass.java
deleted file mode 100644
index d90756e..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestSuiteClass.java
+++ /dev/null
@@ -1,206 +0,0 @@
-package edu.uci.ics.hivesterix.perf.base;
-
-import java.io.BufferedReader;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import junit.framework.TestSuite;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MiniMRCluster;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hyracks.api.client.HyracksConnection;
-import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
-import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
-import edu.uci.ics.hyracks.control.common.controllers.CCConfig;
-import edu.uci.ics.hyracks.control.common.controllers.NCConfig;
-import edu.uci.ics.hyracks.control.nc.NodeControllerService;
-
-@SuppressWarnings("deprecation")
-public abstract class AbstractPerfTestSuiteClass extends TestSuite {
-
- private static final String PATH_TO_HADOOP_CONF = "src/test/resources/perf/hadoop/conf";
- private static final String PATH_TO_HIVE_CONF = "src/test/resources/perf/hive/conf/hive-default.xml";
- private static final String PATH_TO_DATA = "src/test/resources/perf/data/";
-
- private MiniDFSCluster dfsCluster;
- private MiniMRCluster mrCluster;
-
- private JobConf conf = new JobConf();
- protected FileSystem dfs;
-
- private int numberOfNC = 2;
- private ClusterControllerService cc;
- private Map<String, NodeControllerService> ncs = new HashMap<String, NodeControllerService>();
-
- /**
- * setup cluster
- *
- * @throws IOException
- */
- protected void setup() throws Exception {
- setupHdfs();
- setupHyracks();
- }
-
- private void setupHdfs() throws IOException {
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
- HiveConf hconf = new HiveConf(SessionState.class);
- hconf.addResource(new Path(PATH_TO_HIVE_CONF));
-
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.delete(new Path("build"), true);
- lfs.delete(new Path("metastore_db"), true);
-
- System.setProperty("hadoop.log.dir", "logs");
- dfsCluster = new MiniDFSCluster(hconf, numberOfNC, true, null);
- dfs = dfsCluster.getFileSystem();
-
- mrCluster = new MiniMRCluster(2, dfs.getUri().toString(), 1);
- hconf.setVar(HiveConf.ConfVars.HADOOPJT,
- "localhost:" + mrCluster.getJobTrackerPort());
- hconf.setInt("mapred.min.split.size", 1342177280);
-
- conf = new JobConf(hconf);
- ConfUtil.setJobConf(conf);
-
- String fsName = conf.get("fs.default.name");
- hconf.set("hive.metastore.warehouse.dir",
- fsName.concat("/tmp/hivesterix"));
- String warehouse = hconf.get("hive.metastore.warehouse.dir");
- dfs.mkdirs(new Path(warehouse));
- ConfUtil.setHiveConf(hconf);
- }
-
- private void setupHyracks() throws Exception {
- // read hive conf
- HiveConf hconf = new HiveConf(SessionState.class);
- hconf.addResource(new Path(PATH_TO_HIVE_CONF));
- SessionState.start(hconf);
- String ipAddress = hconf.get("hive.hyracks.host");
- int clientPort = Integer.parseInt(hconf.get("hive.hyracks.port"));
- int clusterPort = clientPort;
- // start hyracks cc
- CCConfig ccConfig = new CCConfig();
- ccConfig.clientNetIpAddress = ipAddress;
- ccConfig.clientNetPort = clientPort;
- ccConfig.clusterNetPort = clusterPort;
- ccConfig.profileDumpPeriod = 1000;
- ccConfig.heartbeatPeriod = 200000000;
- ccConfig.maxHeartbeatLapsePeriods = 200000000;
- cc = new ClusterControllerService(ccConfig);
- cc.start();
-
- // start hyracks nc
- for (int i = 0; i < numberOfNC; i++) {
- NCConfig ncConfig = new NCConfig();
- ncConfig.ccHost = ipAddress;
- ncConfig.clusterNetIPAddress = ipAddress;
- ncConfig.ccPort = clientPort;
- ncConfig.dataIPAddress = "127.0.0.1";
- ncConfig.nodeId = "nc" + i;
- NodeControllerService nc = new NodeControllerService(ncConfig);
- nc.start();
- ncs.put(ncConfig.nodeId, nc);
- }
-
- IHyracksClientConnection hcc = new HyracksConnection(ccConfig.clientNetIpAddress, clientPort);
- }
-
- protected void makeDir(String path) throws IOException {
- dfs.mkdirs(new Path(path));
- }
-
- protected void loadFiles(String src, String dest) throws IOException {
- dfs.copyFromLocalFile(new Path(src), new Path(dest));
- }
-
- protected void cleanup() throws Exception {
- cleanupHdfs();
- cleanupHyracks();
- }
-
- /**
- * cleanup hdfs cluster
- */
- private void cleanupHdfs() throws IOException {
- dfs.delete(new Path("/"), true);
- FileSystem.closeAll();
- dfsCluster.shutdown();
- }
-
- /**
- * cleanup hyracks cluster
- */
- private void cleanupHyracks() throws Exception {
- Iterator<NodeControllerService> iterator = ncs.values().iterator();
- while (iterator.hasNext()) {
- NodeControllerService nc = iterator.next();
- nc.stop();
- }
- cc.stop();
- }
-
- protected static List<String> getIgnoreList(String ignorePath)
- throws FileNotFoundException, IOException {
- BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
- String s = null;
- List<String> ignores = new ArrayList<String>();
- while ((s = reader.readLine()) != null) {
- ignores.add(s);
- }
- reader.close();
- return ignores;
- }
-
- protected static boolean isIgnored(String q, List<String> ignoreList) {
- for (String ignore : ignoreList) {
- if (ignore.equals(q)) {
- return true;
- }
- }
- return false;
- }
-
- protected void loadData() throws IOException {
-
- makeDir("/tpch");
- makeDir("/tpch/customer");
- makeDir("/tpch/lineitem");
- makeDir("/tpch/orders");
- makeDir("/tpch/part");
- makeDir("/tpch/partsupp");
- makeDir("/tpch/supplier");
- makeDir("/tpch/nation");
- makeDir("/tpch/region");
-
- makeDir("/jarod");
-
- loadFiles(PATH_TO_DATA + "customer.tbl", "/tpch/customer/");
- loadFiles(PATH_TO_DATA + "lineitem.tbl", "/tpch/lineitem/");
- loadFiles(PATH_TO_DATA + "orders.tbl", "/tpch/orders/");
- loadFiles(PATH_TO_DATA + "part.tbl", "/tpch/part/");
- loadFiles(PATH_TO_DATA + "partsupp.tbl", "/tpch/partsupp/");
- loadFiles(PATH_TO_DATA + "supplier.tbl", "/tpch/supplier/");
- loadFiles(PATH_TO_DATA + "nation.tbl", "/tpch/nation/");
- loadFiles(PATH_TO_DATA + "region.tbl", "/tpch/region/");
-
- loadFiles(PATH_TO_DATA + "ext-gby.tbl", "/jarod/");
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractHivesterixTestCase.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractHivesterixTestCase.java
deleted file mode 100644
index 560cef7..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractHivesterixTestCase.java
+++ /dev/null
@@ -1,52 +0,0 @@
-package edu.uci.ics.hivesterix.test.base;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import junit.framework.TestCase;
-
-public class AbstractHivesterixTestCase extends TestCase {
- protected File queryFile;
-
- public AbstractHivesterixTestCase(String testName, File queryFile) {
- super(testName);
- }
-
- protected static void readFileToString(File file, StringBuilder buf)
- throws Exception {
- BufferedReader result = new BufferedReader(new FileReader(file));
- while (true) {
- String s = result.readLine();
- if (s == null) {
- break;
- } else {
- buf.append(s);
- buf.append('\n');
- }
- }
- result.close();
- }
-
- protected static void writeStringToFile(File file, StringWriter buf)
- throws Exception {
- PrintWriter result = new PrintWriter(new FileWriter(file));
- result.print(buf);
- result.close();
- }
-
- protected static void writeStringToFile(File file, StringBuilder buf)
- throws Exception {
- PrintWriter result = new PrintWriter(new FileWriter(file));
- result.print(buf);
- result.close();
- }
-
- protected static String removeExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot);
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractTestSuiteClass.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractTestSuiteClass.java
deleted file mode 100644
index c03e0a3..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractTestSuiteClass.java
+++ /dev/null
@@ -1,214 +0,0 @@
-package edu.uci.ics.hivesterix.test.base;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import junit.framework.TestSuite;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MiniMRCluster;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hyracks.api.client.HyracksConnection;
-import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
-import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
-import edu.uci.ics.hyracks.control.common.controllers.CCConfig;
-import edu.uci.ics.hyracks.control.common.controllers.NCConfig;
-import edu.uci.ics.hyracks.control.nc.NodeControllerService;
-
-@SuppressWarnings("deprecation")
-public abstract class AbstractTestSuiteClass extends TestSuite {
-
- private static final String PATH_TO_HADOOP_CONF = "src/test/resources/runtimefunctionts/hadoop/conf";
- private static final String PATH_TO_HIVE_CONF = "src/test/resources/runtimefunctionts/hive/conf/hive-default.xml";
-
- private static final String PATH_TO_CLUSTER_CONF = "src/test/resources/runtimefunctionts/hive/conf/topology.xml";
- private static final String PATH_TO_DATA = "src/test/resources/runtimefunctionts/data/";
-
- private MiniDFSCluster dfsCluster;
- private MiniMRCluster mrCluster;
-
- private JobConf conf = new JobConf();
- protected FileSystem dfs;
-
- private int numberOfNC = 2;
- private ClusterControllerService cc;
- private Map<String, NodeControllerService> ncs = new HashMap<String, NodeControllerService>();
-
- /**
- * setup cluster
- *
- * @throws IOException
- */
- protected void setup() throws Exception {
- setupHdfs();
- setupHyracks();
- }
-
- private void setupHdfs() throws IOException {
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
- HiveConf hconf = new HiveConf(SessionState.class);
- hconf.addResource(new Path(PATH_TO_HIVE_CONF));
-
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.delete(new Path("build"), true);
- lfs.delete(new Path("metastore_db"), true);
-
- System.setProperty("hadoop.log.dir", "logs");
- dfsCluster = new MiniDFSCluster(hconf, numberOfNC, true, null);
- dfs = dfsCluster.getFileSystem();
-
- mrCluster = new MiniMRCluster(2, dfs.getUri().toString(), 1);
- hconf.setVar(HiveConf.ConfVars.HADOOPJT,
- "localhost:" + mrCluster.getJobTrackerPort());
-
- conf = new JobConf(hconf);
- ConfUtil.setJobConf(conf);
-
- String fsName = conf.get("fs.default.name");
- hconf.set("hive.metastore.warehouse.dir",
- fsName.concat("/tmp/hivesterix"));
- String warehouse = hconf.get("hive.metastore.warehouse.dir");
- dfs.mkdirs(new Path(warehouse));
- ConfUtil.setHiveConf(hconf);
- }
-
- private void setupHyracks() throws Exception {
- // read hive conf
- HiveConf hconf = new HiveConf(SessionState.class);
- hconf.addResource(new Path(PATH_TO_HIVE_CONF));
- SessionState.start(hconf);
- String ipAddress = hconf.get("hive.hyracks.host");
- int clientPort = Integer.parseInt(hconf.get("hive.hyracks.port"));
- int netPort = clientPort + 1;
-
- // start hyracks cc
- CCConfig ccConfig = new CCConfig();
- ccConfig.clientNetIpAddress = ipAddress;
- ccConfig.clientNetPort = clientPort;
- ccConfig.clusterNetPort = netPort;
- ccConfig.profileDumpPeriod = 1000;
- ccConfig.heartbeatPeriod = 200000000;
- ccConfig.maxHeartbeatLapsePeriods = 200000000;
- ccConfig.clusterTopologyDefinition = new File(PATH_TO_CLUSTER_CONF);
- cc = new ClusterControllerService(ccConfig);
- cc.start();
-
- // start hyracks nc
- for (int i = 0; i < numberOfNC; i++) {
- NCConfig ncConfig = new NCConfig();
- ncConfig.ccHost = ipAddress;
- ncConfig.clusterNetIPAddress = ipAddress;
- ncConfig.ccPort = netPort;
- ncConfig.dataIPAddress = "127.0.0.1";
- ncConfig.nodeId = "nc" + i;
- NodeControllerService nc = new NodeControllerService(ncConfig);
- nc.start();
- ncs.put(ncConfig.nodeId, nc);
- }
-
- IHyracksClientConnection hcc = new HyracksConnection(ccConfig.clientNetIpAddress, clientPort);
- }
-
- protected void makeDir(String path) throws IOException {
- dfs.mkdirs(new Path(path));
- }
-
- protected void loadFiles(String src, String dest) throws IOException {
- dfs.copyFromLocalFile(new Path(src), new Path(dest));
- }
-
- protected void cleanup() throws Exception {
- cleanupHdfs();
- cleanupHyracks();
- }
-
- /**
- * cleanup hdfs cluster
- */
- private void cleanupHdfs() throws IOException {
- dfs.delete(new Path("/"), true);
- FileSystem.closeAll();
- dfsCluster.shutdown();
- }
-
- /**
- * cleanup hyracks cluster
- */
- private void cleanupHyracks() throws Exception {
- Iterator<NodeControllerService> iterator = ncs.values().iterator();
- while (iterator.hasNext()) {
- NodeControllerService nc = iterator.next();
- nc.stop();
- }
- cc.stop();
- }
-
- protected static List<String> getIgnoreList(String ignorePath)
- throws FileNotFoundException, IOException {
- BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
- String s = null;
- List<String> ignores = new ArrayList<String>();
- while ((s = reader.readLine()) != null) {
- ignores.add(s);
- }
- reader.close();
- return ignores;
- }
-
- protected static boolean isIgnored(String q, List<String> ignoreList) {
- for (String ignore : ignoreList) {
- if (q.indexOf(ignore) >= 0) {
- return true;
- }
- }
- return false;
- }
-
- protected void loadData() throws IOException {
-
- makeDir("/tpch");
- makeDir("/tpch/customer");
- makeDir("/tpch/lineitem");
- makeDir("/tpch/orders");
- makeDir("/tpch/part");
- makeDir("/tpch/partsupp");
- makeDir("/tpch/supplier");
- makeDir("/tpch/nation");
- makeDir("/tpch/region");
-
- makeDir("/test");
- makeDir("/test/joinsrc1");
- makeDir("/test/joinsrc2");
-
- loadFiles(PATH_TO_DATA + "customer.tbl", "/tpch/customer/");
- loadFiles(PATH_TO_DATA + "lineitem.tbl", "/tpch/lineitem/");
- loadFiles(PATH_TO_DATA + "orders.tbl", "/tpch/orders/");
- loadFiles(PATH_TO_DATA + "part.tbl", "/tpch/part/");
- loadFiles(PATH_TO_DATA + "partsupp.tbl", "/tpch/partsupp/");
- loadFiles(PATH_TO_DATA + "supplier.tbl", "/tpch/supplier/");
- loadFiles(PATH_TO_DATA + "nation.tbl", "/tpch/nation/");
- loadFiles(PATH_TO_DATA + "region.tbl", "/tpch/region/");
-
- loadFiles(PATH_TO_DATA + "large_card_join_src.tbl", "/test/joinsrc1/");
- loadFiles(PATH_TO_DATA + "large_card_join_src_small.tbl",
- "/test/joinsrc2/");
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/datagen/RecordBalance.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/datagen/RecordBalance.java
deleted file mode 100644
index 800d6be..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/datagen/RecordBalance.java
+++ /dev/null
@@ -1,82 +0,0 @@
-package edu.uci.ics.hivesterix.test.datagen;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.TextInputFormat;
-
-@SuppressWarnings("deprecation")
-public class RecordBalance {
-
- private static String confPath = System.getenv("HADDOP_HOME");
- private static Path[] inputPaths = { new Path("/tpch/100x/customer"),
- new Path("/tpch/100x/nation"), new Path("/tpch/100x/region"),
- new Path("/tpch/100x/lineitem"), new Path("/tpch/100x/orders"),
- new Path("/tpch/100x/part"), new Path("/tpch/100x/partsupp"),
- new Path("/tpch/100x/supplier") };
-
- private static Path[] outputPaths = { new Path("/tpch/100/customer"),
- new Path("/tpch/100/nation"), new Path("/tpch/100/region"),
- new Path("/tpch/100/lineitem"), new Path("/tpch/100/orders"),
- new Path("/tpch/100/part"), new Path("/tpch/100/partsupp"),
- new Path("/tpch/100/supplier") };
-
- public static class MapRecordOnly extends MapReduceBase implements
- Mapper<LongWritable, Text, LongWritable, Text> {
-
- public void map(LongWritable id, Text inputValue,
- OutputCollector<LongWritable, Text> output, Reporter reporter)
- throws IOException {
- output.collect(id, inputValue);
- }
- }
-
- public static class ReduceRecordOnly extends MapReduceBase implements
- Reducer<LongWritable, Text, NullWritable, Text> {
-
- NullWritable key = NullWritable.get();
-
- public void reduce(LongWritable inputKey, Iterator<Text> inputValue,
- OutputCollector<NullWritable, Text> output, Reporter reporter)
- throws IOException {
- while (inputValue.hasNext())
- output.collect(key, inputValue.next());
- }
- }
-
- public static void main(String[] args) throws IOException {
-
- for (int i = 0; i < inputPaths.length; i++) {
- JobConf job = new JobConf(RecordBalance.class);
- job.addResource(new Path(confPath + "/core-site.xml"));
- job.addResource(new Path(confPath + "/mapred-site.xml"));
- job.addResource(new Path(confPath + "/hdfs-site.xml"));
-
- job.setJobName(RecordBalance.class.getSimpleName());
- job.setMapperClass(MapRecordOnly.class);
- job.setReducerClass(ReduceRecordOnly.class);
- job.setMapOutputKeyClass(LongWritable.class);
- job.setMapOutputValueClass(Text.class);
-
- job.setInputFormat(TextInputFormat.class);
- FileInputFormat.setInputPaths(job, inputPaths[i]);
- FileOutputFormat.setOutputPath(job, outputPaths[i]);
- job.setNumReduceTasks(Integer.parseInt(args[0]));
-
- JobClient.runJob(job);
- }
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/legacy/LegacyTestCase.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/legacy/LegacyTestCase.java
deleted file mode 100644
index 9591c32..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/legacy/LegacyTestCase.java
+++ /dev/null
@@ -1,144 +0,0 @@
-package edu.uci.ics.hivesterix.test.legacy;
-
-import java.io.File;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.Driver;
-import org.junit.Test;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
-
-public class LegacyTestCase extends AbstractHivesterixTestCase {
- private File resultFile;
- private FileSystem dfs;
-
- public LegacyTestCase(File queryFile, File resultFile) {
- super("legacy", queryFile);
- this.queryFile = queryFile;
- this.resultFile = resultFile;
- }
-
- @Test
- public void testRuntimeFunction() throws Exception {
- StringBuilder queryString = new StringBuilder();
- readFileToString(queryFile, queryString);
- String[] queries = queryString.toString().split(";");
- StringWriter sw = new StringWriter();
-
- HiveConf hconf = ConfUtil.getHiveConf();
- Driver driver = new Driver(hconf, new PrintWriter(sw));
- driver.init();
-
- dfs = FileSystem.get(ConfUtil.getJobConf());
-
- int i = 0;
- for (String query : queries) {
- if (i == queries.length - 1)
- break;
- driver.run(query);
- driver.clear();
- i++;
- }
-
- String warehouse = hconf.get("hive.metastore.warehouse.dir");
- String tableName = removeExt(resultFile.getName());
- String directory = warehouse + "/" + tableName + "/";
- String localDirectory = "tmp";
-
- FileStatus[] files = dfs.listStatus(new Path(directory));
- FileSystem lfs = null;
- if (files == null) {
- lfs = FileSystem.getLocal(ConfUtil.getJobConf());
- files = lfs.listStatus(new Path(directory));
- }
-
- File resultDirectory = new File(localDirectory + "/" + tableName);
- deleteDir(resultDirectory);
- resultDirectory.mkdir();
-
- for (FileStatus fs : files) {
- Path src = fs.getPath();
- if (src.getName().indexOf("crc") >= 0)
- continue;
-
- String destStr = localDirectory + "/" + tableName + "/"
- + src.getName();
- Path dest = new Path(destStr);
- if (lfs != null) {
- lfs.copyToLocalFile(src, dest);
- dfs.copyFromLocalFile(dest, new Path(directory));
- } else
- dfs.copyToLocalFile(src, dest);
- }
-
- File[] rFiles = resultDirectory.listFiles();
- StringBuilder sb = new StringBuilder();
- for (File r : rFiles) {
- if (r.getName().indexOf("crc") >= 0)
- continue;
- readFileToString(r, sb);
- }
- deleteDir(resultDirectory);
-
- StringBuilder buf = new StringBuilder();
- readFileToString(resultFile, buf);
- if (!equal(buf, sb)) {
- throw new Exception("Result for " + queryFile + " changed:\n"
- + sw.toString());
- }
- }
-
- private void deleteDir(File resultDirectory) {
- if (resultDirectory.exists()) {
- File[] rFiles = resultDirectory.listFiles();
- for (File r : rFiles)
- r.delete();
- resultDirectory.delete();
- }
- }
-
- private boolean equal(StringBuilder sb1, StringBuilder sb2) {
- String s1 = sb1.toString();
- String s2 = sb2.toString();
- String[] rowsOne = s1.split("\n");
- String[] rowsTwo = s2.split("\n");
-
- if (rowsOne.length != rowsTwo.length)
- return false;
-
- for (int i = 0; i < rowsOne.length; i++) {
- String row1 = rowsOne[i];
- String row2 = rowsTwo[i];
-
- if (row1.equals(row2))
- continue;
-
- String[] fields1 = row1.split("");
- String[] fields2 = row2.split("");
-
- for (int j = 0; j < fields1.length; j++) {
- if (fields1[j].equals(fields2[j])) {
- continue;
- } else if (fields1[j].indexOf('.') < 0) {
- return false;
- } else {
- Float float1 = Float.parseFloat(fields1[j]);
- Float float2 = Float.parseFloat(fields2[j]);
-
- if (Math.abs(float1 - float2) == 0)
- continue;
- else
- return false;
- }
- }
- }
-
- return true;
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestCase.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestCase.java
deleted file mode 100644
index db13676..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestCase.java
+++ /dev/null
@@ -1,57 +0,0 @@
-package edu.uci.ics.hivesterix.test.optimizer;
-
-import java.io.File;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.Driver;
-import org.junit.Test;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
-
-public class OptimizerTestCase extends AbstractHivesterixTestCase {
- private File resultFile;
-
- OptimizerTestCase(File queryFile, File resultFile) {
- super("testOptimizer", queryFile);
- this.queryFile = queryFile;
- this.resultFile = resultFile;
- }
-
- @Test
- public void testOptimizer() throws Exception {
- StringBuilder queryString = new StringBuilder();
- readFileToString(queryFile, queryString);
- String[] queries = queryString.toString().split(";");
- StringWriter sw = new StringWriter();
-
- HiveConf hconf = ConfUtil.getHiveConf();
- Driver driver = new Driver(hconf, new PrintWriter(sw));
- driver.init();
-
- int i = 0;
- for (String query : queries) {
- if (i == queries.length - 1)
- break;
- if (query.toLowerCase().indexOf("create") >= 0
- || query.toLowerCase().indexOf("drop") >= 0
- || query.toLowerCase().indexOf("set") >= 0
- || query.toLowerCase().startsWith("\n\ncreate")
- || query.toLowerCase().startsWith("\n\ndrop")
- || query.toLowerCase().startsWith("\n\nset"))
- driver.run(query);
- else
- driver.compile(query);
- driver.clear();
- i++;
- }
- StringBuilder buf = new StringBuilder();
- readFileToString(resultFile, buf);
- if (!buf.toString().equals(sw.toString())) {
- throw new Exception("Result for " + queryFile + " changed:\n"
- + sw.toString());
- }
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuitGenerator.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuitGenerator.java
deleted file mode 100644
index 217f67d..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuitGenerator.java
+++ /dev/null
@@ -1,77 +0,0 @@
-package edu.uci.ics.hivesterix.test.optimizer;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.util.List;
-
-import junit.framework.Test;
-import junit.framework.TestResult;
-import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
-
-public class OptimizerTestSuitGenerator extends AbstractTestSuiteClass {
- private static final String PATH_TO_QUERIES = "src/test/resources/optimizerts/queries/";
- private static final String PATH_TO_RESULTS = "src/test/resources/optimizerts/results/";
- private static final String PATH_TO_IGNORES = "src/test/resources/optimizerts/ignore.txt";
-
- private static final String FILE_EXTENSION_OF_RESULTS = "plan";
-
- public static Test suite() throws UnsupportedEncodingException,
- FileNotFoundException, IOException {
- List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
- File testData = new File(PATH_TO_QUERIES);
- File[] queries = testData.listFiles();
- OptimizerTestSuitGenerator testSuite = new OptimizerTestSuitGenerator();
- // set hdfs and hyracks cluster, and load test data to hdfs
- try {
- testSuite.setup();
- testSuite.loadData();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
-
- for (File qFile : queries) {
- if (isIgnored(qFile.getName(), ignores))
- continue;
-
- if (qFile.isFile()) {
- String resultFileName = aqlExtToResExt(qFile.getName());
- File rFile = new File(PATH_TO_RESULTS + resultFileName);
- testSuite.addTest(new OptimizerTestSuiteCaseGenerator(qFile,
- rFile));
- }
- }
- return testSuite;
- }
-
- private static String aqlExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
- }
-
- /**
- * Runs the tests and collects their result in a TestResult.
- */
- @Override
- public void run(TestResult result) {
-
- int testCount = countTestCases();
- for (int i = 0; i < testCount; i++) {
- Test each = this.testAt(i);
- if (result.shouldStop())
- break;
- runTest(each, result);
- }
-
- // cleanup hdfs and hyracks cluster
- try {
- cleanup();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuite.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuite.java
deleted file mode 100644
index e3a4a4e..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuite.java
+++ /dev/null
@@ -1,54 +0,0 @@
-package edu.uci.ics.hivesterix.test.optimizer;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.util.List;
-
-import junit.framework.Test;
-import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
-
-public class OptimizerTestSuite extends AbstractTestSuiteClass {
-
- private static final String PATH_TO_QUERIES = "src/test/resources/optimizerts/queries/";
- private static final String PATH_TO_RESULTS = "src/test/resources/optimizerts/results/";
- private static final String PATH_TO_IGNORES = "src/test/resources/optimizerts/ignore.txt";
-
- private static final String FILE_EXTENSION_OF_RESULTS = "plan";
-
- public static Test suite() throws UnsupportedEncodingException,
- FileNotFoundException, IOException {
- List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
- File testData = new File(PATH_TO_QUERIES);
- File[] queries = testData.listFiles();
- OptimizerTestSuite testSuite = new OptimizerTestSuite();
-
- // set hdfs and hyracks cluster, and load test data to hdfs
- try {
- testSuite.setup();
- testSuite.loadData();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
-
- for (File qFile : queries) {
- if (isIgnored(qFile.getName(), ignores))
- continue;
-
- if (qFile.isFile() && qFile.getName().startsWith("h11_")) {
- String resultFileName = hiveExtToResExt(qFile.getName());
- File rFile = new File(PATH_TO_RESULTS + resultFileName);
- testSuite.addTest(new OptimizerTestCase(qFile, rFile));
- }
- }
- return testSuite;
- }
-
- private static String hiveExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuiteCaseGenerator.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuiteCaseGenerator.java
deleted file mode 100644
index a86dc29..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuiteCaseGenerator.java
+++ /dev/null
@@ -1,53 +0,0 @@
-package edu.uci.ics.hivesterix.test.optimizer;
-
-import java.io.File;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.Driver;
-import org.junit.Test;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
-
-public class OptimizerTestSuiteCaseGenerator extends AbstractHivesterixTestCase {
- private File resultFile;
-
- OptimizerTestSuiteCaseGenerator(File queryFile, File resultFile) {
- super("testOptimizer", queryFile);
- this.queryFile = queryFile;
- this.resultFile = resultFile;
- }
-
- @Test
- public void testOptimizer() throws Exception {
- StringBuilder queryString = new StringBuilder();
- readFileToString(queryFile, queryString);
- String[] queries = queryString.toString().split(";");
- StringWriter sw = new StringWriter();
-
- HiveConf hconf = ConfUtil.getHiveConf();
- Driver driver = new Driver(hconf, new PrintWriter(sw));
- driver.init();
-
- int i = 0;
- for (String query : queries) {
- if (i == queries.length - 1)
- break;
- if (query.toLowerCase().indexOf("create") >= 0
- || query.toLowerCase().indexOf("drop") >= 0
- || query.toLowerCase().indexOf("set") >= 0
- || query.toLowerCase().startsWith("\n\ncreate")
- || query.toLowerCase().startsWith("\n\ndrop")
- || query.toLowerCase().startsWith("\n\nset"))
- driver.run(query);
- else
- driver.compile(query);
- driver.clear();
- i++;
- }
- sw.close();
- writeStringToFile(resultFile, sw);
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestCase.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestCase.java
deleted file mode 100644
index 078de9a..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestCase.java
+++ /dev/null
@@ -1,152 +0,0 @@
-package edu.uci.ics.hivesterix.test.runtimefunction;
-
-import java.io.File;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.Driver;
-import org.junit.Test;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
-
-public class RuntimeFunctionTestCase extends AbstractHivesterixTestCase {
- private File resultFile;
- private FileSystem dfs;
-
- RuntimeFunctionTestCase(File queryFile, File resultFile) {
- super("testRuntimeFunction", queryFile);
- this.queryFile = queryFile;
- this.resultFile = resultFile;
- }
-
- @Test
- public void testRuntimeFunction() throws Exception {
- StringBuilder queryString = new StringBuilder();
- readFileToString(queryFile, queryString);
- String[] queries = queryString.toString().split(";");
- StringWriter sw = new StringWriter();
-
- HiveConf hconf = ConfUtil.getHiveConf();
- Driver driver = new Driver(hconf, new PrintWriter(sw));
- driver.init();
- // Driver driver = new Driver(hconf);
-
- dfs = FileSystem.get(ConfUtil.getJobConf());
-
- int i = 0;
- for (String query : queries) {
- if (i == queries.length - 1)
- break;
- driver.run(query);
- driver.clear();
- i++;
- }
-
- String warehouse = hconf.get("hive.metastore.warehouse.dir");
- String tableName = removeExt(resultFile.getName());
- String directory = warehouse + "/" + tableName + "/";
- String localDirectory = "tmp";
-
- FileStatus[] files = dfs.listStatus(new Path(directory));
- FileSystem lfs = null;
- if (files == null) {
- lfs = FileSystem.getLocal(ConfUtil.getJobConf());
- files = lfs.listStatus(new Path(directory));
- }
-
- File resultDirectory = new File(localDirectory + "/" + tableName);
- deleteDir(resultDirectory);
- resultDirectory.mkdir();
-
- for (FileStatus fs : files) {
- Path src = fs.getPath();
- if (src.getName().indexOf("crc") >= 0)
- continue;
-
- String destStr = localDirectory + "/" + tableName + "/"
- + src.getName();
- Path dest = new Path(destStr);
- if (lfs != null) {
- lfs.copyToLocalFile(src, dest);
- dfs.copyFromLocalFile(dest, new Path(directory));
- } else
- dfs.copyToLocalFile(src, dest);
- }
-
- File[] rFiles = resultDirectory.listFiles();
- StringBuilder sb = new StringBuilder();
- for (File r : rFiles) {
- if (r.getName().indexOf("crc") >= 0)
- continue;
- readFileToString(r, sb);
- }
-
- StringBuilder buf = new StringBuilder();
- readFileToString(resultFile, buf);
- StringBuffer errorMsg = new StringBuffer();
- if (!equal(buf, sb, errorMsg)) {
- throw new Exception("Result for " + queryFile + " changed:\n"
- + errorMsg.toString());
- }
- deleteDir(resultDirectory);
- }
-
- private void deleteDir(File resultDirectory) {
- if (resultDirectory.exists()) {
- File[] rFiles = resultDirectory.listFiles();
- for (File r : rFiles)
- r.delete();
- resultDirectory.delete();
- }
- }
-
- private boolean equal(StringBuilder sb1, StringBuilder sb2,
- StringBuffer errorMsg) {
- String s1 = sb1.toString();
- String s2 = sb2.toString();
- String[] rowsOne = s1.split("\n");
- String[] rowsTwo = s2.split("\n");
-
- if (rowsOne.length != rowsTwo.length)
- return false;
-
- for (int i = 0; i < rowsOne.length; i++) {
- String row1 = rowsOne[i];
- String row2 = rowsTwo[i];
-
- if (row1.equals(row2))
- continue;
-
- String[] fields1 = row1.split("");
- String[] fields2 = row2.split("");
-
- for (int j = 0; j < fields1.length; j++) {
- if (fields1[j].equals(fields2[j])) {
- continue;
- } else if (fields1[j].indexOf('.') < 0) {
- errorMsg.append("line " + i + " column " + j + ": "
- + fields2[j] + " expected " + fields1[j]);
- return false;
- } else {
- Float float1 = Float.parseFloat(fields1[j]);
- Float float2 = Float.parseFloat(fields2[j]);
-
- if (Math.abs(float1 - float2) == 0)
- continue;
- else {
- errorMsg.append("line " + i + " column " + j + ": "
- + fields2[j] + " expected " + fields1[j]);
- return false;
- }
- }
- }
- }
-
- return true;
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
deleted file mode 100644
index 2093b1d..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
+++ /dev/null
@@ -1,74 +0,0 @@
-package edu.uci.ics.hivesterix.test.runtimefunction;
-
-import java.io.File;
-import java.util.List;
-
-import junit.framework.Test;
-import junit.framework.TestResult;
-import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
-
-public class RuntimeFunctionTestSuite extends AbstractTestSuiteClass {
-
- private static final String PATH_TO_QUERIES = "src/test/resources/runtimefunctionts/queries/";
- private static final String PATH_TO_RESULTS = "src/test/resources/runtimefunctionts/results/";
- private static final String PATH_TO_IGNORES = "src/test/resources/runtimefunctionts/ignore.txt";
-
- private static final String FILE_EXTENSION_OF_RESULTS = "result";
-
- public static Test suite() throws Exception {
- List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
- File testData = new File(PATH_TO_QUERIES);
- File[] queries = testData.listFiles();
- RuntimeFunctionTestSuite testSuite = new RuntimeFunctionTestSuite();
-
- // set hdfs and hyracks cluster, and load test data to hdfs
- try {
- testSuite.setup();
- testSuite.loadData();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
-
- for (File qFile : queries) {
- if (isIgnored(qFile.getName(), ignores))
- continue;
-
- if (qFile.isFile()) {
- String resultFileName = hiveExtToResExt(qFile.getName());
- File rFile = new File(PATH_TO_RESULTS + resultFileName);
- testSuite.addTest(new RuntimeFunctionTestCase(qFile, rFile));
- }
- }
- return testSuite;
- }
-
- private static String hiveExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
- }
-
- /**
- * Runs the tests and collects their result in a TestResult.
- */
- @Override
- public void run(TestResult result) {
-
- int testCount = countTestCases();
- for (int i = 0; i < testCount; i++) {
- Test each = this.testAt(i);
- if (result.shouldStop())
- break;
- runTest(each, result);
- }
-
- // cleanup hdfs and hyracks cluster
- try {
- cleanup();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteCaseGenerator.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteCaseGenerator.java
deleted file mode 100644
index 1b45b41..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteCaseGenerator.java
+++ /dev/null
@@ -1,101 +0,0 @@
-package edu.uci.ics.hivesterix.test.runtimefunction;
-
-import java.io.File;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.Driver;
-import org.junit.Test;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
-
-public class RuntimeFunctionTestSuiteCaseGenerator extends
- AbstractHivesterixTestCase {
- private File resultFile;
- private FileSystem dfs;
-
- RuntimeFunctionTestSuiteCaseGenerator(File queryFile, File resultFile) {
- super("testRuntimeFunction", queryFile);
- this.queryFile = queryFile;
- this.resultFile = resultFile;
- }
-
- @Test
- public void testRuntimeFunction() throws Exception {
- StringBuilder queryString = new StringBuilder();
- readFileToString(queryFile, queryString);
- String[] queries = queryString.toString().split(";");
- StringWriter sw = new StringWriter();
-
- HiveConf hconf = ConfUtil.getHiveConf();
- Driver driver = new Driver(hconf, new PrintWriter(sw));
- driver.init();
-
- dfs = FileSystem.get(ConfUtil.getJobConf());
-
- int i = 0;
- for (String query : queries) {
- if (i == queries.length - 1)
- break;
- driver.run(query);
- driver.clear();
- i++;
- }
-
- String warehouse = hconf.get("hive.metastore.warehouse.dir");
- String tableName = removeExt(resultFile.getName());
- String directory = warehouse + "/" + tableName + "/";
- String localDirectory = "tmp";
-
- FileStatus[] files = dfs.listStatus(new Path(directory));
- FileSystem lfs = null;
- if (files == null) {
- lfs = FileSystem.getLocal(ConfUtil.getJobConf());
- files = lfs.listStatus(new Path(directory));
- }
-
- File resultDirectory = new File(localDirectory + "/" + tableName);
- deleteDir(resultDirectory);
- resultDirectory.mkdir();
-
- for (FileStatus fs : files) {
- Path src = fs.getPath();
- if (src.getName().indexOf("crc") >= 0)
- continue;
-
- String destStr = localDirectory + "/" + tableName + "/"
- + src.getName();
- Path dest = new Path(destStr);
- if (lfs != null) {
- lfs.copyToLocalFile(src, dest);
- dfs.copyFromLocalFile(dest, new Path(directory));
- } else
- dfs.copyToLocalFile(src, dest);
- }
-
- File[] rFiles = resultDirectory.listFiles();
- StringBuilder sb = new StringBuilder();
- for (File r : rFiles) {
- if (r.getName().indexOf("crc") >= 0)
- continue;
- readFileToString(r, sb);
- }
- deleteDir(resultDirectory);
-
- writeStringToFile(resultFile, sb);
- }
-
- private void deleteDir(File resultDirectory) {
- if (resultDirectory.exists()) {
- File[] rFiles = resultDirectory.listFiles();
- for (File r : rFiles)
- r.delete();
- resultDirectory.delete();
- }
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteGenerator.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteGenerator.java
deleted file mode 100644
index a67f475..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteGenerator.java
+++ /dev/null
@@ -1,75 +0,0 @@
-package edu.uci.ics.hivesterix.test.runtimefunction;
-
-import java.io.File;
-import java.util.List;
-
-import junit.framework.Test;
-import junit.framework.TestResult;
-import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
-
-public class RuntimeFunctionTestSuiteGenerator extends AbstractTestSuiteClass {
-
- private static final String PATH_TO_QUERIES = "src/test/resources/runtimefunctionts/queries/";
- private static final String PATH_TO_RESULTS = "src/test/resources/runtimefunctionts/results/";
- private static final String PATH_TO_IGNORES = "src/test/resources/runtimefunctionts/ignore.txt";
-
- private static final String FILE_EXTENSION_OF_RESULTS = "result";
-
- public static Test suite() throws Exception {
- List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
- File testData = new File(PATH_TO_QUERIES);
- File[] queries = testData.listFiles();
- RuntimeFunctionTestSuiteGenerator testSuite = new RuntimeFunctionTestSuiteGenerator();
-
- // set hdfs and hyracks cluster, and load test data to hdfs
- try {
- testSuite.setup();
- testSuite.loadData();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
-
- for (File qFile : queries) {
- if (isIgnored(qFile.getName(), ignores))
- continue;
-
- if (qFile.isFile() && qFile.getName().startsWith("q16_")) {
- String resultFileName = hiveExtToResExt(qFile.getName());
- File rFile = new File(PATH_TO_RESULTS + resultFileName);
- testSuite.addTest(new RuntimeFunctionTestSuiteCaseGenerator(
- qFile, rFile));
- }
- }
- return testSuite;
- }
-
- private static String hiveExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
- }
-
- /**
- * Runs the tests and collects their result in a TestResult.
- */
- @Override
- public void run(TestResult result) {
-
- int testCount = countTestCases();
- for (int i = 0; i < testCount; i++) {
- Test each = this.testAt(i);
- if (result.shouldStop())
- break;
- runTest(each, result);
- }
-
- // cleanup hdfs and hyracks cluster
- try {
- cleanup();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/serde/SerDeTest.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/serde/SerDeTest.java
deleted file mode 100644
index b5db432..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/serde/SerDeTest.java
+++ /dev/null
@@ -1,232 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.test.serde;
-
-import java.util.List;
-import java.util.Properties;
-
-import junit.framework.TestCase;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.serde.Constants;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.io.ByteWritable;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.io.ShortWritable;
-import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive;
-import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
-
-/**
- * TestLazySimpleSerDe.
- *
- */
-@SuppressWarnings({ "deprecation", "rawtypes" })
-public class SerDeTest extends TestCase {
-
- /**
- * Test the LazySimpleSerDe class.
- */
- public void testLazySimpleSerDe() throws Throwable {
- try {
- // Create the SerDe
- LazySimpleSerDe serDe = new LazySimpleSerDe();
- Configuration conf = new Configuration();
- Properties tbl = createProperties();
- serDe.initialize(conf, tbl);
-
- LazySerDe outputSerde = new LazySerDe();
- outputSerde.initialize(conf, tbl);
-
- // Data
- String s = "123\t456\t789\t1000\t5.3\thive and hadoop\t1\tqf";
-
- byte[] bytes = s.getBytes();
- Writable bytesWritable = new BytesWritable(bytes);
-
- // Test
- // deserializeAndSerialize(serDe, t, s, expectedFieldsData);
- Object row = serDe.deserialize(bytesWritable); // test my serde
- StructObjectInspector simpleInspector = (StructObjectInspector) serDe
- .getObjectInspector();
- List<Object> fields = simpleInspector
- .getStructFieldsDataAsList(row);
- List<? extends StructField> fieldRefs = simpleInspector
- .getAllStructFieldRefs();
-
- int i = 0;
- for (Object field : fields) {
- BytesWritable fieldWritable = (BytesWritable) outputSerde
- .serialize(field, fieldRefs.get(i)
- .getFieldObjectInspector());
- System.out.print(fieldWritable.getSize() + "|");
- i++;
- }
-
- // Writable output = outputSerde.serialize(row, serDe
- // .getObjectInspector());
- // System.out.println(output);
- //
- // Object row2 = outputSerde.deserialize(output);
- // Writable output2 = serDe.serialize(row2, outputSerde
- // .getObjectInspector());
- // System.out.println(output2);
-
- // System.out.println(output);
- // deserializeAndSerialize(outputSerde, t, s, expectedFieldsData);
-
- } catch (Throwable e) {
- e.printStackTrace();
- throw e;
- }
- }
-
- private void deserializeAndSerialize(SerDe serDe, Text t, String s,
- Object[] expectedFieldsData) throws SerDeException {
- // Get the row structure
- StructObjectInspector oi = (StructObjectInspector) serDe
- .getObjectInspector();
- List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
- assertEquals(8, fieldRefs.size());
-
- // Deserialize
- Object row = serDe.deserialize(t);
- for (int i = 0; i < fieldRefs.size(); i++) {
- Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
- if (fieldData != null) {
- fieldData = ((LazyPrimitive) fieldData).getWritableObject();
- }
- assertEquals("Field " + i, expectedFieldsData[i], fieldData);
- }
- // Serialize
- assertEquals(Text.class, serDe.getSerializedClass());
- Text serializedText = (Text) serDe.serialize(row, oi);
- assertEquals("Serialized data", s, serializedText.toString());
- }
-
- private Properties createProperties() {
- Properties tbl = new Properties();
-
- // Set the configuration parameters
- tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9");
- tbl.setProperty("columns",
- "abyte,ashort,aint,along,adouble,astring,anullint,anullstring");
- tbl.setProperty("columns.types",
- "tinyint:smallint:int:bigint:double:string:int:string");
- tbl.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "NULL");
- return tbl;
- }
-
- /**
- * Test the LazySimpleSerDe class with LastColumnTakesRest option.
- */
- public void testLazySimpleSerDeLastColumnTakesRest() throws Throwable {
- try {
- // Create the SerDe
- LazySimpleSerDe serDe = new LazySimpleSerDe();
- Configuration conf = new Configuration();
- Properties tbl = createProperties();
- tbl.setProperty(Constants.SERIALIZATION_LAST_COLUMN_TAKES_REST,
- "true");
- serDe.initialize(conf, tbl);
-
- // Data
- Text t = new Text(
- "123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t");
- String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta\tb\t";
- Object[] expectedFieldsData = { new ByteWritable((byte) 123),
- new ShortWritable((short) 456), new IntWritable(789),
- new LongWritable(1000), new DoubleWritable(5.3),
- new Text("hive and hadoop"), null, new Text("a\tb\t") };
-
- // Test
- deserializeAndSerialize(serDe, t, s, expectedFieldsData);
-
- } catch (Throwable e) {
- e.printStackTrace();
- throw e;
- }
- }
-
- /**
- * Test the LazySimpleSerDe class with extra columns.
- */
- public void testLazySimpleSerDeExtraColumns() throws Throwable {
- try {
- // Create the SerDe
- LazySimpleSerDe serDe = new LazySimpleSerDe();
- Configuration conf = new Configuration();
- Properties tbl = createProperties();
- serDe.initialize(conf, tbl);
-
- // Data
- Text t = new Text(
- "123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t");
- String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta";
- Object[] expectedFieldsData = { new ByteWritable((byte) 123),
- new ShortWritable((short) 456), new IntWritable(789),
- new LongWritable(1000), new DoubleWritable(5.3),
- new Text("hive and hadoop"), null, new Text("a") };
-
- // Test
- deserializeAndSerialize(serDe, t, s, expectedFieldsData);
-
- } catch (Throwable e) {
- e.printStackTrace();
- throw e;
- }
- }
-
- /**
- * Test the LazySimpleSerDe class with missing columns.
- */
- public void testLazySimpleSerDeMissingColumns() throws Throwable {
- try {
- // Create the SerDe
- LazySimpleSerDe serDe = new LazySimpleSerDe();
- Configuration conf = new Configuration();
- Properties tbl = createProperties();
- serDe.initialize(conf, tbl);
-
- // Data
- Text t = new Text("123\t456\t789\t1000\t5.3\t");
- String s = "123\t456\t789\t1000\t5.3\t\tNULL\tNULL";
- Object[] expectedFieldsData = { new ByteWritable((byte) 123),
- new ShortWritable((short) 456), new IntWritable(789),
- new LongWritable(1000), new DoubleWritable(5.3),
- new Text(""), null, null };
-
- // Test
- deserializeAndSerialize(serDe, t, s, expectedFieldsData);
-
- } catch (Throwable e) {
- e.printStackTrace();
- throw e;
- }
- }
-
-}
diff --git a/hyracks/hyracks-api/pom.xml b/hyracks/hyracks-api/pom.xml
index 72f0d8b..6807f76 100644
--- a/hyracks/hyracks-api/pom.xml
+++ b/hyracks/hyracks-api/pom.xml
@@ -15,8 +15,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/channels/IInputChannel.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/channels/IInputChannel.java
index a8f2fda..a4f0b29 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/channels/IInputChannel.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/channels/IInputChannel.java
@@ -16,7 +16,7 @@
import java.nio.ByteBuffer;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.context.IHyracksCommonContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
public interface IInputChannel {
@@ -30,7 +30,7 @@
public void recycleBuffer(ByteBuffer buffer);
- public void open(IHyracksTaskContext ctx) throws HyracksDataException;
+ public void open(IHyracksCommonContext ctx) throws HyracksDataException;
public void close() throws HyracksDataException;
}
\ No newline at end of file
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksClientInterfaceFunctions.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksClientInterfaceFunctions.java
index 910eb92..88df49f 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksClientInterfaceFunctions.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksClientInterfaceFunctions.java
@@ -17,6 +17,8 @@
import java.io.Serializable;
import java.util.EnumSet;
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.job.JobFlag;
import edu.uci.ics.hyracks.api.job.JobId;
@@ -27,6 +29,10 @@
CREATE_JOB,
GET_JOB_STATUS,
START_JOB,
+ GET_DATASET_DIRECTORY_SERIVICE_INFO,
+ GET_DATASET_RESULT_STATUS,
+ GET_DATASET_RECORD_DESCRIPTOR,
+ GET_DATASET_RESULT_LOCATIONS,
WAIT_FOR_COMPLETION,
GET_NODE_CONTROLLERS_INFO
}
@@ -90,6 +96,74 @@
}
}
+ public static class GetDatasetDirectoryServiceInfoFunction extends Function {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public FunctionId getFunctionId() {
+ return FunctionId.GET_DATASET_DIRECTORY_SERIVICE_INFO;
+ }
+ }
+
+ public static class GetDatasetResultStatusFunction extends Function {
+ private static final long serialVersionUID = 1L;
+
+ private final JobId jobId;
+
+ private final ResultSetId rsId;
+
+ public GetDatasetResultStatusFunction(JobId jobId, ResultSetId rsId) {
+ this.jobId = jobId;
+ this.rsId = rsId;
+ }
+
+ @Override
+ public FunctionId getFunctionId() {
+ return FunctionId.GET_DATASET_RESULT_STATUS;
+ }
+
+ public JobId getJobId() {
+ return jobId;
+ }
+
+ public ResultSetId getResultSetId() {
+ return rsId;
+ }
+ }
+
+ public static class GetDatasetResultLocationsFunction extends Function {
+ private static final long serialVersionUID = 1L;
+
+ private final JobId jobId;
+
+ private final ResultSetId rsId;
+
+ private final DatasetDirectoryRecord[] knownRecords;
+
+ public GetDatasetResultLocationsFunction(JobId jobId, ResultSetId rsId, DatasetDirectoryRecord[] knownRecords) {
+ this.jobId = jobId;
+ this.rsId = rsId;
+ this.knownRecords = knownRecords;
+ }
+
+ @Override
+ public FunctionId getFunctionId() {
+ return FunctionId.GET_DATASET_RESULT_LOCATIONS;
+ }
+
+ public JobId getJobId() {
+ return jobId;
+ }
+
+ public ResultSetId getResultSetId() {
+ return rsId;
+ }
+
+ public DatasetDirectoryRecord[] getKnownRecords() {
+ return knownRecords;
+ }
+ }
+
public static class WaitForCompletionFunction extends Function {
private static final long serialVersionUID = 1L;
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksClientInterfaceRemoteProxy.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksClientInterfaceRemoteProxy.java
index b85998a..033fc02 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksClientInterfaceRemoteProxy.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksClientInterfaceRemoteProxy.java
@@ -17,6 +17,7 @@
import java.util.EnumSet;
import java.util.Map;
+import edu.uci.ics.hyracks.api.comm.NetworkAddress;
import edu.uci.ics.hyracks.api.job.JobFlag;
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.api.job.JobStatus;
@@ -55,6 +56,12 @@
}
@Override
+ public NetworkAddress getDatasetDirectoryServiceInfo() throws Exception {
+ HyracksClientInterfaceFunctions.GetDatasetDirectoryServiceInfoFunction gddsf = new HyracksClientInterfaceFunctions.GetDatasetDirectoryServiceInfoFunction();
+ return (NetworkAddress) rpci.call(ipcHandle, gddsf);
+ }
+
+ @Override
public void waitForCompletion(JobId jobId) throws Exception {
HyracksClientInterfaceFunctions.WaitForCompletionFunction wfcf = new HyracksClientInterfaceFunctions.WaitForCompletionFunction(
jobId);
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksConnection.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksConnection.java
index 2defa45..8274416 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksConnection.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/HyracksConnection.java
@@ -19,6 +19,7 @@
import java.util.Map;
import edu.uci.ics.hyracks.api.client.impl.JobSpecificationActivityClusterGraphGeneratorFactory;
+import edu.uci.ics.hyracks.api.comm.NetworkAddress;
import edu.uci.ics.hyracks.api.job.IActivityClusterGraphGeneratorFactory;
import edu.uci.ics.hyracks.api.job.JobFlag;
import edu.uci.ics.hyracks.api.job.JobId;
@@ -88,6 +89,10 @@
return hci.startJob(JavaSerializationUtils.serialize(acggf), jobFlags);
}
+ public NetworkAddress getDatasetDirectoryServiceInfo() throws Exception {
+ return hci.getDatasetDirectoryServiceInfo();
+ }
+
@Override
public void waitForCompletion(JobId jobId) throws Exception {
hci.waitForCompletion(jobId);
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientConnection.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientConnection.java
index fcfa722..fe4094f 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientConnection.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientConnection.java
@@ -17,6 +17,7 @@
import java.util.EnumSet;
import java.util.Map;
+import edu.uci.ics.hyracks.api.comm.NetworkAddress;
import edu.uci.ics.hyracks.api.job.IActivityClusterGraphGeneratorFactory;
import edu.uci.ics.hyracks.api.job.JobFlag;
import edu.uci.ics.hyracks.api.job.JobId;
@@ -78,6 +79,14 @@
public JobId startJob(IActivityClusterGraphGeneratorFactory acggf, EnumSet<JobFlag> jobFlags) throws Exception;
/**
+ * Gets the IP Address and port for the DatasetDirectoryService wrapped in NetworkAddress
+ *
+ * @return {@link NetworkAddress}
+ * @throws Exception
+ */
+ public NetworkAddress getDatasetDirectoryServiceInfo() throws Exception;
+
+ /**
* Waits until the specified job has completed, either successfully or has
* encountered a permanent failure.
*
@@ -101,4 +110,4 @@
* @throws Exception
*/
public ClusterTopology getClusterTopology() throws Exception;
-}
\ No newline at end of file
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientInterface.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientInterface.java
index 33ce29a..6fdf638 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientInterface.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/IHyracksClientInterface.java
@@ -17,6 +17,7 @@
import java.util.EnumSet;
import java.util.Map;
+import edu.uci.ics.hyracks.api.comm.NetworkAddress;
import edu.uci.ics.hyracks.api.job.JobFlag;
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.api.job.JobStatus;
@@ -29,6 +30,8 @@
public JobId startJob(byte[] acggfBytes, EnumSet<JobFlag> jobFlags) throws Exception;
+ public NetworkAddress getDatasetDirectoryServiceInfo() throws Exception;
+
public void waitForCompletion(JobId jobId) throws Exception;
public Map<String, NodeControllerInfo> getNodeControllersInfo() throws Exception;
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/NodeControllerInfo.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/NodeControllerInfo.java
index fd9218a..73b5488 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/NodeControllerInfo.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/NodeControllerInfo.java
@@ -27,10 +27,14 @@
private final NetworkAddress netAddress;
- public NodeControllerInfo(String nodeId, NodeStatus status, NetworkAddress netAddress) {
+ private final NetworkAddress datasetNetworkAddress;
+
+ public NodeControllerInfo(String nodeId, NodeStatus status, NetworkAddress netAddress,
+ NetworkAddress datasetNetworkAddress) {
this.nodeId = nodeId;
this.status = status;
this.netAddress = netAddress;
+ this.datasetNetworkAddress = datasetNetworkAddress;
}
public String getNodeId() {
@@ -44,4 +48,8 @@
public NetworkAddress getNetworkAddress() {
return netAddress;
}
+
+ public NetworkAddress getDatasetNetworkAddress() {
+ return datasetNetworkAddress;
+ }
}
\ No newline at end of file
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/impl/JobSpecificationActivityClusterGraphGeneratorFactory.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/impl/JobSpecificationActivityClusterGraphGeneratorFactory.java
index dd45825..3431c40 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/impl/JobSpecificationActivityClusterGraphGeneratorFactory.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/client/impl/JobSpecificationActivityClusterGraphGeneratorFactory.java
@@ -17,6 +17,7 @@
import edu.uci.ics.hyracks.api.job.JobFlag;
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.api.rewriter.ActivityClusterGraphRewriter;
public class JobSpecificationActivityClusterGraphGeneratorFactory implements IActivityClusterGraphGeneratorFactory {
private static final long serialVersionUID = 1L;
@@ -78,6 +79,8 @@
return new IActivityClusterGraphGenerator() {
@Override
public ActivityClusterGraph initialize() {
+ ActivityClusterGraphRewriter rewriter = new ActivityClusterGraphRewriter();
+ rewriter.rewrite(acg);
return acg;
}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/context/IHyracksTaskContext.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/context/IHyracksTaskContext.java
index e964d66..a2ee977 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/context/IHyracksTaskContext.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/context/IHyracksTaskContext.java
@@ -15,6 +15,7 @@
package edu.uci.ics.hyracks.api.context;
import edu.uci.ics.hyracks.api.dataflow.TaskAttemptId;
+import edu.uci.ics.hyracks.api.dataset.IDatasetPartitionManager;
import edu.uci.ics.hyracks.api.io.IWorkspaceFileFactory;
import edu.uci.ics.hyracks.api.job.IOperatorEnvironment;
import edu.uci.ics.hyracks.api.job.profiling.counters.ICounterContext;
@@ -28,5 +29,7 @@
public ICounterContext getCounterContext();
+ public IDatasetPartitionManager getDatasetPartitionManager();
+
public void sendApplicationMessageToCC(byte[] message, String nodeId) throws Exception;
}
\ No newline at end of file
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataflow/value/IResultSerializer.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataflow/value/IResultSerializer.java
new file mode 100644
index 0000000..ba2ff9a
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataflow/value/IResultSerializer.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.dataflow.value;
+
+import java.io.Serializable;
+
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+public interface IResultSerializer extends Serializable {
+ /**
+ * Initializes the serializer.
+ */
+ public void init() throws HyracksDataException;
+
+ /**
+ * Method to serialize the result and append it to the provided output stream
+ *
+ * @param tAccess
+ * - A frame tuple accessor object that contains the original data to be serialized
+ * @param tIdx
+ * - Index of the tuple that should be serialized.
+ * @return true if the tuple was appended successfully, else false.
+ */
+ public boolean appendTuple(IFrameTupleAccessor tAccess, int tIdx) throws HyracksDataException;
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataflow/value/IResultSerializerFactory.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataflow/value/IResultSerializerFactory.java
new file mode 100644
index 0000000..1fbf00f
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataflow/value/IResultSerializerFactory.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.dataflow.value;
+
+import java.io.PrintStream;
+import java.io.Serializable;
+
+public interface IResultSerializerFactory extends Serializable {
+ /**
+ * Creates a result serialized appender
+ *
+ * @param printStream
+ * - A print stream object to which the serialized results will be written.
+ * @return A new instance of result serialized appender.
+ */
+ public IResultSerializer createResultSerializer(RecordDescriptor recordDesc, PrintStream printStream);
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataflow/value/JSONSerializable.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataflow/value/JSONSerializable.java
new file mode 100644
index 0000000..1eca502
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataflow/value/JSONSerializable.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.dataflow.value;
+
+import org.json.JSONException;
+import org.json.JSONObject;
+
+public interface JSONSerializable {
+ /**
+ * Returns the JSON representation of the object.
+ *
+ * @return A new JSONObject instance representing this Java object.
+ */
+ public JSONObject toJSON() throws JSONException;
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/DatasetDirectoryRecord.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/DatasetDirectoryRecord.java
new file mode 100644
index 0000000..6316bba
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/DatasetDirectoryRecord.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.dataset;
+
+import java.io.Serializable;
+
+import edu.uci.ics.hyracks.api.comm.NetworkAddress;
+
+public class DatasetDirectoryRecord implements Serializable {
+ public enum Status {
+ IDLE,
+ RUNNING,
+ SUCCESS,
+ FAILED
+ }
+
+ private static final long serialVersionUID = 1L;
+
+ private NetworkAddress address;
+
+ private boolean readEOS;
+
+ private Status status;
+
+ public DatasetDirectoryRecord() {
+ this.address = null;
+ this.readEOS = false;
+ this.status = Status.IDLE;
+ }
+
+ public void setNetworkAddress(NetworkAddress address) {
+ this.address = address;
+ }
+
+ public NetworkAddress getNetworkAddress() {
+ return address;
+ }
+
+ public void readEOS() {
+ this.readEOS = true;
+ }
+
+ public boolean hasReachedReadEOS() {
+ return readEOS;
+ }
+
+ public void start() {
+ status = Status.RUNNING;
+ }
+
+ public void writeEOS() {
+ status = Status.SUCCESS;
+ }
+
+ public void fail() {
+ status = Status.FAILED;
+ }
+
+ public Status getStatus() {
+ return status;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == this) {
+ return true;
+ }
+ if (!(o instanceof DatasetDirectoryRecord)) {
+ return false;
+ }
+ return address.equals(((DatasetDirectoryRecord) o).address);
+ }
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IDatasetDirectoryService.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IDatasetDirectoryService.java
new file mode 100644
index 0000000..5266333
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IDatasetDirectoryService.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.dataset;
+
+import edu.uci.ics.hyracks.api.comm.NetworkAddress;
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord.Status;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.JobId;
+
+public interface IDatasetDirectoryService {
+ public void registerResultPartitionLocation(JobId jobId, ResultSetId rsId, boolean orderedResult, int partition,
+ int nPartitions, NetworkAddress networkAddress);
+
+ public void reportResultPartitionWriteCompletion(JobId jobId, ResultSetId rsId, int partition);
+
+ public void reportResultPartitionFailure(JobId jobId, ResultSetId rsId, int partition);
+
+ public Status getResultStatus(JobId jobId, ResultSetId rsId) throws HyracksDataException;
+
+ public DatasetDirectoryRecord[] getResultPartitionLocations(JobId jobId, ResultSetId rsId,
+ DatasetDirectoryRecord[] knownLocations) throws HyracksDataException;
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IDatasetInputChannelMonitor.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IDatasetInputChannelMonitor.java
new file mode 100644
index 0000000..65ba1c7
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IDatasetInputChannelMonitor.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.dataset;
+
+import edu.uci.ics.hyracks.api.channels.IInputChannelMonitor;
+
+public interface IDatasetInputChannelMonitor extends IInputChannelMonitor {
+ public boolean eosReached();
+
+ public boolean failed();
+
+ public int getNFramesAvailable();
+
+ public void notifyFrameRead();
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IDatasetPartitionManager.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IDatasetPartitionManager.java
new file mode 100644
index 0000000..ae38c7f
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IDatasetPartitionManager.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.dataset;
+
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.io.IWorkspaceFileFactory;
+import edu.uci.ics.hyracks.api.job.JobId;
+
+public interface IDatasetPartitionManager {
+ public IFrameWriter createDatasetPartitionWriter(IHyracksTaskContext ctx, ResultSetId rsId, boolean orderedResult,
+ int partition, int nPartitions) throws HyracksException;
+
+ public void reportPartitionWriteCompletion(JobId jobId, ResultSetId resultSetId, int partition)
+ throws HyracksException;
+
+ public void reportPartitionFailure(JobId jobId, ResultSetId resultSetId, int partition) throws HyracksException;
+
+ public void initializeDatasetPartitionReader(JobId jobId, int partition, IFrameWriter noc) throws HyracksException;
+
+ public IWorkspaceFileFactory getFileFactory();
+
+ public void close();
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IDatasetPartitionReader.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IDatasetPartitionReader.java
new file mode 100644
index 0000000..8f5ed64
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IDatasetPartitionReader.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.dataset;
+
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+
+public interface IDatasetPartitionReader {
+ public void writeTo(IFrameWriter writer);
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IDatasetPartitionWriter.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IDatasetPartitionWriter.java
new file mode 100644
index 0000000..42dc157
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IDatasetPartitionWriter.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.dataset;
+
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+public interface IDatasetPartitionWriter extends IFrameWriter {
+ public Page returnPage() throws HyracksDataException;
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IHyracksDataset.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IHyracksDataset.java
new file mode 100644
index 0000000..4a7a6b0
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IHyracksDataset.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.dataset;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.JobId;
+
+public interface IHyracksDataset {
+ public IHyracksDatasetReader createReader(JobId jobId, ResultSetId resultSetId) throws HyracksDataException;
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IHyracksDatasetDirectoryServiceConnection.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IHyracksDatasetDirectoryServiceConnection.java
new file mode 100644
index 0000000..d49d5cd
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IHyracksDatasetDirectoryServiceConnection.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.dataset;
+
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord.Status;
+import edu.uci.ics.hyracks.api.job.JobId;
+
+public interface IHyracksDatasetDirectoryServiceConnection {
+ /**
+ * Gets the result status for the given result set.
+ *
+ * @param jobId
+ * ID of the job
+ * @param rsId
+ * ID of the result set
+ * @return {@link Status}
+ * @throws Exception
+ */
+ public Status getDatasetResultStatus(JobId jobId, ResultSetId rsId) throws Exception;
+
+ /**
+ * Gets the IP Addresses and ports for the partition generating the result for each location.
+ *
+ * @param jobId
+ * ID of the job
+ * @param rsId
+ * ID of the result set
+ * @param knownRecords
+ * Locations that are already known to the client
+ * @return {@link NetworkAddress[]}
+ * @throws Exception
+ */
+ public DatasetDirectoryRecord[] getDatasetResultLocations(JobId jobId, ResultSetId rsId,
+ DatasetDirectoryRecord[] knownRecords) throws Exception;
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IHyracksDatasetDirectoryServiceInterface.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IHyracksDatasetDirectoryServiceInterface.java
new file mode 100644
index 0000000..ba21a84
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IHyracksDatasetDirectoryServiceInterface.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.dataset;
+
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord.Status;
+import edu.uci.ics.hyracks.api.job.JobId;
+
+public interface IHyracksDatasetDirectoryServiceInterface {
+ /**
+ * Gets the result status for the given result set.
+ *
+ * @param jobId
+ * ID of the job
+ * @param rsId
+ * ID of the result set
+ * @return {@link Status}
+ * @throws Exception
+ */
+ public Status getDatasetResultStatus(JobId jobId, ResultSetId rsId) throws Exception;
+
+ /**
+ * Gets the IP Addresses and ports for the partition generating the result for each location.
+ *
+ * @param jobId
+ * ID of the job
+ * @param rsId
+ * ID of the result set
+ * @param knownRecords
+ * Locations from the dataset directory that are already known to the client
+ * @return {@link NetworkAddress[]}
+ * @throws Exception
+ */
+ public DatasetDirectoryRecord[] getDatasetResultLocations(JobId jobId, ResultSetId rsId,
+ DatasetDirectoryRecord[] knownRecords) throws Exception;
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IHyracksDatasetReader.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IHyracksDatasetReader.java
new file mode 100644
index 0000000..b928a49
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/IHyracksDatasetReader.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.dataset;
+
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord.Status;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+public interface IHyracksDatasetReader {
+ public Status getResultStatus();
+
+ public int read(ByteBuffer buffer) throws HyracksDataException;
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/Page.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/Page.java
new file mode 100644
index 0000000..7275dfd
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/Page.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.dataset;
+
+import java.nio.ByteBuffer;
+
+public class Page {
+ private final ByteBuffer buffer;
+
+ public Page(ByteBuffer buffer) {
+ this.buffer = buffer;
+ }
+
+ public ByteBuffer getBuffer() {
+ return buffer;
+ }
+
+ public ByteBuffer clear() {
+ return (ByteBuffer) buffer.clear();
+ }
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/ResultSetId.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/ResultSetId.java
new file mode 100644
index 0000000..ae38ef3
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataset/ResultSetId.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.dataset;
+
+import java.io.Serializable;
+
+public class ResultSetId implements Serializable {
+ private static final long serialVersionUID = 1L;
+
+ private final long id;
+
+ public ResultSetId(long id) {
+ this.id = id;
+ }
+
+ public long getId() {
+ return id;
+ }
+
+ @Override
+ public int hashCode() {
+ return (int) id;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == this) {
+ return true;
+ }
+ if (!(o instanceof ResultSetId)) {
+ return false;
+ }
+ return ((ResultSetId) o).id == id;
+ }
+
+ @Override
+ public String toString() {
+ return "RSID:" + id;
+ }
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/job/ActivityCluster.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/job/ActivityCluster.java
index 6698ff7..9fb2b08 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/job/ActivityCluster.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/job/ActivityCluster.java
@@ -33,7 +33,7 @@
import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicyAssignmentPolicy;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-public final class ActivityCluster implements Serializable {
+public class ActivityCluster implements Serializable {
private static final long serialVersionUID = 1L;
private final ActivityClusterGraph acg;
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/job/JobSpecification.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/job/JobSpecification.java
index 7c523f1..1fdff0f 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/job/JobSpecification.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/job/JobSpecification.java
@@ -34,12 +34,15 @@
import edu.uci.ics.hyracks.api.dataflow.OperatorDescriptorId;
import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicyAssignmentPolicy;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
public class JobSpecification implements Serializable, IOperatorDescriptorRegistry, IConnectorDescriptorRegistry {
private static final long serialVersionUID = 1L;
private final List<OperatorDescriptorId> roots;
+ private final List<ResultSetId> resultSetIds;
+
private final Map<OperatorDescriptorId, IOperatorDescriptor> opMap;
private final Map<ConnectorDescriptorId, IConnectorDescriptor> connMap;
@@ -72,6 +75,7 @@
public JobSpecification() {
roots = new ArrayList<OperatorDescriptorId>();
+ resultSetIds = new ArrayList<ResultSetId>();
opMap = new HashMap<OperatorDescriptorId, IOperatorDescriptor>();
connMap = new HashMap<ConnectorDescriptorId, IConnectorDescriptor>();
opInputMap = new HashMap<OperatorDescriptorId, List<IConnectorDescriptor>>();
@@ -104,6 +108,10 @@
roots.add(op.getOperatorId());
}
+ public void addResultSetId(ResultSetId rsId) {
+ resultSetIds.add(rsId);
+ }
+
public void connect(IConnectorDescriptor conn, IOperatorDescriptor producerOp, int producerPort,
IOperatorDescriptor consumerOp, int consumerPort) {
insertIntoIndexedMap(opInputMap, consumerOp.getOperatorId(), consumerPort, conn);
@@ -208,6 +216,10 @@
return roots;
}
+ public List<ResultSetId> getResultSetIds() {
+ return resultSetIds;
+ }
+
public IConnectorPolicyAssignmentPolicy getConnectorPolicyAssignmentPolicy() {
return connectorPolicyAssignmentPolicy;
}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/partitions/ResultSetPartitionId.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/partitions/ResultSetPartitionId.java
new file mode 100644
index 0000000..148a8a2
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/partitions/ResultSetPartitionId.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.api.partitions;
+
+import java.io.Serializable;
+
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
+import edu.uci.ics.hyracks.api.job.JobId;
+
+public final class ResultSetPartitionId implements Serializable {
+ private static final long serialVersionUID = 1L;
+
+ private final JobId jobId;
+
+ private final ResultSetId resultSetId;
+
+ private final int partition;
+
+ public ResultSetPartitionId(JobId jobId, ResultSetId resultSetId, int partition) {
+ this.jobId = jobId;
+ this.resultSetId = resultSetId;
+ this.partition = partition;
+ }
+
+ public JobId getJobId() {
+ return jobId;
+ }
+
+ public ResultSetId getResultSetId() {
+ return resultSetId;
+ }
+
+ public int getPartition() {
+ return partition;
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((resultSetId == null) ? 0 : resultSetId.hashCode());
+ result = prime * result + ((jobId == null) ? 0 : jobId.hashCode());
+ result = prime * result + partition;
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ ResultSetPartitionId other = (ResultSetPartitionId) obj;
+ if (resultSetId == null) {
+ if (other.resultSetId != null)
+ return false;
+ } else if (!resultSetId.equals(other.resultSetId))
+ return false;
+ if (jobId == null) {
+ if (other.jobId != null)
+ return false;
+ } else if (!jobId.equals(other.jobId))
+ return false;
+ if (partition != other.partition)
+ return false;
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ return jobId.toString() + ":" + resultSetId + ":" + partition;
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/rewriter/ActivityClusterGraphRewriter.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/rewriter/ActivityClusterGraphRewriter.java
new file mode 100644
index 0000000..c6761e9
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/rewriter/ActivityClusterGraphRewriter.java
@@ -0,0 +1,381 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.api.rewriter;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Queue;
+import java.util.Set;
+
+import org.apache.commons.lang3.tuple.Pair;
+
+import edu.uci.ics.hyracks.api.dataflow.ActivityId;
+import edu.uci.ics.hyracks.api.dataflow.ConnectorDescriptorId;
+import edu.uci.ics.hyracks.api.dataflow.IActivity;
+import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.job.ActivityCluster;
+import edu.uci.ics.hyracks.api.job.ActivityClusterGraph;
+import edu.uci.ics.hyracks.api.job.ActivityClusterId;
+import edu.uci.ics.hyracks.api.rewriter.runtime.SuperActivity;
+
+/**
+ * This class rewrite the AcivityClusterGraph to eliminate
+ * all one-to-one connections and merge one-to-one connected
+ * DAGs into super activities.
+ * </p>
+ * Each super activity internally maintains a DAG and execute it at the runtime.
+ *
+ * @author yingyib
+ */
+public class ActivityClusterGraphRewriter {
+ private static String ONE_TO_ONE_CONNECTOR = "edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor";
+
+ /**
+ * rewrite an activity cluster graph to eliminate
+ * all one-to-one connections and merge one-to-one connected
+ * DAGs into super activities.
+ *
+ * @param acg
+ * the activity cluster graph
+ */
+ public void rewrite(ActivityClusterGraph acg) {
+ acg.getActivityMap().clear();
+ acg.getConnectorMap().clear();
+ Map<IActivity, SuperActivity> invertedActivitySuperActivityMap = new HashMap<IActivity, SuperActivity>();
+ for (Entry<ActivityClusterId, ActivityCluster> entry : acg.getActivityClusterMap().entrySet()) {
+ rewriteIntraActivityCluster(entry.getValue(), invertedActivitySuperActivityMap);
+ }
+ for (Entry<ActivityClusterId, ActivityCluster> entry : acg.getActivityClusterMap().entrySet()) {
+ rewriteInterActivityCluster(entry.getValue(), invertedActivitySuperActivityMap);
+ }
+ invertedActivitySuperActivityMap.clear();
+ }
+
+ /**
+ * rewrite the blocking relationship among activity cluster
+ *
+ * @param ac
+ * the activity cluster to be rewritten
+ */
+ private void rewriteInterActivityCluster(ActivityCluster ac,
+ Map<IActivity, SuperActivity> invertedActivitySuperActivityMap) {
+ Map<ActivityId, Set<ActivityId>> blocked2BlockerMap = ac.getBlocked2BlockerMap();
+ Map<ActivityId, ActivityId> invertedAid2SuperAidMap = new HashMap<ActivityId, ActivityId>();
+ for (Entry<IActivity, SuperActivity> entry : invertedActivitySuperActivityMap.entrySet()) {
+ invertedAid2SuperAidMap.put(entry.getKey().getActivityId(), entry.getValue().getActivityId());
+ }
+ Map<ActivityId, Set<ActivityId>> replacedBlocked2BlockerMap = new HashMap<ActivityId, Set<ActivityId>>();
+ for (Entry<ActivityId, Set<ActivityId>> entry : blocked2BlockerMap.entrySet()) {
+ ActivityId blocked = entry.getKey();
+ ActivityId replacedBlocked = invertedAid2SuperAidMap.get(blocked);
+ Set<ActivityId> blockers = entry.getValue();
+ Set<ActivityId> replacedBlockers = null;
+ if (blockers != null) {
+ replacedBlockers = new HashSet<ActivityId>();
+ for (ActivityId blocker : blockers) {
+ replacedBlockers.add(invertedAid2SuperAidMap.get(blocker));
+ ActivityCluster dependingAc = ac.getActivityClusterGraph().getActivityMap()
+ .get(invertedAid2SuperAidMap.get(blocker));
+ if (!ac.getDependencies().contains(dependingAc)) {
+ ac.getDependencies().add(dependingAc);
+ }
+ }
+ }
+ if (replacedBlockers != null) {
+ Set<ActivityId> existingBlockers = replacedBlocked2BlockerMap.get(replacedBlocked);
+ if (existingBlockers == null) {
+ replacedBlocked2BlockerMap.put(replacedBlocked, replacedBlockers);
+ } else {
+ existingBlockers.addAll(replacedBlockers);
+ replacedBlocked2BlockerMap.put(replacedBlocked, existingBlockers);
+ }
+ }
+ }
+ blocked2BlockerMap.clear();
+ blocked2BlockerMap.putAll(replacedBlocked2BlockerMap);
+ }
+
+ /**
+ * rewrite an activity cluster internally
+ *
+ * @param ac
+ * the activity cluster to be rewritten
+ */
+ private void rewriteIntraActivityCluster(ActivityCluster ac,
+ Map<IActivity, SuperActivity> invertedActivitySuperActivityMap) {
+ Map<ActivityId, IActivity> activities = ac.getActivityMap();
+ Map<ActivityId, List<IConnectorDescriptor>> activityInputMap = ac.getActivityInputMap();
+ Map<ActivityId, List<IConnectorDescriptor>> activityOutputMap = ac.getActivityOutputMap();
+ Map<ConnectorDescriptorId, Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>>> connectorActivityMap = ac
+ .getConnectorActivityMap();
+ ActivityClusterGraph acg = ac.getActivityClusterGraph();
+ Map<ActivityId, IActivity> startActivities = new HashMap<ActivityId, IActivity>();
+ Map<ActivityId, SuperActivity> superActivities = new HashMap<ActivityId, SuperActivity>();
+ Map<ActivityId, Queue<IActivity>> toBeExpendedMap = new HashMap<ActivityId, Queue<IActivity>>();
+
+ /**
+ * Build the initial super activities
+ */
+ for (Entry<ActivityId, IActivity> entry : activities.entrySet()) {
+ ActivityId activityId = entry.getKey();
+ IActivity activity = entry.getValue();
+ if (activityInputMap.get(activityId) == null) {
+ startActivities.put(activityId, activity);
+ /**
+ * use the start activity's id as the id of the super activity
+ */
+ createNewSuperActivity(ac, superActivities, toBeExpendedMap, invertedActivitySuperActivityMap,
+ activityId, activity);
+ }
+ }
+
+ /**
+ * expand one-to-one connected activity cluster by the BFS order.
+ * after the while-loop, the original activities are partitioned
+ * into equivalent classes, one-per-super-activity.
+ */
+ Map<ActivityId, SuperActivity> clonedSuperActivities = new HashMap<ActivityId, SuperActivity>();
+ while (toBeExpendedMap.size() > 0) {
+ clonedSuperActivities.clear();
+ clonedSuperActivities.putAll(superActivities);
+ for (Entry<ActivityId, SuperActivity> entry : clonedSuperActivities.entrySet()) {
+ ActivityId superActivityId = entry.getKey();
+ SuperActivity superActivity = entry.getValue();
+
+ /**
+ * for the case where the super activity has already been swallowed
+ */
+ if (superActivities.get(superActivityId) == null) {
+ continue;
+ }
+
+ /**
+ * expend the super activity
+ */
+ Queue<IActivity> toBeExpended = toBeExpendedMap.get(superActivityId);
+ if (toBeExpended == null) {
+ /**
+ * Nothing to expand
+ */
+ continue;
+ }
+ IActivity expendingActivity = toBeExpended.poll();
+ List<IConnectorDescriptor> outputConnectors = activityOutputMap.get(expendingActivity.getActivityId());
+ if (outputConnectors != null) {
+ for (IConnectorDescriptor outputConn : outputConnectors) {
+ Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>> endPoints = connectorActivityMap
+ .get(outputConn.getConnectorId());
+ IActivity newActivity = endPoints.getRight().getLeft();
+ SuperActivity existingSuperActivity = invertedActivitySuperActivityMap.get(newActivity);
+ if (outputConn.getClass().getName().contains(ONE_TO_ONE_CONNECTOR)) {
+ /**
+ * expend the super activity cluster on an one-to-one out-bound connection
+ */
+ if (existingSuperActivity == null) {
+ superActivity.addActivity(newActivity);
+ toBeExpended.add(newActivity);
+ invertedActivitySuperActivityMap.put(newActivity, superActivity);
+ } else {
+ /**
+ * the two activities already in the same super activity
+ */
+ if (existingSuperActivity == superActivity) {
+ continue;
+ }
+ /**
+ * swallow an existing super activity
+ */
+ swallowExistingSuperActivity(superActivities, toBeExpendedMap,
+ invertedActivitySuperActivityMap, superActivity, superActivityId,
+ existingSuperActivity);
+ }
+ } else {
+ if (existingSuperActivity == null) {
+ /**
+ * create new activity
+ */
+ createNewSuperActivity(ac, superActivities, toBeExpendedMap,
+ invertedActivitySuperActivityMap, newActivity.getActivityId(), newActivity);
+ }
+ }
+ }
+ }
+
+ /**
+ * remove the to-be-expended queue if it is empty
+ */
+ if (toBeExpended.size() == 0) {
+ toBeExpendedMap.remove(superActivityId);
+ }
+ }
+ }
+
+ Map<ConnectorDescriptorId, IConnectorDescriptor> connMap = ac.getConnectorMap();
+ Map<ConnectorDescriptorId, RecordDescriptor> connRecordDesc = ac.getConnectorRecordDescriptorMap();
+ Map<SuperActivity, Integer> superActivityProducerPort = new HashMap<SuperActivity, Integer>();
+ Map<SuperActivity, Integer> superActivityConsumerPort = new HashMap<SuperActivity, Integer>();
+ for (Entry<ActivityId, SuperActivity> entry : superActivities.entrySet()) {
+ superActivityProducerPort.put(entry.getValue(), 0);
+ superActivityConsumerPort.put(entry.getValue(), 0);
+ }
+
+ /**
+ * create a new activity cluster to replace the old activity cluster
+ */
+ ActivityCluster newActivityCluster = new ActivityCluster(acg, ac.getId());
+ newActivityCluster.setConnectorPolicyAssignmentPolicy(ac.getConnectorPolicyAssignmentPolicy());
+ for (Entry<ActivityId, SuperActivity> entry : superActivities.entrySet()) {
+ newActivityCluster.addActivity(entry.getValue());
+ acg.getActivityMap().put(entry.getKey(), newActivityCluster);
+ }
+
+ /**
+ * Setup connectors: either inside a super activity or among super activities
+ */
+ for (Entry<ConnectorDescriptorId, Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>>> entry : connectorActivityMap
+ .entrySet()) {
+ ConnectorDescriptorId connectorId = entry.getKey();
+ Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>> endPoints = entry.getValue();
+ IActivity producerActivity = endPoints.getLeft().getLeft();
+ IActivity consumerActivity = endPoints.getRight().getLeft();
+ int producerPort = endPoints.getLeft().getRight();
+ int consumerPort = endPoints.getRight().getRight();
+ RecordDescriptor recordDescriptor = connRecordDesc.get(connectorId);
+ IConnectorDescriptor conn = connMap.get(connectorId);
+ if (conn.getClass().getName().contains(ONE_TO_ONE_CONNECTOR)) {
+ /**
+ * connection edge between inner activities
+ */
+ SuperActivity residingSuperActivity = invertedActivitySuperActivityMap.get(producerActivity);
+ residingSuperActivity.connect(conn, producerActivity, producerPort, consumerActivity, consumerPort,
+ recordDescriptor);
+ } else {
+ /**
+ * connection edge between super activities
+ */
+ SuperActivity producerSuperActivity = invertedActivitySuperActivityMap.get(producerActivity);
+ SuperActivity consumerSuperActivity = invertedActivitySuperActivityMap.get(consumerActivity);
+ int producerSAPort = superActivityProducerPort.get(producerSuperActivity);
+ int consumerSAPort = superActivityConsumerPort.get(consumerSuperActivity);
+ newActivityCluster.addConnector(conn);
+ newActivityCluster.connect(conn, producerSuperActivity, producerSAPort, consumerSuperActivity,
+ consumerSAPort, recordDescriptor);
+
+ /**
+ * bridge the port
+ */
+ producerSuperActivity.setClusterOutputIndex(producerSAPort, producerActivity.getActivityId(),
+ producerPort);
+ consumerSuperActivity.setClusterInputIndex(consumerSAPort, consumerActivity.getActivityId(),
+ consumerPort);
+ acg.getConnectorMap().put(connectorId, newActivityCluster);
+
+ /**
+ * increasing the port number for the producer and consumer
+ */
+ superActivityProducerPort.put(producerSuperActivity, ++producerSAPort);
+ superActivityConsumerPort.put(consumerSuperActivity, ++consumerSAPort);
+ }
+ }
+
+ /**
+ * Set up the roots of the new activity cluster
+ */
+ for (Entry<ActivityId, SuperActivity> entry : superActivities.entrySet()) {
+ List<IConnectorDescriptor> connIds = newActivityCluster.getActivityOutputMap().get(entry.getKey());
+ if (connIds == null || connIds.size() == 0) {
+ newActivityCluster.addRoot(entry.getValue());
+ }
+ }
+
+ /**
+ * set up the blocked2Blocker mapping, which will be updated in the rewriteInterActivityCluster call
+ */
+ newActivityCluster.getBlocked2BlockerMap().putAll(ac.getBlocked2BlockerMap());
+
+ /**
+ * replace the old activity cluster with the new activity cluster
+ */
+ acg.getActivityClusterMap().put(ac.getId(), newActivityCluster);
+ }
+
+ /**
+ * Create a new super activity
+ *
+ * @param acg
+ * the activity cluster
+ * @param superActivities
+ * the map from activity id to current super activities
+ * @param toBeExpendedMap
+ * the map from an existing super activity to its BFS expansion queue of the original activities
+ * @param invertedActivitySuperActivityMap
+ * the map from the original activities to their hosted super activities
+ * @param activityId
+ * the activity id for the new super activity, which is the first added acitivty's id in the super activity
+ * @param activity
+ * the first activity added to the new super activity
+ */
+ private void createNewSuperActivity(ActivityCluster acg, Map<ActivityId, SuperActivity> superActivities,
+ Map<ActivityId, Queue<IActivity>> toBeExpendedMap,
+ Map<IActivity, SuperActivity> invertedActivitySuperActivityMap, ActivityId activityId, IActivity activity) {
+ SuperActivity superActivity = new SuperActivity(acg.getActivityClusterGraph(), acg.getId(), activityId);
+ superActivities.put(activityId, superActivity);
+ superActivity.addActivity(activity);
+ Queue<IActivity> toBeExpended = new LinkedList<IActivity>();
+ toBeExpended.add(activity);
+ toBeExpendedMap.put(activityId, toBeExpended);
+ invertedActivitySuperActivityMap.put(activity, superActivity);
+ }
+
+ /**
+ * One super activity swallows another existing super activity.
+ *
+ * @param superActivities
+ * the map from activity id to current super activities
+ * @param toBeExpendedMap
+ * the map from an existing super activity to its BFS expansion queue of the original activities
+ * @param invertedActivitySuperActivityMap
+ * the map from the original activities to their hosted super activities
+ * @param superActivity
+ * the "swallowing" super activity
+ * @param superActivityId
+ * the activity id for the "swallowing" super activity, which is also the first added acitivty's id in the super activity
+ * @param existingSuperActivity
+ * an existing super activity which is to be swallowed by the "swallowing" super activity
+ */
+ private void swallowExistingSuperActivity(Map<ActivityId, SuperActivity> superActivities,
+ Map<ActivityId, Queue<IActivity>> toBeExpendedMap,
+ Map<IActivity, SuperActivity> invertedActivitySuperActivityMap, SuperActivity superActivity,
+ ActivityId superActivityId, SuperActivity existingSuperActivity) {
+ ActivityId existingSuperActivityId = existingSuperActivity.getActivityId();
+ superActivities.remove(existingSuperActivityId);
+ for (Entry<ActivityId, IActivity> existingEntry : existingSuperActivity.getActivityMap().entrySet()) {
+ IActivity existingActivity = existingEntry.getValue();
+ superActivity.addActivity(existingActivity);
+ invertedActivitySuperActivityMap.put(existingActivity, superActivity);
+ }
+ Queue<IActivity> tbeQueue = toBeExpendedMap.get(superActivityId);
+ Queue<IActivity> existingTbeQueque = toBeExpendedMap.remove(existingSuperActivityId);
+ if (existingTbeQueque != null) {
+ tbeQueue.addAll(existingTbeQueque);
+ }
+ }
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/rewriter/OneToOneConnectedActivityCluster.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/rewriter/OneToOneConnectedActivityCluster.java
new file mode 100644
index 0000000..07b7ffc
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/rewriter/OneToOneConnectedActivityCluster.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.api.rewriter;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.lang3.tuple.Pair;
+
+import edu.uci.ics.hyracks.api.dataflow.ActivityId;
+import edu.uci.ics.hyracks.api.job.ActivityCluster;
+import edu.uci.ics.hyracks.api.job.ActivityClusterGraph;
+import edu.uci.ics.hyracks.api.job.ActivityClusterId;
+
+/**
+ * All the connectors in an OneToOneConnectedCluster are OneToOneConnectorDescriptors.
+ *
+ * @author yingyib
+ */
+public class OneToOneConnectedActivityCluster extends ActivityCluster {
+
+ private static final long serialVersionUID = 1L;
+
+ protected final Map<Integer, Pair<ActivityId, Integer>> clusterInputIndexMap = new HashMap<Integer, Pair<ActivityId, Integer>>();
+ protected final Map<Integer, Pair<ActivityId, Integer>> clusterOutputIndexMap = new HashMap<Integer, Pair<ActivityId, Integer>>();
+ protected final Map<Pair<ActivityId, Integer>, Integer> invertedClusterOutputIndexMap = new HashMap<Pair<ActivityId, Integer>, Integer>();
+ protected final Map<Pair<ActivityId, Integer>, Integer> invertedClusterInputIndexMap = new HashMap<Pair<ActivityId, Integer>, Integer>();
+
+ public OneToOneConnectedActivityCluster(ActivityClusterGraph acg, ActivityClusterId id) {
+ super(acg, id);
+ }
+
+ /**
+ * Set up the mapping of the cluster's output channel to an internal activity and its output channel
+ *
+ * @param clusterOutputIndex
+ * the output channel index for the cluster
+ * @param activityId
+ * the id of the internal activity which produces the corresponding output
+ * @param activityOutputIndex
+ * the output channel index of the internal activity which corresponds to the output channel of the cluster of activities
+ */
+ public void setClusterOutputIndex(int clusterOutputIndex, ActivityId activityId, int activityOutputIndex) {
+ clusterOutputIndexMap.put(clusterOutputIndex, Pair.of(activityId, activityOutputIndex));
+ invertedClusterOutputIndexMap.put(Pair.of(activityId, activityOutputIndex), clusterOutputIndex);
+ }
+
+ /**
+ * get the an internal activity and its output channel of a cluster output channel
+ *
+ * @param clusterOutputIndex
+ * the output channel index for the cluster
+ * @return a pair containing the activity id of the corresponding internal activity and the output channel index
+ */
+ public Pair<ActivityId, Integer> getActivityIdOutputIndex(int clusterOutputIndex) {
+ return clusterOutputIndexMap.get(clusterOutputIndex);
+ }
+
+ /**
+ * Set up the mapping of the cluster's input channel to an internal activity and input output channel
+ *
+ * @param clusterInputIndex
+ * the input channel index for the cluster
+ * @param activityId
+ * the id of the internal activity which consumes the corresponding input
+ * @param activityInputIndex
+ * the output channel index of the internal activity which corresponds to the input channel of the cluster of activities
+ */
+ public void setClusterInputIndex(int clusterInputIndex, ActivityId activityId, int activityInputIndex) {
+ clusterInputIndexMap.put(clusterInputIndex, Pair.of(activityId, activityInputIndex));
+ invertedClusterInputIndexMap.put(Pair.of(activityId, activityInputIndex), clusterInputIndex);
+ }
+
+ /**
+ * get the an internal activity and its input channel of a cluster input channel
+ *
+ * @param clusterOutputIndex
+ * the output channel index for the cluster
+ * @return a pair containing the activity id of the corresponding internal activity and the output channel index
+ */
+ public Pair<ActivityId, Integer> getActivityIdInputIndex(int clusterInputIndex) {
+ return clusterInputIndexMap.get(clusterInputIndex);
+ }
+
+ /**
+ * Get the cluster input channel of an input-boundary activity and its input channel
+ *
+ * @param activityInputChannel
+ * the input-boundary activity and its input channel
+ * @return the cluster input channel
+ */
+ public int getClusterInputIndex(Pair<ActivityId, Integer> activityInputChannel) {
+ Integer channel = invertedClusterInputIndexMap.get(activityInputChannel);
+ return channel == null ? -1 : channel;
+ }
+
+ /**
+ * Get the cluster output channel of an input-boundary activity and its output channel
+ *
+ * @param activityOutputChannel
+ * the output-boundary activity and its output channel
+ * @return the cluster output channel
+ */
+ public int getClusterOutputIndex(Pair<ActivityId, Integer> activityOutputChannel) {
+ Integer channel = invertedClusterOutputIndexMap.get(activityOutputChannel);
+ return channel == null ? -1 : channel;
+ }
+
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/rewriter/runtime/SuperActivity.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/rewriter/runtime/SuperActivity.java
new file mode 100644
index 0000000..734ff85
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/rewriter/runtime/SuperActivity.java
@@ -0,0 +1,177 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.api.rewriter.runtime;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.commons.lang3.tuple.Pair;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.ActivityId;
+import edu.uci.ics.hyracks.api.dataflow.IActivity;
+import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
+import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.ActivityCluster;
+import edu.uci.ics.hyracks.api.job.ActivityClusterGraph;
+import edu.uci.ics.hyracks.api.job.ActivityClusterId;
+import edu.uci.ics.hyracks.api.rewriter.OneToOneConnectedActivityCluster;
+
+/**
+ * This class can be used to execute a DAG of activities inside which
+ * there are only one-to-one connectors.
+ *
+ * @author yingyib
+ */
+public class SuperActivity extends OneToOneConnectedActivityCluster implements IActivity {
+ private static final long serialVersionUID = 1L;
+ private final ActivityId activityId;
+
+ public SuperActivity(ActivityClusterGraph acg, ActivityClusterId id, ActivityId activityId) {
+ super(acg, id);
+ this.activityId = activityId;
+ }
+
+ @Override
+ public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
+ final IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions)
+ throws HyracksDataException {
+ final Map<ActivityId, IActivity> startActivities = new HashMap<ActivityId, IActivity>();
+ Map<ActivityId, IActivity> activities = getActivityMap();
+ for (Entry<ActivityId, IActivity> entry : activities.entrySet()) {
+ /**
+ * extract start activities
+ */
+ List<IConnectorDescriptor> conns = getActivityInputMap().get(entry.getKey());
+ if (conns == null || conns.size() == 0) {
+ startActivities.put(entry.getKey(), entry.getValue());
+ }
+ }
+
+ /**
+ * wrap a RecordDescriptorProvider for the super activity
+ */
+ IRecordDescriptorProvider wrappedRecDescProvider = new IRecordDescriptorProvider() {
+
+ @Override
+ public RecordDescriptor getInputRecordDescriptor(ActivityId aid, int inputIndex) {
+ if (startActivities.get(aid) != null) {
+ /**
+ * if the activity is a start (input boundary) activity
+ */
+ int superActivityInputChannel = SuperActivity.this.getClusterInputIndex(Pair.of(aid, inputIndex));
+ if (superActivityInputChannel >= 0) {
+ return recordDescProvider.getInputRecordDescriptor(activityId, superActivityInputChannel);
+ }
+ }
+ if (SuperActivity.this.getActivityMap().get(aid) != null) {
+ /**
+ * if the activity is an internal activity of the super activity
+ */
+ IConnectorDescriptor conn = getActivityInputMap().get(aid).get(inputIndex);
+ return getConnectorRecordDescriptorMap().get(conn.getConnectorId());
+ }
+
+ /**
+ * the following is for the case where the activity is in other SuperActivities
+ */
+ ActivityClusterGraph acg = SuperActivity.this.getActivityClusterGraph();
+ for (Entry<ActivityClusterId, ActivityCluster> entry : acg.getActivityClusterMap().entrySet()) {
+ ActivityCluster ac = entry.getValue();
+ for (Entry<ActivityId, IActivity> saEntry : ac.getActivityMap().entrySet()) {
+ SuperActivity sa = (SuperActivity) saEntry.getValue();
+ if (sa.getActivityMap().get(aid) != null) {
+ List<IConnectorDescriptor> conns = sa.getActivityInputMap().get(aid);
+ if (conns != null && conns.size() >= inputIndex) {
+ IConnectorDescriptor conn = conns.get(inputIndex);
+ return sa.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
+ } else {
+ int superActivityInputChannel = sa.getClusterInputIndex(Pair.of(aid, inputIndex));
+ if (superActivityInputChannel >= 0) {
+ return recordDescProvider.getInputRecordDescriptor(sa.getActivityId(),
+ superActivityInputChannel);
+ }
+ }
+ }
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public RecordDescriptor getOutputRecordDescriptor(ActivityId aid, int outputIndex) {
+ /**
+ * if the activity is an output-boundary activity
+ */
+ int superActivityOutputChannel = SuperActivity.this.getClusterOutputIndex(Pair.of(aid, outputIndex));
+ if (superActivityOutputChannel >= 0) {
+ return recordDescProvider.getOutputRecordDescriptor(activityId, superActivityOutputChannel);
+ }
+
+ if (SuperActivity.this.getActivityMap().get(aid) != null) {
+ /**
+ * if the activity is an internal activity of the super activity
+ */
+ IConnectorDescriptor conn = getActivityOutputMap().get(aid).get(outputIndex);
+ return getConnectorRecordDescriptorMap().get(conn.getConnectorId());
+ }
+
+ /**
+ * the following is for the case where the activity is in other SuperActivities
+ */
+ ActivityClusterGraph acg = SuperActivity.this.getActivityClusterGraph();
+ for (Entry<ActivityClusterId, ActivityCluster> entry : acg.getActivityClusterMap().entrySet()) {
+ ActivityCluster ac = entry.getValue();
+ for (Entry<ActivityId, IActivity> saEntry : ac.getActivityMap().entrySet()) {
+ SuperActivity sa = (SuperActivity) saEntry.getValue();
+ if (sa.getActivityMap().get(aid) != null) {
+ List<IConnectorDescriptor> conns = sa.getActivityOutputMap().get(aid);
+ if (conns != null && conns.size() >= outputIndex) {
+ IConnectorDescriptor conn = conns.get(outputIndex);
+ return sa.getConnectorRecordDescriptorMap().get(conn.getConnectorId());
+ } else {
+ superActivityOutputChannel = sa.getClusterOutputIndex(Pair.of(aid, outputIndex));
+ if (superActivityOutputChannel >= 0) {
+ return recordDescProvider.getOutputRecordDescriptor(sa.getActivityId(),
+ superActivityOutputChannel);
+ }
+ }
+ }
+ }
+ }
+ return null;
+ }
+
+ };
+ return new SuperActivityOperatorNodePushable(this, startActivities, ctx, wrappedRecDescProvider, partition,
+ nPartitions);
+ }
+
+ @Override
+ public ActivityId getActivityId() {
+ return activityId;
+ }
+
+ @Override
+ public String toString() {
+ return getActivityMap().values().toString();
+ }
+}
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/rewriter/runtime/SuperActivityOperatorNodePushable.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/rewriter/runtime/SuperActivityOperatorNodePushable.java
new file mode 100644
index 0000000..7d50fa0
--- /dev/null
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/rewriter/runtime/SuperActivityOperatorNodePushable.java
@@ -0,0 +1,195 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.api.rewriter.runtime;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Queue;
+
+import org.apache.commons.lang3.tuple.Pair;
+
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.ActivityId;
+import edu.uci.ics.hyracks.api.dataflow.IActivity;
+import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
+import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+/**
+ * The runtime of a SuperActivity, which internally executes a DAG of one-to-one
+ * connected activities in a single thread.
+ *
+ * @author yingyib
+ */
+public class SuperActivityOperatorNodePushable implements IOperatorNodePushable {
+ private final Map<ActivityId, IOperatorNodePushable> operatorNodePushables = new HashMap<ActivityId, IOperatorNodePushable>();
+ private final List<IOperatorNodePushable> operatprNodePushablesBFSOrder = new ArrayList<IOperatorNodePushable>();
+ private final Map<ActivityId, IActivity> startActivities;
+ private final SuperActivity parent;
+ private final IHyracksTaskContext ctx;
+ private final IRecordDescriptorProvider recordDescProvider;
+ private final int partition;
+ private final int nPartitions;
+ private int inputArity = 0;
+
+ public SuperActivityOperatorNodePushable(SuperActivity parent, Map<ActivityId, IActivity> startActivities,
+ IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) {
+ this.parent = parent;
+ this.startActivities = startActivities;
+ this.ctx = ctx;
+ this.recordDescProvider = recordDescProvider;
+ this.partition = partition;
+ this.nPartitions = nPartitions;
+
+ /**
+ * initialize the writer-relationship for the internal DAG of operator
+ * node pushables
+ */
+ try {
+ init();
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ @Override
+ public void initialize() throws HyracksDataException {
+ /**
+ * initialize operator node pushables in the BFS order
+ */
+ for (IOperatorNodePushable op : operatprNodePushablesBFSOrder) {
+ op.initialize();
+ }
+ }
+
+ public void init() throws HyracksDataException {
+ Map<ActivityId, IOperatorNodePushable> startOperatorNodePushables = new HashMap<ActivityId, IOperatorNodePushable>();
+ Queue<Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>>> childQueue = new LinkedList<Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>>>();
+ List<IConnectorDescriptor> outputConnectors = null;
+
+ /**
+ * Set up the source operators
+ */
+ for (Entry<ActivityId, IActivity> entry : startActivities.entrySet()) {
+ IOperatorNodePushable opPushable = entry.getValue().createPushRuntime(ctx, recordDescProvider, partition,
+ nPartitions);
+ startOperatorNodePushables.put(entry.getKey(), opPushable);
+ operatprNodePushablesBFSOrder.add(opPushable);
+ operatorNodePushables.put(entry.getKey(), opPushable);
+ inputArity += opPushable.getInputArity();
+ outputConnectors = parent.getActivityOutputMap().get(entry.getKey());
+ if (outputConnectors != null) {
+ for (IConnectorDescriptor conn : outputConnectors) {
+ childQueue.add(parent.getConnectorActivityMap().get(conn.getConnectorId()));
+ }
+ }
+ }
+
+ /**
+ * Using BFS (breadth-first search) to construct to runtime execution
+ * DAG;
+ */
+ while (childQueue.size() > 0) {
+ /**
+ * expend the executing activities further to the downstream
+ */
+ if (outputConnectors != null && outputConnectors.size() > 0) {
+ for (IConnectorDescriptor conn : outputConnectors) {
+ if (conn != null) {
+ childQueue.add(parent.getConnectorActivityMap().get(conn.getConnectorId()));
+ }
+ }
+ }
+
+ /**
+ * construct the source to destination information
+ */
+ Pair<Pair<IActivity, Integer>, Pair<IActivity, Integer>> channel = childQueue.poll();
+ ActivityId sourceId = channel.getLeft().getLeft().getActivityId();
+ int outputChannel = channel.getLeft().getRight();
+ ActivityId destId = channel.getRight().getLeft().getActivityId();
+ int inputChannel = channel.getRight().getRight();
+ IOperatorNodePushable sourceOp = operatorNodePushables.get(sourceId);
+ IOperatorNodePushable destOp = operatorNodePushables.get(destId);
+ if (destOp == null) {
+ destOp = channel.getRight().getLeft()
+ .createPushRuntime(ctx, recordDescProvider, partition, nPartitions);
+ operatprNodePushablesBFSOrder.add(destOp);
+ operatorNodePushables.put(destId, destOp);
+ }
+
+ /**
+ * construct the dataflow connection from a producer to a consumer
+ */
+ sourceOp.setOutputFrameWriter(outputChannel, destOp.getInputFrameWriter(inputChannel),
+ recordDescProvider.getInputRecordDescriptor(destId, inputChannel));
+
+ /**
+ * traverse to the child of the current activity
+ */
+ outputConnectors = parent.getActivityOutputMap().get(destId);
+ }
+ }
+
+ @Override
+ public void deinitialize() throws HyracksDataException {
+ /**
+ * de-initialize operator node pushables
+ */
+ for (IOperatorNodePushable op : operatprNodePushablesBFSOrder) {
+ op.deinitialize();
+ }
+ }
+
+ @Override
+ public int getInputArity() {
+ return inputArity;
+ }
+
+ @Override
+ public void setOutputFrameWriter(int clusterOutputIndex, IFrameWriter writer, RecordDescriptor recordDesc) {
+ /**
+ * set the right output frame writer
+ */
+ Pair<ActivityId, Integer> activityIdOutputIndex = parent.getActivityIdOutputIndex(clusterOutputIndex);
+ IOperatorNodePushable opPushable = operatorNodePushables.get(activityIdOutputIndex.getLeft());
+ opPushable.setOutputFrameWriter(activityIdOutputIndex.getRight(), writer, recordDesc);
+ }
+
+ @Override
+ public IFrameWriter getInputFrameWriter(final int index) {
+ /**
+ * get the right IFrameWriter from the cluster input index
+ */
+ Pair<ActivityId, Integer> activityIdInputIndex = parent.getActivityIdInputIndex(index);
+ IOperatorNodePushable operatorNodePushable = operatorNodePushables.get(activityIdInputIndex.getLeft());
+ IFrameWriter writer = operatorNodePushable.getInputFrameWriter(activityIdInputIndex.getRight());
+ return writer;
+ }
+
+ @Override
+ public String getDisplayName() {
+ return "Super Activity " + parent.getActivityMap().values().toString();
+ }
+
+}
diff --git a/hyracks/hyracks-client/pom.xml b/hyracks/hyracks-client/pom.xml
new file mode 100644
index 0000000..854a009
--- /dev/null
+++ b/hyracks/hyracks-client/pom.xml
@@ -0,0 +1,46 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hyracks-client</artifactId>
+ <name>hyracks-client</name>
+ <parent>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ </parent>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ <dependencies>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-api</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-net</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-comm</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-dataflow-common</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/DatasetClientContext.java b/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/DatasetClientContext.java
new file mode 100644
index 0000000..8be4a8c
--- /dev/null
+++ b/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/DatasetClientContext.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.client.dataset;
+
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.hyracks.api.context.IHyracksCommonContext;
+import edu.uci.ics.hyracks.api.io.IIOManager;
+
+public class DatasetClientContext implements IHyracksCommonContext {
+ private final int frameSize;
+
+ public DatasetClientContext(int frameSize) {
+ this.frameSize = frameSize;
+ }
+
+ @Override
+ public int getFrameSize() {
+ return frameSize;
+ }
+
+ @Override
+ public IIOManager getIOManager() {
+ return null;
+ }
+
+ @Override
+ public ByteBuffer allocateFrame() {
+ return ByteBuffer.allocate(frameSize);
+ }
+
+}
diff --git a/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDataset.java b/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDataset.java
new file mode 100644
index 0000000..6866e46
--- /dev/null
+++ b/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDataset.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.client.dataset;
+
+import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
+import edu.uci.ics.hyracks.api.comm.NetworkAddress;
+import edu.uci.ics.hyracks.api.dataset.IHyracksDataset;
+import edu.uci.ics.hyracks.api.dataset.IHyracksDatasetDirectoryServiceConnection;
+import edu.uci.ics.hyracks.api.dataset.IHyracksDatasetReader;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.client.net.ClientNetworkManager;
+
+public class HyracksDataset implements IHyracksDataset {
+ private final IHyracksDatasetDirectoryServiceConnection datasetDirectoryServiceConnection;
+
+ private final ClientNetworkManager netManager;
+
+ private final DatasetClientContext datasetClientCtx;
+
+ public HyracksDataset(IHyracksClientConnection hcc, int frameSize, int nReaders) throws Exception {
+ NetworkAddress ddsAddress = hcc.getDatasetDirectoryServiceInfo();
+ datasetDirectoryServiceConnection = new HyracksDatasetDirectoryServiceConnection(new String(
+ ddsAddress.getIpAddress()), ddsAddress.getPort());
+
+ netManager = new ClientNetworkManager(nReaders);
+ netManager.start();
+
+ datasetClientCtx = new DatasetClientContext(frameSize);
+ }
+
+ @Override
+ public IHyracksDatasetReader createReader(JobId jobId, ResultSetId resultSetId) throws HyracksDataException {
+ IHyracksDatasetReader reader = null;
+ try {
+ reader = new HyracksDatasetReader(datasetDirectoryServiceConnection, netManager, datasetClientCtx, jobId,
+ resultSetId);
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ return reader;
+ }
+}
diff --git a/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDatasetDirectoryServiceConnection.java b/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDatasetDirectoryServiceConnection.java
new file mode 100644
index 0000000..095fd7d
--- /dev/null
+++ b/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDatasetDirectoryServiceConnection.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.client.dataset;
+
+import java.net.InetSocketAddress;
+
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord;
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord.Status;
+import edu.uci.ics.hyracks.api.dataset.IHyracksDatasetDirectoryServiceConnection;
+import edu.uci.ics.hyracks.api.dataset.IHyracksDatasetDirectoryServiceInterface;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.ipc.api.IIPCHandle;
+import edu.uci.ics.hyracks.ipc.api.RPCInterface;
+import edu.uci.ics.hyracks.ipc.impl.IPCSystem;
+import edu.uci.ics.hyracks.ipc.impl.JavaSerializationBasedPayloadSerializerDeserializer;
+
+//TODO(madhusudancs): Should this implementation be moved to edu.uci.ics.hyracks.client?
+public class HyracksDatasetDirectoryServiceConnection implements IHyracksDatasetDirectoryServiceConnection {
+ private final IPCSystem ipc;
+ private final IHyracksDatasetDirectoryServiceInterface ddsi;
+
+ public HyracksDatasetDirectoryServiceConnection(String ddsHost, int ddsPort) throws Exception {
+ RPCInterface rpci = new RPCInterface();
+ ipc = new IPCSystem(new InetSocketAddress(0), rpci, new JavaSerializationBasedPayloadSerializerDeserializer());
+ ipc.start();
+ IIPCHandle ddsIpchandle = ipc.getHandle(new InetSocketAddress(ddsHost, ddsPort));
+ this.ddsi = new HyracksDatasetDirectoryServiceInterfaceRemoteProxy(ddsIpchandle, rpci);
+ }
+
+ @Override
+ public Status getDatasetResultStatus(JobId jobId, ResultSetId rsId) throws Exception {
+ return ddsi.getDatasetResultStatus(jobId, rsId);
+ }
+
+ @Override
+ public DatasetDirectoryRecord[] getDatasetResultLocations(JobId jobId, ResultSetId rsId,
+ DatasetDirectoryRecord[] knownRecords) throws Exception {
+ return ddsi.getDatasetResultLocations(jobId, rsId, knownRecords);
+ }
+}
diff --git a/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDatasetDirectoryServiceInterfaceRemoteProxy.java b/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDatasetDirectoryServiceInterfaceRemoteProxy.java
new file mode 100644
index 0000000..47cdf97
--- /dev/null
+++ b/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDatasetDirectoryServiceInterfaceRemoteProxy.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.client.dataset;
+
+import edu.uci.ics.hyracks.api.client.HyracksClientInterfaceFunctions;
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord;
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord.Status;
+import edu.uci.ics.hyracks.api.dataset.IHyracksDatasetDirectoryServiceInterface;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.ipc.api.IIPCHandle;
+import edu.uci.ics.hyracks.ipc.api.RPCInterface;
+
+//TODO(madhusudancs): Should this implementation be moved to edu.uci.ics.hyracks.client?
+public class HyracksDatasetDirectoryServiceInterfaceRemoteProxy implements IHyracksDatasetDirectoryServiceInterface {
+ private final IIPCHandle ipcHandle;
+
+ private final RPCInterface rpci;
+
+ public HyracksDatasetDirectoryServiceInterfaceRemoteProxy(IIPCHandle ipcHandle, RPCInterface rpci) {
+ this.ipcHandle = ipcHandle;
+ this.rpci = rpci;
+ }
+
+ @Override
+ public Status getDatasetResultStatus(JobId jobId, ResultSetId rsId) throws Exception {
+ HyracksClientInterfaceFunctions.GetDatasetResultStatusFunction gdrlf = new HyracksClientInterfaceFunctions.GetDatasetResultStatusFunction(
+ jobId, rsId);
+ return (Status) rpci.call(ipcHandle, gdrlf);
+ }
+
+ @Override
+ public DatasetDirectoryRecord[] getDatasetResultLocations(JobId jobId, ResultSetId rsId,
+ DatasetDirectoryRecord[] knownRecords) throws Exception {
+ HyracksClientInterfaceFunctions.GetDatasetResultLocationsFunction gdrlf = new HyracksClientInterfaceFunctions.GetDatasetResultLocationsFunction(
+ jobId, rsId, knownRecords);
+ return (DatasetDirectoryRecord[]) rpci.call(ipcHandle, gdrlf);
+ }
+}
diff --git a/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDatasetReader.java b/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDatasetReader.java
new file mode 100644
index 0000000..78bcf20
--- /dev/null
+++ b/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDatasetReader.java
@@ -0,0 +1,255 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.client.dataset;
+
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
+import java.net.SocketAddress;
+import java.net.UnknownHostException;
+import java.nio.ByteBuffer;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.logging.Logger;
+
+import edu.uci.ics.hyracks.api.channels.IInputChannel;
+import edu.uci.ics.hyracks.api.comm.NetworkAddress;
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord;
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord.Status;
+import edu.uci.ics.hyracks.api.dataset.IDatasetInputChannelMonitor;
+import edu.uci.ics.hyracks.api.dataset.IHyracksDatasetDirectoryServiceConnection;
+import edu.uci.ics.hyracks.api.dataset.IHyracksDatasetReader;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.client.net.ClientNetworkManager;
+import edu.uci.ics.hyracks.comm.channels.DatasetNetworkInputChannel;
+
+// TODO(madhusudancs): Should this implementation be moved to edu.uci.ics.hyracks.client?
+public class HyracksDatasetReader implements IHyracksDatasetReader {
+ private static final Logger LOGGER = Logger.getLogger(HyracksDatasetReader.class.getName());
+
+ private final IHyracksDatasetDirectoryServiceConnection datasetDirectoryServiceConnection;
+
+ private final ClientNetworkManager netManager;
+
+ private final DatasetClientContext datasetClientCtx;
+
+ private JobId jobId;
+
+ private ResultSetId resultSetId;
+
+ private DatasetDirectoryRecord[] knownRecords;
+
+ private IDatasetInputChannelMonitor[] monitors;
+
+ private int lastReadPartition;
+
+ private IDatasetInputChannelMonitor lastMonitor;
+
+ private DatasetNetworkInputChannel resultChannel;
+
+ private static int NUM_READ_BUFFERS = 1;
+
+ public HyracksDatasetReader(IHyracksDatasetDirectoryServiceConnection datasetDirectoryServiceConnection,
+ ClientNetworkManager netManager, DatasetClientContext datasetClientCtx, JobId jobId, ResultSetId resultSetId)
+ throws Exception {
+ this.datasetDirectoryServiceConnection = datasetDirectoryServiceConnection;
+ this.netManager = netManager;
+ this.datasetClientCtx = datasetClientCtx;
+ this.jobId = jobId;
+ this.resultSetId = resultSetId;
+ knownRecords = null;
+ monitors = null;
+ lastReadPartition = -1;
+ lastMonitor = null;
+ resultChannel = null;
+ }
+
+ @Override
+ public Status getResultStatus() {
+ Status status = null;
+ try {
+ status = datasetDirectoryServiceConnection.getDatasetResultStatus(jobId, resultSetId);
+ } catch (Exception e) {
+ // TODO(madhusudancs): Decide what to do in case of error
+ }
+ return status;
+ }
+
+ @Override
+ public int read(ByteBuffer buffer) throws HyracksDataException {
+ ByteBuffer readBuffer;
+ int readSize = 0;
+
+ if (lastReadPartition == -1) {
+ while (knownRecords == null || knownRecords[0] == null) {
+ try {
+ knownRecords = datasetDirectoryServiceConnection.getDatasetResultLocations(jobId, resultSetId,
+ knownRecords);
+ lastReadPartition = 0;
+ resultChannel = new DatasetNetworkInputChannel(netManager,
+ getSocketAddress(knownRecords[lastReadPartition]), jobId, lastReadPartition,
+ NUM_READ_BUFFERS);
+ lastMonitor = getMonitor(lastReadPartition);
+ resultChannel.open(datasetClientCtx);
+ resultChannel.registerMonitor(lastMonitor);
+ } catch (HyracksException e) {
+ throw new HyracksDataException(e);
+ } catch (UnknownHostException e) {
+ throw new HyracksDataException(e);
+ } catch (Exception e) {
+ // Do nothing here.
+ }
+ }
+ }
+
+ while (readSize <= 0 && !((lastReadPartition == knownRecords.length - 1) && (lastMonitor.eosReached()))) {
+ synchronized (lastMonitor) {
+ while (lastMonitor.getNFramesAvailable() <= 0 && !lastMonitor.eosReached()) {
+ try {
+ lastMonitor.wait();
+ } catch (InterruptedException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+ }
+
+ if (lastMonitor.getNFramesAvailable() <= 0 && lastMonitor.eosReached()) {
+ knownRecords[lastReadPartition].readEOS();
+ if ((lastReadPartition == knownRecords.length - 1)) {
+ break;
+ } else {
+ try {
+ lastReadPartition++;
+ while (knownRecords[lastReadPartition] == null) {
+ try {
+ knownRecords = datasetDirectoryServiceConnection.getDatasetResultLocations(jobId,
+ resultSetId, knownRecords);
+ } catch (Exception e) {
+ // Do nothing here.
+ }
+ }
+
+ resultChannel = new DatasetNetworkInputChannel(netManager,
+ getSocketAddress(knownRecords[lastReadPartition]), jobId, lastReadPartition,
+ NUM_READ_BUFFERS);
+ lastMonitor = getMonitor(lastReadPartition);
+ resultChannel.open(datasetClientCtx);
+ resultChannel.registerMonitor(lastMonitor);
+ } catch (HyracksException e) {
+ throw new HyracksDataException(e);
+ } catch (UnknownHostException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+ } else {
+ readBuffer = resultChannel.getNextBuffer();
+ lastMonitor.notifyFrameRead();
+ if (readBuffer != null) {
+ buffer.put(readBuffer);
+ buffer.flip();
+ readSize = buffer.limit();
+ resultChannel.recycleBuffer(readBuffer);
+ }
+ }
+ }
+
+ return readSize;
+ }
+
+ private boolean nullExists(DatasetDirectoryRecord[] locations) {
+ if (locations == null) {
+ return true;
+ }
+ for (int i = 0; i < locations.length; i++) {
+ if (locations[i] == null) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private SocketAddress getSocketAddress(DatasetDirectoryRecord addr) throws UnknownHostException {
+ NetworkAddress netAddr = addr.getNetworkAddress();
+ return new InetSocketAddress(InetAddress.getByAddress(netAddr.getIpAddress()), netAddr.getPort());
+ }
+
+ private IDatasetInputChannelMonitor getMonitor(int partition) throws HyracksException {
+ if (knownRecords == null || knownRecords[partition] == null) {
+ throw new HyracksException("Accessing monitors before the obtaining the corresponding addresses.");
+ }
+ if (monitors == null) {
+ monitors = new DatasetInputChannelMonitor[knownRecords.length];
+ }
+ if (monitors[partition] == null) {
+ monitors[partition] = new DatasetInputChannelMonitor();
+ }
+ return monitors[partition];
+ }
+
+ private class DatasetInputChannelMonitor implements IDatasetInputChannelMonitor {
+ private final AtomicInteger nAvailableFrames;
+
+ private final AtomicBoolean eos;
+
+ private final AtomicBoolean failed;
+
+ public DatasetInputChannelMonitor() {
+ nAvailableFrames = new AtomicInteger(0);
+ eos = new AtomicBoolean(false);
+ failed = new AtomicBoolean(false);
+ }
+
+ @Override
+ public synchronized void notifyFailure(IInputChannel channel) {
+ failed.set(true);
+ notifyAll();
+ }
+
+ @Override
+ public synchronized void notifyDataAvailability(IInputChannel channel, int nFrames) {
+ nAvailableFrames.addAndGet(nFrames);
+ notifyAll();
+ }
+
+ @Override
+ public synchronized void notifyEndOfStream(IInputChannel channel) {
+ eos.set(true);
+ notifyAll();
+ }
+
+ @Override
+ public synchronized boolean eosReached() {
+ return eos.get();
+ }
+
+ @Override
+ public synchronized boolean failed() {
+ return failed.get();
+ }
+
+ @Override
+ public synchronized int getNFramesAvailable() {
+ return nAvailableFrames.get();
+ }
+
+ @Override
+ public synchronized void notifyFrameRead() {
+ nAvailableFrames.decrementAndGet();
+ }
+
+ }
+}
diff --git a/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/net/ClientNetworkManager.java b/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/net/ClientNetworkManager.java
new file mode 100644
index 0000000..7aef8b9
--- /dev/null
+++ b/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/net/ClientNetworkManager.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.client.net;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.SocketAddress;
+
+import edu.uci.ics.hyracks.comm.channels.IChannelConnectionFactory;
+import edu.uci.ics.hyracks.net.exceptions.NetException;
+import edu.uci.ics.hyracks.net.protocols.muxdemux.ChannelControlBlock;
+import edu.uci.ics.hyracks.net.protocols.muxdemux.MultiplexedConnection;
+import edu.uci.ics.hyracks.net.protocols.muxdemux.MuxDemux;
+import edu.uci.ics.hyracks.net.protocols.muxdemux.MuxDemuxPerformanceCounters;
+
+public class ClientNetworkManager implements IChannelConnectionFactory {
+ private static final int MAX_CONNECTION_ATTEMPTS = 5;
+
+ private final MuxDemux md;
+
+ public ClientNetworkManager(int nThreads) throws IOException {
+ /* This is a connect only socket and does not listen to any incoming connections, so pass null to
+ * localAddress and listener.
+ */
+ md = new MuxDemux(null, null, nThreads, MAX_CONNECTION_ATTEMPTS);
+ }
+
+ public void start() throws IOException {
+ md.start();
+ }
+
+ public void stop() {
+
+ }
+
+ public ChannelControlBlock connect(SocketAddress remoteAddress) throws InterruptedException, NetException {
+ MultiplexedConnection mConn = md.connect((InetSocketAddress) remoteAddress);
+ return mConn.openChannel();
+ }
+
+ public MuxDemuxPerformanceCounters getPerformanceCounters() {
+ return md.getPerformanceCounters();
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-comm/pom.xml b/hyracks/hyracks-comm/pom.xml
new file mode 100644
index 0000000..c3583699
--- /dev/null
+++ b/hyracks/hyracks-comm/pom.xml
@@ -0,0 +1,36 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hyracks-comm</artifactId>
+ <name>hyracks-comm</name>
+ <parent>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ </parent>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ <dependencies>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-api</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-net</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/NetworkInputChannel.java b/hyracks/hyracks-comm/src/main/java/edu/uci/ics/hyracks/comm/channels/DatasetNetworkInputChannel.java
similarity index 72%
copy from hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/NetworkInputChannel.java
copy to hyracks/hyracks-comm/src/main/java/edu/uci/ics/hyracks/comm/channels/DatasetNetworkInputChannel.java
index 1d5af84..fac2949 100644
--- a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/NetworkInputChannel.java
+++ b/hyracks/hyracks-comm/src/main/java/edu/uci/ics/hyracks/comm/channels/DatasetNetworkInputChannel.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hyracks.control.nc.net;
+package edu.uci.ics.hyracks.comm.channels;
import java.net.SocketAddress;
import java.nio.ByteBuffer;
@@ -23,21 +23,25 @@
import edu.uci.ics.hyracks.api.channels.IInputChannel;
import edu.uci.ics.hyracks.api.channels.IInputChannelMonitor;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.context.IHyracksCommonContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.partitions.PartitionId;
+import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.net.buffers.IBufferAcceptor;
import edu.uci.ics.hyracks.net.buffers.ICloseableBufferAcceptor;
import edu.uci.ics.hyracks.net.protocols.muxdemux.ChannelControlBlock;
-public class NetworkInputChannel implements IInputChannel {
- private static final Logger LOGGER = Logger.getLogger(NetworkInputChannel.class.getName());
+public class DatasetNetworkInputChannel implements IInputChannel {
+ private static final Logger LOGGER = Logger.getLogger(DatasetNetworkInputChannel.class.getName());
- private final NetworkManager netManager;
+ static final int INITIAL_MESSAGE_SIZE = 20;
+
+ private final IChannelConnectionFactory netManager;
private final SocketAddress remoteAddress;
- private final PartitionId partitionId;
+ private final JobId jobId;
+
+ private final int partition;
private final Queue<ByteBuffer> fullQueue;
@@ -49,11 +53,12 @@
private Object attachment;
- public NetworkInputChannel(NetworkManager netManager, SocketAddress remoteAddress, PartitionId partitionId,
- int nBuffers) {
+ public DatasetNetworkInputChannel(IChannelConnectionFactory netManager, SocketAddress remoteAddress, JobId jobId,
+ int partition, int nBuffers) {
this.netManager = netManager;
this.remoteAddress = remoteAddress;
- this.partitionId = partitionId;
+ this.jobId = jobId;
+ this.partition = partition;
fullQueue = new ArrayDeque<ByteBuffer>(nBuffers);
this.nBuffers = nBuffers;
}
@@ -85,7 +90,7 @@
}
@Override
- public void open(IHyracksTaskContext ctx) throws HyracksDataException {
+ public void open(IHyracksCommonContext ctx) throws HyracksDataException {
try {
ccb = netManager.connect(remoteAddress);
} catch (Exception e) {
@@ -96,14 +101,13 @@
for (int i = 0; i < nBuffers; ++i) {
ccb.getReadInterface().getEmptyBufferAcceptor().accept(ctx.allocateFrame());
}
- ByteBuffer writeBuffer = ByteBuffer.allocate(NetworkManager.INITIAL_MESSAGE_SIZE);
- writeBuffer.putLong(partitionId.getJobId().getId());
- writeBuffer.putInt(partitionId.getConnectorDescriptorId().getId());
- writeBuffer.putInt(partitionId.getSenderIndex());
- writeBuffer.putInt(partitionId.getReceiverIndex());
+ ByteBuffer writeBuffer = ByteBuffer.allocate(INITIAL_MESSAGE_SIZE);
+ writeBuffer.putLong(jobId.getId());
+ writeBuffer.putInt(partition);
writeBuffer.flip();
if (LOGGER.isLoggable(Level.FINE)) {
- LOGGER.fine("Sending partition request: " + partitionId + " on channel: " + ccb);
+ LOGGER.fine("Sending partition request for JobId: " + jobId + " partition: " + partition + " on channel: "
+ + ccb);
}
ccb.getWriteInterface().getFullBufferAcceptor().accept(writeBuffer);
ccb.getWriteInterface().getFullBufferAcceptor().close();
@@ -118,17 +122,17 @@
@Override
public void accept(ByteBuffer buffer) {
fullQueue.add(buffer);
- monitor.notifyDataAvailability(NetworkInputChannel.this, 1);
+ monitor.notifyDataAvailability(DatasetNetworkInputChannel.this, 1);
}
@Override
public void close() {
- monitor.notifyEndOfStream(NetworkInputChannel.this);
+ monitor.notifyEndOfStream(DatasetNetworkInputChannel.this);
}
@Override
public void error(int ecode) {
- monitor.notifyFailure(NetworkInputChannel.this);
+ monitor.notifyFailure(DatasetNetworkInputChannel.this);
}
}
diff --git a/hyracks/hyracks-comm/src/main/java/edu/uci/ics/hyracks/comm/channels/IChannelConnectionFactory.java b/hyracks/hyracks-comm/src/main/java/edu/uci/ics/hyracks/comm/channels/IChannelConnectionFactory.java
new file mode 100644
index 0000000..33179ba
--- /dev/null
+++ b/hyracks/hyracks-comm/src/main/java/edu/uci/ics/hyracks/comm/channels/IChannelConnectionFactory.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.comm.channels;
+
+import java.net.SocketAddress;
+
+import edu.uci.ics.hyracks.net.exceptions.NetException;
+import edu.uci.ics.hyracks.net.protocols.muxdemux.ChannelControlBlock;
+
+public interface IChannelConnectionFactory {
+ public ChannelControlBlock connect(SocketAddress remoteAddress) throws InterruptedException, NetException;
+}
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/NetworkInputChannel.java b/hyracks/hyracks-comm/src/main/java/edu/uci/ics/hyracks/comm/channels/NetworkInputChannel.java
similarity index 89%
rename from hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/NetworkInputChannel.java
rename to hyracks/hyracks-comm/src/main/java/edu/uci/ics/hyracks/comm/channels/NetworkInputChannel.java
index 1d5af84..aa37b16 100644
--- a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/NetworkInputChannel.java
+++ b/hyracks/hyracks-comm/src/main/java/edu/uci/ics/hyracks/comm/channels/NetworkInputChannel.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hyracks.control.nc.net;
+package edu.uci.ics.hyracks.comm.channels;
import java.net.SocketAddress;
import java.nio.ByteBuffer;
@@ -23,7 +23,7 @@
import edu.uci.ics.hyracks.api.channels.IInputChannel;
import edu.uci.ics.hyracks.api.channels.IInputChannelMonitor;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.context.IHyracksCommonContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.partitions.PartitionId;
import edu.uci.ics.hyracks.net.buffers.IBufferAcceptor;
@@ -33,7 +33,9 @@
public class NetworkInputChannel implements IInputChannel {
private static final Logger LOGGER = Logger.getLogger(NetworkInputChannel.class.getName());
- private final NetworkManager netManager;
+ static final int INITIAL_MESSAGE_SIZE = 20;
+
+ private final IChannelConnectionFactory netManager;
private final SocketAddress remoteAddress;
@@ -49,8 +51,8 @@
private Object attachment;
- public NetworkInputChannel(NetworkManager netManager, SocketAddress remoteAddress, PartitionId partitionId,
- int nBuffers) {
+ public NetworkInputChannel(IChannelConnectionFactory netManager, SocketAddress remoteAddress,
+ PartitionId partitionId, int nBuffers) {
this.netManager = netManager;
this.remoteAddress = remoteAddress;
this.partitionId = partitionId;
@@ -85,7 +87,7 @@
}
@Override
- public void open(IHyracksTaskContext ctx) throws HyracksDataException {
+ public void open(IHyracksCommonContext ctx) throws HyracksDataException {
try {
ccb = netManager.connect(remoteAddress);
} catch (Exception e) {
@@ -96,7 +98,7 @@
for (int i = 0; i < nBuffers; ++i) {
ccb.getReadInterface().getEmptyBufferAcceptor().accept(ctx.allocateFrame());
}
- ByteBuffer writeBuffer = ByteBuffer.allocate(NetworkManager.INITIAL_MESSAGE_SIZE);
+ ByteBuffer writeBuffer = ByteBuffer.allocate(INITIAL_MESSAGE_SIZE);
writeBuffer.putLong(partitionId.getJobId().getId());
writeBuffer.putInt(partitionId.getConnectorDescriptorId().getId());
writeBuffer.putInt(partitionId.getSenderIndex());
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/NetworkOutputChannel.java b/hyracks/hyracks-comm/src/main/java/edu/uci/ics/hyracks/comm/channels/NetworkOutputChannel.java
similarity index 92%
rename from hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/NetworkOutputChannel.java
rename to hyracks/hyracks-comm/src/main/java/edu/uci/ics/hyracks/comm/channels/NetworkOutputChannel.java
index 9024e18..812a2de 100644
--- a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/NetworkOutputChannel.java
+++ b/hyracks/hyracks-comm/src/main/java/edu/uci/ics/hyracks/comm/channels/NetworkOutputChannel.java
@@ -12,14 +12,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hyracks.control.nc.net;
+package edu.uci.ics.hyracks.comm.channels;
import java.nio.ByteBuffer;
import java.util.ArrayDeque;
import java.util.Deque;
import edu.uci.ics.hyracks.api.comm.IFrameWriter;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.net.buffers.IBufferAcceptor;
import edu.uci.ics.hyracks.net.protocols.muxdemux.ChannelControlBlock;
@@ -40,9 +39,9 @@
ccb.getWriteInterface().setEmptyBufferAcceptor(new WriteEmptyBufferAcceptor());
}
- public void setTaskContext(IHyracksTaskContext ctx) {
+ public void setFrameSize(int frameSize) {
for (int i = 0; i < nBuffers; ++i) {
- emptyStack.push(ByteBuffer.allocateDirect(ctx.getFrameSize()));
+ emptyStack.push(ByteBuffer.allocateDirect(frameSize));
}
}
@@ -87,7 +86,7 @@
ccb.getWriteInterface().getFullBufferAcceptor().close();
}
- void abort() {
+ public void abort() {
ccb.getWriteInterface().getFullBufferAcceptor().error(1);
synchronized (NetworkOutputChannel.this) {
aborted = true;
diff --git a/hyracks/hyracks-control/hyracks-control-cc/pom.xml b/hyracks/hyracks-control/hyracks-control-cc/pom.xml
index c7eedb3..d644673 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/pom.xml
+++ b/hyracks/hyracks-control/hyracks-control-cc/pom.xml
@@ -15,8 +15,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/ClusterControllerService.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/ClusterControllerService.java
index 137cf05..a8055c9 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/ClusterControllerService.java
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/ClusterControllerService.java
@@ -35,26 +35,37 @@
import edu.uci.ics.hyracks.api.client.ClusterControllerInfo;
import edu.uci.ics.hyracks.api.client.HyracksClientInterfaceFunctions;
import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+import edu.uci.ics.hyracks.api.comm.NetworkAddress;
import edu.uci.ics.hyracks.api.context.ICCContext;
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord;
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord.Status;
+import edu.uci.ics.hyracks.api.dataset.IDatasetDirectoryService;
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.api.job.JobStatus;
import edu.uci.ics.hyracks.api.topology.ClusterTopology;
import edu.uci.ics.hyracks.api.topology.TopologyDefinitionParser;
import edu.uci.ics.hyracks.control.cc.application.CCApplicationContext;
+import edu.uci.ics.hyracks.control.cc.dataset.DatasetDirectoryService;
import edu.uci.ics.hyracks.control.cc.job.JobRun;
import edu.uci.ics.hyracks.control.cc.web.WebServer;
import edu.uci.ics.hyracks.control.cc.work.ApplicationMessageWork;
+import edu.uci.ics.hyracks.control.cc.work.GetDatasetDirectoryServiceInfoWork;
import edu.uci.ics.hyracks.control.cc.work.GetIpAddressNodeNameMapWork;
import edu.uci.ics.hyracks.control.cc.work.GetJobStatusWork;
import edu.uci.ics.hyracks.control.cc.work.GetNodeControllersInfoWork;
+import edu.uci.ics.hyracks.control.cc.work.GetResultPartitionLocationsWork;
+import edu.uci.ics.hyracks.control.cc.work.GetResultStatusWork;
import edu.uci.ics.hyracks.control.cc.work.JobStartWork;
import edu.uci.ics.hyracks.control.cc.work.JobletCleanupNotificationWork;
import edu.uci.ics.hyracks.control.cc.work.NodeHeartbeatWork;
import edu.uci.ics.hyracks.control.cc.work.RegisterNodeWork;
import edu.uci.ics.hyracks.control.cc.work.RegisterPartitionAvailibilityWork;
import edu.uci.ics.hyracks.control.cc.work.RegisterPartitionRequestWork;
+import edu.uci.ics.hyracks.control.cc.work.RegisterResultPartitionLocationWork;
import edu.uci.ics.hyracks.control.cc.work.RemoveDeadNodesWork;
import edu.uci.ics.hyracks.control.cc.work.ReportProfilesWork;
+import edu.uci.ics.hyracks.control.cc.work.ReportResultPartitionFailureWork;
+import edu.uci.ics.hyracks.control.cc.work.ReportResultPartitionWriteCompletionWork;
import edu.uci.ics.hyracks.control.cc.work.TaskCompleteWork;
import edu.uci.ics.hyracks.control.cc.work.TaskFailureWork;
import edu.uci.ics.hyracks.control.cc.work.UnregisterNodeWork;
@@ -111,6 +122,8 @@
private final DeadNodeSweeper sweeper;
+ private final IDatasetDirectoryService datasetDirectoryService;
+
private long jobCounter;
public ClusterControllerService(final CCConfig ccConfig) throws Exception {
@@ -157,6 +170,7 @@
}
};
sweeper = new DeadNodeSweeper();
+ datasetDirectoryService = new DatasetDirectoryService();
jobCounter = 0;
}
@@ -272,6 +286,10 @@
return clusterIPC;
}
+ public NetworkAddress getDatasetDirectoryServiceInfo() {
+ return new NetworkAddress(ccConfig.clientNetIpAddress.getBytes(), ccConfig.clientNetPort);
+ }
+
private class DeadNodeSweeper extends TimerTask {
@Override
public void run() {
@@ -279,6 +297,10 @@
}
}
+ public IDatasetDirectoryService getDatasetDirectoryService() {
+ return datasetDirectoryService;
+ }
+
private class HyracksClientInterfaceIPCI implements IIPCI {
@Override
public void deliverIncomingMessage(IIPCHandle handle, long mid, long rmid, Object payload, Exception exception) {
@@ -308,6 +330,27 @@
return;
}
+ case GET_DATASET_DIRECTORY_SERIVICE_INFO: {
+ workQueue.schedule(new GetDatasetDirectoryServiceInfoWork(ClusterControllerService.this,
+ new IPCResponder<NetworkAddress>(handle, mid)));
+ return;
+ }
+
+ case GET_DATASET_RESULT_STATUS: {
+ HyracksClientInterfaceFunctions.GetDatasetResultStatusFunction gdrlf = (HyracksClientInterfaceFunctions.GetDatasetResultStatusFunction) fn;
+ workQueue.schedule(new GetResultStatusWork(ClusterControllerService.this, gdrlf.getJobId(), gdrlf
+ .getResultSetId(), new IPCResponder<Status>(handle, mid)));
+ return;
+ }
+
+ case GET_DATASET_RESULT_LOCATIONS: {
+ HyracksClientInterfaceFunctions.GetDatasetResultLocationsFunction gdrlf = (HyracksClientInterfaceFunctions.GetDatasetResultLocationsFunction) fn;
+ workQueue.schedule(new GetResultPartitionLocationsWork(ClusterControllerService.this, gdrlf
+ .getJobId(), gdrlf.getResultSetId(), gdrlf.getKnownRecords(),
+ new IPCResponder<DatasetDirectoryRecord[]>(handle, mid)));
+ return;
+ }
+
case WAIT_FOR_COMPLETION: {
HyracksClientInterfaceFunctions.WaitForCompletionFunction wfcf = (HyracksClientInterfaceFunctions.WaitForCompletionFunction) fn;
workQueue.schedule(new WaitForJobCompletionWork(ClusterControllerService.this, wfcf.getJobId(),
@@ -403,6 +446,28 @@
return;
}
+ case REGISTER_RESULT_PARTITION_LOCATION: {
+ CCNCFunctions.RegisterResultPartitionLocationFunction rrplf = (CCNCFunctions.RegisterResultPartitionLocationFunction) fn;
+ workQueue.schedule(new RegisterResultPartitionLocationWork(ClusterControllerService.this, rrplf
+ .getJobId(), rrplf.getResultSetId(), rrplf.getOrderedResult(), rrplf.getPartition(), rrplf
+ .getNPartitions(), rrplf.getNetworkAddress()));
+ return;
+ }
+
+ case REPORT_RESULT_PARTITION_WRITE_COMPLETION: {
+ CCNCFunctions.ReportResultPartitionWriteCompletionFunction rrplf = (CCNCFunctions.ReportResultPartitionWriteCompletionFunction) fn;
+ workQueue.schedule(new ReportResultPartitionWriteCompletionWork(ClusterControllerService.this,
+ rrplf.getJobId(), rrplf.getResultSetId(), rrplf.getPartition()));
+ return;
+ }
+
+ case REPORT_RESULT_PARTITION_FAILURE: {
+ CCNCFunctions.ReportResultPartitionFailureFunction rrplf = (CCNCFunctions.ReportResultPartitionFailureFunction) fn;
+ workQueue.schedule(new ReportResultPartitionFailureWork(ClusterControllerService.this, rrplf
+ .getJobId(), rrplf.getResultSetId(), rrplf.getPartition()));
+ return;
+ }
+
case SEND_APPLICATION_MESSAGE: {
CCNCFunctions.SendApplicationMessageFunction rsf = (CCNCFunctions.SendApplicationMessageFunction) fn;
workQueue.schedule(new ApplicationMessageWork(ClusterControllerService.this, rsf.getMessage(), rsf
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/NodeControllerState.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/NodeControllerState.java
index c17acd0..c96a319 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/NodeControllerState.java
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/NodeControllerState.java
@@ -41,6 +41,8 @@
private final NetworkAddress dataPort;
+ private final NetworkAddress datasetPort;
+
private final Set<JobId> activeJobIds;
private final String osName;
@@ -107,6 +109,14 @@
private final long[] netSignalingBytesWritten;
+ private final long[] datasetNetPayloadBytesRead;
+
+ private final long[] datasetNetPayloadBytesWritten;
+
+ private final long[] datasetNetSignalingBytesRead;
+
+ private final long[] datasetNetSignalingBytesWritten;
+
private final long[] ipcMessagesSent;
private final long[] ipcMessageBytesSent;
@@ -123,6 +133,7 @@
this.nodeController = nodeController;
ncConfig = reg.getNCConfig();
dataPort = reg.getDataPort();
+ datasetPort = reg.getDatasetPort();
activeJobIds = new HashSet<JobId>();
osName = reg.getOSName();
@@ -164,6 +175,10 @@
netPayloadBytesWritten = new long[RRD_SIZE];
netSignalingBytesRead = new long[RRD_SIZE];
netSignalingBytesWritten = new long[RRD_SIZE];
+ datasetNetPayloadBytesRead = new long[RRD_SIZE];
+ datasetNetPayloadBytesWritten = new long[RRD_SIZE];
+ datasetNetSignalingBytesRead = new long[RRD_SIZE];
+ datasetNetSignalingBytesWritten = new long[RRD_SIZE];
ipcMessagesSent = new long[RRD_SIZE];
ipcMessageBytesSent = new long[RRD_SIZE];
ipcMessagesReceived = new long[RRD_SIZE];
@@ -196,6 +211,10 @@
netPayloadBytesWritten[rrdPtr] = hbData.netPayloadBytesWritten;
netSignalingBytesRead[rrdPtr] = hbData.netSignalingBytesRead;
netSignalingBytesWritten[rrdPtr] = hbData.netSignalingBytesWritten;
+ datasetNetPayloadBytesRead[rrdPtr] = hbData.datasetNetPayloadBytesRead;
+ datasetNetPayloadBytesWritten[rrdPtr] = hbData.datasetNetPayloadBytesWritten;
+ datasetNetSignalingBytesRead[rrdPtr] = hbData.datasetNetSignalingBytesRead;
+ datasetNetSignalingBytesWritten[rrdPtr] = hbData.datasetNetSignalingBytesWritten;
ipcMessagesSent[rrdPtr] = hbData.ipcMessagesSent;
ipcMessageBytesSent[rrdPtr] = hbData.ipcMessageBytesSent;
ipcMessagesReceived[rrdPtr] = hbData.ipcMessagesReceived;
@@ -227,6 +246,10 @@
return dataPort;
}
+ public NetworkAddress getDatasetPort() {
+ return datasetPort;
+ }
+
public JSONObject toSummaryJSON() throws JSONException {
JSONObject o = new JSONObject();
o.put("node-id", ncConfig.nodeId);
@@ -271,6 +294,10 @@
o.put("net-payload-bytes-written", netPayloadBytesWritten);
o.put("net-signaling-bytes-read", netSignalingBytesRead);
o.put("net-signaling-bytes-written", netSignalingBytesWritten);
+ o.put("dataset-net-payload-bytes-read", datasetNetPayloadBytesRead);
+ o.put("dataset-net-payload-bytes-written", datasetNetPayloadBytesWritten);
+ o.put("dataset-net-signaling-bytes-read", datasetNetSignalingBytesRead);
+ o.put("dataset-net-signaling-bytes-written", datasetNetSignalingBytesWritten);
o.put("ipc-messages-sent", ipcMessagesSent);
o.put("ipc-message-bytes-sent", ipcMessageBytesSent);
o.put("ipc-messages-received", ipcMessagesReceived);
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/dataset/DatasetDirectoryService.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/dataset/DatasetDirectoryService.java
new file mode 100644
index 0000000..13d0c30
--- /dev/null
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/dataset/DatasetDirectoryService.java
@@ -0,0 +1,241 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.control.cc.dataset;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+import edu.uci.ics.hyracks.api.comm.NetworkAddress;
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord;
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord.Status;
+import edu.uci.ics.hyracks.api.dataset.IDatasetDirectoryService;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.JobId;
+
+/**
+ * TODO(madhusudancs): The potential perils of this global dataset directory service implementation is that, the jobs
+ * location information is never evicted from the memory and the memory usage grows as the number of jobs in the system
+ * grows. What we should possibly do is, add an API call for the client to say that it received everything it has to for
+ * the job (after it receives all the results) completely. Then we can just get rid of the location information for that
+ * job.
+ */
+public class DatasetDirectoryService implements IDatasetDirectoryService {
+ private final Map<JobId, Map<ResultSetId, ResultSetMetaData>> jobResultLocationsMap;
+
+ public DatasetDirectoryService() {
+ jobResultLocationsMap = new HashMap<JobId, Map<ResultSetId, ResultSetMetaData>>();
+ }
+
+ @Override
+ public synchronized void registerResultPartitionLocation(JobId jobId, ResultSetId rsId, boolean orderedResult,
+ int partition, int nPartitions, NetworkAddress networkAddress) {
+ Map<ResultSetId, ResultSetMetaData> rsMap = jobResultLocationsMap.get(jobId);
+ if (rsMap == null) {
+ rsMap = new HashMap<ResultSetId, ResultSetMetaData>();
+ jobResultLocationsMap.put(jobId, rsMap);
+ }
+
+ ResultSetMetaData resultSetMetaData = rsMap.get(rsId);
+ if (resultSetMetaData == null) {
+ resultSetMetaData = new ResultSetMetaData(orderedResult, new DatasetDirectoryRecord[nPartitions]);
+ rsMap.put(rsId, resultSetMetaData);
+ }
+
+ DatasetDirectoryRecord[] records = resultSetMetaData.getRecords();
+ if (records[partition] == null) {
+ records[partition] = new DatasetDirectoryRecord();
+ }
+ records[partition].setNetworkAddress(networkAddress);
+ records[partition].start();
+ notifyAll();
+ }
+
+ @Override
+ public synchronized void reportResultPartitionWriteCompletion(JobId jobId, ResultSetId rsId, int partition) {
+ DatasetDirectoryRecord ddr = getDatasetDirectoryRecord(jobId, rsId, partition);
+ ddr.writeEOS();
+ }
+
+ @Override
+ public synchronized void reportResultPartitionFailure(JobId jobId, ResultSetId rsId, int partition) {
+ DatasetDirectoryRecord ddr = getDatasetDirectoryRecord(jobId, rsId, partition);
+ ddr.fail();
+ }
+
+ @Override
+ public synchronized Status getResultStatus(JobId jobId, ResultSetId rsId) throws HyracksDataException {
+ Map<ResultSetId, ResultSetMetaData> rsMap;
+ while ((rsMap = jobResultLocationsMap.get(jobId)) == null) {
+ try {
+ wait();
+ } catch (InterruptedException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ ResultSetMetaData resultSetMetaData = rsMap.get(rsId);
+ if (resultSetMetaData == null || resultSetMetaData.getRecords() == null) {
+ throw new HyracksDataException("ResultSet locations uninitialized when it is expected to be initialized.");
+ }
+ DatasetDirectoryRecord[] records = resultSetMetaData.getRecords();
+
+ ArrayList<Status> statuses = new ArrayList<Status>(records.length);
+ for (int i = 0; i < records.length; i++) {
+ statuses.add(records[i].getStatus());
+ }
+
+ // Default status is idle
+ Status status = Status.IDLE;
+ if (statuses.contains(Status.FAILED)) {
+ // Even if there is at least one failed entry we should return failed status.
+ return Status.FAILED;
+ } else if (statuses.contains(Status.RUNNING)) {
+ // If there are not failed entry and if there is at least one running entry we should return running status.
+ return Status.RUNNING;
+ } else {
+ // If each and every partition has reported success do we report success as the status.
+ int successCount = 0;
+ for (int i = 0; i < statuses.size(); i++) {
+ if (statuses.get(i) == Status.SUCCESS) {
+ successCount++;
+ }
+ }
+ if (successCount == statuses.size()) {
+ return Status.SUCCESS;
+ }
+ }
+ return status;
+ }
+
+ @Override
+ public synchronized DatasetDirectoryRecord[] getResultPartitionLocations(JobId jobId, ResultSetId rsId,
+ DatasetDirectoryRecord[] knownRecords) throws HyracksDataException {
+ DatasetDirectoryRecord[] newRecords;
+ while ((newRecords = updatedRecords(jobId, rsId, knownRecords)) == null) {
+ try {
+ wait();
+ } catch (InterruptedException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+ return newRecords;
+ }
+
+ public DatasetDirectoryRecord getDatasetDirectoryRecord(JobId jobId, ResultSetId rsId, int partition) {
+ Map<ResultSetId, ResultSetMetaData> rsMap = jobResultLocationsMap.get(jobId);
+ ResultSetMetaData resultSetMetaData = rsMap.get(rsId);
+ DatasetDirectoryRecord[] records = resultSetMetaData.getRecords();
+ return records[partition];
+ }
+
+ /**
+ * Compares the records already known by the client for the given job's result set id with the records that the
+ * dataset directory service knows and if there are any newly discovered records returns a whole array with the
+ * new records filled in.
+ * This method has a very convoluted logic. Here is the explanation of how it works.
+ * If the ordering constraint has to be enforced, the method obtains the first null record in the known records in
+ * the order of the partitions. It always traverses the array in the first to last order!
+ * If known records array or the first element in that array is null in the but the record for that partition now
+ * known to the directory service, the method fills in that record in the array and returns the array back.
+ * However, if the first known null record is not a first element in the array, by induction, all the previous
+ * known records should be known already be known to client and none of the records for the partitions ahead is
+ * known by the client yet. So, we check if the client has reached the end of stream for the partition corresponding
+ * to the record before the first known null record, i.e. the last known non-null record. If not, we just return
+ * null because we cannot expose any new locations until the client reaches end of stream for the last known record.
+ * If the client has reached the end of stream record for the last known non-null record, we check if the next record
+ * is discovered by the dataset directory service and if so, we fill it in the records array and return it back or
+ * send null otherwise.
+ * If the ordering is not required, we are free to return any newly discovered records back, so we just check if
+ * arrays are equal and if they are not we send the entire new updated array.
+ *
+ * @param jobId
+ * - Id of the job for which the directory records should be retrieved.
+ * @param rsId
+ * - Id of the result set for which the directory records should be retrieved.
+ * @param knownRecords
+ * - An array of directory records that the client is already aware of.
+ * @return
+ * - Returns null if there aren't any newly discovered partitions enforcing the ordering constraint
+ * @throws HyracksDataException
+ * TODO(madhusudancs): Think about caching (and still be stateless) instead of this ugly O(n) iterations for
+ * every check. This already looks very expensive.
+ */
+ private DatasetDirectoryRecord[] updatedRecords(JobId jobId, ResultSetId rsId, DatasetDirectoryRecord[] knownRecords)
+ throws HyracksDataException {
+ Map<ResultSetId, ResultSetMetaData> rsMap = jobResultLocationsMap.get(jobId);
+ if (rsMap == null) {
+ return null;
+ }
+
+ ResultSetMetaData resultSetMetaData = rsMap.get(rsId);
+ if (resultSetMetaData == null || resultSetMetaData.getRecords() == null) {
+ throw new HyracksDataException("ResultSet locations uninitialized when it is expected to be initialized.");
+ }
+
+ boolean ordered = resultSetMetaData.getOrderedResult();
+ DatasetDirectoryRecord[] records = resultSetMetaData.getRecords();
+ /* If ordering is required, we should expose the dataset directory records only in the order, otherwise
+ * we can simply check if there are any newly discovered records and send the whole array back if there are.
+ */
+ if (ordered) {
+ // Iterate over the known records and find the last record which is not null.
+ int i = 0;
+ for (i = 0; i < records.length; i++) {
+ if (knownRecords == null) {
+ if (records[0] != null) {
+ knownRecords = new DatasetDirectoryRecord[records.length];
+ knownRecords[0] = records[0];
+ return knownRecords;
+ }
+ return null;
+ }
+ if (knownRecords[i] == null) {
+ if ((i == 0 || knownRecords[i - 1].hasReachedReadEOS()) && records[i] != null) {
+ knownRecords[i] = records[i];
+ return knownRecords;
+ }
+ return null;
+ }
+ }
+ } else {
+ if (!Arrays.equals(records, knownRecords)) {
+ return records;
+ }
+ }
+ return null;
+ }
+
+ private class ResultSetMetaData {
+ private final boolean ordered;
+
+ private final DatasetDirectoryRecord[] records;
+
+ public ResultSetMetaData(boolean ordered, DatasetDirectoryRecord[] records) {
+ this.ordered = ordered;
+ this.records = records;
+ }
+
+ public boolean getOrderedResult() {
+ return ordered;
+ }
+
+ public DatasetDirectoryRecord[] getRecords() {
+ return records;
+ }
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/GetDatasetDirectoryServiceInfoWork.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/GetDatasetDirectoryServiceInfoWork.java
new file mode 100644
index 0000000..3ac6acc
--- /dev/null
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/GetDatasetDirectoryServiceInfoWork.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.control.cc.work;
+
+import edu.uci.ics.hyracks.api.comm.NetworkAddress;
+import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
+import edu.uci.ics.hyracks.control.common.work.IResultCallback;
+import edu.uci.ics.hyracks.control.common.work.SynchronizableWork;
+
+public class GetDatasetDirectoryServiceInfoWork extends SynchronizableWork {
+ private final ClusterControllerService ccs;
+
+ private final IResultCallback<NetworkAddress> callback;
+
+ public GetDatasetDirectoryServiceInfoWork(ClusterControllerService ccs, IResultCallback<NetworkAddress> callback) {
+ this.ccs = ccs;
+ this.callback = callback;
+ }
+
+ @Override
+ public void doRun() {
+ try {
+ NetworkAddress addr = ccs.getDatasetDirectoryServiceInfo();
+ callback.setValue(addr);
+ } catch (Exception e) {
+ callback.setException(e);
+ }
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/GetNodeControllersInfoWork.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/GetNodeControllersInfoWork.java
index 2f23a2c..a787b9f 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/GetNodeControllersInfoWork.java
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/GetNodeControllersInfoWork.java
@@ -39,7 +39,8 @@
Map<String, NodeControllerInfo> result = new LinkedHashMap<String, NodeControllerInfo>();
Map<String, NodeControllerState> nodeMap = ccs.getNodeMap();
for (Map.Entry<String, NodeControllerState> e : nodeMap.entrySet()) {
- result.put(e.getKey(), new NodeControllerInfo(e.getKey(), NodeStatus.ALIVE, e.getValue().getDataPort()));
+ result.put(e.getKey(), new NodeControllerInfo(e.getKey(), NodeStatus.ALIVE, e.getValue().getDataPort(), e
+ .getValue().getDatasetPort()));
}
callback.setValue(result);
}
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/GetResultPartitionLocationsWork.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/GetResultPartitionLocationsWork.java
new file mode 100644
index 0000000..fd1d418
--- /dev/null
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/GetResultPartitionLocationsWork.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.control.cc.work;
+
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord;
+import edu.uci.ics.hyracks.api.dataset.IDatasetDirectoryService;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
+import edu.uci.ics.hyracks.control.common.work.IResultCallback;
+import edu.uci.ics.hyracks.control.common.work.SynchronizableWork;
+
+public class GetResultPartitionLocationsWork extends SynchronizableWork {
+ private final ClusterControllerService ccs;
+
+ private final JobId jobId;
+
+ private final ResultSetId rsId;
+
+ private final DatasetDirectoryRecord[] knownRecords;
+
+ private final IResultCallback<DatasetDirectoryRecord[]> callback;
+
+ public GetResultPartitionLocationsWork(ClusterControllerService ccs, JobId jobId, ResultSetId rsId,
+ DatasetDirectoryRecord[] knownRecords, IResultCallback<DatasetDirectoryRecord[]> callback) {
+ this.ccs = ccs;
+ this.jobId = jobId;
+ this.rsId = rsId;
+ this.knownRecords = knownRecords;
+ this.callback = callback;
+ }
+
+ @Override
+ public void doRun() {
+ final IDatasetDirectoryService dds = ccs.getDatasetDirectoryService();
+ ccs.getExecutor().execute(new Runnable() {
+ @Override
+ public void run() {
+ try {
+ DatasetDirectoryRecord[] partitionLocations = dds.getResultPartitionLocations(jobId, rsId,
+ knownRecords);
+ callback.setValue(partitionLocations);
+ } catch (HyracksDataException e) {
+ callback.setException(e);
+ }
+ }
+ });
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/GetResultStatusWork.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/GetResultStatusWork.java
new file mode 100644
index 0000000..d2dadf5
--- /dev/null
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/GetResultStatusWork.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.control.cc.work;
+
+import edu.uci.ics.hyracks.api.dataset.DatasetDirectoryRecord.Status;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
+import edu.uci.ics.hyracks.control.common.work.IResultCallback;
+import edu.uci.ics.hyracks.control.common.work.SynchronizableWork;
+
+public class GetResultStatusWork extends SynchronizableWork {
+ private final ClusterControllerService ccs;
+
+ private final JobId jobId;
+
+ private final ResultSetId rsId;
+
+ private final IResultCallback<Status> callback;
+
+ public GetResultStatusWork(ClusterControllerService ccs, JobId jobId, ResultSetId rsId,
+ IResultCallback<Status> callback) {
+ this.ccs = ccs;
+ this.jobId = jobId;
+ this.rsId = rsId;
+ this.callback = callback;
+ }
+
+ @Override
+ public void doRun() {
+ try {
+ Status status = ccs.getDatasetDirectoryService().getResultStatus(jobId, rsId);
+ callback.setValue(status);
+ } catch (HyracksDataException e) {
+ callback.setException(e);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "JobId@" + jobId + " ResultSetId@" + rsId;
+ }
+}
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/RegisterResultPartitionLocationWork.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/RegisterResultPartitionLocationWork.java
new file mode 100644
index 0000000..f86e924
--- /dev/null
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/RegisterResultPartitionLocationWork.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.control.cc.work;
+
+import edu.uci.ics.hyracks.api.comm.NetworkAddress;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
+import edu.uci.ics.hyracks.control.common.work.AbstractWork;
+
+public class RegisterResultPartitionLocationWork extends AbstractWork {
+ private final ClusterControllerService ccs;
+
+ private final JobId jobId;
+
+ private final ResultSetId rsId;
+
+ private final boolean orderedResult;
+
+ private final int partition;
+
+ private final int nPartitions;
+
+ private final NetworkAddress networkAddress;
+
+ public RegisterResultPartitionLocationWork(ClusterControllerService ccs, JobId jobId, ResultSetId rsId,
+ boolean orderedResult, int partition, int nPartitions, NetworkAddress networkAddress) {
+ this.ccs = ccs;
+ this.jobId = jobId;
+ this.rsId = rsId;
+ this.orderedResult = orderedResult;
+ this.partition = partition;
+ this.nPartitions = nPartitions;
+ this.networkAddress = networkAddress;
+ }
+
+ @Override
+ public void run() {
+ ccs.getDatasetDirectoryService().registerResultPartitionLocation(jobId, rsId, orderedResult, partition,
+ nPartitions, networkAddress);
+ }
+
+ @Override
+ public String toString() {
+ return "JobId@" + jobId + " ResultSetId@" + rsId + " Partition@" + partition + " NPartitions@" + nPartitions
+ + " ResultPartitionLocation@" + networkAddress + " OrderedResult@" + orderedResult;
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/ReportResultPartitionFailureWork.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/ReportResultPartitionFailureWork.java
new file mode 100644
index 0000000..4aea41e
--- /dev/null
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/ReportResultPartitionFailureWork.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.control.cc.work;
+
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
+import edu.uci.ics.hyracks.control.common.work.AbstractWork;
+
+public class ReportResultPartitionFailureWork extends AbstractWork {
+ private final ClusterControllerService ccs;
+
+ private final JobId jobId;
+
+ private final ResultSetId rsId;
+
+ private final int partition;
+
+ public ReportResultPartitionFailureWork(ClusterControllerService ccs, JobId jobId, ResultSetId rsId, int partition) {
+ this.ccs = ccs;
+ this.jobId = jobId;
+ this.rsId = rsId;
+ this.partition = partition;
+ }
+
+ @Override
+ public void run() {
+ ccs.getDatasetDirectoryService().reportResultPartitionFailure(jobId, rsId, partition);
+ }
+
+ @Override
+ public String toString() {
+ return "JobId@" + jobId + " ResultSetId@" + rsId + " Partition@" + partition;
+ }
+}
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/ReportResultPartitionWriteCompletionWork.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/ReportResultPartitionWriteCompletionWork.java
new file mode 100644
index 0000000..313b730
--- /dev/null
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/ReportResultPartitionWriteCompletionWork.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.control.cc.work;
+
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
+import edu.uci.ics.hyracks.control.common.work.AbstractWork;
+
+public class ReportResultPartitionWriteCompletionWork extends AbstractWork {
+ private final ClusterControllerService ccs;
+
+ private final JobId jobId;
+
+ private final ResultSetId rsId;
+
+ private final int partition;
+
+ public ReportResultPartitionWriteCompletionWork(ClusterControllerService ccs, JobId jobId, ResultSetId rsId,
+ int partition) {
+ this.ccs = ccs;
+ this.jobId = jobId;
+ this.rsId = rsId;
+ this.partition = partition;
+ }
+
+ @Override
+ public void run() {
+ ccs.getDatasetDirectoryService().reportResultPartitionWriteCompletion(jobId, rsId, partition);
+ }
+
+ @Override
+ public String toString() {
+ return "JobId@" + jobId + " ResultSetId@" + rsId + " Partition@" + partition;
+ }
+}
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/resources/static/javascript/adminconsole/NodeDetailsPage.js b/hyracks/hyracks-control/hyracks-control-cc/src/main/resources/static/javascript/adminconsole/NodeDetailsPage.js
index ff9d8a0..3fc46ff 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/resources/static/javascript/adminconsole/NodeDetailsPage.js
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/resources/static/javascript/adminconsole/NodeDetailsPage.js
@@ -58,6 +58,10 @@
var netPayloadBytesWritten = result['net-payload-bytes-written'];
var netSignalingBytesRead = result['net-signaling-bytes-read'];
var netSignalingBytesWritten = result['net-signaling-bytes-written'];
+ var datasetNetPayloadBytesRead = result['dataset-net-payload-bytes-read'];
+ var datasetNetPayloadBytesWritten = result['dataset-net-payload-bytes-written'];
+ var datasetNetSignalingBytesRead = result['dataset-net-signaling-bytes-read'];
+ var datasetNetSignalingBytesWritten = result['dataset-net-signaling-bytes-written'];
var ipcMessagesSent = result['ipc-messages-sent'];
var ipcMessageBytesSent = result['ipc-message-bytes-sent'];
var ipcMessagesReceived = result['ipc-messages-received'];
@@ -117,9 +121,13 @@
}
if (i < sysLoad.length - 1) {
netPayloadReadBWArray.push([ i, computeRate(netPayloadBytesRead, rrdPtr) ]);
+ netPayloadReadBWArray.push([ i, computeRate(datasetNetPayloadBytesRead, rrdPtr) ]);
netPayloadWriteBWArray.push([ i, computeRate(netPayloadBytesWritten, rrdPtr) ]);
+ netPayloadWriteBWArray.push([ i, computeRate(datasetNetPayloadBytesWritten, rrdPtr) ]);
netSignalingReadBWArray.push([ i, computeRate(netSignalingBytesRead, rrdPtr) ]);
+ netSignalingReadBWArray.push([ i, computeRate(datasetNetSignalingBytesRead, rrdPtr) ]);
netSignalingWriteBWArray.push([ i, computeRate(netSignalingBytesWritten, rrdPtr) ]);
+ netSignalingWriteBWArray.push([ i, computeRate(etSignalingBytesWritten, rrdPtr) ]);
ipcMessageSendRateArray.push([ i, computeRate(ipcMessagesSent, rrdPtr) ]);
ipcMessageBytesSendRateArray.push([ i, computeRate(ipcMessageBytesSent, rrdPtr) ]);
ipcMessageReceiveRateArray.push([ i, computeRate(ipcMessagesReceived, rrdPtr) ]);
diff --git a/hyracks/hyracks-control/hyracks-control-common/pom.xml b/hyracks/hyracks-control/hyracks-control-common/pom.xml
index 096f2e1..ce1298e 100644
--- a/hyracks/hyracks-control/hyracks-control-common/pom.xml
+++ b/hyracks/hyracks-control/hyracks-control-common/pom.xml
@@ -18,8 +18,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/base/IClusterController.java b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/base/IClusterController.java
index 3099bbb..627dd55 100644
--- a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/base/IClusterController.java
+++ b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/base/IClusterController.java
@@ -16,7 +16,9 @@
import java.util.List;
+import edu.uci.ics.hyracks.api.comm.NetworkAddress;
import edu.uci.ics.hyracks.api.dataflow.TaskAttemptId;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.control.common.controllers.NodeRegistration;
import edu.uci.ics.hyracks.control.common.heartbeat.HeartbeatData;
@@ -47,5 +49,12 @@
public void sendApplicationMessageToCC(byte[] data, String nodeId) throws Exception;
+ public void registerResultPartitionLocation(JobId jobId, ResultSetId rsId, boolean orderedResult, int partition,
+ int nPartitions, NetworkAddress networkAddress) throws Exception;
+
+ public void reportResultPartitionWriteCompletion(JobId jobId, ResultSetId rsId, int partition) throws Exception;
+
+ public void reportResultPartitionFailure(JobId jobId, ResultSetId rsId, int partition) throws Exception;
+
public void getNodeControllerInfos() throws Exception;
}
\ No newline at end of file
diff --git a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/controllers/NCConfig.java b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/controllers/NCConfig.java
index 2e7b498..6a9747b 100644
--- a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/controllers/NCConfig.java
+++ b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/controllers/NCConfig.java
@@ -39,6 +39,9 @@
@Option(name = "-data-ip-address", usage = "IP Address to bind data listener", required = true)
public String dataIPAddress;
+ @Option(name = "-result-ip-address", usage = "IP Address to bind dataset result distribution listener", required = true)
+ public String datasetIPAddress;
+
@Option(name = "-iodevices", usage = "Comma separated list of IO Device mount points (default: One device in default temp folder)", required = false)
public String ioDevices = System.getProperty("java.io.tmpdir");
@@ -55,6 +58,9 @@
@Option(name = "--", handler = StopOptionHandler.class)
public List<String> appArgs;
+ @Option(name = "-result-manager-memory", usage = "Memory usable for result caching at this Node Controller in bytes (default: -1 auto)")
+ public int resultManagerMemory = -1;
+
public void toCommandLine(List<String> cList) {
cList.add("-cc-host");
cList.add(ccHost);
@@ -66,6 +72,7 @@
cList.add(nodeId);
cList.add("-data-ip-address");
cList.add(dataIPAddress);
+ cList.add(datasetIPAddress);
cList.add("-iodevices");
cList.add(ioDevices);
cList.add("-net-thread-count");
@@ -82,5 +89,7 @@
cList.add(appArg);
}
}
+ cList.add("-result-manager-memory");
+ cList.add(String.valueOf(resultManagerMemory));
}
}
\ No newline at end of file
diff --git a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/controllers/NodeRegistration.java b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/controllers/NodeRegistration.java
index 91cfecf..a897602 100644
--- a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/controllers/NodeRegistration.java
+++ b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/controllers/NodeRegistration.java
@@ -33,6 +33,8 @@
private final NetworkAddress dataPort;
+ private final NetworkAddress datasetPort;
+
private final String osName;
private final String arch;
@@ -60,13 +62,14 @@
private final HeartbeatSchema hbSchema;
public NodeRegistration(InetSocketAddress ncAddress, String nodeId, NCConfig ncConfig, NetworkAddress dataPort,
- String osName, String arch, String osVersion, int nProcessors, String vmName, String vmVersion,
- String vmVendor, String classpath, String libraryPath, String bootClasspath, List<String> inputArguments,
- Map<String, String> systemProperties, HeartbeatSchema hbSchema) {
+ NetworkAddress datasetPort, String osName, String arch, String osVersion, int nProcessors, String vmName,
+ String vmVersion, String vmVendor, String classpath, String libraryPath, String bootClasspath,
+ List<String> inputArguments, Map<String, String> systemProperties, HeartbeatSchema hbSchema) {
this.ncAddress = ncAddress;
this.nodeId = nodeId;
this.ncConfig = ncConfig;
this.dataPort = dataPort;
+ this.datasetPort = datasetPort;
this.osName = osName;
this.arch = arch;
this.osVersion = osVersion;
@@ -98,6 +101,10 @@
return dataPort;
}
+ public NetworkAddress getDatasetPort() {
+ return datasetPort;
+ }
+
public String getOSName() {
return osName;
}
diff --git a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/heartbeat/HeartbeatData.java b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/heartbeat/HeartbeatData.java
index 1dba3bc..663c68a 100644
--- a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/heartbeat/HeartbeatData.java
+++ b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/heartbeat/HeartbeatData.java
@@ -37,6 +37,10 @@
public long netPayloadBytesWritten;
public long netSignalingBytesRead;
public long netSignalingBytesWritten;
+ public long datasetNetPayloadBytesRead;
+ public long datasetNetPayloadBytesWritten;
+ public long datasetNetSignalingBytesRead;
+ public long datasetNetSignalingBytesWritten;
public long ipcMessagesSent;
public long ipcMessageBytesSent;
public long ipcMessagesReceived;
diff --git a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/ipc/CCNCFunctions.java b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/ipc/CCNCFunctions.java
index 8cc4a54..f6ab9ba 100644
--- a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/ipc/CCNCFunctions.java
+++ b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/ipc/CCNCFunctions.java
@@ -36,6 +36,7 @@
import edu.uci.ics.hyracks.api.dataflow.TaskAttemptId;
import edu.uci.ics.hyracks.api.dataflow.TaskId;
import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicy;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.job.JobFlag;
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.api.job.JobStatus;
@@ -67,6 +68,9 @@
REPORT_PROFILE,
REGISTER_PARTITION_PROVIDER,
REGISTER_PARTITION_REQUEST,
+ REGISTER_RESULT_PARTITION_LOCATION,
+ REPORT_RESULT_PARTITION_WRITE_COMPLETION,
+ REPORT_RESULT_PARTITION_FAILURE,
NODE_REGISTRATION_RESULT,
START_TASKS,
@@ -427,6 +431,127 @@
}
}
+ public static class RegisterResultPartitionLocationFunction extends Function {
+ private static final long serialVersionUID = 1L;
+
+ private final JobId jobId;
+
+ private final ResultSetId rsId;
+
+ private final boolean orderedResult;
+
+ private final int partition;
+
+ private final int nPartitions;
+
+ private NetworkAddress networkAddress;
+
+ public RegisterResultPartitionLocationFunction(JobId jobId, ResultSetId rsId, boolean orderedResult,
+ int partition, int nPartitions, NetworkAddress networkAddress) {
+ this.jobId = jobId;
+ this.rsId = rsId;
+ this.orderedResult = orderedResult;
+ this.partition = partition;
+ this.nPartitions = nPartitions;
+ this.networkAddress = networkAddress;
+ }
+
+ @Override
+ public FunctionId getFunctionId() {
+ return FunctionId.REGISTER_RESULT_PARTITION_LOCATION;
+ }
+
+ public JobId getJobId() {
+ return jobId;
+ }
+
+ public ResultSetId getResultSetId() {
+ return rsId;
+ }
+
+ public boolean getOrderedResult() {
+ return orderedResult;
+ }
+
+ public int getPartition() {
+ return partition;
+ }
+
+ public int getNPartitions() {
+ return nPartitions;
+ }
+
+ public NetworkAddress getNetworkAddress() {
+ return networkAddress;
+ }
+ }
+
+ public static class ReportResultPartitionWriteCompletionFunction extends Function {
+ private static final long serialVersionUID = 1L;
+
+ private final JobId jobId;
+
+ private final ResultSetId rsId;
+
+ private final int partition;
+
+ public ReportResultPartitionWriteCompletionFunction(JobId jobId, ResultSetId rsId, int partition) {
+ this.jobId = jobId;
+ this.rsId = rsId;
+ this.partition = partition;
+ }
+
+ @Override
+ public FunctionId getFunctionId() {
+ return FunctionId.REPORT_RESULT_PARTITION_WRITE_COMPLETION;
+ }
+
+ public JobId getJobId() {
+ return jobId;
+ }
+
+ public ResultSetId getResultSetId() {
+ return rsId;
+ }
+
+ public int getPartition() {
+ return partition;
+ }
+ }
+
+ public static class ReportResultPartitionFailureFunction extends Function {
+ private static final long serialVersionUID = 1L;
+
+ private final JobId jobId;
+
+ private final ResultSetId rsId;
+
+ private final int partition;
+
+ public ReportResultPartitionFailureFunction(JobId jobId, ResultSetId rsId, int partition) {
+ this.jobId = jobId;
+ this.rsId = rsId;
+ this.partition = partition;
+ }
+
+ @Override
+ public FunctionId getFunctionId() {
+ return FunctionId.REPORT_RESULT_PARTITION_FAILURE;
+ }
+
+ public JobId getJobId() {
+ return jobId;
+ }
+
+ public ResultSetId getResultSetId() {
+ return rsId;
+ }
+
+ public int getPartition() {
+ return partition;
+ }
+ }
+
public static class NodeRegistrationResult extends Function {
private static final long serialVersionUID = 1L;
diff --git a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/ipc/ClusterControllerRemoteProxy.java b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/ipc/ClusterControllerRemoteProxy.java
index 5fd2302..057a0f4 100644
--- a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/ipc/ClusterControllerRemoteProxy.java
+++ b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/ipc/ClusterControllerRemoteProxy.java
@@ -16,7 +16,9 @@
import java.util.List;
+import edu.uci.ics.hyracks.api.comm.NetworkAddress;
import edu.uci.ics.hyracks.api.dataflow.TaskAttemptId;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.control.common.base.IClusterController;
import edu.uci.ics.hyracks.control.common.controllers.NodeRegistration;
@@ -99,6 +101,27 @@
ipcHandle.send(-1, fn, null);
}
+ public void registerResultPartitionLocation(JobId jobId, ResultSetId rsId, boolean orderedResult, int partition,
+ int nPartitions, NetworkAddress networkAddress) throws Exception {
+ CCNCFunctions.RegisterResultPartitionLocationFunction fn = new CCNCFunctions.RegisterResultPartitionLocationFunction(
+ jobId, rsId, orderedResult, partition, nPartitions, networkAddress);
+ ipcHandle.send(-1, fn, null);
+ }
+
+ @Override
+ public void reportResultPartitionWriteCompletion(JobId jobId, ResultSetId rsId, int partition) throws Exception {
+ CCNCFunctions.ReportResultPartitionWriteCompletionFunction fn = new CCNCFunctions.ReportResultPartitionWriteCompletionFunction(
+ jobId, rsId, partition);
+ ipcHandle.send(-1, fn, null);
+ }
+
+ @Override
+ public void reportResultPartitionFailure(JobId jobId, ResultSetId rsId, int partition) throws Exception {
+ CCNCFunctions.ReportResultPartitionFailureFunction fn = new CCNCFunctions.ReportResultPartitionFailureFunction(
+ jobId, rsId, partition);
+ ipcHandle.send(-1, fn, null);
+ }
+
@Override
public void getNodeControllerInfos() throws Exception {
ipcHandle.send(-1, new CCNCFunctions.GetNodeControllersInfoFunction(), null);
diff --git a/hyracks/hyracks-control/hyracks-control-nc/pom.xml b/hyracks/hyracks-control/hyracks-control-nc/pom.xml
index 874cbab..e163d2b 100644
--- a/hyracks/hyracks-control/hyracks-control-nc/pom.xml
+++ b/hyracks/hyracks-control/hyracks-control-nc/pom.xml
@@ -15,8 +15,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
@@ -34,6 +35,11 @@
<artifactId>hyracks-net</artifactId>
<version>0.2.3-SNAPSHOT</version>
</dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-comm</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ </dependency>
</dependencies>
<reporting>
<plugins>
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/NodeControllerService.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/NodeControllerService.java
index 53a4d73..10e0dba 100644
--- a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/NodeControllerService.java
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/NodeControllerService.java
@@ -46,6 +46,7 @@
import edu.uci.ics.hyracks.api.application.INCApplicationEntryPoint;
import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
import edu.uci.ics.hyracks.api.context.IHyracksRootContext;
+import edu.uci.ics.hyracks.api.dataset.IDatasetPartitionManager;
import edu.uci.ics.hyracks.api.io.IODeviceHandle;
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.control.common.AbstractRemoteService;
@@ -62,7 +63,9 @@
import edu.uci.ics.hyracks.control.common.work.FutureValue;
import edu.uci.ics.hyracks.control.common.work.WorkQueue;
import edu.uci.ics.hyracks.control.nc.application.NCApplicationContext;
+import edu.uci.ics.hyracks.control.nc.dataset.DatasetPartitionManager;
import edu.uci.ics.hyracks.control.nc.io.IOManager;
+import edu.uci.ics.hyracks.control.nc.net.DatasetNetworkManager;
import edu.uci.ics.hyracks.control.nc.net.NetworkManager;
import edu.uci.ics.hyracks.control.nc.partitions.PartitionManager;
import edu.uci.ics.hyracks.control.nc.runtime.RootHyracksContext;
@@ -93,6 +96,10 @@
private final NetworkManager netManager;
+ private final IDatasetPartitionManager datasetPartitionManager;
+
+ private final DatasetNetworkManager datasetNetworkManager;
+
private final WorkQueue queue;
private final Timer timer;
@@ -141,7 +148,11 @@
throw new Exception("id not set");
}
partitionManager = new PartitionManager(this);
- netManager = new NetworkManager(getIpAddress(ncConfig), partitionManager, ncConfig.nNetThreads);
+ netManager = new NetworkManager(getIpAddress(ncConfig.dataIPAddress), partitionManager, ncConfig.nNetThreads);
+
+ datasetPartitionManager = new DatasetPartitionManager(this, executor, ncConfig.resultManagerMemory);
+ datasetNetworkManager = new DatasetNetworkManager(getIpAddress(ncConfig.datasetIPAddress),
+ datasetPartitionManager, ncConfig.nNetThreads);
queue = new WorkQueue();
jobletMap = new Hashtable<JobId, Joblet>();
@@ -212,6 +223,7 @@
startApplication();
+ datasetNetworkManager.start();
IIPCHandle ccIPCHandle = ipc.getHandle(new InetSocketAddress(ncConfig.ccHost, ncConfig.ccPort));
this.ccs = new ClusterControllerRemoteProxy(ccIPCHandle);
HeartbeatSchema.GarbageCollectorInfo[] gcInfos = new HeartbeatSchema.GarbageCollectorInfo[gcMXBeans.size()];
@@ -220,10 +232,11 @@
}
HeartbeatSchema hbSchema = new HeartbeatSchema(gcInfos);
ccs.registerNode(new NodeRegistration(ipc.getSocketAddress(), id, ncConfig, netManager.getNetworkAddress(),
- osMXBean.getName(), osMXBean.getArch(), osMXBean.getVersion(), osMXBean.getAvailableProcessors(),
- runtimeMXBean.getVmName(), runtimeMXBean.getVmVersion(), runtimeMXBean.getVmVendor(), runtimeMXBean
- .getClassPath(), runtimeMXBean.getLibraryPath(), runtimeMXBean.getBootClassPath(),
- runtimeMXBean.getInputArguments(), runtimeMXBean.getSystemProperties(), hbSchema));
+ datasetNetworkManager.getNetworkAddress(), osMXBean.getName(), osMXBean.getArch(), osMXBean
+ .getVersion(), osMXBean.getAvailableProcessors(), runtimeMXBean.getVmName(), runtimeMXBean
+ .getVmVersion(), runtimeMXBean.getVmVendor(), runtimeMXBean.getClassPath(), runtimeMXBean
+ .getLibraryPath(), runtimeMXBean.getBootClassPath(), runtimeMXBean.getInputArguments(),
+ runtimeMXBean.getSystemProperties(), hbSchema));
synchronized (this) {
while (registrationPending) {
@@ -270,8 +283,10 @@
LOGGER.log(Level.INFO, "Stopping NodeControllerService");
executor.shutdownNow();
partitionManager.close();
+ datasetPartitionManager.close();
heartbeatTask.cancel();
netManager.stop();
+ datasetNetworkManager.stop();
queue.stop();
LOGGER.log(Level.INFO, "Stopped NodeControllerService");
}
@@ -292,6 +307,10 @@
return netManager;
}
+ public DatasetNetworkManager getDatasetNetworkManager() {
+ return datasetNetworkManager;
+ }
+
public PartitionManager getPartitionManager() {
return partitionManager;
}
@@ -316,8 +335,7 @@
return queue;
}
- private static InetAddress getIpAddress(NCConfig ncConfig) throws Exception {
- String ipaddrStr = ncConfig.dataIPAddress;
+ private static InetAddress getIpAddress(String ipaddrStr) throws Exception {
ipaddrStr = ipaddrStr.trim();
Pattern pattern = Pattern.compile("(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})\\.(\\d{1,3})");
Matcher m = pattern.matcher(ipaddrStr);
@@ -374,6 +392,12 @@
hbData.netSignalingBytesRead = netPC.getSignalingBytesRead();
hbData.netSignalingBytesWritten = netPC.getSignalingBytesWritten();
+ MuxDemuxPerformanceCounters datasetNetPC = datasetNetworkManager.getPerformanceCounters();
+ hbData.datasetNetPayloadBytesRead = datasetNetPC.getPayloadBytesRead();
+ hbData.datasetNetPayloadBytesWritten = datasetNetPC.getPayloadBytesWritten();
+ hbData.datasetNetSignalingBytesRead = datasetNetPC.getSignalingBytesRead();
+ hbData.datasetNetSignalingBytesWritten = datasetNetPC.getSignalingBytesWritten();
+
IPCPerformanceCounters ipcPC = ipc.getPerformanceCounters();
hbData.ipcMessagesSent = ipcPC.getMessageSentCount();
hbData.ipcMessageBytesSent = ipcPC.getMessageBytesSent();
@@ -468,4 +492,8 @@
public void sendApplicationMessageToCC(byte[] data, String nodeId) throws Exception {
ccs.sendApplicationMessageToCC(data, nodeId);
}
+
+ public IDatasetPartitionManager getDatasetPartitionManager() {
+ return datasetPartitionManager;
+ }
}
\ No newline at end of file
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/Task.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/Task.java
index 310878f..d6ea111 100644
--- a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/Task.java
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/Task.java
@@ -34,6 +34,7 @@
import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
import edu.uci.ics.hyracks.api.dataflow.TaskAttemptId;
import edu.uci.ics.hyracks.api.dataflow.state.IStateObject;
+import edu.uci.ics.hyracks.api.dataset.IDatasetPartitionManager;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
import edu.uci.ics.hyracks.api.io.FileReference;
@@ -348,6 +349,11 @@
}
@Override
+ public IDatasetPartitionManager getDatasetPartitionManager() {
+ return ncs.getDatasetPartitionManager();
+ }
+
+ @Override
public void sendApplicationMessageToCC(byte[] message, String nodeId) throws Exception {
this.ncs.sendApplicationMessageToCC(message, nodeId);
}
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/dataset/DatasetMemoryManager.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/dataset/DatasetMemoryManager.java
new file mode 100644
index 0000000..cecd677
--- /dev/null
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/dataset/DatasetMemoryManager.java
@@ -0,0 +1,237 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.control.nc.dataset;
+
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+import edu.uci.ics.hyracks.api.dataset.IDatasetPartitionWriter;
+import edu.uci.ics.hyracks.api.dataset.Page;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.partitions.ResultSetPartitionId;
+
+public class DatasetMemoryManager {
+ private final Set<Page> availPages;
+
+ private final LeastRecentlyUsedList leastRecentlyUsedList;
+
+ private final Map<ResultSetPartitionId, PartitionNode> resultPartitionNodesMap;
+
+ private final static int FRAME_SIZE = 32768;
+
+ public DatasetMemoryManager(int availableMemory) {
+ availPages = new HashSet<Page>();
+
+ // Atleast have one page for temporarily storing the results.
+ if (availableMemory <= 0)
+ availableMemory = FRAME_SIZE;
+
+ while (availableMemory >= FRAME_SIZE) {
+ /* TODO(madhusudancs): Should we have some way of accounting this memory usage by using Hyrack's allocateFrame()
+ * instead of direct ByteBuffer.allocate()?
+ */
+ availPages.add(new Page(ByteBuffer.allocate(FRAME_SIZE)));
+ availableMemory -= FRAME_SIZE;
+ }
+
+ leastRecentlyUsedList = new LeastRecentlyUsedList();
+ resultPartitionNodesMap = new HashMap<ResultSetPartitionId, PartitionNode>();
+ }
+
+ public Page requestPage(ResultSetPartitionId resultSetPartitionId, IDatasetPartitionWriter dpw)
+ throws OutOfMemoryError, HyracksDataException {
+ Page page;
+ if (availPages.isEmpty()) {
+ page = evictPage();
+ } else {
+ page = getAvailablePage();
+ }
+
+ page.clear();
+
+ /*
+ * It is extremely important to update the reference after obtaining the page because, in the cases where memory
+ * manager is allocated only one page of memory, the front of the LRU list should not be created by the
+ * update reference call before a page is pushed on to the element of the LRU list. So we first obtain the page,
+ * then make a updateReference call which in turn creates a new node in the LRU list and then add the page to it.
+ */
+ PartitionNode pn = updateReference(resultSetPartitionId, dpw);
+ pn.add(page);
+ return page;
+ }
+
+ public void pageReferenced(ResultSetPartitionId resultSetPartitionId) {
+ // When a page is referenced the dataset partition writer should already be known, so we pass null.
+ updateReference(resultSetPartitionId, null);
+ }
+
+ public int getPageSize() {
+ return FRAME_SIZE;
+ }
+
+ protected void insertPartitionNode(ResultSetPartitionId resultSetPartitionId, PartitionNode pn) {
+ leastRecentlyUsedList.add(pn);
+ resultPartitionNodesMap.put(resultSetPartitionId, pn);
+ }
+
+ protected synchronized PartitionNode updateReference(ResultSetPartitionId resultSetPartitionId,
+ IDatasetPartitionWriter dpw) {
+ PartitionNode pn = null;
+
+ if (!resultPartitionNodesMap.containsKey(resultSetPartitionId)) {
+ if (dpw != null) {
+ pn = new PartitionNode(resultSetPartitionId, dpw);
+ insertPartitionNode(resultSetPartitionId, pn);
+ }
+ return pn;
+ }
+ pn = resultPartitionNodesMap.get(resultSetPartitionId);
+ leastRecentlyUsedList.remove(pn);
+ insertPartitionNode(resultSetPartitionId, pn);
+
+ return pn;
+ }
+
+ protected synchronized Page evictPage() throws HyracksDataException {
+ PartitionNode pn = leastRecentlyUsedList.getFirst();
+ IDatasetPartitionWriter dpw = pn.getDatasetPartitionWriter();
+ Page page = dpw.returnPage();
+
+ /* If the partition holding the pages breaks the contract by not returning the page or it has no page, just take
+ * away all the pages allocated to it and add to the available pages set.
+ */
+ if (page == null) {
+ availPages.addAll(pn);
+ pn.clear();
+ resultPartitionNodesMap.remove(pn.getResultSetPartitionId());
+ leastRecentlyUsedList.remove(pn);
+
+ /* Based on the assumption that if the dataset partition writer returned a null page, it should be lying about
+ * the number of pages it holds in which case we just evict all the pages it holds and should thus be able to
+ * add all those pages to available set and we have at least one page to allocate back.
+ */
+ page = getAvailablePage();
+ } else {
+ pn.remove(page);
+
+ // If the partition no more holds any pages, remove it from the linked list and the hash map.
+ if (pn.isEmpty()) {
+ resultPartitionNodesMap.remove(pn.getResultSetPartitionId());
+ leastRecentlyUsedList.remove(pn);
+ }
+ }
+
+ return page;
+ }
+
+ protected synchronized Page getAvailablePage() {
+ Iterator<Page> iter = availPages.iterator();
+ Page page = iter.next();
+ iter.remove();
+ return page;
+ }
+
+ private class LeastRecentlyUsedList {
+ private PartitionNode head;
+
+ private PartitionNode tail;
+
+ public LeastRecentlyUsedList() {
+ head = null;
+ tail = null;
+ }
+
+ public void add(PartitionNode node) {
+ if (head == null) {
+ head = tail = node;
+ return;
+ }
+ tail.setNext(node);
+ node.setPrev(tail);
+ tail = node;
+ }
+
+ public void remove(PartitionNode node) {
+ if ((node == head) && (node == tail)) {
+ head = tail = null;
+ return;
+ } else if (node == head) {
+ head = head.getNext();
+ head.setPrev(null);
+ return;
+ } else if (node == tail) {
+ tail = tail.getPrev();
+ tail.setNext(null);
+ return;
+ } else {
+ PartitionNode prev = node.getPrev();
+ PartitionNode next = node.getNext();
+ prev.setNext(next);
+ next.setPrev(prev);
+ }
+ }
+
+ public PartitionNode getFirst() {
+ return head;
+ }
+ }
+
+ private class PartitionNode extends HashSet<Page> {
+ private static final long serialVersionUID = 1L;
+
+ private final ResultSetPartitionId resultSetPartitionId;
+
+ private final IDatasetPartitionWriter datasetPartitionWriter;
+
+ private PartitionNode prev;
+
+ private PartitionNode next;
+
+ public PartitionNode(ResultSetPartitionId resultSetPartitionId, IDatasetPartitionWriter datasetPartitionWriter) {
+ this.resultSetPartitionId = resultSetPartitionId;
+ this.datasetPartitionWriter = datasetPartitionWriter;
+ prev = null;
+ next = null;
+ }
+
+ public ResultSetPartitionId getResultSetPartitionId() {
+ return resultSetPartitionId;
+ }
+
+ public IDatasetPartitionWriter getDatasetPartitionWriter() {
+ return datasetPartitionWriter;
+ }
+
+ public void setPrev(PartitionNode node) {
+ prev = node;
+ }
+
+ public PartitionNode getPrev() {
+ return prev;
+ }
+
+ public void setNext(PartitionNode node) {
+ next = node;
+ }
+
+ public PartitionNode getNext() {
+ return next;
+ }
+ }
+}
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/dataset/DatasetPartitionManager.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/dataset/DatasetPartitionManager.java
new file mode 100644
index 0000000..1cad54b
--- /dev/null
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/dataset/DatasetPartitionManager.java
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.control.nc.dataset;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.Executor;
+
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataset.IDatasetPartitionManager;
+import edu.uci.ics.hyracks.api.dataset.IDatasetPartitionReader;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.io.IWorkspaceFileFactory;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.control.nc.NodeControllerService;
+import edu.uci.ics.hyracks.control.nc.io.IOManager;
+import edu.uci.ics.hyracks.control.nc.io.WorkspaceFileFactory;
+import edu.uci.ics.hyracks.control.nc.resources.DefaultDeallocatableRegistry;
+
+public class DatasetPartitionManager implements IDatasetPartitionManager {
+ private final NodeControllerService ncs;
+
+ private final Executor executor;
+
+ private final Map<JobId, ResultState[]> partitionResultStateMap;
+
+ private final DefaultDeallocatableRegistry deallocatableRegistry;
+
+ private final IWorkspaceFileFactory fileFactory;
+
+ private final DatasetMemoryManager datasetMemoryManager;
+
+ public DatasetPartitionManager(NodeControllerService ncs, Executor executor, int availableMemory) {
+ this.ncs = ncs;
+ this.executor = executor;
+ partitionResultStateMap = new HashMap<JobId, ResultState[]>();
+ deallocatableRegistry = new DefaultDeallocatableRegistry();
+ fileFactory = new WorkspaceFileFactory(deallocatableRegistry, (IOManager) ncs.getRootContext().getIOManager());
+ datasetMemoryManager = new DatasetMemoryManager(availableMemory);
+ }
+
+ @Override
+ public IFrameWriter createDatasetPartitionWriter(IHyracksTaskContext ctx, ResultSetId rsId, boolean orderedResult,
+ int partition, int nPartitions) throws HyracksException {
+ DatasetPartitionWriter dpw = null;
+ JobId jobId = ctx.getJobletContext().getJobId();
+ try {
+ ncs.getClusterController().registerResultPartitionLocation(jobId, rsId, orderedResult, partition,
+ nPartitions, ncs.getDatasetNetworkManager().getNetworkAddress());
+ dpw = new DatasetPartitionWriter(ctx, this, jobId, rsId, partition, datasetMemoryManager);
+
+ ResultState[] resultStates = partitionResultStateMap.get(jobId);
+ if (resultStates == null) {
+ resultStates = new ResultState[nPartitions];
+ partitionResultStateMap.put(jobId, resultStates);
+ }
+ resultStates[partition] = dpw.getResultState();
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+
+ return dpw;
+ }
+
+ @Override
+ public void reportPartitionWriteCompletion(JobId jobId, ResultSetId rsId, int partition) throws HyracksException {
+ try {
+ ncs.getClusterController().reportResultPartitionWriteCompletion(jobId, rsId, partition);
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+ }
+
+ @Override
+ public void reportPartitionFailure(JobId jobId, ResultSetId rsId, int partition) throws HyracksException {
+ try {
+ ncs.getClusterController().reportResultPartitionFailure(jobId, rsId, partition);
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+ }
+
+ @Override
+ public void initializeDatasetPartitionReader(JobId jobId, int partition, IFrameWriter writer)
+ throws HyracksException {
+ ResultState[] resultStates = partitionResultStateMap.get(jobId);
+
+ if (resultStates == null) {
+ throw new HyracksException("Unknown JobId " + jobId);
+ }
+
+ ResultState resultState = resultStates[partition];
+ if (resultState == null) {
+ throw new HyracksException("No DatasetPartitionWriter for partition " + partition);
+ }
+
+ IDatasetPartitionReader dpr = new DatasetPartitionReader(datasetMemoryManager, executor, resultState);
+ dpr.writeTo(writer);
+ }
+
+ @Override
+ public IWorkspaceFileFactory getFileFactory() {
+ return fileFactory;
+ }
+
+ @Override
+ public void close() {
+ deallocatableRegistry.close();
+ }
+}
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/dataset/DatasetPartitionReader.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/dataset/DatasetPartitionReader.java
new file mode 100644
index 0000000..296c502
--- /dev/null
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/dataset/DatasetPartitionReader.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.control.nc.dataset;
+
+import java.nio.ByteBuffer;
+import java.util.concurrent.Executor;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.dataset.IDatasetPartitionReader;
+import edu.uci.ics.hyracks.api.dataset.Page;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.io.IFileHandle;
+import edu.uci.ics.hyracks.api.io.IIOManager;
+import edu.uci.ics.hyracks.comm.channels.NetworkOutputChannel;
+
+public class DatasetPartitionReader implements IDatasetPartitionReader {
+ private static final Logger LOGGER = Logger.getLogger(DatasetPartitionReader.class.getName());
+
+ private final DatasetMemoryManager datasetMemoryManager;
+
+ private final Executor executor;
+
+ private final ResultState resultState;
+
+ private IFileHandle fileHandle;
+
+ public DatasetPartitionReader(DatasetMemoryManager datasetMemoryManager, Executor executor, ResultState resultState) {
+ this.datasetMemoryManager = datasetMemoryManager;
+ this.executor = executor;
+ this.resultState = resultState;
+ }
+
+ private long read(long offset, ByteBuffer buffer) throws HyracksDataException {
+ long readSize = 0;
+ synchronized (resultState) {
+ while (offset >= resultState.getSize() && !resultState.getEOS()) {
+ try {
+ resultState.wait();
+ } catch (InterruptedException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+ }
+
+ if (offset >= resultState.getSize() && resultState.getEOS()) {
+ return readSize;
+ }
+
+ if (offset < resultState.getPersistentSize()) {
+ readSize = resultState.getIOManager().syncRead(fileHandle, offset, buffer);
+ }
+
+ if (readSize < buffer.capacity()) {
+ long localPageOffset = offset - resultState.getPersistentSize();
+ int localPageIndex = (int) (localPageOffset / datasetMemoryManager.getPageSize());
+ int pageOffset = (int) (localPageOffset % datasetMemoryManager.getPageSize());
+ Page page = resultState.getPage(localPageIndex);
+ readSize += buffer.remaining();
+ buffer.put(page.getBuffer().array(), pageOffset, buffer.remaining());
+ }
+
+ datasetMemoryManager.pageReferenced(resultState.getResultSetPartitionId());
+ return readSize;
+ }
+
+ @Override
+ public void writeTo(final IFrameWriter writer) {
+ executor.execute(new Runnable() {
+ @Override
+ public void run() {
+ NetworkOutputChannel channel = (NetworkOutputChannel) writer;
+ channel.setFrameSize(resultState.getFrameSize());
+ try {
+ fileHandle = resultState.getIOManager().open(resultState.getValidFileReference(),
+ IIOManager.FileReadWriteMode.READ_ONLY, IIOManager.FileSyncMode.METADATA_ASYNC_DATA_ASYNC);
+ channel.open();
+ try {
+ long offset = 0;
+ ByteBuffer buffer = ByteBuffer.allocate(resultState.getFrameSize());
+ while (true) {
+ buffer.clear();
+ long size = read(offset, buffer);
+ if (size <= 0) {
+ break;
+ } else if (size < buffer.limit()) {
+ throw new HyracksDataException("Premature end of file - readSize: " + size
+ + " buffer limit: " + buffer.limit());
+ }
+ offset += size;
+ buffer.flip();
+ channel.nextFrame(buffer);
+ }
+ } finally {
+ channel.close();
+ resultState.getIOManager().close(fileHandle);
+ }
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ } catch (HyracksDataException e) {
+ throw new RuntimeException(e);
+ }
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("result reading successful(" + resultState.getResultSetPartitionId() + ")");
+ }
+ }
+ });
+ }
+}
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/dataset/DatasetPartitionWriter.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/dataset/DatasetPartitionWriter.java
new file mode 100644
index 0000000..f6ae540
--- /dev/null
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/dataset/DatasetPartitionWriter.java
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.control.nc.dataset;
+
+import java.nio.ByteBuffer;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataset.IDatasetPartitionManager;
+import edu.uci.ics.hyracks.api.dataset.IDatasetPartitionWriter;
+import edu.uci.ics.hyracks.api.dataset.Page;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.api.io.IFileHandle;
+import edu.uci.ics.hyracks.api.io.IIOManager;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.api.partitions.ResultSetPartitionId;
+
+public class DatasetPartitionWriter implements IDatasetPartitionWriter {
+ private static final Logger LOGGER = Logger.getLogger(DatasetPartitionWriter.class.getName());
+
+ private static final String FILE_PREFIX = "result_";
+
+ private final IDatasetPartitionManager manager;
+
+ private final JobId jobId;
+
+ private final ResultSetId resultSetId;
+
+ private final int partition;
+
+ private final DatasetMemoryManager datasetMemoryManager;
+
+ private final ResultSetPartitionId resultSetPartitionId;
+
+ private final ResultState resultState;
+
+ private IFileHandle fileHandle;
+
+ public DatasetPartitionWriter(IHyracksTaskContext ctx, IDatasetPartitionManager manager, JobId jobId,
+ ResultSetId rsId, int partition, DatasetMemoryManager datasetMemoryManager) {
+ this.manager = manager;
+ this.jobId = jobId;
+ this.resultSetId = rsId;
+ this.partition = partition;
+ this.datasetMemoryManager = datasetMemoryManager;
+
+ resultSetPartitionId = new ResultSetPartitionId(jobId, rsId, partition);
+ resultState = new ResultState(resultSetPartitionId, ctx.getIOManager(), ctx.getFrameSize());
+ }
+
+ public ResultState getResultState() {
+ return resultState;
+ }
+
+ @Override
+ public void open() throws HyracksDataException {
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("open(" + partition + ")");
+ }
+ String fName = FILE_PREFIX + String.valueOf(partition);
+ FileReference fRef = manager.getFileFactory().createUnmanagedWorkspaceFile(fName);
+ fileHandle = resultState.getIOManager().open(fRef, IIOManager.FileReadWriteMode.READ_WRITE,
+ IIOManager.FileSyncMode.METADATA_ASYNC_DATA_ASYNC);
+ resultState.init(fRef);
+ }
+
+ @Override
+ public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+ int srcOffset = 0;
+ Page destPage = resultState.getLastPage();
+
+ while (srcOffset < buffer.limit()) {
+ if ((destPage == null) || (destPage.getBuffer().remaining() <= 0)) {
+ destPage = datasetMemoryManager.requestPage(resultSetPartitionId, this);
+ resultState.addPage(destPage);
+ }
+ int srcLength = Math.min(buffer.limit() - srcOffset, destPage.getBuffer().remaining());
+ destPage.getBuffer().put(buffer.array(), srcOffset, srcLength);
+ srcOffset += srcLength;
+ resultState.incrementSize(srcLength);
+ }
+
+ synchronized (resultState) {
+ resultState.notifyAll();
+ }
+ }
+
+ @Override
+ public void fail() throws HyracksDataException {
+ try {
+ manager.reportPartitionFailure(jobId, resultSetId, partition);
+ } catch (HyracksException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("close(" + partition + ")");
+ }
+
+ try {
+ synchronized (resultState) {
+ resultState.setEOS(true);
+ resultState.notifyAll();
+ }
+ manager.reportPartitionWriteCompletion(jobId, resultSetId, partition);
+ } catch (HyracksException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public Page returnPage() throws HyracksDataException {
+ Page page = resultState.removePage(0);
+
+ IIOManager ioManager = resultState.getIOManager();
+
+ // If we do not have any pages to be given back close the write channel since we don't write any more, return null.
+ if (page == null) {
+ ioManager.close(fileHandle);
+ return null;
+ }
+
+ page.getBuffer().flip();
+
+ long delta = ioManager.syncWrite(fileHandle, resultState.getPersistentSize(), page.getBuffer());
+ resultState.incrementPersistentSize(delta);
+ return page;
+ }
+}
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/dataset/ResultState.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/dataset/ResultState.java
new file mode 100644
index 0000000..3db3fd9
--- /dev/null
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/dataset/ResultState.java
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.control.nc.dataset;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import edu.uci.ics.hyracks.api.dataflow.state.IStateObject;
+import edu.uci.ics.hyracks.api.dataset.Page;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.api.io.IIOManager;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.api.partitions.ResultSetPartitionId;
+
+public class ResultState implements IStateObject {
+ private final ResultSetPartitionId resultSetPartitionId;
+
+ private final int frameSize;
+
+ private final IIOManager ioManager;
+
+ private final AtomicBoolean eos;
+
+ private final AtomicBoolean readEOS;
+
+ private final List<Page> localPageList;
+
+ private FileReference fileRef;
+
+ private long size;
+
+ private long persistentSize;
+
+ ResultState(ResultSetPartitionId resultSetPartitionId, IIOManager ioManager, int frameSize) {
+ this.resultSetPartitionId = resultSetPartitionId;
+ this.ioManager = ioManager;
+ this.frameSize = frameSize;
+ eos = new AtomicBoolean(false);
+ readEOS = new AtomicBoolean(false);
+ localPageList = new ArrayList<Page>();
+ }
+
+ public synchronized void init(FileReference fileRef) {
+ this.fileRef = fileRef;
+
+ size = 0;
+ persistentSize = 0;
+ notifyAll();
+ }
+
+ public ResultSetPartitionId getResultSetPartitionId() {
+ return resultSetPartitionId;
+ }
+
+ public int getFrameSize() {
+ return frameSize;
+ }
+
+ public IIOManager getIOManager() {
+ return ioManager;
+ }
+
+ public synchronized void incrementSize(long delta) {
+ size += delta;
+ }
+
+ public synchronized long getSize() {
+ return size;
+ }
+
+ public synchronized void incrementPersistentSize(long delta) {
+ persistentSize += delta;
+ }
+
+ public synchronized long getPersistentSize() {
+ return persistentSize;
+ }
+
+ public void setEOS(boolean eos) {
+ this.eos.set(eos);
+ }
+
+ public boolean getEOS() {
+ return eos.get();
+ }
+
+ public boolean getReadEOS() {
+ return readEOS.get();
+ }
+
+ public synchronized void addPage(Page page) {
+ localPageList.add(page);
+ }
+
+ public synchronized Page removePage(int index) {
+ Page page = null;
+ if (!localPageList.isEmpty()) {
+ page = localPageList.remove(index);
+ }
+ return page;
+ }
+
+ public synchronized Page getPage(int index) {
+ Page page = null;
+ if (!localPageList.isEmpty()) {
+ page = localPageList.get(index);
+ }
+ return page;
+ }
+
+ public synchronized Page getLastPage() {
+ Page page = null;
+ if (!localPageList.isEmpty()) {
+ page = localPageList.get(localPageList.size() - 1);
+ }
+ return page;
+ }
+
+ public synchronized Page getFirstPage() {
+ Page page = null;
+ if (!localPageList.isEmpty()) {
+ page = localPageList.get(0);
+ }
+ return page;
+ }
+
+ public synchronized FileReference getValidFileReference() throws InterruptedException {
+ while (fileRef == null)
+ wait();
+ return fileRef;
+ }
+
+ @Override
+ public JobId getJobId() {
+ return resultSetPartitionId.getJobId();
+ }
+
+ @Override
+ public Object getId() {
+ return resultSetPartitionId;
+ }
+
+ @Override
+ public long getMemoryOccupancy() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void toBytes(DataOutput out) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void fromBytes(DataInput in) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/DatasetNetworkManager.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/DatasetNetworkManager.java
new file mode 100644
index 0000000..5b8b333
--- /dev/null
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/DatasetNetworkManager.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.control.nc.net;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.InetSocketAddress;
+import java.net.SocketAddress;
+import java.nio.ByteBuffer;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import edu.uci.ics.hyracks.api.comm.NetworkAddress;
+import edu.uci.ics.hyracks.api.dataset.IDatasetPartitionManager;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.comm.channels.IChannelConnectionFactory;
+import edu.uci.ics.hyracks.comm.channels.NetworkOutputChannel;
+import edu.uci.ics.hyracks.net.buffers.ICloseableBufferAcceptor;
+import edu.uci.ics.hyracks.net.exceptions.NetException;
+import edu.uci.ics.hyracks.net.protocols.muxdemux.ChannelControlBlock;
+import edu.uci.ics.hyracks.net.protocols.muxdemux.IChannelOpenListener;
+import edu.uci.ics.hyracks.net.protocols.muxdemux.MultiplexedConnection;
+import edu.uci.ics.hyracks.net.protocols.muxdemux.MuxDemux;
+import edu.uci.ics.hyracks.net.protocols.muxdemux.MuxDemuxPerformanceCounters;
+
+public class DatasetNetworkManager implements IChannelConnectionFactory {
+ private static final Logger LOGGER = Logger.getLogger(DatasetNetworkManager.class.getName());
+
+ private static final int MAX_CONNECTION_ATTEMPTS = 5;
+
+ static final int INITIAL_MESSAGE_SIZE = 20;
+
+ private final IDatasetPartitionManager partitionManager;
+
+ private final MuxDemux md;
+
+ private NetworkAddress networkAddress;
+
+ public DatasetNetworkManager(InetAddress inetAddress, IDatasetPartitionManager partitionManager, int nThreads)
+ throws IOException {
+ this.partitionManager = partitionManager;
+ md = new MuxDemux(new InetSocketAddress(inetAddress, 0), new ChannelOpenListener(), nThreads,
+ MAX_CONNECTION_ATTEMPTS);
+ }
+
+ public void start() throws IOException {
+ md.start();
+ InetSocketAddress sockAddr = md.getLocalAddress();
+ networkAddress = new NetworkAddress(sockAddr.getAddress().getAddress(), sockAddr.getPort());
+ }
+
+ public NetworkAddress getNetworkAddress() {
+ return networkAddress;
+ }
+
+ public void stop() {
+
+ }
+
+ public ChannelControlBlock connect(SocketAddress remoteAddress) throws InterruptedException, NetException {
+ MultiplexedConnection mConn = md.connect((InetSocketAddress) remoteAddress);
+ return mConn.openChannel();
+ }
+
+ private class ChannelOpenListener implements IChannelOpenListener {
+ @Override
+ public void channelOpened(ChannelControlBlock channel) {
+ channel.getReadInterface().setFullBufferAcceptor(new InitialBufferAcceptor(channel));
+ channel.getReadInterface().getEmptyBufferAcceptor().accept(ByteBuffer.allocate(INITIAL_MESSAGE_SIZE));
+ }
+ }
+
+ private class InitialBufferAcceptor implements ICloseableBufferAcceptor {
+ private final ChannelControlBlock ccb;
+
+ private NetworkOutputChannel noc;
+
+ public InitialBufferAcceptor(ChannelControlBlock ccb) {
+ this.ccb = ccb;
+ }
+
+ @Override
+ public void accept(ByteBuffer buffer) {
+ JobId jobId = new JobId(buffer.getLong());
+ int partition = buffer.getInt();
+ if (LOGGER.isLoggable(Level.FINE)) {
+ LOGGER.fine("Received initial dataset partition read request for JobId: " + jobId + " partition: "
+ + partition + " on channel: " + ccb);
+ }
+ noc = new NetworkOutputChannel(ccb, 1);
+ try {
+ partitionManager.initializeDatasetPartitionReader(jobId, partition, noc);
+ } catch (HyracksException e) {
+ noc.abort();
+ }
+ }
+
+ @Override
+ public void close() {
+
+ }
+
+ @Override
+ public void error(int ecode) {
+ if (noc != null) {
+ noc.abort();
+ }
+ }
+ }
+
+ public MuxDemuxPerformanceCounters getPerformanceCounters() {
+ return md.getPerformanceCounters();
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/NetworkManager.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/NetworkManager.java
index b805595..c8e4e94 100644
--- a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/NetworkManager.java
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/net/NetworkManager.java
@@ -27,6 +27,8 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.api.partitions.PartitionId;
+import edu.uci.ics.hyracks.comm.channels.IChannelConnectionFactory;
+import edu.uci.ics.hyracks.comm.channels.NetworkOutputChannel;
import edu.uci.ics.hyracks.control.nc.partitions.PartitionManager;
import edu.uci.ics.hyracks.net.buffers.ICloseableBufferAcceptor;
import edu.uci.ics.hyracks.net.exceptions.NetException;
@@ -36,7 +38,7 @@
import edu.uci.ics.hyracks.net.protocols.muxdemux.MuxDemux;
import edu.uci.ics.hyracks.net.protocols.muxdemux.MuxDemuxPerformanceCounters;
-public class NetworkManager {
+public class NetworkManager implements IChannelConnectionFactory {
private static final Logger LOGGER = Logger.getLogger(NetworkManager.class.getName());
private static final int MAX_CONNECTION_ATTEMPTS = 5;
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/MaterializedPartitionInputChannel.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/MaterializedPartitionInputChannel.java
index 16e31f7..ba6e6c3 100644
--- a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/MaterializedPartitionInputChannel.java
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/MaterializedPartitionInputChannel.java
@@ -21,7 +21,7 @@
import edu.uci.ics.hyracks.api.channels.IInputChannel;
import edu.uci.ics.hyracks.api.channels.IInputChannelMonitor;
import edu.uci.ics.hyracks.api.comm.IFrameWriter;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.context.IHyracksCommonContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.partitions.IPartition;
import edu.uci.ics.hyracks.api.partitions.PartitionId;
@@ -82,7 +82,7 @@
}
@Override
- public void open(IHyracksTaskContext ctx) throws HyracksDataException {
+ public void open(IHyracksCommonContext ctx) throws HyracksDataException {
for (int i = 0; i < nBuffers; ++i) {
emptyQueue.add(ctx.allocateFrame());
}
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/PartitionManager.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/PartitionManager.java
index 45c091a..ea88a75 100644
--- a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/PartitionManager.java
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/partitions/PartitionManager.java
@@ -28,12 +28,12 @@
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.api.partitions.IPartition;
import edu.uci.ics.hyracks.api.partitions.PartitionId;
+import edu.uci.ics.hyracks.comm.channels.NetworkOutputChannel;
import edu.uci.ics.hyracks.control.common.job.PartitionDescriptor;
import edu.uci.ics.hyracks.control.common.job.PartitionState;
import edu.uci.ics.hyracks.control.nc.NodeControllerService;
import edu.uci.ics.hyracks.control.nc.io.IOManager;
import edu.uci.ics.hyracks.control.nc.io.WorkspaceFileFactory;
-import edu.uci.ics.hyracks.control.nc.net.NetworkOutputChannel;
import edu.uci.ics.hyracks.control.nc.resources.DefaultDeallocatableRegistry;
public class PartitionManager {
@@ -98,7 +98,7 @@
List<IPartition> pList = partitionMap.get(partitionId);
if (pList != null && !pList.isEmpty()) {
IPartition partition = pList.get(0);
- writer.setTaskContext(partition.getTaskContext());
+ writer.setFrameSize(partition.getTaskContext().getFrameSize());
partition.writeTo(writer);
if (!partition.isReusable()) {
partitionMap.remove(partitionId);
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/work/ReportPartitionAvailabilityWork.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/work/ReportPartitionAvailabilityWork.java
index bb9669d..7ed9d11 100644
--- a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/work/ReportPartitionAvailabilityWork.java
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/work/ReportPartitionAvailabilityWork.java
@@ -22,10 +22,10 @@
import edu.uci.ics.hyracks.api.comm.PartitionChannel;
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.api.partitions.PartitionId;
+import edu.uci.ics.hyracks.comm.channels.NetworkInputChannel;
import edu.uci.ics.hyracks.control.common.work.AbstractWork;
import edu.uci.ics.hyracks.control.nc.Joblet;
import edu.uci.ics.hyracks.control.nc.NodeControllerService;
-import edu.uci.ics.hyracks.control.nc.net.NetworkInputChannel;
public class ReportPartitionAvailabilityWork extends AbstractWork {
private final NodeControllerService ncs;
diff --git a/hyracks/hyracks-data/hyracks-data-std/pom.xml b/hyracks/hyracks-data/hyracks-data-std/pom.xml
index 3f051f9..8f5f04e 100644
--- a/hyracks/hyracks-data/hyracks-data-std/pom.xml
+++ b/hyracks/hyracks-data/hyracks-data-std/pom.xml
@@ -16,8 +16,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/util/ByteArrayAccessibleOutputStream.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/util/ByteArrayAccessibleOutputStream.java
index 8508287..c0ba163 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/util/ByteArrayAccessibleOutputStream.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/util/ByteArrayAccessibleOutputStream.java
@@ -21,6 +21,14 @@
public class ByteArrayAccessibleOutputStream extends ByteArrayOutputStream {
private static final Logger LOGGER = Logger.getLogger(ByteArrayAccessibleOutputStream.class.getName());
+ public ByteArrayAccessibleOutputStream() {
+ super();
+ }
+
+ public ByteArrayAccessibleOutputStream(int size) {
+ super(size);
+ }
+
public byte[] getByteArray() {
return buf;
}
diff --git a/hyracks/hyracks-dataflow-common/pom.xml b/hyracks/hyracks-dataflow-common/pom.xml
index a0ffb66..1a2950b 100644
--- a/hyracks/hyracks-dataflow-common/pom.xml
+++ b/hyracks/hyracks-dataflow-common/pom.xml
@@ -15,8 +15,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/comm/io/FrameOutputStream.java b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/comm/io/FrameOutputStream.java
new file mode 100644
index 0000000..07f6ba2
--- /dev/null
+++ b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/comm/io/FrameOutputStream.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.common.comm.io;
+
+import java.nio.ByteBuffer;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import edu.uci.ics.hyracks.data.std.util.ByteArrayAccessibleOutputStream;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+
+public class FrameOutputStream extends ByteArrayAccessibleOutputStream {
+ private static final Logger LOGGER = Logger.getLogger(FrameOutputStream.class.getName());
+
+ private final FrameTupleAppender frameTupleAppender;
+
+ public FrameOutputStream(int frameSize) {
+ super(frameSize);
+ this.frameTupleAppender = new FrameTupleAppender(frameSize);
+ }
+
+ public void reset(ByteBuffer buffer, boolean clear) {
+ frameTupleAppender.reset(buffer, clear);
+ }
+
+ public int getTupleCount() {
+ int tupleCount = frameTupleAppender.getTupleCount();
+ if (LOGGER.isLoggable(Level.FINEST)) {
+ LOGGER.finest("appendTuple(): tuple count: " + tupleCount);
+ }
+ return tupleCount;
+ }
+
+ public boolean appendTuple() {
+ if (LOGGER.isLoggable(Level.FINEST)) {
+ LOGGER.finest("appendTuple(): tuple size: " + count);
+ }
+ boolean appended = frameTupleAppender.append(buf, 0, count);
+ count = 0;
+ return appended;
+ }
+}
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/comm/io/ResultFrameTupleAccessor.java b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/comm/io/ResultFrameTupleAccessor.java
new file mode 100644
index 0000000..915a436
--- /dev/null
+++ b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/comm/io/ResultFrameTupleAccessor.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.common.comm.io;
+
+import java.io.DataInputStream;
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.hyracks.api.comm.FrameHelper;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
+
+public class ResultFrameTupleAccessor implements IFrameTupleAccessor {
+
+ private final int frameSize;
+ private ByteBuffer buffer;
+
+ public ResultFrameTupleAccessor(int frameSize) {
+ this.frameSize = frameSize;
+ }
+
+ @Override
+ public void reset(ByteBuffer buffer) {
+ this.buffer = buffer;
+ }
+
+ @Override
+ public ByteBuffer getBuffer() {
+ return buffer;
+ }
+
+ @Override
+ public int getTupleCount() {
+ return buffer.getInt(FrameHelper.getTupleCountOffset(frameSize));
+ }
+
+ @Override
+ public int getTupleStartOffset(int tupleIndex) {
+ return tupleIndex == 0 ? 0 : buffer.getInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * tupleIndex);
+ }
+
+ @Override
+ public int getTupleEndOffset(int tupleIndex) {
+ return buffer.getInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * (tupleIndex + 1));
+ }
+
+ @Override
+ public int getFieldStartOffset(int tupleIndex, int fIdx) {
+ return fIdx == 0 ? 0 : buffer.getInt(getTupleStartOffset(tupleIndex) + (fIdx - 1) * 4);
+ }
+
+ @Override
+ public int getFieldEndOffset(int tupleIndex, int fIdx) {
+ return buffer.getInt(getTupleStartOffset(tupleIndex) + fIdx * 4);
+ }
+
+ @Override
+ public int getFieldLength(int tupleIndex, int fIdx) {
+ return getFieldEndOffset(tupleIndex, fIdx) - getFieldStartOffset(tupleIndex, fIdx);
+ }
+
+ @Override
+ public int getFieldSlotsLength() {
+ return getFieldCount() * 4;
+ }
+
+ public void prettyPrint() {
+ ByteBufferInputStream bbis = new ByteBufferInputStream();
+ DataInputStream dis = new DataInputStream(bbis);
+ int tc = getTupleCount();
+ System.err.println("TC: " + tc);
+ for (int i = 0; i < tc; ++i) {
+ System.err.print(i + ":(" + getTupleStartOffset(i) + ", " + getTupleEndOffset(i) + ")[");
+
+ bbis.setByteBuffer(buffer, getTupleStartOffset(i));
+ System.err.print(dis);
+
+ System.err.println("]");
+ }
+ }
+
+ @Override
+ public int getFieldCount() {
+ return 1;
+ }
+}
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/partition/FieldHashPartitionComputerFamily.java b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/partition/FieldHashPartitionComputerFamily.java
index 51645c4..ec3c2be 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/partition/FieldHashPartitionComputerFamily.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/partition/FieldHashPartitionComputerFamily.java
@@ -52,10 +52,10 @@
h += fh;
}
if (h < 0) {
- h = -h;
+ h = -(h+1);
}
return h % nParts;
}
};
}
-}
\ No newline at end of file
+}
diff --git a/hyracks/hyracks-dataflow-hadoop/pom.xml b/hyracks/hyracks-dataflow-hadoop/pom.xml
index f9d5153..f5135f8 100644
--- a/hyracks/hyracks-dataflow-hadoop/pom.xml
+++ b/hyracks/hyracks-dataflow-hadoop/pom.xml
@@ -1,8 +1,6 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
- <groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-hadoop</artifactId>
- <version>0.2.3-SNAPSHOT</version>
<name>hyracks-dataflow-hadoop</name>
<parent>
@@ -18,8 +16,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-dataflow-std/pom.xml b/hyracks/hyracks-dataflow-std/pom.xml
index bf27d20..2cf0fdc 100644
--- a/hyracks/hyracks-dataflow-std/pom.xml
+++ b/hyracks/hyracks-dataflow-std/pom.xml
@@ -1,8 +1,6 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
- <groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.3-SNAPSHOT</version>
<name>hyracks-dataflow-std</name>
<parent>
@@ -18,8 +16,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/HashSpillableTableFactory.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/HashSpillableTableFactory.java
index f2b56fa..f86d9fb 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/HashSpillableTableFactory.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/group/HashSpillableTableFactory.java
@@ -256,8 +256,6 @@
outputAppender.reset(outputFrame, true);
- writer.open();
-
if (tPointers == null) {
// Not sorted
for (int i = 0; i < tableSize; ++i) {
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/result/ResultWriterOperatorDescriptor.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/result/ResultWriterOperatorDescriptor.java
new file mode 100644
index 0000000..edca60a
--- /dev/null
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/result/ResultWriterOperatorDescriptor.java
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.dataflow.std.result;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
+import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.IResultSerializer;
+import edu.uci.ics.hyracks.api.dataflow.value.IResultSerializerFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.IDatasetPartitionManager;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.job.IOperatorDescriptorRegistry;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameOutputStream;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable;
+
+public class ResultWriterOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor {
+ private static final long serialVersionUID = 1L;
+
+ private final ResultSetId rsId;
+
+ private final boolean ordered;
+
+ private final IResultSerializerFactory resultSerializerFactory;
+
+ public ResultWriterOperatorDescriptor(IOperatorDescriptorRegistry spec, ResultSetId rsId, boolean ordered,
+ IResultSerializerFactory resultSerializerFactory) throws IOException {
+ super(spec, 1, 0);
+ this.rsId = rsId;
+ this.ordered = ordered;
+ this.resultSerializerFactory = resultSerializerFactory;
+ }
+
+ @Override
+ public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+ IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) {
+ final IDatasetPartitionManager dpm = ctx.getDatasetPartitionManager();
+
+ final ByteBuffer outputBuffer = ctx.allocateFrame();
+
+ final FrameOutputStream frameOutputStream = new FrameOutputStream(ctx.getFrameSize());
+ frameOutputStream.reset(outputBuffer, true);
+ PrintStream printStream = new PrintStream(frameOutputStream);
+
+ final RecordDescriptor outRecordDesc = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0);
+ final IResultSerializer resultSerializer = resultSerializerFactory.createResultSerializer(outRecordDesc,
+ printStream);
+
+ final FrameTupleAccessor frameTupleAccessor = new FrameTupleAccessor(ctx.getFrameSize(), outRecordDesc);
+
+ return new AbstractUnaryInputSinkOperatorNodePushable() {
+ IFrameWriter datasetPartitionWriter;
+
+ @Override
+ public void open() throws HyracksDataException {
+ try {
+ datasetPartitionWriter = dpm.createDatasetPartitionWriter(ctx, rsId, ordered, partition,
+ nPartitions);
+ datasetPartitionWriter.open();
+ resultSerializer.init();
+ } catch (HyracksException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+ frameTupleAccessor.reset(buffer);
+ for (int tIndex = 0; tIndex < frameTupleAccessor.getTupleCount(); tIndex++) {
+ resultSerializer.appendTuple(frameTupleAccessor, tIndex);
+ if (!frameOutputStream.appendTuple()) {
+ datasetPartitionWriter.nextFrame(outputBuffer);
+ frameOutputStream.reset(outputBuffer, true);
+
+ /* TODO(madhusudancs): This works under the assumption that no single serialized record is
+ * longer than the buffer size.
+ */
+ resultSerializer.appendTuple(frameTupleAccessor, tIndex);
+ frameOutputStream.appendTuple();
+ }
+ }
+ }
+
+ @Override
+ public void fail() throws HyracksDataException {
+ datasetPartitionWriter.fail();
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ if (frameOutputStream.getTupleCount() > 0) {
+ datasetPartitionWriter.nextFrame(outputBuffer);
+ frameOutputStream.reset(outputBuffer, true);
+ }
+ datasetPartitionWriter.close();
+ }
+ };
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-dist/pom.xml b/hyracks/hyracks-dist/pom.xml
index 4f9fc20..4f782ac 100755
--- a/hyracks/hyracks-dist/pom.xml
+++ b/hyracks/hyracks-dist/pom.xml
@@ -23,8 +23,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
</configuration>
</plugin>
<plugin>
diff --git a/hyracks/hyracks-dist/src/main/resources/bin/getip.sh b/hyracks/hyracks-dist/src/main/resources/bin/getip.sh
index e0cdf73..a691c0f 100755
--- a/hyracks/hyracks-dist/src/main/resources/bin/getip.sh
+++ b/hyracks/hyracks-dist/src/main/resources/bin/getip.sh
@@ -6,6 +6,10 @@
then
#Get IP Address
IPADDR=`/sbin/ifconfig eth0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ if [ "$IPADDR" = "" ]
+ then
+ IPADDR=`/sbin/ifconfig em1 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ fi
if [ "$IPADDR" = "" ]
then
IPADDR=`/sbin/ifconfig lo | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
diff --git a/hyracks/hyracks-dist/src/main/resources/bin/startcc.sh b/hyracks/hyracks-dist/src/main/resources/bin/startcc.sh
index fe2551d..efb79ce 100755
--- a/hyracks/hyracks-dist/src/main/resources/bin/startcc.sh
+++ b/hyracks/hyracks-dist/src/main/resources/bin/startcc.sh
@@ -22,4 +22,4 @@
#Launch hyracks cc script
chmod -R 755 $HYRACKS_HOME
-$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyrackscc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 3 &> $CCLOGS_DIR/cc.log &
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyrackscc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 0 &> $CCLOGS_DIR/cc.log &
diff --git a/hyracks/hyracks-documentation/pom.xml b/hyracks/hyracks-documentation/pom.xml
index ed24adb..7aedd57 100644
--- a/hyracks/hyracks-documentation/pom.xml
+++ b/hyracks/hyracks-documentation/pom.xml
@@ -1,8 +1,6 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
- <groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-documentation</artifactId>
- <version>0.2.3-SNAPSHOT</version>
<name>hyracks-documentation</name>
<parent>
diff --git a/hyracks/hyracks-examples/btree-example/btreeclient/pom.xml b/hyracks/hyracks-examples/btree-example/btreeclient/pom.xml
index ba296ac..f941a5b 100644
--- a/hyracks/hyracks-examples/btree-example/btreeclient/pom.xml
+++ b/hyracks/hyracks-examples/btree-example/btreeclient/pom.xml
@@ -37,8 +37,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
diff --git a/hyracks/hyracks-examples/btree-example/btreehelper/pom.xml b/hyracks/hyracks-examples/btree-example/btreehelper/pom.xml
index 8e90606..eb651ce 100644
--- a/hyracks/hyracks-examples/btree-example/btreehelper/pom.xml
+++ b/hyracks/hyracks-examples/btree-example/btreehelper/pom.xml
@@ -41,8 +41,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatclient/pom.xml b/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatclient/pom.xml
index dd96db8..f52536c 100644
--- a/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatclient/pom.xml
+++ b/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatclient/pom.xml
@@ -2,7 +2,6 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks.examples.compat</groupId>
<artifactId>hadoopcompatclient</artifactId>
- <version>0.2.3-SNAPSHOT</version>
<name>hadoopcompatclient</name>
<parent>
@@ -33,8 +32,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
</configuration>
</plugin>
<plugin>
diff --git a/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompathelper/pom.xml b/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompathelper/pom.xml
index 1e029be..c397a72 100644
--- a/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompathelper/pom.xml
+++ b/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompathelper/pom.xml
@@ -2,7 +2,6 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks.examples.compat</groupId>
<artifactId>hadoopcompathelper</artifactId>
- <version>0.2.3-SNAPSHOT</version>
<name>hadoopcompathelper</name>
<parent>
@@ -32,8 +31,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-examples/hadoop-compat-example/pom.xml b/hyracks/hyracks-examples/hadoop-compat-example/pom.xml
index 0614034..103d762 100644
--- a/hyracks/hyracks-examples/hadoop-compat-example/pom.xml
+++ b/hyracks/hyracks-examples/hadoop-compat-example/pom.xml
@@ -2,7 +2,6 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks.examples</groupId>
<artifactId>hadoop-compat-example</artifactId>
- <version>0.2.3-SNAPSHOT</version>
<packaging>pom</packaging>
<name>hadoop-compat-example</name>
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/pom.xml b/hyracks/hyracks-examples/hyracks-integration-tests/pom.xml
index e31af75..5e7b5c9 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/pom.xml
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/pom.xml
@@ -16,8 +16,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
@@ -31,6 +32,13 @@
<scope>test</scope>
</dependency>
<dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-client</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
<version>0.2.3-SNAPSHOT</version>
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreePrimaryIndexScanOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreePrimaryIndexScanOperatorTest.java
index 8482083..33ddca2 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreePrimaryIndexScanOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreePrimaryIndexScanOperatorTest.java
@@ -30,6 +30,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
@@ -44,8 +45,8 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeDataflowHelperFactory;
import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor;
@@ -60,6 +61,7 @@
import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
import edu.uci.ics.hyracks.test.support.TestStorageManagerInterface;
import edu.uci.ics.hyracks.tests.integration.AbstractIntegrationTest;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class BTreePrimaryIndexScanOperatorTest extends AbstractIntegrationTest {
static {
@@ -114,7 +116,7 @@
spec.addRoot(primaryCreateOp);
runTest(spec);
}
-
+
public void loadPrimaryIndexTest() throws Exception {
JobSpecification spec = new JobSpecification();
@@ -143,8 +145,9 @@
int[] fieldPermutation = { 0, 1, 2, 4, 5, 7 };
TreeIndexBulkLoadOperatorDescriptor primaryBtreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
- storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits, primaryComparatorFactories, fieldPermutation, 0.7f,
- dataflowHelperFactory, NoOpOperationCallbackProvider.INSTANCE);
+ storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits,
+ primaryComparatorFactories, fieldPermutation, 0.7f, dataflowHelperFactory,
+ NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeBulkLoad, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
@@ -179,13 +182,16 @@
int[] highKeyFields = null; // + infinity
BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
- storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits, primaryComparatorFactories, lowKeyFields,
- highKeyFields, true, true, dataflowHelperFactory, false, NoOpOperationCallbackProvider.INSTANCE);
+ storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits,
+ primaryComparatorFactories, lowKeyFields, highKeyFields, true, true, dataflowHelperFactory, false,
+ NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryBtreeSearchOp, 0);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreePrimaryIndexSearchOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreePrimaryIndexSearchOperatorTest.java
index 82fecbe..acd3027 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreePrimaryIndexSearchOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreePrimaryIndexSearchOperatorTest.java
@@ -30,6 +30,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
@@ -44,8 +45,8 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeDataflowHelperFactory;
import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor;
@@ -60,6 +61,7 @@
import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
import edu.uci.ics.hyracks.test.support.TestStorageManagerInterface;
import edu.uci.ics.hyracks.tests.integration.AbstractIntegrationTest;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class BTreePrimaryIndexSearchOperatorTest extends AbstractIntegrationTest {
static {
@@ -114,7 +116,7 @@
spec.addRoot(primaryCreateOp);
runTest(spec);
}
-
+
public void loadPrimaryIndexTest() throws Exception {
JobSpecification spec = new JobSpecification();
@@ -143,8 +145,9 @@
int[] fieldPermutation = { 0, 1, 2, 4, 5, 7 };
TreeIndexBulkLoadOperatorDescriptor primaryBtreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
- storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits, primaryComparatorFactories, fieldPermutation, 0.7f,
- dataflowHelperFactory, NoOpOperationCallbackProvider.INSTANCE);
+ storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits,
+ primaryComparatorFactories, fieldPermutation, 0.7f, dataflowHelperFactory,
+ NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeBulkLoad, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
@@ -184,13 +187,16 @@
int[] highKeyFields = { 1 };
BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
- storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits, primaryComparatorFactories, lowKeyFields,
- highKeyFields, true, true, dataflowHelperFactory, false, NoOpOperationCallbackProvider.INSTANCE);
+ storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits,
+ primaryComparatorFactories, lowKeyFields, highKeyFields, true, true, dataflowHelperFactory, false,
+ NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryBtreeSearchOp, 0);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreePrimaryIndexStatsOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreePrimaryIndexStatsOperatorTest.java
index e63ce11..ca03b16 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreePrimaryIndexStatsOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreePrimaryIndexStatsOperatorTest.java
@@ -29,6 +29,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
@@ -42,7 +43,7 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeDataflowHelperFactory;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
@@ -57,6 +58,7 @@
import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
import edu.uci.ics.hyracks.test.support.TestStorageManagerInterface;
import edu.uci.ics.hyracks.tests.integration.AbstractIntegrationTest;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class BTreePrimaryIndexStatsOperatorTest extends AbstractIntegrationTest {
static {
@@ -82,7 +84,6 @@
private IFileSplitProvider primaryBtreeSplitProvider = new ConstantFileSplitProvider(
new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File(primaryFileName))) });
-
@Before
public void setup() throws Exception {
// field, type and key declarations for primary index
@@ -107,7 +108,7 @@
spec.addRoot(primaryCreateOp);
runTest(spec);
}
-
+
public void loadPrimaryIndexTest() throws Exception {
JobSpecification spec = new JobSpecification();
@@ -136,8 +137,9 @@
int[] fieldPermutation = { 0, 1, 2, 4, 5, 7 };
TreeIndexBulkLoadOperatorDescriptor primaryBtreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
- storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits, primaryComparatorFactories, fieldPermutation, 0.7f,
- dataflowHelperFactory, NoOpOperationCallbackProvider.INSTANCE);
+ storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits,
+ primaryComparatorFactories, fieldPermutation, 0.7f, dataflowHelperFactory,
+ NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeBulkLoad, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
@@ -153,12 +155,15 @@
JobSpecification spec = new JobSpecification();
TreeIndexStatsOperatorDescriptor primaryStatsOp = new TreeIndexStatsOperatorDescriptor(spec, storageManager,
- indexRegistryProvider, primaryBtreeSplitProvider,
- primaryTypeTraits, primaryComparatorFactories, dataflowHelperFactory, NoOpOperationCallbackProvider.INSTANCE);
+ indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits, primaryComparatorFactories,
+ dataflowHelperFactory, NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryStatsOp, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), primaryStatsOp, 0, printer, 0);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreeSecondaryIndexInsertOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreeSecondaryIndexInsertOperatorTest.java
index 3c87ae3..34a1cd3 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreeSecondaryIndexInsertOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreeSecondaryIndexInsertOperatorTest.java
@@ -30,6 +30,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
@@ -44,9 +45,9 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.misc.NullSinkOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeDataflowHelperFactory;
import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor;
@@ -63,6 +64,7 @@
import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
import edu.uci.ics.hyracks.test.support.TestStorageManagerInterface;
import edu.uci.ics.hyracks.tests.integration.AbstractIntegrationTest;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class BTreeSecondaryIndexInsertOperatorTest extends AbstractIntegrationTest {
static {
@@ -339,9 +341,11 @@
primaryHighKeyFields, true, true, dataflowHelperFactory, false, NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, secondaryBtreeSearchOp, 0);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreeSecondaryIndexSearchOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreeSecondaryIndexSearchOperatorTest.java
index 1304f12..c9ee118 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreeSecondaryIndexSearchOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/btree/BTreeSecondaryIndexSearchOperatorTest.java
@@ -30,6 +30,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
@@ -44,8 +45,8 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeDataflowHelperFactory;
import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor;
@@ -60,6 +61,7 @@
import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
import edu.uci.ics.hyracks.test.support.TestStorageManagerInterface;
import edu.uci.ics.hyracks.tests.integration.AbstractIntegrationTest;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class BTreeSecondaryIndexSearchOperatorTest extends AbstractIntegrationTest {
static {
@@ -137,7 +139,7 @@
spec.addRoot(primaryCreateOp);
runTest(spec);
}
-
+
public void loadPrimaryIndexTest() throws Exception {
JobSpecification spec = new JobSpecification();
@@ -166,8 +168,9 @@
int[] fieldPermutation = { 0, 1, 2, 4, 5, 7 };
TreeIndexBulkLoadOperatorDescriptor primaryBtreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
- storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits, primaryComparatorFactories, fieldPermutation, 0.7f,
- dataflowHelperFactory, NoOpOperationCallbackProvider.INSTANCE);
+ storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits,
+ primaryComparatorFactories, fieldPermutation, 0.7f, dataflowHelperFactory,
+ NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeBulkLoad, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
@@ -180,14 +183,14 @@
public void createSecondaryIndex() throws Exception {
JobSpecification spec = new JobSpecification();
- TreeIndexCreateOperatorDescriptor secondaryCreateOp = new TreeIndexCreateOperatorDescriptor(spec, storageManager,
- indexRegistryProvider, secondaryBtreeSplitProvider, secondaryTypeTraits, secondaryComparatorFactories,
- dataflowHelperFactory, NoOpOperationCallbackProvider.INSTANCE);
+ TreeIndexCreateOperatorDescriptor secondaryCreateOp = new TreeIndexCreateOperatorDescriptor(spec,
+ storageManager, indexRegistryProvider, secondaryBtreeSplitProvider, secondaryTypeTraits,
+ secondaryComparatorFactories, dataflowHelperFactory, NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryCreateOp, NC1_ID);
spec.addRoot(secondaryCreateOp);
runTest(spec);
}
-
+
public void loadSecondaryIndexTest() throws Exception {
JobSpecification spec = new JobSpecification();
@@ -212,8 +215,9 @@
// scan primary index
BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
- storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits, primaryComparatorFactories, lowKeyFields,
- highKeyFields, true, true, dataflowHelperFactory, false, NoOpOperationCallbackProvider.INSTANCE);
+ storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits,
+ primaryComparatorFactories, lowKeyFields, highKeyFields, true, true, dataflowHelperFactory, false,
+ NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
// sort based on secondary keys
@@ -225,8 +229,9 @@
// load secondary index
int[] fieldPermutation = { 3, 0 };
TreeIndexBulkLoadOperatorDescriptor secondaryBtreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
- storageManager, indexRegistryProvider, secondaryBtreeSplitProvider, secondaryTypeTraits, secondaryComparatorFactories, fieldPermutation, 0.7f,
- dataflowHelperFactory, NoOpOperationCallbackProvider.INSTANCE);
+ storageManager, indexRegistryProvider, secondaryBtreeSplitProvider, secondaryTypeTraits,
+ secondaryComparatorFactories, fieldPermutation, 0.7f, dataflowHelperFactory,
+ NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryBtreeBulkLoad, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryBtreeSearchOp, 0);
@@ -268,8 +273,8 @@
// search secondary index
BTreeSearchOperatorDescriptor secondaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec,
secondaryRecDesc, storageManager, indexRegistryProvider, secondaryBtreeSplitProvider,
- secondaryTypeTraits, secondaryComparatorFactories, secondaryLowKeyFields, secondaryHighKeyFields, true, true,
- dataflowHelperFactory, false, NoOpOperationCallbackProvider.INSTANCE);
+ secondaryTypeTraits, secondaryComparatorFactories, secondaryLowKeyFields, secondaryHighKeyFields, true,
+ true, dataflowHelperFactory, false, NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryBtreeSearchOp, NC1_ID);
int[] primaryLowKeyFields = { 1 }; // second field from the tuples
@@ -279,13 +284,16 @@
// search primary index
BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
- storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits, primaryComparatorFactories, primaryLowKeyFields,
- primaryHighKeyFields, true, true, dataflowHelperFactory, false, NoOpOperationCallbackProvider.INSTANCE);
+ storageManager, indexRegistryProvider, primaryBtreeSplitProvider, primaryTypeTraits,
+ primaryComparatorFactories, primaryLowKeyFields, primaryHighKeyFields, true, true,
+ dataflowHelperFactory, false, NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, secondaryBtreeSearchOp, 0);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/AbstractIntegrationTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/AbstractIntegrationTest.java
index ce2eb83..acaa94a 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/AbstractIntegrationTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/AbstractIntegrationTest.java
@@ -14,29 +14,39 @@
*/
package edu.uci.ics.hyracks.tests.integration;
+import java.io.BufferedReader;
import java.io.File;
+import java.io.FileReader;
import java.io.IOException;
+import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
-import org.apache.commons.io.FileUtils;
import org.junit.AfterClass;
+import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;
import edu.uci.ics.hyracks.api.client.HyracksConnection;
import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.dataset.IHyracksDataset;
+import edu.uci.ics.hyracks.api.dataset.IHyracksDatasetReader;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.job.JobFlag;
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.client.dataset.HyracksDataset;
import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
import edu.uci.ics.hyracks.control.common.controllers.CCConfig;
import edu.uci.ics.hyracks.control.common.controllers.NCConfig;
import edu.uci.ics.hyracks.control.nc.NodeControllerService;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ResultFrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
public abstract class AbstractIntegrationTest {
private static final Logger LOGGER = Logger.getLogger(AbstractIntegrationTest.class.getName());
@@ -80,6 +90,7 @@
ncConfig1.ccPort = 39001;
ncConfig1.clusterNetIPAddress = "127.0.0.1";
ncConfig1.dataIPAddress = "127.0.0.1";
+ ncConfig1.datasetIPAddress = "127.0.0.1";
ncConfig1.nodeId = NC1_ID;
nc1 = new NodeControllerService(ncConfig1);
nc1.start();
@@ -89,6 +100,7 @@
ncConfig2.ccPort = 39001;
ncConfig2.clusterNetIPAddress = "127.0.0.1";
ncConfig2.dataIPAddress = "127.0.0.1";
+ ncConfig2.datasetIPAddress = "127.0.0.1";
ncConfig2.nodeId = NC2_ID;
nc2 = new NodeControllerService(ncConfig2);
nc2.start();
@@ -106,7 +118,7 @@
cc.stop();
}
- protected void runTest(JobSpecification spec) throws Exception {
+ protected JobId executeTest(JobSpecification spec) throws Exception {
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info(spec.toJSON().toString(2));
}
@@ -114,25 +126,72 @@
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info(jobId.toString());
}
- hcc.waitForCompletion(jobId);
- dumpOutputFiles();
+ return jobId;
}
- private void dumpOutputFiles() {
- if (LOGGER.isLoggable(Level.INFO)) {
- for (File f : outputFiles) {
- if (f.exists() && f.isFile()) {
- try {
- LOGGER.info("Reading file: " + f.getAbsolutePath() + " in test: " + getClass().getName());
- String data = FileUtils.readFileToString(f);
- LOGGER.info(data);
- } catch (IOException e) {
- LOGGER.info("Error reading file: " + f.getAbsolutePath());
- LOGGER.info(e.getMessage());
- }
+ protected void runTest(JobSpecification spec) throws Exception {
+ JobId jobId = executeTest(spec);
+ hcc.waitForCompletion(jobId);
+ }
+
+ protected List<String> readResults(JobSpecification spec, JobId jobId, ResultSetId resultSetId) throws Exception {
+ int nReaders = 1;
+ ByteBuffer resultBuffer = ByteBuffer.allocate(spec.getFrameSize());
+ resultBuffer.clear();
+
+ IFrameTupleAccessor frameTupleAccessor = new ResultFrameTupleAccessor(spec.getFrameSize());
+
+ IHyracksDataset hyracksDataset = new HyracksDataset(hcc, spec.getFrameSize(), nReaders);
+ IHyracksDatasetReader reader = hyracksDataset.createReader(jobId, resultSetId);
+
+ List<String> resultRecords = new ArrayList<String>();
+ ByteBufferInputStream bbis = new ByteBufferInputStream();
+
+ int readSize = reader.read(resultBuffer);
+
+ while (readSize > 0) {
+
+ try {
+ frameTupleAccessor.reset(resultBuffer);
+ for (int tIndex = 0; tIndex < frameTupleAccessor.getTupleCount(); tIndex++) {
+ int start = frameTupleAccessor.getTupleStartOffset(tIndex);
+ int length = frameTupleAccessor.getTupleEndOffset(tIndex) - start;
+ bbis.setByteBuffer(resultBuffer, start);
+ byte[] recordBytes = new byte[length];
+ bbis.read(recordBytes, 0, length);
+ resultRecords.add(new String(recordBytes, 0, length));
}
+ } finally {
+ bbis.close();
}
+
+ resultBuffer.clear();
+ readSize = reader.read(resultBuffer);
}
+ return resultRecords;
+ }
+
+ protected boolean runTestAndCompareResults(JobSpecification spec, String[] expectedFileNames) throws Exception {
+ JobId jobId = executeTest(spec);
+
+ List<String> results;
+ for (int i = 0; i < expectedFileNames.length; i++) {
+ results = readResults(spec, jobId, spec.getResultSetIds().get(i));
+ BufferedReader expectedFile = new BufferedReader(new FileReader(expectedFileNames[i]));
+
+ String expectedLine, actualLine;
+ int j = 0;
+ while ((expectedLine = expectedFile.readLine()) != null) {
+ actualLine = results.get(j).trim();
+ Assert.assertEquals(expectedLine, actualLine);
+ j++;
+ }
+ Assert.assertEquals(j, results.size());
+ expectedFile.close();
+ }
+
+ hcc.waitForCompletion(jobId);
+ return true;
}
protected File createTempFile() throws IOException {
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/AbstractMultiNCIntegrationTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/AbstractMultiNCIntegrationTest.java
index 279e5f9..d97b7db 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/AbstractMultiNCIntegrationTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/AbstractMultiNCIntegrationTest.java
@@ -16,6 +16,7 @@
import java.io.File;
import java.io.IOException;
+import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
@@ -23,6 +24,7 @@
import java.util.logging.Logger;
import org.apache.commons.io.FileUtils;
+import org.json.JSONArray;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Rule;
@@ -30,13 +32,20 @@
import edu.uci.ics.hyracks.api.client.HyracksConnection;
import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.dataset.IHyracksDataset;
+import edu.uci.ics.hyracks.api.dataset.IHyracksDatasetReader;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.job.JobFlag;
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.client.dataset.HyracksDataset;
import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
import edu.uci.ics.hyracks.control.common.controllers.CCConfig;
import edu.uci.ics.hyracks.control.common.controllers.NCConfig;
import edu.uci.ics.hyracks.control.nc.NodeControllerService;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ResultFrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
public abstract class AbstractMultiNCIntegrationTest {
@@ -84,6 +93,7 @@
ncConfig.ccPort = 39001;
ncConfig.clusterNetIPAddress = "127.0.0.1";
ncConfig.dataIPAddress = "127.0.0.1";
+ ncConfig.datasetIPAddress = "127.0.0.1";
ncConfig.nodeId = ASTERIX_IDS[i];
asterixNCs[i] = new NodeControllerService(ncConfig);
asterixNCs[i].start();
@@ -111,6 +121,46 @@
if (LOGGER.isLoggable(Level.INFO)) {
LOGGER.info(jobId.toString());
}
+
+ int nReaders = 1;
+
+ ByteBuffer resultBuffer = ByteBuffer.allocate(spec.getFrameSize());
+ resultBuffer.clear();
+
+ IFrameTupleAccessor frameTupleAccessor = new ResultFrameTupleAccessor(spec.getFrameSize());
+
+ IHyracksDataset hyracksDataset = new HyracksDataset(hcc, spec.getFrameSize(), nReaders);
+ IHyracksDatasetReader reader = hyracksDataset.createReader(jobId, spec.getResultSetIds().get(0));
+
+ JSONArray resultRecords = new JSONArray();
+ ByteBufferInputStream bbis = new ByteBufferInputStream();
+
+ int readSize = reader.read(resultBuffer);
+
+ while (readSize > 0) {
+
+ try {
+ frameTupleAccessor.reset(resultBuffer);
+ for (int tIndex = 0; tIndex < frameTupleAccessor.getTupleCount(); tIndex++) {
+ int start = frameTupleAccessor.getTupleStartOffset(tIndex);
+ int length = frameTupleAccessor.getTupleEndOffset(tIndex) - start;
+ bbis.setByteBuffer(resultBuffer, start);
+ byte[] recordBytes = new byte[length];
+ bbis.read(recordBytes, 0, length);
+ resultRecords.put(new String(recordBytes, 0, length));
+ }
+ } finally {
+ try {
+ bbis.close();
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ resultBuffer.clear();
+ readSize = reader.read(resultBuffer);
+ }
+
hcc.waitForCompletion(jobId);
dumpOutputFiles();
}
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/AggregationTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/AggregationTest.java
index 93e1e9b..06751e3 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/AggregationTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/AggregationTest.java
@@ -25,8 +25,8 @@
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
-import edu.uci.ics.hyracks.api.job.IOperatorDescriptorRegistry;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory;
@@ -49,7 +49,6 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
import edu.uci.ics.hyracks.dataflow.std.file.ITupleParserFactory;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.group.HashSpillableTableFactory;
import edu.uci.ics.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory;
import edu.uci.ics.hyracks.dataflow.std.group.aggregators.AvgFieldGroupAggregatorFactory;
@@ -62,60 +61,42 @@
import edu.uci.ics.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.group.hash.HashGroupOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
/**
*
*/
public class AggregationTest extends AbstractIntegrationTest {
- final IFileSplitProvider splitProvider = new ConstantFileSplitProvider(
- new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
- "data/tpch0.001/lineitem.tbl"))) });
+ final IFileSplitProvider splitProvider = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC2_ID,
+ new FileReference(new File("data/tpch0.001/lineitem.tbl"))) });
- final RecordDescriptor desc = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- FloatSerializerDeserializer.INSTANCE,
- FloatSerializerDeserializer.INSTANCE,
- FloatSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ final RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
+ FloatSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
- final ITupleParserFactory tupleParserFactory = new DelimitedDataTupleParserFactory(
- new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
- IntegerParserFactory.INSTANCE,
- IntegerParserFactory.INSTANCE,
- IntegerParserFactory.INSTANCE,
- IntegerParserFactory.INSTANCE, FloatParserFactory.INSTANCE,
- FloatParserFactory.INSTANCE, FloatParserFactory.INSTANCE,
- UTF8StringParserFactory.INSTANCE,
- UTF8StringParserFactory.INSTANCE,
- UTF8StringParserFactory.INSTANCE,
- UTF8StringParserFactory.INSTANCE,
- UTF8StringParserFactory.INSTANCE,
- UTF8StringParserFactory.INSTANCE,
- UTF8StringParserFactory.INSTANCE,
- UTF8StringParserFactory.INSTANCE, }, '|');
+ final ITupleParserFactory tupleParserFactory = new DelimitedDataTupleParserFactory(new IValueParserFactory[] {
+ UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, IntegerParserFactory.INSTANCE,
+ IntegerParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, FloatParserFactory.INSTANCE,
+ FloatParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
+ UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
+ UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
+ UTF8StringParserFactory.INSTANCE, }, '|');
- private AbstractSingleActivityOperatorDescriptor getPrinter(
- IOperatorDescriptorRegistry spec, String prefix) throws IOException {
+ private AbstractSingleActivityOperatorDescriptor getPrinter(JobSpecification spec, String prefix)
+ throws IOException {
- AbstractSingleActivityOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(
- spec, new ConstantFileSplitProvider(new FileSplit[] {
- new FileSplit(NC1_ID, createTempFile()
- .getAbsolutePath()),
- new FileSplit(NC2_ID, createTempFile()
- .getAbsolutePath()) }), "\t");
+ ResultSetId rsId = new ResultSetId(1);
+ AbstractSingleActivityOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
+ spec.addResultSetId(rsId);
return printer;
}
@@ -124,54 +105,38 @@
public void singleKeySumInmemGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- FloatSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
int tableSize = 8;
- HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(
- spec,
- keyFields,
- new FieldHashPartitionComputerFactory(
- keyFields,
+ HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(spec, keyFields,
+ new FieldHashPartitionComputerFactory(keyFields,
new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
.of(UTF8StringPointable.FACTORY) }),
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, true),
- new IntSumFieldAggregatorFactory(3, true),
- new FloatSumFieldAggregatorFactory(5, true) }),
- outputRec, tableSize);
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(1, true), new IntSumFieldAggregatorFactory(3, true),
+ new FloatSumFieldAggregatorFactory(5, true) }), outputRec, tableSize);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec,
- new FieldHashPartitionComputerFactory(
- keyFields,
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields,
new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "singleKeySumInmemGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeySumInmemGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -184,49 +149,34 @@
public void singleKeySumPreClusterGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- FloatSerializerDeserializer.INSTANCE});
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
- PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(
- spec,
- keyFields,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, true),
- new IntSumFieldAggregatorFactory(3, true),
- new FloatSumFieldAggregatorFactory(5, true)}),
- outputRec);
+ PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(1, true), new IntSumFieldAggregatorFactory(3, true),
+ new FloatSumFieldAggregatorFactory(5, true) }), outputRec);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec,
- new FieldHashPartitionComputerFactory(
- keyFields,
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields,
new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "singleKeySumInmemGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeySumInmemGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -239,64 +189,43 @@
public void singleKeySumExtGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- FloatSerializerDeserializer.INSTANCE});
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
int frameLimits = 4;
int tableSize = 8;
- ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(
- spec,
- keyFields,
- frameLimits,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new UTF8StringNormalizedKeyComputerFactory(),
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, false),
+ ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, keyFields, frameLimits,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(
+ new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false),
new IntSumFieldAggregatorFactory(3, false),
- new FloatSumFieldAggregatorFactory(5, false)}),
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, false),
+ new FloatSumFieldAggregatorFactory(5, false) }), new MultiFieldsAggregatorFactory(
+ new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false),
new IntSumFieldAggregatorFactory(2, false),
- new FloatSumFieldAggregatorFactory(3, false)}),
- outputRec,
- new HashSpillableTableFactory(
- new FieldHashPartitionComputerFactory(
- keyFields,
- new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }),
- tableSize), true);
+ new FloatSumFieldAggregatorFactory(3, false) }), outputRec,
+ new HashSpillableTableFactory(new FieldHashPartitionComputerFactory(keyFields,
+ new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
+ .of(UTF8StringPointable.FACTORY) }), tableSize), true);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec,
- new FieldHashPartitionComputerFactory(
- keyFields,
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields,
new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "singleKeySumExtGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeySumExtGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -309,54 +238,38 @@
public void singleKeyAvgInmemGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- FloatSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
int tableSize = 8;
- HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(
- spec,
- keyFields,
- new FieldHashPartitionComputerFactory(
- keyFields,
+ HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(spec, keyFields,
+ new FieldHashPartitionComputerFactory(keyFields,
new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
.of(UTF8StringPointable.FACTORY) }),
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, true),
- new CountFieldAggregatorFactory(true),
- new AvgFieldGroupAggregatorFactory(1, true) }),
- outputRec, tableSize);
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(1, true), new CountFieldAggregatorFactory(true),
+ new AvgFieldGroupAggregatorFactory(1, true) }), outputRec, tableSize);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec,
- new FieldHashPartitionComputerFactory(
- keyFields,
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields,
new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "singleKeyAvgInmemGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgInmemGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -369,49 +282,34 @@
public void singleKeyAvgPreClusterGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- FloatSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
- PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(
- spec,
- keyFields,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, true),
- new CountFieldAggregatorFactory(true),
- new AvgFieldGroupAggregatorFactory(1, true) }),
- outputRec);
+ PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(1, true), new CountFieldAggregatorFactory(true),
+ new AvgFieldGroupAggregatorFactory(1, true) }), outputRec);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec,
- new FieldHashPartitionComputerFactory(
- keyFields,
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields,
new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "singleKeyAvgInmemGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgInmemGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -424,64 +322,43 @@
public void singleKeyAvgExtGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- FloatSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
int frameLimits = 4;
int tableSize = 8;
- ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(
- spec,
- keyFields,
- frameLimits,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
+ ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, keyFields, frameLimits,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
new UTF8StringNormalizedKeyComputerFactory(),
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, false),
- new CountFieldAggregatorFactory(false),
- new AvgFieldGroupAggregatorFactory(1, false) }),
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, false),
+ new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(1, false), new CountFieldAggregatorFactory(false),
+ new AvgFieldGroupAggregatorFactory(1, false) }), new MultiFieldsAggregatorFactory(
+ new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false),
new IntSumFieldAggregatorFactory(2, false),
- new AvgFieldMergeAggregatorFactory(3, false) }),
- outputRec,
- new HashSpillableTableFactory(
- new FieldHashPartitionComputerFactory(
- keyFields,
- new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }),
- tableSize), true);
+ new AvgFieldMergeAggregatorFactory(3, false) }), outputRec,
+ new HashSpillableTableFactory(new FieldHashPartitionComputerFactory(keyFields,
+ new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
+ .of(UTF8StringPointable.FACTORY) }), tableSize), true);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec,
- new FieldHashPartitionComputerFactory(
- keyFields,
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields,
new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "singleKeyAvgExtGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgExtGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -494,52 +371,38 @@
public void singleKeyMinMaxStringInmemGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ UTF8StringSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
int tableSize = 8;
- HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(
- spec,
- keyFields,
- new FieldHashPartitionComputerFactory(
- keyFields,
+ HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(spec, keyFields,
+ new FieldHashPartitionComputerFactory(keyFields,
new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
.of(UTF8StringPointable.FACTORY) }),
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, true),
- new MinMaxStringFieldAggregatorFactory(15,
- true, false) }), outputRec, tableSize);
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(1, true),
+ new MinMaxStringFieldAggregatorFactory(15, true, false) }), outputRec, tableSize);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec,
- new FieldHashPartitionComputerFactory(
- keyFields,
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields,
new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "singleKeyAvgInmemGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgInmemGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -552,47 +415,34 @@
public void singleKeyMinMaxStringPreClusterGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ UTF8StringSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
- PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(
- spec,
- keyFields,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, true),
- new MinMaxStringFieldAggregatorFactory(15,
- true, false) }), outputRec);
+ PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(1, true),
+ new MinMaxStringFieldAggregatorFactory(15, true, false) }), outputRec);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec,
- new FieldHashPartitionComputerFactory(
- keyFields,
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields,
new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "singleKeyAvgInmemGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgInmemGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -605,63 +455,42 @@
public void singleKeyMinMaxStringExtGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ UTF8StringSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
int frameLimits = 4;
int tableSize = 8;
- ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(
- spec,
- keyFields,
- frameLimits,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new UTF8StringNormalizedKeyComputerFactory(),
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, false),
- new MinMaxStringFieldAggregatorFactory(15,
- true, true) }),
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, false),
- new MinMaxStringFieldAggregatorFactory(2, true,
- true) }),
- outputRec,
- new HashSpillableTableFactory(
- new FieldHashPartitionComputerFactory(
- keyFields,
- new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }),
- tableSize), true);
+ ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, keyFields, frameLimits,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(
+ new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false),
+ new MinMaxStringFieldAggregatorFactory(15, true, true) }),
+ new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(1, false),
+ new MinMaxStringFieldAggregatorFactory(2, true, true) }), outputRec,
+ new HashSpillableTableFactory(new FieldHashPartitionComputerFactory(keyFields,
+ new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
+ .of(UTF8StringPointable.FACTORY) }), tableSize), true);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec,
- new FieldHashPartitionComputerFactory(
- keyFields,
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields,
new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "singleKeyAvgExtGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "singleKeyAvgExtGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -674,58 +503,39 @@
public void multiKeySumInmemGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 8, 0 };
int tableSize = 8;
- HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(
- spec, keyFields, new FieldHashPartitionComputerFactory(
- keyFields,
- new IBinaryHashFunctionFactory[] {
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }),
- new IBinaryComparatorFactory[] {
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, true),
- new IntSumFieldAggregatorFactory(3, true) }),
+ HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(spec, keyFields,
+ new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] {
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }),
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(1, true), new IntSumFieldAggregatorFactory(3, true) }),
outputRec, tableSize);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec, new FieldHashPartitionComputerFactory(keyFields,
- new IBinaryHashFunctionFactory[] {
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }));
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] {
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "multiKeySumInmemGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeySumInmemGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -738,51 +548,35 @@
public void multiKeySumPreClusterGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 8, 0 };
- PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(
- spec, keyFields,
- new IBinaryComparatorFactory[] {
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, true),
- new IntSumFieldAggregatorFactory(3, true) }),
+ PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(1, true), new IntSumFieldAggregatorFactory(3, true) }),
outputRec);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec, new FieldHashPartitionComputerFactory(keyFields,
- new IBinaryHashFunctionFactory[] {
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }));
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] {
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "multiKeySumInmemGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeySumInmemGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -795,69 +589,43 @@
public void multiKeySumExtGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 8, 0 };
int frameLimits = 4;
int tableSize = 8;
- ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(
- spec,
- keyFields,
- frameLimits,
- new IBinaryComparatorFactory[] {
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new UTF8StringNormalizedKeyComputerFactory(),
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, false),
- new IntSumFieldAggregatorFactory(3, false) }),
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(2, false),
- new IntSumFieldAggregatorFactory(3, false) }),
- outputRec,
- new HashSpillableTableFactory(
- new FieldHashPartitionComputerFactory(
- keyFields,
- new IBinaryHashFunctionFactory[] {
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }),
- tableSize), true);
-
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
-
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec, new FieldHashPartitionComputerFactory(keyFields,
+ ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, keyFields, frameLimits,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(
+ new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false),
+ new IntSumFieldAggregatorFactory(3, false) }), new MultiFieldsAggregatorFactory(
+ new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(2, false),
+ new IntSumFieldAggregatorFactory(3, false) }), outputRec,
+ new HashSpillableTableFactory(new FieldHashPartitionComputerFactory(keyFields,
new IBinaryHashFunctionFactory[] {
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }));
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), tableSize), true);
+
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
+
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] {
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "multiKeySumExtGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeySumExtGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -870,60 +638,40 @@
public void multiKeyAvgInmemGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- FloatSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 8, 0 };
int tableSize = 8;
- HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(
- spec, keyFields, new FieldHashPartitionComputerFactory(
- keyFields,
- new IBinaryHashFunctionFactory[] {
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }),
- new IBinaryComparatorFactory[] {
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, true),
- new CountFieldAggregatorFactory(true),
- new AvgFieldGroupAggregatorFactory(1, true) }),
- outputRec, tableSize);
+ HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(spec, keyFields,
+ new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] {
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }),
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(1, true), new CountFieldAggregatorFactory(true),
+ new AvgFieldGroupAggregatorFactory(1, true) }), outputRec, tableSize);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec, new FieldHashPartitionComputerFactory(keyFields,
- new IBinaryHashFunctionFactory[] {
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }));
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] {
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "multiKeyAvgInmemGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeyAvgInmemGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -936,53 +684,36 @@
public void multiKeyAvgPreClusterGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- FloatSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 8, 0 };
- PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(
- spec, keyFields,
- new IBinaryComparatorFactory[] {
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, true),
- new CountFieldAggregatorFactory(true),
- new AvgFieldGroupAggregatorFactory(1, true) }),
- outputRec);
+ PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(1, true), new CountFieldAggregatorFactory(true),
+ new AvgFieldGroupAggregatorFactory(1, true) }), outputRec);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec, new FieldHashPartitionComputerFactory(keyFields,
- new IBinaryHashFunctionFactory[] {
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }));
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] {
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "multiKeyAvgInmemGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeyAvgInmemGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -995,72 +726,46 @@
public void multiKeyAvgExtGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- FloatSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 8, 0 };
int frameLimits = 4;
int tableSize = 8;
- ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(
- spec,
- keyFields,
- frameLimits,
- new IBinaryComparatorFactory[] {
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
+ ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, keyFields, frameLimits,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
new UTF8StringNormalizedKeyComputerFactory(),
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, false),
- new CountFieldAggregatorFactory(false),
- new AvgFieldGroupAggregatorFactory(1, false) }),
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(2, false),
+ new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(1, false), new CountFieldAggregatorFactory(false),
+ new AvgFieldGroupAggregatorFactory(1, false) }), new MultiFieldsAggregatorFactory(
+ new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(2, false),
new IntSumFieldAggregatorFactory(3, false),
- new AvgFieldMergeAggregatorFactory(4, false) }),
- outputRec,
- new HashSpillableTableFactory(
- new FieldHashPartitionComputerFactory(
- keyFields,
- new IBinaryHashFunctionFactory[] {
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }),
- tableSize), true);
-
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
-
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec, new FieldHashPartitionComputerFactory(keyFields,
+ new AvgFieldMergeAggregatorFactory(4, false) }), outputRec,
+ new HashSpillableTableFactory(new FieldHashPartitionComputerFactory(keyFields,
new IBinaryHashFunctionFactory[] {
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }));
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), tableSize), true);
+
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
+
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] {
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "multiKeyAvgExtGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeyAvgExtGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -1073,58 +778,39 @@
public void multiKeyMinMaxStringInmemGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 8, 0 };
int tableSize = 8;
- HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(
- spec, keyFields, new FieldHashPartitionComputerFactory(
- keyFields,
- new IBinaryHashFunctionFactory[] {
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }),
- new IBinaryComparatorFactory[] {
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, true),
- new MinMaxStringFieldAggregatorFactory(15,
- true, false) }), outputRec, tableSize);
+ HashGroupOperatorDescriptor grouper = new HashGroupOperatorDescriptor(spec, keyFields,
+ new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] {
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }),
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(1, true),
+ new MinMaxStringFieldAggregatorFactory(15, true, false) }), outputRec, tableSize);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec, new FieldHashPartitionComputerFactory(keyFields,
- new IBinaryHashFunctionFactory[] {
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }));
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] {
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "multiKeyMinMaxStringInmemGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeyMinMaxStringInmemGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -1137,51 +823,35 @@
public void multiKeyMinMaxStringPreClusterGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 8, 0 };
- PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(
- spec, keyFields,
- new IBinaryComparatorFactory[] {
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, true),
- new MinMaxStringFieldAggregatorFactory(15,
- true, false) }), outputRec);
+ PreclusteredGroupOperatorDescriptor grouper = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(1, true),
+ new MinMaxStringFieldAggregatorFactory(15, true, false) }), outputRec);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec, new FieldHashPartitionComputerFactory(keyFields,
- new IBinaryHashFunctionFactory[] {
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }));
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] {
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "multiKeyMinMaxStringPreClusterGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeyMinMaxStringPreClusterGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
@@ -1194,71 +864,44 @@
public void multiKeyMinMaxStringExtGroupTest() throws Exception {
JobSpecification spec = new JobSpecification();
- FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
- spec, splitProvider, tupleParserFactory, desc);
+ FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(spec, splitProvider, tupleParserFactory,
+ desc);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,
- csvScanner, NC2_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
- RecordDescriptor outputRec = new RecordDescriptor(
- new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 8, 0 };
int frameLimits = 4;
int tableSize = 8;
- ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(
- spec,
- keyFields,
- frameLimits,
- new IBinaryComparatorFactory[] {
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryComparatorFactory
- .of(UTF8StringPointable.FACTORY) },
- new UTF8StringNormalizedKeyComputerFactory(),
- new MultiFieldsAggregatorFactory(
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(1, false),
- new MinMaxStringFieldAggregatorFactory(15,
- true, true) }),
- new MultiFieldsAggregatorFactory(new int[] { 0, 1 },
- new IFieldAggregateDescriptorFactory[] {
- new IntSumFieldAggregatorFactory(2, false),
- new MinMaxStringFieldAggregatorFactory(3, true,
- true) }),
- outputRec,
- new HashSpillableTableFactory(
- new FieldHashPartitionComputerFactory(
- keyFields,
- new IBinaryHashFunctionFactory[] {
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }),
- tableSize), true);
-
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper,
- NC2_ID, NC1_ID);
-
- IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(
- spec, new FieldHashPartitionComputerFactory(keyFields,
+ ExternalGroupOperatorDescriptor grouper = new ExternalGroupOperatorDescriptor(spec, keyFields, frameLimits,
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
+ new UTF8StringNormalizedKeyComputerFactory(), new MultiFieldsAggregatorFactory(
+ new IFieldAggregateDescriptorFactory[] { new IntSumFieldAggregatorFactory(1, false),
+ new MinMaxStringFieldAggregatorFactory(15, true, true) }),
+ new MultiFieldsAggregatorFactory(new int[] { 0, 1 }, new IFieldAggregateDescriptorFactory[] {
+ new IntSumFieldAggregatorFactory(2, false),
+ new MinMaxStringFieldAggregatorFactory(3, true, true) }), outputRec,
+ new HashSpillableTableFactory(new FieldHashPartitionComputerFactory(keyFields,
new IBinaryHashFunctionFactory[] {
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY),
- PointableBinaryHashFunctionFactory
- .of(UTF8StringPointable.FACTORY) }));
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }), tableSize), true);
+
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, grouper, NC2_ID, NC1_ID);
+
+ IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
+ new FieldHashPartitionComputerFactory(keyFields, new IBinaryHashFunctionFactory[] {
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY),
+ PointableBinaryHashFunctionFactory.of(UTF8StringPointable.FACTORY) }));
spec.connect(conn1, csvScanner, 0, grouper, 0);
- AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec,
- "multiKeyMinMaxStringExtGroupTest");
+ AbstractSingleActivityOperatorDescriptor printer = getPrinter(spec, "multiKeyMinMaxStringExtGroupTest");
- PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,
- NC2_ID, NC1_ID);
+ PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn2 = new OneToOneConnectorDescriptor(spec);
spec.connect(conn2, grouper, 0, printer, 0);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/CountOfCountsTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/CountOfCountsTest.java
index aea6126..5008991 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/CountOfCountsTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/CountOfCountsTest.java
@@ -25,6 +25,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
@@ -44,13 +45,14 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory;
import edu.uci.ics.hyracks.dataflow.std.group.aggregators.CountFieldAggregatorFactory;
import edu.uci.ics.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory;
import edu.uci.ics.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.InMemorySortOperatorDescriptor;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class CountOfCountsTest extends AbstractIntegrationTest {
@Test
@@ -76,12 +78,10 @@
RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] {
UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
- PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(
- spec,
- new int[] { 0 },
+ PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 },
new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
- new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }),
- desc2);
+ new MultiFieldsAggregatorFactory(
+ new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC1_ID);
InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 },
@@ -91,13 +91,15 @@
RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] {
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 },
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) },
+ new MultiFieldsAggregatorFactory(
new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
+ spec.addResultSetId(rsId);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
@@ -148,12 +150,10 @@
RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] {
UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
- PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(
- spec,
- new int[] { 0 },
+ PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 },
new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
- new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }),
- desc2);
+ new MultiFieldsAggregatorFactory(
+ new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 },
@@ -163,13 +163,16 @@
RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] {
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 },
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) },
+ new MultiFieldsAggregatorFactory(
new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC1_ID, NC2_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
+ spec.addResultSetId(rsId);
+
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
@@ -220,12 +223,10 @@
RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] {
UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
- PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(
- spec,
- new int[] { 0 },
+ PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 },
new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
- new MultiFieldsAggregatorFactory(new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }),
- desc2);
+ new MultiFieldsAggregatorFactory(
+ new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc2);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
InMemorySortOperatorDescriptor sorter2 = new InMemorySortOperatorDescriptor(spec, new int[] { 1 },
@@ -235,13 +236,16 @@
RecordDescriptor desc3 = new RecordDescriptor(new ISerializerDeserializer[] {
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group2 = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 1 },
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) }, new MultiFieldsAggregatorFactory(
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) },
+ new MultiFieldsAggregatorFactory(
new IFieldAggregateDescriptorFactory[] { new CountFieldAggregatorFactory(true) }), desc3);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, group2, NC1_ID, NC2_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
+ spec.addResultSetId(rsId);
+
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/LocalityAwareConnectorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/LocalityAwareConnectorTest.java
index 0d5a627..93ed2c7 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/LocalityAwareConnectorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/LocalityAwareConnectorTest.java
@@ -26,8 +26,8 @@
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
-import edu.uci.ics.hyracks.api.job.IOperatorDescriptorRegistry;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory;
@@ -52,12 +52,13 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
import edu.uci.ics.hyracks.dataflow.std.file.ITupleParserFactory;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.group.IFieldAggregateDescriptorFactory;
import edu.uci.ics.hyracks.dataflow.std.group.aggregators.FloatSumFieldAggregatorFactory;
import edu.uci.ics.hyracks.dataflow.std.group.aggregators.IntSumFieldAggregatorFactory;
import edu.uci.ics.hyracks.dataflow.std.group.aggregators.MultiFieldsAggregatorFactory;
import edu.uci.ics.hyracks.dataflow.std.group.hash.HashGroupOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class LocalityAwareConnectorTest extends AbstractMultiNCIntegrationTest {
@@ -200,13 +201,13 @@
runTest(spec);
}
- private AbstractSingleActivityOperatorDescriptor getPrinter(IOperatorDescriptorRegistry spec, String prefix)
+ private AbstractSingleActivityOperatorDescriptor getPrinter(JobSpecification spec, String prefix)
throws IOException {
- AbstractSingleActivityOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec,
- new ConstantFileSplitProvider(new FileSplit[] {
- new FileSplit("asterix-005", createTempFile().getAbsolutePath()),
- new FileSplit("asterix-006", createTempFile().getAbsolutePath()) }), "\t");
+ ResultSetId rsId = new ResultSetId(1);
+ AbstractSingleActivityOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
+ spec.addResultSetId(rsId);
return printer;
}
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/OptimizedSortMergeTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/OptimizedSortMergeTest.java
index 1ee4400..ec9be32 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/OptimizedSortMergeTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/OptimizedSortMergeTest.java
@@ -24,6 +24,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
@@ -40,9 +41,10 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.misc.LimitOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.OptimizedExternalSortOperatorDescriptor;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class OptimizedSortMergeTest extends AbstractIntegrationTest {
@@ -75,9 +77,11 @@
PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
@@ -128,9 +132,11 @@
LimitOperatorDescriptor filter = new LimitOperatorDescriptor(spec, ordersDesc, outputLimit);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, filter, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/ScanPrintTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/ScanPrintTest.java
index 9355110..961f780 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/ScanPrintTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/ScanPrintTest.java
@@ -24,6 +24,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory;
@@ -42,7 +43,8 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class ScanPrintTest extends AbstractIntegrationTest {
@Test
@@ -63,10 +65,11 @@
desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] {
- new FileSplit(NC2_ID, createTempFile().getAbsolutePath()),
- new FileSplit(NC1_ID, createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
+ spec.addResultSetId(rsId);
+
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
IConnectorDescriptor conn = new OneToOneConnectorDescriptor(spec);
@@ -98,9 +101,11 @@
UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
+ spec.addResultSetId(rsId);
+
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
@@ -135,9 +140,11 @@
UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
+ spec.addResultSetId(rsId);
+
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor conn1 = new MToNPartitioningConnectorDescriptor(spec,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/SortMergeTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/SortMergeTest.java
index 2c3fddf..0da93f2 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/SortMergeTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/SortMergeTest.java
@@ -24,6 +24,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
@@ -40,9 +41,10 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.InMemorySortOperatorDescriptor;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class SortMergeTest extends AbstractIntegrationTest {
@Test
@@ -73,9 +75,11 @@
ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
@@ -118,9 +122,11 @@
PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) }, ordersDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), ordScanner, 0, sorter, 0);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/SplitOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/SplitOperatorTest.java
index 2b32142..6040748 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/SplitOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/SplitOperatorTest.java
@@ -26,6 +26,7 @@
import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
import edu.uci.ics.hyracks.dataflow.common.data.parsers.IValueParserFactory;
@@ -35,8 +36,9 @@
import edu.uci.ics.hyracks.dataflow.std.file.DelimitedDataTupleParserFactory;
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
-import edu.uci.ics.hyracks.dataflow.std.file.LineFileWriteOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.misc.SplitOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class SplitOperatorTest extends AbstractIntegrationTest {
@@ -50,6 +52,8 @@
Assert.assertEquals(lineA, lineB);
}
Assert.assertNull(fileB.readLine());
+ fileA.close();
+ fileB.close();
}
@Test
@@ -83,8 +87,11 @@
IOperatorDescriptor outputOp[] = new IOperatorDescriptor[outputFile.length];
for (int i = 0; i < outputArity; i++) {
- outputOp[i] = new LineFileWriteOperatorDescriptor(spec, new FileSplit[] { new FileSplit(NC1_ID,
- outputFile[i].getAbsolutePath()) });
+ ResultSetId rsId = new ResultSetId(i);
+ spec.addResultSetId(rsId);
+
+ outputOp[i] = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, outputOp[i], locations);
}
@@ -96,10 +103,10 @@
for (int i = 0; i < outputArity; i++) {
spec.addRoot(outputOp[i]);
}
- runTest(spec);
-
+ String[] expectedResultsFileNames = new String[outputArity];
for (int i = 0; i < outputArity; i++) {
- compareFiles(inputFileName, outputFile[i].getAbsolutePath());
+ expectedResultsFileNames[i] = inputFileName;
}
+ runTestAndCompareResults(spec, expectedResultsFileNames);
}
}
\ No newline at end of file
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java
index 622942b..b5eb850 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java
@@ -14,9 +14,7 @@
*/
package edu.uci.ics.hyracks.tests.integration;
-import java.io.DataOutput;
import java.io.File;
-import java.io.IOException;
import org.junit.Test;
@@ -25,11 +23,10 @@
import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.INullWriter;
import edu.uci.ics.hyracks.api.dataflow.value.INullWriterFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
@@ -47,12 +44,13 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.join.GraceHashJoinOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.join.HybridHashJoinOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.join.InMemoryHashJoinOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.misc.MaterializingOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.tests.util.NoopNullWriterFactory;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class TPCHCustomerOrderHashJoinTest extends AbstractIntegrationTest {
@@ -127,9 +125,11 @@
custOrderJoinDesc, 128);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec);
@@ -208,9 +208,11 @@
custOrderJoinDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec);
@@ -289,9 +291,11 @@
custOrderJoinDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec);
@@ -371,9 +375,11 @@
custOrderJoinDesc, true, nullWriterFactories, 128);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec);
@@ -457,9 +463,11 @@
custOrderJoinDesc, true, nullWriterFactories);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec);
@@ -543,9 +551,11 @@
custOrderJoinDesc, true, nullWriterFactories);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec);
@@ -622,9 +632,11 @@
custOrderJoinDesc, 128);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID, NC2_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordJoinConn = new MToNPartitioningConnectorDescriptor(spec,
@@ -711,9 +723,11 @@
custOrderJoinDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID, NC2_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordJoinConn = new MToNPartitioningConnectorDescriptor(spec,
@@ -800,9 +814,11 @@
custOrderJoinDesc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID, NC2_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordJoinConn = new MToNPartitioningConnectorDescriptor(spec,
@@ -885,9 +901,11 @@
custOrderJoinDesc, 128);
PartitionConstraintHelper.addPartitionCountConstraint(spec, join, 2);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordJoinConn = new MToNPartitioningConnectorDescriptor(spec,
@@ -976,9 +994,11 @@
custOrderJoinDesc, 128);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID, NC2_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordPartConn = new MToNPartitioningConnectorDescriptor(spec,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java
index 9233e39..99f2d18 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java
@@ -15,7 +15,9 @@
package edu.uci.ics.hyracks.tests.integration;
import java.io.File;
+
import org.junit.Test;
+
import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
import edu.uci.ics.hyracks.api.constraints.PartitionConstraintHelper;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
@@ -28,6 +30,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.ITuplePairComparator;
import edu.uci.ics.hyracks.api.dataflow.value.ITuplePairComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
@@ -42,9 +45,10 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.join.NestedLoopJoinOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.tests.util.NoopNullWriterFactory;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class TPCHCustomerOrderNestedLoopJoinTest extends AbstractIntegrationTest {
private static class JoinComparatorFactory implements ITuplePairComparatorFactory {
@@ -169,9 +173,11 @@
null);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec);
@@ -244,9 +250,11 @@
null);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID, NC2_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec);
@@ -319,9 +327,11 @@
null);
PartitionConstraintHelper.addPartitionCountConstraint(spec, join, 2);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec);
@@ -399,9 +409,11 @@
nullWriterFactories);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID, NC2_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/UnionTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/UnionTest.java
index 37b55b8..5b323e6 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/UnionTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/UnionTest.java
@@ -22,6 +22,7 @@
import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
@@ -33,8 +34,9 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.union.UnionAllOperatorDescriptor;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class UnionTest extends AbstractIntegrationTest {
@Test
@@ -65,10 +67,11 @@
UnionAllOperatorDescriptor unionAll = new UnionAllOperatorDescriptor(spec, 2, desc);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, unionAll, NC2_ID, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] {
- new FileSplit(NC2_ID, createTempFile().getAbsolutePath()),
- new FileSplit(NC1_ID, createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, false,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC2_ID, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), csvScanner01, 0, unionAll, 0);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/invertedindex/BinaryTokenizerOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/invertedindex/BinaryTokenizerOperatorTest.java
index 836e72e..600b54b 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/invertedindex/BinaryTokenizerOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/invertedindex/BinaryTokenizerOperatorTest.java
@@ -8,6 +8,7 @@
import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
@@ -21,13 +22,14 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.invertedindex.dataflow.BinaryTokenizerOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizerFactory;
import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IBinaryTokenizerFactory;
import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.ITokenFactory;
import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.UTF8WordTokenFactory;
import edu.uci.ics.hyracks.tests.integration.AbstractIntegrationTest;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class BinaryTokenizerOperatorTest extends AbstractIntegrationTest {
@@ -58,9 +60,11 @@
tokenizerRecDesc, tokenizerFactory, tokenFields, keyFields);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, binaryTokenizer, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), dblpTitleScanner, 0, binaryTokenizer, 0);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/invertedindex/InvertedIndexOperatorsTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/invertedindex/InvertedIndexOperatorsTest.java
index 2206a26..b5a4df8 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/invertedindex/InvertedIndexOperatorsTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/invertedindex/InvertedIndexOperatorsTest.java
@@ -8,6 +8,7 @@
import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
@@ -21,13 +22,14 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.invertedindex.dataflow.BinaryTokenizerOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizerFactory;
import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IBinaryTokenizerFactory;
import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.ITokenFactory;
import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.UTF8WordTokenFactory;
import edu.uci.ics.hyracks.tests.integration.AbstractIntegrationTest;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class InvertedIndexOperatorsTest extends AbstractIntegrationTest {
@@ -58,9 +60,11 @@
tokenizerRecDesc, tokenizerFactory, tokenFields, projFields);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, binaryTokenizer, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), dblpTitleScanner, 0, binaryTokenizer, 0);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/invertedindex/WordInvertedIndexTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/invertedindex/WordInvertedIndexTest.java
index d8fd48e..d1071a3 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/invertedindex/WordInvertedIndexTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/invertedindex/WordInvertedIndexTest.java
@@ -30,6 +30,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
@@ -48,8 +49,8 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeDataflowHelperFactory;
import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor;
@@ -74,6 +75,7 @@
import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
import edu.uci.ics.hyracks.test.support.TestStorageManagerInterface;
import edu.uci.ics.hyracks.tests.integration.AbstractIntegrationTest;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class WordInvertedIndexTest extends AbstractIntegrationTest {
static {
@@ -87,9 +89,12 @@
private final static SimpleDateFormat simpleDateFormat = new SimpleDateFormat("ddMMyy-hhmmssSS");
private final static String sep = System.getProperty("file.separator");
private final static String dateString = simpleDateFormat.format(new Date());
- private final static String primaryFileName = System.getProperty("java.io.tmpdir") + sep + "primaryBtree" + dateString;
- private final static String btreeFileName = System.getProperty("java.io.tmpdir") + sep + "invIndexBtree" + dateString;
- private final static String invListsFileName = System.getProperty("java.io.tmpdir") + sep + "invIndexLists" + dateString;
+ private final static String primaryFileName = System.getProperty("java.io.tmpdir") + sep + "primaryBtree"
+ + dateString;
+ private final static String btreeFileName = System.getProperty("java.io.tmpdir") + sep + "invIndexBtree"
+ + dateString;
+ private final static String invListsFileName = System.getProperty("java.io.tmpdir") + sep + "invIndexLists"
+ + dateString;
private IFileSplitProvider primaryFileSplitProvider = new ConstantFileSplitProvider(
new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File(primaryFileName))) });
@@ -155,7 +160,7 @@
spec.addRoot(primaryCreateOp);
runTest(spec);
}
-
+
@Test
public void testConjunctiveSearcher() throws Exception {
IInvertedIndexSearchModifierFactory conjunctiveSearchModifierFactory = new ConjunctiveSearchModifierFactory();
@@ -180,8 +185,9 @@
private IOperatorDescriptor createPrimaryBulkLoadOp(JobSpecification spec) {
int[] fieldPermutation = { 0, 1 };
TreeIndexBulkLoadOperatorDescriptor primaryBtreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
- storageManager, indexRegistryProvider, primaryFileSplitProvider, primaryTypeTraits, primaryComparatorFactories, fieldPermutation, 0.7f,
- btreeDataflowHelperFactory, NoOpOperationCallbackProvider.INSTANCE);
+ storageManager, indexRegistryProvider, primaryFileSplitProvider, primaryTypeTraits,
+ primaryComparatorFactories, fieldPermutation, 0.7f, btreeDataflowHelperFactory,
+ NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeBulkLoad, NC1_ID);
return primaryBtreeBulkLoad;
}
@@ -206,8 +212,9 @@
int[] lowKeyFields = null; // - infinity
int[] highKeyFields = null; // + infinity
BTreeSearchOperatorDescriptor primaryBtreeSearchOp = new BTreeSearchOperatorDescriptor(spec, primaryRecDesc,
- storageManager, indexRegistryProvider, primaryFileSplitProvider, primaryTypeTraits, primaryComparatorFactories, lowKeyFields,
- highKeyFields, true, true, btreeDataflowHelperFactory, false, NoOpOperationCallbackProvider.INSTANCE);
+ storageManager, indexRegistryProvider, primaryFileSplitProvider, primaryTypeTraits,
+ primaryComparatorFactories, lowKeyFields, highKeyFields, true, true, btreeDataflowHelperFactory, false,
+ NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryBtreeSearchOp, NC1_ID);
return primaryBtreeSearchOp;
}
@@ -227,9 +234,12 @@
JobSpecification spec = new JobSpecification();
IOperatorDescriptor keyProviderOp = createScanKeyProviderOp(spec);
IOperatorDescriptor primaryScanOp = createPrimaryScanOp(spec);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, printer, 0);
spec.addRoot(printer);
@@ -265,15 +275,14 @@
public void createInvertedIndex() throws Exception {
JobSpecification spec = new JobSpecification();
InvertedIndexCreateOperatorDescriptor invIndexCreateOp = new InvertedIndexCreateOperatorDescriptor(spec,
- storageManager, btreeFileSplitProvider, invListsFileSplitProvider,
- indexRegistryProvider, tokenTypeTraits, tokenComparatorFactories, invListsTypeTraits,
- invListsComparatorFactories, tokenizerFactory, btreeDataflowHelperFactory,
- NoOpOperationCallbackProvider.INSTANCE);
+ storageManager, btreeFileSplitProvider, invListsFileSplitProvider, indexRegistryProvider,
+ tokenTypeTraits, tokenComparatorFactories, invListsTypeTraits, invListsComparatorFactories,
+ tokenizerFactory, btreeDataflowHelperFactory, NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, invIndexCreateOp, NC1_ID);
spec.addRoot(invIndexCreateOp);
runTest(spec);
}
-
+
public void loadInvertedIndex() throws Exception {
JobSpecification spec = new JobSpecification();
IOperatorDescriptor keyProviderOp = createScanKeyProviderOp(spec);
@@ -325,20 +334,23 @@
JobSpecification spec = new JobSpecification();
IOperatorDescriptor queryProviderOp = createQueryProviderOp(spec, queryString);
IOperatorDescriptor invIndexSearchOp = createInvertedIndexSearchOp(spec, searchModifierFactory);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
spec.connect(new OneToOneConnectorDescriptor(spec), queryProviderOp, 0, invIndexSearchOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), invIndexSearchOp, 0, printer, 0);
spec.addRoot(printer);
runTest(spec);
}
-
+
@AfterClass
public static void cleanup() throws Exception {
- File primary = new File(primaryFileName);
- File btree = new File(btreeFileName);
- File invLists = new File(invListsFileName);
+ File primary = new File(primaryFileName);
+ File btree = new File(btreeFileName);
+ File invLists = new File(invListsFileName);
primary.deleteOnExit();
btree.deleteOnExit();
invLists.deleteOnExit();
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/rtree/RTreePrimaryIndexSearchOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/rtree/RTreePrimaryIndexSearchOperatorTest.java
index 6625148..92b6e14 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/rtree/RTreePrimaryIndexSearchOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/rtree/RTreePrimaryIndexSearchOperatorTest.java
@@ -30,6 +30,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
@@ -47,8 +48,8 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.api.IPrimitiveValueProviderFactory;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
@@ -64,6 +65,7 @@
import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
import edu.uci.ics.hyracks.test.support.TestStorageManagerInterface;
import edu.uci.ics.hyracks.tests.integration.AbstractIntegrationTest;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class RTreePrimaryIndexSearchOperatorTest extends AbstractIntegrationTest {
static {
@@ -190,9 +192,11 @@
dataflowHelperFactory, false, NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryRTreeSearchOp, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryRTreeSearchOp, 0);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/rtree/RTreePrimaryIndexStatsOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/rtree/RTreePrimaryIndexStatsOperatorTest.java
index ef2950e..2a00394 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/rtree/RTreePrimaryIndexStatsOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/rtree/RTreePrimaryIndexStatsOperatorTest.java
@@ -29,6 +29,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
@@ -45,6 +46,7 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.api.IPrimitiveValueProviderFactory;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndex;
@@ -64,6 +66,7 @@
import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
import edu.uci.ics.hyracks.test.support.TestStorageManagerInterface;
import edu.uci.ics.hyracks.tests.integration.AbstractIntegrationTest;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class RTreePrimaryIndexStatsOperatorTest extends AbstractIntegrationTest {
static {
@@ -175,9 +178,11 @@
primaryTypeTraits, primaryComparatorFactories, dataflowHelperFactory, NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, primaryStatsOp, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), primaryStatsOp, 0, printer, 0);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/rtree/RTreeSecondaryIndexSearchOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/rtree/RTreeSecondaryIndexSearchOperatorTest.java
index 030afcf..1d86037 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/rtree/RTreeSecondaryIndexSearchOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/rtree/RTreeSecondaryIndexSearchOperatorTest.java
@@ -30,6 +30,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.dataset.ResultSetId;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
@@ -47,8 +48,8 @@
import edu.uci.ics.hyracks.dataflow.std.file.FileScanOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.result.ResultWriterOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeDataflowHelperFactory;
import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor;
@@ -67,6 +68,7 @@
import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
import edu.uci.ics.hyracks.test.support.TestStorageManagerInterface;
import edu.uci.ics.hyracks.tests.integration.AbstractIntegrationTest;
+import edu.uci.ics.hyracks.tests.util.ResultSerializerFactoryProvider;
public class RTreeSecondaryIndexSearchOperatorTest extends AbstractIntegrationTest {
static {
@@ -297,9 +299,11 @@
secondaryTypeTraits, secondaryComparatorFactories, keyFields, dataflowHelperFactory, false, NoOpOperationCallbackProvider.INSTANCE);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, secondaryRTreeSearchOp, NC1_ID);
- IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
- createTempFile().getAbsolutePath()) });
- IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+ ResultSetId rsId = new ResultSetId(1);
+ spec.addResultSetId(rsId);
+
+ IOperatorDescriptor printer = new ResultWriterOperatorDescriptor(spec, rsId, true,
+ ResultSerializerFactoryProvider.INSTANCE.getResultSerializerFactoryProvider());
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, secondaryRTreeSearchOp, 0);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/util/ResultSerializerFactoryProvider.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/util/ResultSerializerFactoryProvider.java
new file mode 100644
index 0000000..19c4475
--- /dev/null
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/util/ResultSerializerFactoryProvider.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.tests.util;
+
+import java.io.DataInputStream;
+import java.io.PrintStream;
+import java.io.Serializable;
+
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.dataflow.value.IResultSerializer;
+import edu.uci.ics.hyracks.api.dataflow.value.IResultSerializerFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
+
+public class ResultSerializerFactoryProvider implements Serializable {
+ private static final long serialVersionUID = 1L;
+
+ public static final ResultSerializerFactoryProvider INSTANCE = new ResultSerializerFactoryProvider();
+
+ private ResultSerializerFactoryProvider() {
+ }
+
+ public IResultSerializerFactory getResultSerializerFactoryProvider() {
+ return new IResultSerializerFactory() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IResultSerializer createResultSerializer(final RecordDescriptor recordDesc,
+ final PrintStream printStream) {
+ return new IResultSerializer() {
+ private static final long serialVersionUID = 1L;
+
+ ByteBufferInputStream bbis = new ByteBufferInputStream();
+ DataInputStream di = new DataInputStream(bbis);
+
+ @Override
+ public void init() throws HyracksDataException {
+
+ }
+
+ @Override
+ public boolean appendTuple(IFrameTupleAccessor tAccess, int tIdx) throws HyracksDataException {
+ int start = tAccess.getTupleStartOffset(tIdx) + tAccess.getFieldSlotsLength();
+
+ bbis.setByteBuffer(tAccess.getBuffer(), start);
+
+ Object[] record = new Object[recordDesc.getFieldCount()];
+ for (int i = 0; i < record.length; ++i) {
+ Object instance = recordDesc.getFields()[i].deserialize(di);
+ if (i == 0) {
+ printStream.print(String.valueOf(instance));
+ } else {
+ printStream.print(", " + String.valueOf(instance));
+ }
+ }
+ printStream.println();
+ return true;
+ }
+ };
+ }
+ };
+ }
+}
diff --git a/hyracks/hyracks-examples/pom.xml b/hyracks/hyracks-examples/pom.xml
index 8ce8108..551e2be 100644
--- a/hyracks/hyracks-examples/pom.xml
+++ b/hyracks/hyracks-examples/pom.xml
@@ -1,8 +1,6 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
- <groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-examples</artifactId>
- <version>0.2.3-SNAPSHOT</version>
<packaging>pom</packaging>
<name>hyracks-examples</name>
diff --git a/hyracks/hyracks-examples/text-example/pom.xml b/hyracks/hyracks-examples/text-example/pom.xml
index 367e0a5..469fd0e 100644
--- a/hyracks/hyracks-examples/text-example/pom.xml
+++ b/hyracks/hyracks-examples/text-example/pom.xml
@@ -2,7 +2,6 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks.examples</groupId>
<artifactId>text-example</artifactId>
- <version>0.2.3-SNAPSHOT</version>
<packaging>pom</packaging>
<name>text-example</name>
diff --git a/hyracks/hyracks-examples/text-example/textclient/pom.xml b/hyracks/hyracks-examples/text-example/textclient/pom.xml
index 901f1fb2..4aace73 100644
--- a/hyracks/hyracks-examples/text-example/textclient/pom.xml
+++ b/hyracks/hyracks-examples/text-example/textclient/pom.xml
@@ -2,7 +2,6 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks.examples.text</groupId>
<artifactId>textclient</artifactId>
- <version>0.2.3-SNAPSHOT</version>
<name>textclient</name>
<parent>
@@ -33,8 +32,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
diff --git a/hyracks/hyracks-examples/text-example/texthelper/pom.xml b/hyracks/hyracks-examples/text-example/texthelper/pom.xml
index 8e32c8c..bcb280c 100644
--- a/hyracks/hyracks-examples/text-example/texthelper/pom.xml
+++ b/hyracks/hyracks-examples/text-example/texthelper/pom.xml
@@ -36,8 +36,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-examples/tpch-example/pom.xml b/hyracks/hyracks-examples/tpch-example/pom.xml
index 93514de..7d676dc 100644
--- a/hyracks/hyracks-examples/tpch-example/pom.xml
+++ b/hyracks/hyracks-examples/tpch-example/pom.xml
@@ -2,7 +2,6 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks.examples</groupId>
<artifactId>tpch-example</artifactId>
- <version>0.2.3-SNAPSHOT</version>
<packaging>pom</packaging>
<name>tpch-example</name>
diff --git a/hyracks/hyracks-examples/tpch-example/tpchclient/pom.xml b/hyracks/hyracks-examples/tpch-example/tpchclient/pom.xml
index 6b3f603..4e0d9f0 100644
--- a/hyracks/hyracks-examples/tpch-example/tpchclient/pom.xml
+++ b/hyracks/hyracks-examples/tpch-example/tpchclient/pom.xml
@@ -29,8 +29,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
diff --git a/hyracks/hyracks-hadoop-compat/pom.xml b/hyracks/hyracks-hadoop-compat/pom.xml
index 3426293..87aaaa7 100644
--- a/hyracks/hyracks-hadoop-compat/pom.xml
+++ b/hyracks/hyracks-hadoop-compat/pom.xml
@@ -18,8 +18,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/pom.xml b/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/pom.xml
index e74d610..9092655 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/pom.xml
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/pom.xml
@@ -18,8 +18,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
@@ -63,6 +64,10 @@
<profile>
<activation>
<activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>1.0.4</value>
+ </property>
</activation>
<id>hadoop-1.0.4</id>
<dependencies>
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java
index a2b16c6..16ce76b 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java
@@ -1,7 +1,8 @@
package edu.uci.ics.hyracks.hdfs;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
@@ -14,12 +15,25 @@
public class ContextFactory {
@SuppressWarnings({ "unchecked", "rawtypes" })
- public TaskAttemptContext createContext(Configuration conf, InputSplit split) throws HyracksDataException {
+ public TaskAttemptContext createContext(Configuration conf, TaskAttemptID tid) throws HyracksDataException {
try {
- return new Mapper().new Context(conf, new TaskAttemptID(), null, null, null, null, split);
+ return new Mapper().new Context(conf, tid, null, null, null, null, null);
} catch (Exception e) {
throw new HyracksDataException(e);
}
}
+ public TaskAttemptContext createContext(Configuration conf, int partition) throws HyracksDataException {
+ try {
+ TaskAttemptID tid = new TaskAttemptID("", 0, true, partition, 0);
+ return new TaskAttemptContext(conf, tid);
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ public JobContext createJobContext(Configuration conf) {
+ return new JobContext(conf, new JobID("0", 0));
+ }
+
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/pom.xml b/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/pom.xml
index 27a1e33..8b7ecf0 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/pom.xml
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/pom.xml
@@ -17,8 +17,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
@@ -40,6 +41,10 @@
<profile>
<activation>
<activeByDefault>true</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>0.23.1</value>
+ </property>
</activation>
<id>hadoop-0.23.1</id>
<dependencies>
@@ -77,6 +82,10 @@
<id>hadoop-0.23.6</id>
<activation>
<activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>0.23.6</value>
+ </property>
</activation>
<dependencies>
<dependency>
@@ -109,6 +118,86 @@
</dependency>
</dependencies>
</profile>
+ <profile>
+ <activation>
+ <activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>cdh-4.2</value>
+ </property>
+ </activation>
+ <id>cdh-4.2</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>2.0.0-cdh4.2.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ <version>2.0.0-cdh4.2.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <version>2.0.0-cdh4.2.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ <version>2.0.0-cdh4.2.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+ </profile>
+ <profile>
+ <activation>
+ <activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>cdh-4.1</value>
+ </property>
+ </activation>
+ <id>cdh-4.1</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>2.0.0-cdh4.1.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ <version>2.0.0-cdh4.1.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <version>2.0.0-cdh4.1.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ <version>2.0.0-cdh4.1.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+ </profile>
</profiles>
<dependencies>
@@ -120,4 +209,11 @@
<scope>compile</scope>
</dependency>
</dependencies>
+
+ <repositories>
+ <repository>
+ <id>cloudera</id>
+ <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
+ </repository>
+ </repositories>
</project>
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java
index 60ae5d3..ddcce64 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java
@@ -1,9 +1,12 @@
package edu.uci.ics.hyracks.hdfs;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.TaskType;
+import org.apache.hadoop.mapreduce.task.JobContextImpl;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
@@ -13,12 +16,25 @@
*/
public class ContextFactory {
- public TaskAttemptContext createContext(Configuration conf, InputSplit split) throws HyracksDataException {
+ public TaskAttemptContext createContext(Configuration conf, TaskAttemptID tid) throws HyracksDataException {
try {
- return new TaskAttemptContextImpl(conf, new TaskAttemptID());
+ return new TaskAttemptContextImpl(conf, tid);
} catch (Exception e) {
throw new HyracksDataException(e);
}
}
+ public TaskAttemptContext createContext(Configuration conf, int partition) throws HyracksDataException {
+ try {
+ TaskAttemptID tid = new TaskAttemptID("", 0, TaskType.REDUCE, partition, 0);
+ return new TaskAttemptContextImpl(conf, tid);
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ public JobContext createJobContext(Configuration conf) {
+ return new JobContextImpl(conf, new JobID("0", 0));
+ }
+
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml b/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml
index fccfec4..a28c698a 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml
@@ -18,8 +18,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
@@ -75,6 +76,10 @@
<profile>
<activation>
<activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>1.0.4</value>
+ </property>
</activation>
<id>hadoop-1.0.4</id>
<dependencies>
@@ -90,6 +95,10 @@
<profile>
<activation>
<activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>0.23.1</value>
+ </property>
</activation>
<id>hadoop-0.23.1</id>
<dependencies>
@@ -105,6 +114,10 @@
<profile>
<activation>
<activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>0.23.6</value>
+ </property>
</activation>
<id>hadoop-0.23.6</id>
<dependencies>
@@ -117,6 +130,44 @@
</dependency>
</dependencies>
</profile>
+ <profile>
+ <activation>
+ <activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>cdh-4.1</value>
+ </property>
+ </activation>
+ <id>cdh-4.1</id>
+ <dependencies>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-0.23.1</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+ </profile>
+ <profile>
+ <activation>
+ <activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>cdh-4.2</value>
+ </property>
+ </activation>
+ <id>cdh-4.2</id>
+ <dependencies>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-0.23.1</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+ </profile>
</profiles>
<dependencies>
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParser.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParser.java
index 5923e1e..5d35ec5 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParser.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParser.java
@@ -29,12 +29,24 @@
public interface IKeyValueParser<K, V> {
/**
+ * Initialize the key value parser.
+ *
+ * @param writer
+ * The hyracks writer for outputting data.
+ * @throws HyracksDataException
+ */
+ public void open(IFrameWriter writer) throws HyracksDataException;
+
+ /**
* Parse a key-value pair returned by HDFS record reader to a tuple.
* when the parsers' internal buffer is full, it can flush the buffer to the writer
*
* @param key
+ * The key returned from Hadoop's InputReader.
* @param value
+ * The value returned from Hadoop's InputReader.
* @param writer
+ * The hyracks writer for outputting data.
* @throws HyracksDataException
*/
public void parse(K key, V value, IFrameWriter writer) throws HyracksDataException;
@@ -44,7 +56,8 @@
* This method is called in the close() of HDFSReadOperatorDescriptor.
*
* @param writer
+ * The hyracks writer for outputting data.
* @throws HyracksDataException
*/
- public void flush(IFrameWriter writer) throws HyracksDataException;
+ public void close(IFrameWriter writer) throws HyracksDataException;
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParserFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParserFactory.java
index 6e943ad..7d6f868 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParserFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParserFactory.java
@@ -18,6 +18,7 @@
import java.io.Serializable;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
/**
* Users need to implement this interface to use the HDFSReadOperatorDescriptor.
@@ -36,6 +37,6 @@
* the IHyracksTaskContext
* @return a key-value parser instance.
*/
- public IKeyValueParser<K, V> createKeyValueParser(IHyracksTaskContext ctx);
+ public IKeyValueParser<K, V> createKeyValueParser(IHyracksTaskContext ctx) throws HyracksDataException;
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/INcCollection.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/INcCollection.java
new file mode 100644
index 0000000..c51c1dd
--- /dev/null
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/INcCollection.java
@@ -0,0 +1,11 @@
+package edu.uci.ics.hyracks.hdfs.api;
+
+import org.apache.hadoop.mapred.InputSplit;
+
+@SuppressWarnings("deprecation")
+public interface INcCollection {
+
+ public String findNearestAvailableSlot(InputSplit split);
+
+ public int numAvailableSlots();
+}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/INcCollectionBuilder.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/INcCollectionBuilder.java
new file mode 100644
index 0000000..ef3ff23
--- /dev/null
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/INcCollectionBuilder.java
@@ -0,0 +1,18 @@
+package edu.uci.ics.hyracks.hdfs.api;
+
+import java.util.List;
+import java.util.Map;
+
+import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+
+/**
+ * NC collections
+ *
+ * @author yingyib
+ */
+public interface INcCollectionBuilder {
+
+ public INcCollection build(Map<String, NodeControllerInfo> ncNameToNcInfos,
+ Map<String, List<String>> ipToNcMapping, Map<String, Integer> ncNameToIndex, String[] NCs, int[] workloads,
+ int slotLimit);
+}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriter.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriter.java
index 25b9523..8e85627 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriter.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriter.java
@@ -26,6 +26,15 @@
public interface ITupleWriter {
/**
+ * Initialize the the tuple writer.
+ *
+ * @param output
+ * The channel for output data.
+ * @throws HyracksDataException
+ */
+ public void open(DataOutput output) throws HyracksDataException;
+
+ /**
* Write the tuple to the DataOutput.
*
* @param output
@@ -36,4 +45,13 @@
*/
public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException;
+ /**
+ * Close the writer.
+ *
+ * @param output
+ * The channel for output data.
+ * @throws HyracksDataException
+ */
+ public void close(DataOutput output) throws HyracksDataException;
+
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriterFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriterFactory.java
index 839de8f..9a025c2 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriterFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriterFactory.java
@@ -17,14 +17,19 @@
import java.io.Serializable;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
/**
* Users need to implement this interface to use the HDFSWriteOperatorDescriptor.
*/
public interface ITupleWriterFactory extends Serializable {
/**
+ * @param ctx
+ * the IHyracksTaskContext
* @return a tuple writer instance
*/
- public ITupleWriter getTupleWriter();
+ public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException;
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSReadOperatorDescriptor.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSReadOperatorDescriptor.java
index e924650..f49688b 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSReadOperatorDescriptor.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSReadOperatorDescriptor.java
@@ -102,6 +102,7 @@
JobConf conf = confFactory.getConf();
IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
writer.open();
+ parser.open(writer);
InputFormat inputFormat = conf.getInputFormat();
for (int i = 0; i < inputSplits.length; i++) {
/**
@@ -131,7 +132,7 @@
}
}
}
- parser.flush(writer);
+ parser.close(writer);
writer.close();
} catch (Exception e) {
throw new HyracksDataException(e);
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSWriteOperatorDescriptor.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSWriteOperatorDescriptor.java
index ff97a29..3ce6b2a 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSWriteOperatorDescriptor.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSWriteOperatorDescriptor.java
@@ -89,10 +89,11 @@
String outputDirPath = FileOutputFormat.getOutputPath(conf).toString();
String fileName = outputDirPath + File.separator + "part-" + partition;
- tupleWriter = tupleWriterFactory.getTupleWriter();
+ tupleWriter = tupleWriterFactory.getTupleWriter(ctx);
try {
FileSystem dfs = FileSystem.get(conf);
dos = dfs.create(new Path(fileName), true);
+ tupleWriter.open(dos);
} catch (Exception e) {
throw new HyracksDataException(e);
}
@@ -116,6 +117,7 @@
@Override
public void close() throws HyracksDataException {
try {
+ tupleWriter.close(dos);
dos.close();
} catch (Exception e) {
throw new HyracksDataException(e);
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/InputSplitsFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/InputSplitsFactory.java
index 9cc9ebc..147e872 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/InputSplitsFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/InputSplitsFactory.java
@@ -23,6 +23,7 @@
import java.io.Serializable;
import java.lang.reflect.Constructor;
+import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
@@ -38,6 +39,8 @@
splitBytes = splitsToBytes(splits);
if (splits.length > 0) {
splitClassName = splits[0].getClass().getName();
+ } else {
+ splitClassName = FileSplit.class.getName();
}
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextKeyValueParserFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextKeyValueParserFactory.java
index c691f5d..9574bb4 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextKeyValueParserFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextKeyValueParserFactory.java
@@ -43,6 +43,11 @@
return new IKeyValueParser<LongWritable, Text>() {
@Override
+ public void open(IFrameWriter writer) {
+
+ }
+
+ @Override
public void parse(LongWritable key, Text value, IFrameWriter writer) throws HyracksDataException {
tb.reset();
tb.addField(value.getBytes(), 0, value.getLength());
@@ -56,7 +61,7 @@
}
@Override
- public void flush(IFrameWriter writer) throws HyracksDataException {
+ public void close(IFrameWriter writer) throws HyracksDataException {
FrameUtils.flushFrame(buffer, writer);
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextTupleWriterFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextTupleWriterFactory.java
index d26721d..0da14e5 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextTupleWriterFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextTupleWriterFactory.java
@@ -17,6 +17,7 @@
import java.io.DataOutput;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
@@ -26,9 +27,14 @@
private static final long serialVersionUID = 1L;
@Override
- public ITupleWriter getTupleWriter() {
+ public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) {
return new ITupleWriter() {
- byte newLine = "\n".getBytes()[0];
+ private byte newLine = "\n".getBytes()[0];
+
+ @Override
+ public void open(DataOutput output) {
+
+ }
@Override
public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
@@ -43,6 +49,11 @@
}
}
+ @Override
+ public void close(DataOutput output) {
+
+ }
+
};
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/IPProximityNcCollectionBuilder.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/IPProximityNcCollectionBuilder.java
new file mode 100644
index 0000000..320b48b
--- /dev/null
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/IPProximityNcCollectionBuilder.java
@@ -0,0 +1,121 @@
+package edu.uci.ics.hyracks.hdfs.scheduler;
+
+import java.net.InetAddress;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapred.InputSplit;
+
+import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+import edu.uci.ics.hyracks.hdfs.api.INcCollection;
+import edu.uci.ics.hyracks.hdfs.api.INcCollectionBuilder;
+
+@SuppressWarnings("deprecation")
+public class IPProximityNcCollectionBuilder implements INcCollectionBuilder {
+
+ @Override
+ public INcCollection build(Map<String, NodeControllerInfo> ncNameToNcInfos,
+ final Map<String, List<String>> ipToNcMapping, final Map<String, Integer> ncNameToIndex, String[] NCs,
+ final int[] workloads, final int slotLimit) {
+ final TreeMap<BytesWritable, IntWritable> availableIpsToSlots = new TreeMap<BytesWritable, IntWritable>();
+ for (int i = 0; i < workloads.length; i++) {
+ if (workloads[i] < slotLimit) {
+ BytesWritable ip = new BytesWritable(ncNameToNcInfos.get(NCs[i]).getNetworkAddress().getIpAddress());
+ IntWritable availableSlot = availableIpsToSlots.get(ip);
+ if (availableSlot == null) {
+ availableSlot = new IntWritable(slotLimit - workloads[i]);
+ availableIpsToSlots.put(ip, availableSlot);
+ } else {
+ availableSlot.set(slotLimit - workloads[i] + availableSlot.get());
+ }
+ }
+ }
+ return new INcCollection() {
+
+ @Override
+ public String findNearestAvailableSlot(InputSplit split) {
+ try {
+ String[] locs = split.getLocations();
+ int minDistance = Integer.MAX_VALUE;
+ BytesWritable currentCandidateIp = null;
+ if (locs == null || locs.length > 0) {
+ for (int j = 0; j < locs.length; j++) {
+ /**
+ * get all the IP addresses from the name
+ */
+ InetAddress[] allIps = InetAddress.getAllByName(locs[j]);
+ for (InetAddress ip : allIps) {
+ BytesWritable splitIp = new BytesWritable(ip.getAddress());
+ /**
+ * if the node controller exists
+ */
+ BytesWritable candidateNcIp = availableIpsToSlots.floorKey(splitIp);
+ if (candidateNcIp == null) {
+ candidateNcIp = availableIpsToSlots.ceilingKey(splitIp);
+ }
+ if (candidateNcIp != null) {
+ if (availableIpsToSlots.get(candidateNcIp).get() > 0) {
+ byte[] candidateIP = candidateNcIp.getBytes();
+ byte[] splitIP = splitIp.getBytes();
+ int candidateInt = candidateIP[0] << 24 | (candidateIP[1] & 0xFF) << 16
+ | (candidateIP[2] & 0xFF) << 8 | (candidateIP[3] & 0xFF);
+ int splitInt = splitIP[0] << 24 | (splitIP[1] & 0xFF) << 16
+ | (splitIP[2] & 0xFF) << 8 | (splitIP[3] & 0xFF);
+ int distance = Math.abs(candidateInt - splitInt);
+ if (minDistance > distance) {
+ minDistance = distance;
+ currentCandidateIp = candidateNcIp;
+ }
+ }
+ }
+ }
+ }
+ } else {
+ for (Entry<BytesWritable, IntWritable> entry : availableIpsToSlots.entrySet()) {
+ if (entry.getValue().get() > 0) {
+ currentCandidateIp = entry.getKey();
+ break;
+ }
+ }
+ }
+
+ if (currentCandidateIp != null) {
+ /**
+ * Update the entry of the selected IP
+ */
+ IntWritable availableSlot = availableIpsToSlots.get(currentCandidateIp);
+ availableSlot.set(availableSlot.get() - 1);
+ if (availableSlot.get() == 0) {
+ availableIpsToSlots.remove(currentCandidateIp);
+ }
+ /**
+ * Update the entry of the selected NC
+ */
+ List<String> dataLocations = ipToNcMapping.get(InetAddress.getByAddress(
+ currentCandidateIp.getBytes()).getHostAddress());
+ for (String nc : dataLocations) {
+ int ncIndex = ncNameToIndex.get(nc);
+ if (workloads[ncIndex] < slotLimit) {
+ return nc;
+ }
+ }
+ }
+ /** not scheduled */
+ return null;
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ @Override
+ public int numAvailableSlots() {
+ return availableIpsToSlots.size();
+ }
+
+ };
+ }
+}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/RackAwareNcCollectionBuilder.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/RackAwareNcCollectionBuilder.java
new file mode 100644
index 0000000..5371c84
--- /dev/null
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/RackAwareNcCollectionBuilder.java
@@ -0,0 +1,167 @@
+package edu.uci.ics.hyracks.hdfs.scheduler;
+
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapred.InputSplit;
+
+import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+import edu.uci.ics.hyracks.api.topology.ClusterTopology;
+import edu.uci.ics.hyracks.hdfs.api.INcCollection;
+import edu.uci.ics.hyracks.hdfs.api.INcCollectionBuilder;
+
+@SuppressWarnings("deprecation")
+public class RackAwareNcCollectionBuilder implements INcCollectionBuilder {
+ private ClusterTopology topology;
+
+ public RackAwareNcCollectionBuilder(ClusterTopology topology) {
+ this.topology = topology;
+ }
+
+ @Override
+ public INcCollection build(Map<String, NodeControllerInfo> ncNameToNcInfos,
+ final Map<String, List<String>> ipToNcMapping, final Map<String, Integer> ncNameToIndex, String[] NCs,
+ final int[] workloads, final int slotLimit) {
+ try {
+ final Map<List<Integer>, String> pathToNCs = new HashMap<List<Integer>, String>();
+ for (int i = 0; i < NCs.length; i++) {
+ List<Integer> path = new ArrayList<Integer>();
+ String ipAddress = InetAddress.getByAddress(
+ ncNameToNcInfos.get(NCs[i]).getNetworkAddress().getIpAddress()).getHostAddress();
+ topology.lookupNetworkTerminal(ipAddress, path);
+ pathToNCs.put(path, NCs[i]);
+ }
+
+ final TreeMap<List<Integer>, IntWritable> availableIpsToSlots = new TreeMap<List<Integer>, IntWritable>(
+ new Comparator<List<Integer>>() {
+
+ @Override
+ public int compare(List<Integer> l1, List<Integer> l2) {
+ int commonLength = Math.min(l1.size(), l2.size());
+ for (int i = 0; i < commonLength; i++) {
+ Integer value1 = l1.get(i);
+ Integer value2 = l2.get(i);
+ int cmp = value1 > value2 ? 1 : (value1 < value2 ? -1 : 0);
+ if (cmp != 0) {
+ return cmp;
+ }
+ }
+ return l1.size() > l2.size() ? 1 : (l1.size() < l2.size() ? -1 : 0);
+ }
+
+ });
+ for (int i = 0; i < workloads.length; i++) {
+ if (workloads[i] < slotLimit) {
+ List<Integer> path = new ArrayList<Integer>();
+ String ipAddress = InetAddress.getByAddress(
+ ncNameToNcInfos.get(NCs[i]).getNetworkAddress().getIpAddress()).getHostAddress();
+ topology.lookupNetworkTerminal(ipAddress, path);
+ IntWritable availableSlot = availableIpsToSlots.get(path);
+ if (availableSlot == null) {
+ availableSlot = new IntWritable(slotLimit - workloads[i]);
+ availableIpsToSlots.put(path, availableSlot);
+ } else {
+ availableSlot.set(slotLimit - workloads[i] + availableSlot.get());
+ }
+ }
+ }
+ return new INcCollection() {
+
+ @Override
+ public String findNearestAvailableSlot(InputSplit split) {
+ try {
+ String[] locs = split.getLocations();
+ int minDistance = Integer.MAX_VALUE;
+ List<Integer> currentCandidatePath = null;
+ if (locs == null || locs.length > 0) {
+ for (int j = 0; j < locs.length; j++) {
+ /**
+ * get all the IP addresses from the name
+ */
+ InetAddress[] allIps = InetAddress.getAllByName(locs[j]);
+ for (InetAddress ip : allIps) {
+ List<Integer> splitPath = new ArrayList<Integer>();
+ boolean inCluster = topology.lookupNetworkTerminal(ip.getHostAddress(), splitPath);
+ if (!inCluster) {
+ continue;
+ }
+ /**
+ * if the node controller exists
+ */
+ List<Integer> candidatePath = availableIpsToSlots.floorKey(splitPath);
+ if (candidatePath == null) {
+ candidatePath = availableIpsToSlots.ceilingKey(splitPath);
+ }
+ if (candidatePath != null) {
+ if (availableIpsToSlots.get(candidatePath).get() > 0) {
+ int distance = distance(splitPath, candidatePath);
+ if (minDistance > distance) {
+ minDistance = distance;
+ currentCandidatePath = candidatePath;
+ }
+ }
+
+ }
+ }
+ }
+ } else {
+ for (Entry<List<Integer>, IntWritable> entry : availableIpsToSlots.entrySet()) {
+ if (entry.getValue().get() > 0) {
+ currentCandidatePath = entry.getKey();
+ break;
+ }
+ }
+ }
+
+ if (currentCandidatePath.size() > 0) {
+ /**
+ * Update the entry of the selected IP
+ */
+ IntWritable availableSlot = availableIpsToSlots.get(currentCandidatePath);
+ availableSlot.set(availableSlot.get() - 1);
+ if (availableSlot.get() == 0) {
+ availableIpsToSlots.remove(currentCandidatePath);
+ }
+ /**
+ * Update the entry of the selected NC
+ */
+ String candidateNc = pathToNCs.get(currentCandidatePath);
+ int ncIndex = ncNameToIndex.get(candidateNc);
+ if (workloads[ncIndex] < slotLimit) {
+ return candidateNc;
+ }
+ }
+ /** not scheduled */
+ return null;
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ @Override
+ public int numAvailableSlots() {
+ return availableIpsToSlots.size();
+ }
+
+ private int distance(List<Integer> splitPath, List<Integer> candidatePath) {
+ int commonLength = Math.min(splitPath.size(), candidatePath.size());
+ int distance = 0;
+ for (int i = 0; i < commonLength; i++) {
+ distance = distance * 10 + Math.abs(splitPath.get(i) - candidatePath.get(i));
+ }
+ return distance;
+ }
+ };
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/Scheduler.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/Scheduler.java
index e7309d4..6d31855 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/Scheduler.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/Scheduler.java
@@ -17,13 +17,18 @@
import java.io.IOException;
import java.net.InetAddress;
+import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.PriorityQueue;
import java.util.Random;
+import java.util.logging.Logger;
+import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.InputSplit;
import edu.uci.ics.hyracks.api.client.HyracksConnection;
@@ -31,13 +36,17 @@
import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.topology.ClusterTopology;
+import edu.uci.ics.hyracks.hdfs.api.INcCollection;
+import edu.uci.ics.hyracks.hdfs.api.INcCollectionBuilder;
/**
- * The scheduler conduct data-local scheduling for data reading on HDFS.
- * This class works for Hadoop old API.
+ * The scheduler conduct data-local scheduling for data reading on HDFS. This
+ * class works for Hadoop old API.
*/
@SuppressWarnings("deprecation")
public class Scheduler {
+ private static final Logger LOGGER = Logger.getLogger(Scheduler.class.getName());
/** a list of NCs */
private String[] NCs;
@@ -48,8 +57,16 @@
/** a map from the NC name to the index */
private Map<String, Integer> ncNameToIndex = new HashMap<String, Integer>();
+ /** a map from NC name to the NodeControllerInfo */
+ private Map<String, NodeControllerInfo> ncNameToNcInfos;
+
/**
- * The constructor of the scheduler
+ * the nc collection builder
+ */
+ private INcCollectionBuilder ncCollectionBuilder;
+
+ /**
+ * The constructor of the scheduler.
*
* @param ncNameToNcInfos
* @throws HyracksException
@@ -57,113 +74,130 @@
public Scheduler(String ipAddress, int port) throws HyracksException {
try {
IHyracksClientConnection hcc = new HyracksConnection(ipAddress, port);
- Map<String, NodeControllerInfo> ncNameToNcInfos = hcc.getNodeControllerInfos();
+ this.ncNameToNcInfos = hcc.getNodeControllerInfos();
+ ClusterTopology topology = hcc.getClusterTopology();
+ this.ncCollectionBuilder = topology == null ? new IPProximityNcCollectionBuilder()
+ : new RackAwareNcCollectionBuilder(topology);
loadIPAddressToNCMap(ncNameToNcInfos);
} catch (Exception e) {
throw new HyracksException(e);
}
}
+ /**
+ * The constructor of the scheduler.
+ *
+ * @param ncNameToNcInfos
+ * @throws HyracksException
+ */
+ public Scheduler(String ipAddress, int port, INcCollectionBuilder ncCollectionBuilder) throws HyracksException {
+ try {
+ IHyracksClientConnection hcc = new HyracksConnection(ipAddress, port);
+ this.ncNameToNcInfos = hcc.getNodeControllerInfos();
+ this.ncCollectionBuilder = ncCollectionBuilder;
+ loadIPAddressToNCMap(ncNameToNcInfos);
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+ }
+
+ /**
+ * The constructor of the scheduler.
+ *
+ * @param ncNameToNcInfos
+ * the mapping from nc names to nc infos
+ * @throws HyracksException
+ */
public Scheduler(Map<String, NodeControllerInfo> ncNameToNcInfos) throws HyracksException {
+ this.ncNameToNcInfos = ncNameToNcInfos;
+ this.ncCollectionBuilder = new IPProximityNcCollectionBuilder();
loadIPAddressToNCMap(ncNameToNcInfos);
}
/**
- * Set location constraints for a file scan operator with a list of file splits
+ * The constructor of the scheduler.
+ *
+ * @param ncNameToNcInfos
+ * the mapping from nc names to nc infos
+ * @param topology
+ * the hyracks cluster toplogy
+ * @throws HyracksException
+ */
+ public Scheduler(Map<String, NodeControllerInfo> ncNameToNcInfos, ClusterTopology topology) throws HyracksException {
+ this(ncNameToNcInfos);
+ this.ncCollectionBuilder = topology == null ? new IPProximityNcCollectionBuilder()
+ : new RackAwareNcCollectionBuilder(topology);
+ }
+
+ /**
+ * The constructor of the scheduler.
+ *
+ * @param ncNameToNcInfos
+ * the mapping from nc names to nc infos
+ * @throws HyracksException
+ */
+ public Scheduler(Map<String, NodeControllerInfo> ncNameToNcInfos, INcCollectionBuilder ncCollectionBuilder)
+ throws HyracksException {
+ this.ncNameToNcInfos = ncNameToNcInfos;
+ this.ncCollectionBuilder = ncCollectionBuilder;
+ loadIPAddressToNCMap(ncNameToNcInfos);
+ }
+
+ /**
+ * Set location constraints for a file scan operator with a list of file
+ * splits. It guarantees the maximum slots a machine can is at most one more
+ * than the minimum slots a machine can get.
*
* @throws HyracksDataException
*/
public String[] getLocationConstraints(InputSplit[] splits) throws HyracksException {
- int[] capacity = new int[NCs.length];
- Arrays.fill(capacity, 0);
+ int[] workloads = new int[NCs.length];
+ Arrays.fill(workloads, 0);
String[] locations = new String[splits.length];
- int slots = splits.length % capacity.length == 0 ? (splits.length / capacity.length) : (splits.length
- / capacity.length + 1);
+ Map<String, IntWritable> locationToNumOfSplits = new HashMap<String, IntWritable>();
+ /**
+ * upper bound number of slots that a machine can get
+ */
+ int upperBoundSlots = splits.length % workloads.length == 0 ? (splits.length / workloads.length)
+ : (splits.length / workloads.length + 1);
+ /**
+ * lower bound number of slots that a machine can get
+ */
+ int lowerBoundSlots = splits.length % workloads.length == 0 ? upperBoundSlots : upperBoundSlots - 1;
try {
Random random = new Random(System.currentTimeMillis());
boolean scheduled[] = new boolean[splits.length];
Arrays.fill(scheduled, false);
-
- for (int i = 0; i < splits.length; i++) {
- /**
- * get the location of all the splits
- */
- String[] loc = splits[i].getLocations();
- if (loc.length > 0) {
- for (int j = 0; j < loc.length; j++) {
- /**
- * get all the IP addresses from the name
- */
- InetAddress[] allIps = InetAddress.getAllByName(loc[j]);
- /**
- * iterate overa all ips
- */
- for (InetAddress ip : allIps) {
- /**
- * if the node controller exists
- */
- if (ipToNcMapping.get(ip.getHostAddress()) != null) {
- /**
- * set the ncs
- */
- List<String> dataLocations = ipToNcMapping.get(ip.getHostAddress());
- int arrayPos = random.nextInt(dataLocations.size());
- String nc = dataLocations.get(arrayPos);
- int pos = ncNameToIndex.get(nc);
- /**
- * check if the node is already full
- */
- if (capacity[pos] < slots) {
- locations[i] = nc;
- capacity[pos]++;
- scheduled[i] = true;
- }
- }
- }
-
- /**
- * break the loop for data-locations if the schedule has already been found
- */
- if (scheduled[i] == true) {
- break;
- }
- }
- }
- }
-
/**
- * find the lowest index the current available NCs
+ * scan the splits and build the popularity map
+ * give the machines with less local splits more scheduling priority
*/
- int currentAvailableNC = 0;
- for (int i = 0; i < capacity.length; i++) {
- if (capacity[i] < slots) {
- currentAvailableNC = i;
- break;
- }
- }
-
+ buildPopularityMap(splits, locationToNumOfSplits);
/**
- * schedule no-local file reads
+ * push data-local lower-bounds slots to each machine
*/
- for (int i = 0; i < splits.length; i++) {
- // if there is no data-local NC choice, choose a random one
- if (!scheduled[i]) {
- locations[i] = NCs[currentAvailableNC];
- capacity[currentAvailableNC]++;
- scheduled[i] = true;
+ scheduleLocalSlots(splits, workloads, locations, lowerBoundSlots, random, scheduled, locationToNumOfSplits);
+ /**
+ * push data-local upper-bounds slots to each machine
+ */
+ scheduleLocalSlots(splits, workloads, locations, upperBoundSlots, random, scheduled, locationToNumOfSplits);
- /**
- * move the available NC cursor to the next one
- */
- for (int j = currentAvailableNC; j < capacity.length; j++) {
- if (capacity[j] < slots) {
- currentAvailableNC = j;
- break;
- }
- }
+ int dataLocalCount = 0;
+ for (int i = 0; i < scheduled.length; i++) {
+ if (scheduled[i] == true) {
+ dataLocalCount++;
}
}
+ LOGGER.info("Data local rate: " + ((float) dataLocalCount / (float) (scheduled.length)));
+ /**
+ * push non-data-local lower-bounds slots to each machine
+ */
+ scheduleNonLocalSlots(splits, workloads, locations, lowerBoundSlots, scheduled);
+ /**
+ * push non-data-local upper-bounds slots to each machine
+ */
+ scheduleNonLocalSlots(splits, workloads, locations, upperBoundSlots, scheduled);
return locations;
} catch (IOException e) {
throw new HyracksException(e);
@@ -171,6 +205,159 @@
}
/**
+ * Schedule non-local slots to each machine
+ *
+ * @param splits
+ * The HDFS file splits.
+ * @param workloads
+ * The current capacity of each machine.
+ * @param locations
+ * The result schedule.
+ * @param slotLimit
+ * The maximum slots of each machine.
+ * @param scheduled
+ * Indicate which slot is scheduled.
+ */
+ private void scheduleNonLocalSlots(InputSplit[] splits, int[] workloads, String[] locations, int slotLimit,
+ boolean[] scheduled) throws IOException, UnknownHostException {
+ /**
+ * build the map from available ips to the number of available slots
+ */
+ INcCollection ncCollection = this.ncCollectionBuilder.build(ncNameToNcInfos, ipToNcMapping, ncNameToIndex, NCs,
+ workloads, slotLimit);
+ if (ncCollection.numAvailableSlots() == 0) {
+ return;
+ }
+ /**
+ * schedule no-local file reads
+ */
+ for (int i = 0; i < splits.length; i++) {
+ /** if there is no data-local NC choice, choose a random one */
+ if (!scheduled[i]) {
+ InputSplit split = splits[i];
+ String selectedNcName = ncCollection.findNearestAvailableSlot(split);
+ if (selectedNcName != null) {
+ int ncIndex = ncNameToIndex.get(selectedNcName);
+ workloads[ncIndex]++;
+ scheduled[i] = true;
+ locations[i] = selectedNcName;
+ }
+ }
+ }
+ }
+
+ /**
+ * Schedule data-local slots to each machine.
+ *
+ * @param splits
+ * The HDFS file splits.
+ * @param workloads
+ * The current capacity of each machine.
+ * @param locations
+ * The result schedule.
+ * @param slots
+ * The maximum slots of each machine.
+ * @param random
+ * The random generator.
+ * @param scheduled
+ * Indicate which slot is scheduled.
+ * @throws IOException
+ * @throws UnknownHostException
+ */
+ private void scheduleLocalSlots(InputSplit[] splits, int[] workloads, String[] locations, int slots, Random random,
+ boolean[] scheduled, final Map<String, IntWritable> locationToNumSplits) throws IOException,
+ UnknownHostException {
+ /** scheduling candidates will be ordered inversely according to their popularity */
+ PriorityQueue<String> scheduleCadndiates = new PriorityQueue<String>(3, new Comparator<String>() {
+
+ @Override
+ public int compare(String s1, String s2) {
+ return locationToNumSplits.get(s1).compareTo(locationToNumSplits.get(s2));
+ }
+
+ });
+ for (int i = 0; i < splits.length; i++) {
+ if (scheduled[i]) {
+ continue;
+ }
+ /**
+ * get the location of all the splits
+ */
+ String[] locs = splits[i].getLocations();
+ if (locs.length > 0) {
+ scheduleCadndiates.clear();
+ for (int j = 0; j < locs.length; j++) {
+ scheduleCadndiates.add(locs[j]);
+ }
+
+ for (String candidate : scheduleCadndiates) {
+ /**
+ * get all the IP addresses from the name
+ */
+ InetAddress[] allIps = InetAddress.getAllByName(candidate);
+ /**
+ * iterate overa all ips
+ */
+ for (InetAddress ip : allIps) {
+ /**
+ * if the node controller exists
+ */
+ if (ipToNcMapping.get(ip.getHostAddress()) != null) {
+ /**
+ * set the ncs
+ */
+ List<String> dataLocations = ipToNcMapping.get(ip.getHostAddress());
+ int arrayPos = random.nextInt(dataLocations.size());
+ String nc = dataLocations.get(arrayPos);
+ int pos = ncNameToIndex.get(nc);
+ /**
+ * check if the node is already full
+ */
+ if (workloads[pos] < slots) {
+ locations[i] = nc;
+ workloads[pos]++;
+ scheduled[i] = true;
+ break;
+ }
+ }
+ }
+ /**
+ * break the loop for data-locations if the schedule has
+ * already been found
+ */
+ if (scheduled[i] == true) {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Scan the splits once and build a popularity map
+ *
+ * @param splits
+ * the split array
+ * @param locationToNumOfSplits
+ * the map to be built
+ * @throws IOException
+ */
+ private void buildPopularityMap(InputSplit[] splits, Map<String, IntWritable> locationToNumOfSplits)
+ throws IOException {
+ for (InputSplit split : splits) {
+ String[] locations = split.getLocations();
+ for (String loc : locations) {
+ IntWritable locCount = locationToNumOfSplits.get(loc);
+ if (locCount == null) {
+ locCount = new IntWritable(0);
+ locationToNumOfSplits.put(loc, locCount);
+ }
+ locCount.set(locCount.get() + 1);
+ }
+ }
+ }
+
+ /**
* Load the IP-address-to-NC map from the NCNameToNCInfoMap
*
* @param ncNameToNcInfos
@@ -179,6 +366,8 @@
private void loadIPAddressToNCMap(Map<String, NodeControllerInfo> ncNameToNcInfos) throws HyracksException {
try {
NCs = new String[ncNameToNcInfos.size()];
+ ipToNcMapping.clear();
+ ncNameToIndex.clear();
int i = 0;
/**
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSReadOperatorDescriptor.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSReadOperatorDescriptor.java
index 90f5603..9e9abdf 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSReadOperatorDescriptor.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSReadOperatorDescriptor.java
@@ -139,8 +139,7 @@
/**
* read the split
*/
- TaskAttemptContext context = ctxFactory.createContext(job.getConfiguration(),
- inputSplits.get(i));
+ TaskAttemptContext context = ctxFactory.createContext(job.getConfiguration(), i);
RecordReader reader = inputFormat.createRecordReader(inputSplits.get(i), context);
reader.initialize(inputSplits.get(i), context);
while (reader.nextKeyValue() == true) {
@@ -148,7 +147,7 @@
}
}
}
- parser.flush(writer);
+ parser.close(writer);
writer.close();
} catch (Exception e) {
throw new HyracksDataException(e);
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSWriteOperatorDescriptor.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSWriteOperatorDescriptor.java
index 390a7b5..c1c227c 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSWriteOperatorDescriptor.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSWriteOperatorDescriptor.java
@@ -39,8 +39,8 @@
import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
/**
- * The HDFS file write operator using the Hadoop new API.
- * To use this operator, a user need to provide an ITupleWriterFactory.
+ * The HDFS file write operator using the Hadoop new API. To use this operator,
+ * a user need to provide an ITupleWriterFactory.
*/
public class HDFSWriteOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor {
@@ -88,10 +88,11 @@
String outputPath = FileOutputFormat.getOutputPath(conf).toString();
String fileName = outputPath + File.separator + "part-" + partition;
- tupleWriter = tupleWriterFactory.getTupleWriter();
+ tupleWriter = tupleWriterFactory.getTupleWriter(ctx);
try {
FileSystem dfs = FileSystem.get(conf.getConfiguration());
dos = dfs.create(new Path(fileName), true);
+ tupleWriter.open(dos);
} catch (Exception e) {
throw new HyracksDataException(e);
}
@@ -115,6 +116,7 @@
@Override
public void close() throws HyracksDataException {
try {
+ tupleWriter.close(dos);
dos.close();
} catch (Exception e) {
throw new HyracksDataException(e);
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/Scheduler.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/Scheduler.java
index 3445d68..cb97ca1 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/Scheduler.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/Scheduler.java
@@ -15,18 +15,11 @@
package edu.uci.ics.hyracks.hdfs2.scheduler;
-import java.net.InetAddress;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Random;
import org.apache.hadoop.mapreduce.InputSplit;
-import edu.uci.ics.hyracks.api.client.HyracksConnection;
-import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
@@ -35,16 +28,10 @@
* The scheduler conduct data-local scheduling for data reading on HDFS.
* This class works for Hadoop new API.
*/
+@SuppressWarnings("deprecation")
public class Scheduler {
- /** a list of NCs */
- private String[] NCs;
-
- /** a map from ip to NCs */
- private Map<String, List<String>> ipToNcMapping = new HashMap<String, List<String>>();
-
- /** a map from the NC name to the index */
- private Map<String, Integer> ncNameToIndex = new HashMap<String, Integer>();
+ private edu.uci.ics.hyracks.hdfs.scheduler.Scheduler scheduler;
/**
* The constructor of the scheduler
@@ -53,17 +40,18 @@
* @throws HyracksException
*/
public Scheduler(String ipAddress, int port) throws HyracksException {
- try {
- IHyracksClientConnection hcc = new HyracksConnection(ipAddress, port);
- Map<String, NodeControllerInfo> ncNameToNcInfos = hcc.getNodeControllerInfos();
- loadIPAddressToNCMap(ncNameToNcInfos);
- } catch (Exception e) {
- throw new HyracksException(e);
- }
+ scheduler = new edu.uci.ics.hyracks.hdfs.scheduler.Scheduler(ipAddress, port);
}
+ /**
+ * The constructor of the scheduler.
+ *
+ * @param ncNameToNcInfos
+ * the mapping from nc names to nc infos
+ * @throws HyracksException
+ */
public Scheduler(Map<String, NodeControllerInfo> ncNameToNcInfos) throws HyracksException {
- loadIPAddressToNCMap(ncNameToNcInfos);
+ scheduler = new edu.uci.ics.hyracks.hdfs.scheduler.Scheduler(ncNameToNcInfos);
}
/**
@@ -72,135 +60,11 @@
* @throws HyracksDataException
*/
public String[] getLocationConstraints(List<InputSplit> splits) throws HyracksException {
- int[] capacity = new int[NCs.length];
- Arrays.fill(capacity, 0);
- String[] locations = new String[splits.size()];
- int slots = splits.size() % capacity.length == 0 ? (splits.size() / capacity.length) : (splits.size()
- / capacity.length + 1);
-
try {
- Random random = new Random(System.currentTimeMillis());
- boolean scheduled[] = new boolean[splits.size()];
- Arrays.fill(scheduled, false);
-
- for (int i = 0; i < splits.size(); i++) {
- /**
- * get the location of all the splits
- */
- String[] loc = splits.get(i).getLocations();
- if (loc.length > 0) {
- for (int j = 0; j < loc.length; j++) {
- /**
- * get all the IP addresses from the name
- */
- InetAddress[] allIps = InetAddress.getAllByName(loc[j]);
- /**
- * iterate overa all ips
- */
- for (InetAddress ip : allIps) {
- /**
- * if the node controller exists
- */
- if (ipToNcMapping.get(ip.getHostAddress()) != null) {
- /**
- * set the ncs
- */
- List<String> dataLocations = ipToNcMapping.get(ip.getHostAddress());
- int arrayPos = random.nextInt(dataLocations.size());
- String nc = dataLocations.get(arrayPos);
- int pos = ncNameToIndex.get(nc);
- /**
- * check if the node is already full
- */
- if (capacity[pos] < slots) {
- locations[i] = nc;
- capacity[pos]++;
- scheduled[i] = true;
- }
- }
- }
-
- /**
- * break the loop for data-locations if the schedule has already been found
- */
- if (scheduled[i] == true) {
- break;
- }
- }
- }
- }
-
- /**
- * find the lowest index the current available NCs
- */
- int currentAvailableNC = 0;
- for (int i = 0; i < capacity.length; i++) {
- if (capacity[i] < slots) {
- currentAvailableNC = i;
- break;
- }
- }
-
- /**
- * schedule no-local file reads
- */
- for (int i = 0; i < splits.size(); i++) {
- // if there is no data-local NC choice, choose a random one
- if (!scheduled[i]) {
- locations[i] = NCs[currentAvailableNC];
- capacity[currentAvailableNC]++;
- scheduled[i] = true;
-
- /**
- * move the available NC cursor to the next one
- */
- for (int j = currentAvailableNC; j < capacity.length; j++) {
- if (capacity[j] < slots) {
- currentAvailableNC = j;
- break;
- }
- }
- }
- }
- return locations;
- } catch (Exception e) {
- throw new HyracksException(e);
- }
- }
-
- /**
- * Load the IP-address-to-NC map from the NCNameToNCInfoMap
- *
- * @param ncNameToNcInfos
- * @throws HyracksException
- */
- private void loadIPAddressToNCMap(Map<String, NodeControllerInfo> ncNameToNcInfos) throws HyracksException {
- try {
- NCs = new String[ncNameToNcInfos.size()];
- int i = 0;
-
- /**
- * build the IP address to NC map
- */
- for (Map.Entry<String, NodeControllerInfo> entry : ncNameToNcInfos.entrySet()) {
- String ipAddr = InetAddress.getByAddress(entry.getValue().getNetworkAddress().getIpAddress())
- .getHostAddress();
- List<String> matchedNCs = ipToNcMapping.get(ipAddr);
- if (matchedNCs == null) {
- matchedNCs = new ArrayList<String>();
- ipToNcMapping.put(ipAddr, matchedNCs);
- }
- matchedNCs.add(entry.getKey());
- NCs[i] = entry.getKey();
- i++;
- }
-
- /**
- * set up the NC name to index mapping
- */
- for (i = 0; i < NCs.length; i++) {
- ncNameToIndex.put(NCs[i], i);
- }
+ org.apache.hadoop.mapred.InputSplit[] inputSplits = new org.apache.hadoop.mapred.InputSplit[splits.size()];
+ for (int i = 0; i < inputSplits.length; i++)
+ inputSplits[i] = new WrappedFileSplit(splits.get(i).getLocations(), splits.get(i).getLength());
+ return scheduler.getLocationConstraints(inputSplits);
} catch (Exception e) {
throw new HyracksException(e);
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/WrappedFileSplit.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/WrappedFileSplit.java
new file mode 100644
index 0000000..1deb469
--- /dev/null
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/WrappedFileSplit.java
@@ -0,0 +1,51 @@
+package edu.uci.ics.hyracks.hdfs2.scheduler;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.mapred.InputSplit;
+
+/**
+ * The wrapped implementation of InputSplit, for the new API scheduler
+ * to reuse the old API scheduler
+ */
+@SuppressWarnings("deprecation")
+public class WrappedFileSplit implements InputSplit {
+
+ private String[] locations;
+ private long length;
+
+ public WrappedFileSplit(String[] locations, long length) {
+ this.locations = locations;
+ this.length = length;
+ }
+
+ @Override
+ public void readFields(DataInput input) throws IOException {
+ int len = input.readInt();
+ locations = new String[len];
+ for (int i = 0; i < len; i++)
+ locations[i] = input.readUTF();
+ length = input.readLong();
+ }
+
+ @Override
+ public void write(DataOutput output) throws IOException {
+ output.write(locations.length);
+ for (int i = 0; i < locations.length; i++)
+ output.writeUTF(locations[i]);
+ output.writeLong(length);
+ }
+
+ @Override
+ public long getLength() throws IOException {
+ return length;
+ }
+
+ @Override
+ public String[] getLocations() throws IOException {
+ return locations;
+ }
+
+}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/scheduler/SchedulerTest.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/scheduler/SchedulerTest.java
index 4b8a278..90967a0 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/scheduler/SchedulerTest.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/scheduler/SchedulerTest.java
@@ -15,6 +15,8 @@
package edu.uci.ics.hyracks.hdfs.scheduler;
+import java.io.FileReader;
+import java.io.IOException;
import java.net.InetAddress;
import java.util.HashMap;
import java.util.Map;
@@ -25,13 +27,28 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
import edu.uci.ics.hyracks.api.client.NodeStatus;
import edu.uci.ics.hyracks.api.comm.NetworkAddress;
+import edu.uci.ics.hyracks.api.topology.ClusterTopology;
+import edu.uci.ics.hyracks.api.topology.TopologyDefinitionParser;
@SuppressWarnings("deprecation")
public class SchedulerTest extends TestCase {
+ private static String TOPOLOGY_PATH = "src/test/resources/topology.xml";
+
+ private ClusterTopology parseTopology() throws IOException, SAXException {
+ FileReader fr = new FileReader(TOPOLOGY_PATH);
+ InputSource in = new InputSource(fr);
+ try {
+ return TopologyDefinitionParser.parse(in);
+ } finally {
+ fr.close();
+ }
+ }
/**
* Test the scheduler for the case when the Hyracks cluster is the HDFS cluster
@@ -41,17 +58,23 @@
public void testSchedulerSimple() throws Exception {
Map<String, NodeControllerInfo> ncNameToNcInfos = new HashMap<String, NodeControllerInfo>();
ncNameToNcInfos.put("nc1", new NodeControllerInfo("nc1", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.1").getAddress(), 5099)));
+ .getByName("10.0.0.1").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.1")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc2", new NodeControllerInfo("nc2", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.2").getAddress(), 5099)));
+ .getByName("10.0.0.2").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.2")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc3", new NodeControllerInfo("nc3", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.3").getAddress(), 5099)));
+ .getByName("10.0.0.3").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.3")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc4", new NodeControllerInfo("nc4", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.4").getAddress(), 5099)));
+ .getByName("10.0.0.4").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.4")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc5", new NodeControllerInfo("nc5", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.5").getAddress(), 5099)));
+ .getByName("10.0.0.5").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.5")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc6", new NodeControllerInfo("nc6", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.6").getAddress(), 5099)));
+ .getByName("10.0.0.6").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.6")
+ .getAddress(), 5098)));
InputSplit[] fileSplits = new InputSplit[6];
fileSplits[0] = new FileSplit(new Path("part-1"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" });
@@ -61,11 +84,17 @@
fileSplits[4] = new FileSplit(new Path("part-5"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" });
fileSplits[5] = new FileSplit(new Path("part-6"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" });
+ String[] expectedResults = new String[] { "nc1", "nc4", "nc6", "nc2", "nc3", "nc5" };
+
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
+ for (int i = 0; i < locationConstraints.length; i++) {
+ Assert.assertEquals(locationConstraints[i], expectedResults[i]);
+ }
- String[] expectedResults = new String[] { "nc1", "nc3", "nc4", "nc2", "nc5", "nc6" };
-
+ ClusterTopology topology = parseTopology();
+ scheduler = new Scheduler(ncNameToNcInfos, topology);
+ locationConstraints = scheduler.getLocationConstraints(fileSplits);
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
}
@@ -79,17 +108,23 @@
public void testSchedulerLargerHDFS() throws Exception {
Map<String, NodeControllerInfo> ncNameToNcInfos = new HashMap<String, NodeControllerInfo>();
ncNameToNcInfos.put("nc1", new NodeControllerInfo("nc1", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.1").getAddress(), 5099)));
+ .getByName("10.0.0.1").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.1")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc2", new NodeControllerInfo("nc2", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.2").getAddress(), 5099)));
+ .getByName("10.0.0.2").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.2")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc3", new NodeControllerInfo("nc3", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.3").getAddress(), 5099)));
+ .getByName("10.0.0.3").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.3")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc4", new NodeControllerInfo("nc4", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.4").getAddress(), 5099)));
- ncNameToNcInfos.put("nc5", new NodeControllerInfo("nc5", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.5").getAddress(), 5099)));
- ncNameToNcInfos.put("nc6", new NodeControllerInfo("nc6", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.6").getAddress(), 5099)));
+ .getByName("10.0.0.4").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.4")
+ .getAddress(), 5098)));
+ ncNameToNcInfos.put("nc7", new NodeControllerInfo("nc7", NodeStatus.ALIVE, new NetworkAddress(InetAddress
+ .getByName("10.0.0.7").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.5")
+ .getAddress(), 5098)));
+ ncNameToNcInfos.put("nc12", new NodeControllerInfo("nc12", NodeStatus.ALIVE, new NetworkAddress(InetAddress
+ .getByName("10.0.0.12").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.5")
+ .getAddress(), 5098)));
InputSplit[] fileSplits = new InputSplit[12];
fileSplits[0] = new FileSplit(new Path("part-1"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" });
@@ -100,17 +135,25 @@
fileSplits[5] = new FileSplit(new Path("part-6"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" });
fileSplits[6] = new FileSplit(new Path("part-7"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" });
fileSplits[7] = new FileSplit(new Path("part-8"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" });
- fileSplits[8] = new FileSplit(new Path("part-9"), 0, 0, new String[] { "10.0.0.4", "10.0.0.5", "10.0.0.6" });
+ fileSplits[8] = new FileSplit(new Path("part-12"), 0, 0, new String[] { "10.0.0.14", "10.0.0.11", "10.0.0.13" });
fileSplits[9] = new FileSplit(new Path("part-10"), 0, 0, new String[] { "10.0.0.2", "10.0.0.1", "10.0.0.6" });
fileSplits[10] = new FileSplit(new Path("part-11"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.7" });
- fileSplits[11] = new FileSplit(new Path("part-12"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" });
+ fileSplits[11] = new FileSplit(new Path("part-9"), 0, 0, new String[] { "10.0.0.4", "10.0.0.5", "10.0.0.6" });
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
- String[] expectedResults = new String[] { "nc1", "nc3", "nc4", "nc2", "nc3", "nc2", "nc1", "nc4", "nc5", "nc6",
- "nc6", "nc5" };
+ String[] expectedResults = new String[] { "nc1", "nc4", "nc4", "nc1", "nc3", "nc2", "nc2", "nc3", "nc12",
+ "nc7", "nc7", "nc12" };
+ for (int i = 0; i < locationConstraints.length; i++) {
+ Assert.assertEquals(locationConstraints[i], expectedResults[i]);
+ }
+ expectedResults = new String[] { "nc1", "nc4", "nc4", "nc1", "nc3", "nc2", "nc2", "nc3", "nc7", "nc12", "nc7",
+ "nc12" };
+ ClusterTopology topology = parseTopology();
+ scheduler = new Scheduler(ncNameToNcInfos, topology);
+ locationConstraints = scheduler.getLocationConstraints(fileSplits);
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
}
@@ -124,17 +167,23 @@
public void testSchedulerSmallerHDFS() throws Exception {
Map<String, NodeControllerInfo> ncNameToNcInfos = new HashMap<String, NodeControllerInfo>();
ncNameToNcInfos.put("nc1", new NodeControllerInfo("nc1", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.1").getAddress(), 5099)));
+ .getByName("10.0.0.1").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.1")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc2", new NodeControllerInfo("nc2", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.2").getAddress(), 5099)));
+ .getByName("10.0.0.2").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.2")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc3", new NodeControllerInfo("nc3", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.3").getAddress(), 5099)));
+ .getByName("10.0.0.3").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.3")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc4", new NodeControllerInfo("nc4", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.4").getAddress(), 5099)));
+ .getByName("10.0.0.4").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.4")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc5", new NodeControllerInfo("nc5", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.5").getAddress(), 5099)));
+ .getByName("10.0.0.5").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.5")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc6", new NodeControllerInfo("nc6", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.6").getAddress(), 5099)));
+ .getByName("10.0.0.6").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.6")
+ .getAddress(), 5098)));
InputSplit[] fileSplits = new InputSplit[12];
fileSplits[0] = new FileSplit(new Path("part-1"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" });
@@ -150,12 +199,19 @@
fileSplits[10] = new FileSplit(new Path("part-11"), 0, 0, new String[] { "10.0.0.3", "10.0.0.4", "10.0.0.5" });
fileSplits[11] = new FileSplit(new Path("part-12"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" });
+ String[] expectedResults = new String[] { "nc1", "nc4", "nc4", "nc1", "nc3", "nc2", "nc2", "nc3", "nc5", "nc6",
+ "nc5", "nc6" };
+
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
- String[] expectedResults = new String[] { "nc1", "nc3", "nc4", "nc2", "nc3", "nc2", "nc1", "nc4", "nc5", "nc6",
- "nc5", "nc6" };
+ for (int i = 0; i < locationConstraints.length; i++) {
+ Assert.assertEquals(locationConstraints[i], expectedResults[i]);
+ }
+ ClusterTopology topology = parseTopology();
+ scheduler = new Scheduler(ncNameToNcInfos, topology);
+ locationConstraints = scheduler.getLocationConstraints(fileSplits);
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
}
@@ -169,17 +225,23 @@
public void testSchedulerSmallerHDFSOdd() throws Exception {
Map<String, NodeControllerInfo> ncNameToNcInfos = new HashMap<String, NodeControllerInfo>();
ncNameToNcInfos.put("nc1", new NodeControllerInfo("nc1", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.1").getAddress(), 5099)));
+ .getByName("10.0.0.1").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.1")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc2", new NodeControllerInfo("nc2", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.2").getAddress(), 5099)));
+ .getByName("10.0.0.2").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.2")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc3", new NodeControllerInfo("nc3", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.3").getAddress(), 5099)));
+ .getByName("10.0.0.3").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.3")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc4", new NodeControllerInfo("nc4", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.4").getAddress(), 5099)));
+ .getByName("10.0.0.4").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.4")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc5", new NodeControllerInfo("nc5", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.5").getAddress(), 5099)));
+ .getByName("10.0.0.5").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.5")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc6", new NodeControllerInfo("nc6", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.6").getAddress(), 5099)));
+ .getByName("10.0.0.6").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.6")
+ .getAddress(), 5098)));
InputSplit[] fileSplits = new InputSplit[13];
fileSplits[0] = new FileSplit(new Path("part-1"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" });
@@ -196,15 +258,23 @@
fileSplits[11] = new FileSplit(new Path("part-12"), 0, 0, new String[] { "10.0.0.2", "10.0.0.3", "10.0.0.5" });
fileSplits[12] = new FileSplit(new Path("part-13"), 0, 0, new String[] { "10.0.0.2", "10.0.0.4", "10.0.0.5" });
+ String[] expectedResults = new String[] { "nc1", "nc4", "nc4", "nc1", "nc3", "nc2", "nc2", "nc3", "nc5", "nc1",
+ "nc5", "nc2", "nc4" };
+
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
- String[] expectedResults = new String[] { "nc1", "nc3", "nc4", "nc2", "nc3", "nc2", "nc1", "nc3", "nc4", "nc2",
- "nc4", "nc5", "nc5" };
-
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
}
+
+ ClusterTopology topology = parseTopology();
+ scheduler = new Scheduler(ncNameToNcInfos, topology);
+ locationConstraints = scheduler.getLocationConstraints(fileSplits);
+ for (int i = 0; i < locationConstraints.length; i++) {
+ Assert.assertEquals(locationConstraints[i], expectedResults[i]);
+ }
+
}
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/utils/HyracksUtils.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/utils/HyracksUtils.java
index 30dbb80..bdff2fd 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/utils/HyracksUtils.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/utils/HyracksUtils.java
@@ -64,6 +64,7 @@
ncConfig1.clusterNetIPAddress = "localhost";
ncConfig1.ccPort = TEST_HYRACKS_CC_PORT;
ncConfig1.dataIPAddress = "127.0.0.1";
+ ncConfig1.datasetIPAddress = "127.0.0.1";
ncConfig1.nodeId = NC1_ID;
nc1 = new NodeControllerService(ncConfig1);
nc1.start();
@@ -73,6 +74,7 @@
ncConfig2.clusterNetIPAddress = "localhost";
ncConfig2.ccPort = TEST_HYRACKS_CC_PORT;
ncConfig2.dataIPAddress = "127.0.0.1";
+ ncConfig2.datasetIPAddress = "127.0.0.1";
ncConfig2.nodeId = NC2_ID;
nc2 = new NodeControllerService(ncConfig2);
nc2.start();
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs2/scheduler/SchedulerTest.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs2/scheduler/SchedulerTest.java
index ea2af13..442aeae0 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs2/scheduler/SchedulerTest.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs2/scheduler/SchedulerTest.java
@@ -34,7 +34,6 @@
/**
* Test case for the new HDFS API scheduler
- *
*/
public class SchedulerTest extends TestCase {
@@ -46,17 +45,23 @@
public void testSchedulerSimple() throws Exception {
Map<String, NodeControllerInfo> ncNameToNcInfos = new HashMap<String, NodeControllerInfo>();
ncNameToNcInfos.put("nc1", new NodeControllerInfo("nc1", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.1").getAddress(), 5099)));
+ .getByName("10.0.0.1").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.1")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc2", new NodeControllerInfo("nc2", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.2").getAddress(), 5099)));
+ .getByName("10.0.0.2").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.2")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc3", new NodeControllerInfo("nc3", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.3").getAddress(), 5099)));
+ .getByName("10.0.0.3").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.3")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc4", new NodeControllerInfo("nc4", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.4").getAddress(), 5099)));
+ .getByName("10.0.0.4").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.4")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc5", new NodeControllerInfo("nc5", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.5").getAddress(), 5099)));
+ .getByName("10.0.0.5").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.5")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc6", new NodeControllerInfo("nc6", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.6").getAddress(), 5099)));
+ .getByName("10.0.0.6").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.6")
+ .getAddress(), 5098)));
List<InputSplit> fileSplits = new ArrayList<InputSplit>();
fileSplits.add(new FileSplit(new Path("part-1"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" }));
@@ -69,7 +74,7 @@
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
- String[] expectedResults = new String[] { "nc1", "nc3", "nc4", "nc2", "nc5", "nc6" };
+ String[] expectedResults = new String[] { "nc1", "nc4", "nc6", "nc2", "nc3", "nc5" };
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
@@ -84,17 +89,23 @@
public void testSchedulerLargerHDFS() throws Exception {
Map<String, NodeControllerInfo> ncNameToNcInfos = new HashMap<String, NodeControllerInfo>();
ncNameToNcInfos.put("nc1", new NodeControllerInfo("nc1", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.1").getAddress(), 5099)));
+ .getByName("10.0.0.1").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.1")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc2", new NodeControllerInfo("nc2", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.2").getAddress(), 5099)));
+ .getByName("10.0.0.2").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.2")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc3", new NodeControllerInfo("nc3", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.3").getAddress(), 5099)));
+ .getByName("10.0.0.3").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.3")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc4", new NodeControllerInfo("nc4", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.4").getAddress(), 5099)));
+ .getByName("10.0.0.4").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.4")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc5", new NodeControllerInfo("nc5", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.5").getAddress(), 5099)));
+ .getByName("10.0.0.5").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.5")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc6", new NodeControllerInfo("nc6", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.6").getAddress(), 5099)));
+ .getByName("10.0.0.6").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.6")
+ .getAddress(), 5098)));
List<InputSplit> fileSplits = new ArrayList<InputSplit>();
fileSplits.add(new FileSplit(new Path("part-1"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" }));
@@ -113,8 +124,8 @@
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
- String[] expectedResults = new String[] { "nc1", "nc3", "nc4", "nc2", "nc3", "nc2", "nc1", "nc4", "nc5", "nc6",
- "nc6", "nc5" };
+ String[] expectedResults = new String[] { "nc1", "nc4", "nc6", "nc1", "nc4", "nc2", "nc2", "nc3", "nc6", "nc5",
+ "nc3", "nc5" };
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
@@ -129,17 +140,23 @@
public void testSchedulerSmallerHDFS() throws Exception {
Map<String, NodeControllerInfo> ncNameToNcInfos = new HashMap<String, NodeControllerInfo>();
ncNameToNcInfos.put("nc1", new NodeControllerInfo("nc1", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.1").getAddress(), 5099)));
+ .getByName("10.0.0.1").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.1")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc2", new NodeControllerInfo("nc2", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.2").getAddress(), 5099)));
+ .getByName("10.0.0.2").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.2")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc3", new NodeControllerInfo("nc3", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.3").getAddress(), 5099)));
+ .getByName("10.0.0.3").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.3")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc4", new NodeControllerInfo("nc4", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.4").getAddress(), 5099)));
+ .getByName("10.0.0.4").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.4")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc5", new NodeControllerInfo("nc5", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.5").getAddress(), 5099)));
+ .getByName("10.0.0.5").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.5")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc6", new NodeControllerInfo("nc6", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.6").getAddress(), 5099)));
+ .getByName("10.0.0.6").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.6")
+ .getAddress(), 5098)));
List<InputSplit> fileSplits = new ArrayList<InputSplit>();
fileSplits.add(new FileSplit(new Path("part-1"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" }));
@@ -158,7 +175,7 @@
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
- String[] expectedResults = new String[] { "nc1", "nc3", "nc4", "nc2", "nc3", "nc2", "nc1", "nc4", "nc5", "nc6",
+ String[] expectedResults = new String[] { "nc1", "nc4", "nc4", "nc1", "nc3", "nc2", "nc2", "nc3", "nc5", "nc6",
"nc5", "nc6" };
for (int i = 0; i < locationConstraints.length; i++) {
@@ -174,17 +191,23 @@
public void testSchedulerSmallerHDFSOdd() throws Exception {
Map<String, NodeControllerInfo> ncNameToNcInfos = new HashMap<String, NodeControllerInfo>();
ncNameToNcInfos.put("nc1", new NodeControllerInfo("nc1", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.1").getAddress(), 5099)));
+ .getByName("10.0.0.1").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.1")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc2", new NodeControllerInfo("nc2", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.2").getAddress(), 5099)));
+ .getByName("10.0.0.2").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.2")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc3", new NodeControllerInfo("nc3", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.3").getAddress(), 5099)));
+ .getByName("10.0.0.3").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.3")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc4", new NodeControllerInfo("nc4", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.4").getAddress(), 5099)));
+ .getByName("10.0.0.4").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.4")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc5", new NodeControllerInfo("nc5", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.5").getAddress(), 5099)));
+ .getByName("10.0.0.5").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.5")
+ .getAddress(), 5098)));
ncNameToNcInfos.put("nc6", new NodeControllerInfo("nc6", NodeStatus.ALIVE, new NetworkAddress(InetAddress
- .getByName("10.0.0.6").getAddress(), 5099)));
+ .getByName("10.0.0.6").getAddress(), 5099), new NetworkAddress(InetAddress.getByName("10.0.0.6")
+ .getAddress(), 5098)));
List<InputSplit> fileSplits = new ArrayList<InputSplit>();
fileSplits.add(new FileSplit(new Path("part-1"), 0, 0, new String[] { "10.0.0.1", "10.0.0.2", "10.0.0.3" }));
@@ -204,8 +227,8 @@
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
- String[] expectedResults = new String[] { "nc1", "nc3", "nc4", "nc2", "nc3", "nc2", "nc1", "nc3", "nc4", "nc2",
- "nc4", "nc5", "nc5" };
+ String[] expectedResults = new String[] { "nc1", "nc4", "nc4", "nc1", "nc3", "nc2", "nc2", "nc3", "nc5", "nc1",
+ "nc5", "nc2", "nc4" };
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/resources/topology.xml b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/resources/topology.xml
new file mode 100644
index 0000000..3a0ac7e
--- /dev/null
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/resources/topology.xml
@@ -0,0 +1,32 @@
+<cluster-topology>
+ <network-switch name="all">
+ <network-switch name="rack1">
+ <terminal name="10.0.0.1" />
+ <terminal name="10.0.0.5" />
+ <terminal name="10.0.0.9" />
+ <terminal name="10.0.0.13" />
+ <terminal name="10.0.0.17" />
+ </network-switch>
+ <network-switch name="rack2">
+ <terminal name="10.0.0.2" />
+ <terminal name="10.0.0.6" />
+ <terminal name="10.0.0.10" />
+ <terminal name="10.0.0.14" />
+ <terminal name="10.0.0.18" />
+ </network-switch>
+ <network-switch name="rack3">
+ <terminal name="10.0.0.3" />
+ <terminal name="10.0.0.7" />
+ <terminal name="10.0.0.11" />
+ <terminal name="10.0.0.15" />
+ <terminal name="10.0.0.19" />
+ </network-switch>
+ <network-switch name="rack4">
+ <terminal name="10.0.0.4" />
+ <terminal name="10.0.0.8" />
+ <terminal name="10.0.0.12" />
+ <terminal name="10.0.0.16" />
+ <terminal name="10.0.0.20" />
+ </network-switch>
+ </network-switch>
+</cluster-topology>
\ No newline at end of file
diff --git a/hyracks/hyracks-ipc/pom.xml b/hyracks/hyracks-ipc/pom.xml
index cb0de08..6f5e09f 100644
--- a/hyracks/hyracks-ipc/pom.xml
+++ b/hyracks/hyracks-ipc/pom.xml
@@ -15,8 +15,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-maven-plugins/hyracks-virtualcluster-maven-plugin/pom.xml b/hyracks/hyracks-maven-plugins/hyracks-virtualcluster-maven-plugin/pom.xml
index 50c05da..a8fc29e 100644
--- a/hyracks/hyracks-maven-plugins/hyracks-virtualcluster-maven-plugin/pom.xml
+++ b/hyracks/hyracks-maven-plugins/hyracks-virtualcluster-maven-plugin/pom.xml
@@ -17,8 +17,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-maven-plugins/hyracks-virtualcluster-maven-plugin/src/main/java/edu/uci/ics/hyracks/maven/plugin/HyracksNCStartMojo.java b/hyracks/hyracks-maven-plugins/hyracks-virtualcluster-maven-plugin/src/main/java/edu/uci/ics/hyracks/maven/plugin/HyracksNCStartMojo.java
index 47de024..fc06a68 100644
--- a/hyracks/hyracks-maven-plugins/hyracks-virtualcluster-maven-plugin/src/main/java/edu/uci/ics/hyracks/maven/plugin/HyracksNCStartMojo.java
+++ b/hyracks/hyracks-maven-plugins/hyracks-virtualcluster-maven-plugin/src/main/java/edu/uci/ics/hyracks/maven/plugin/HyracksNCStartMojo.java
@@ -55,6 +55,7 @@
cmdLineBuffer.append(" -data-ip-address ").append(dataIpAddress);
cmdLineBuffer.append(" -node-id ").append(nodeId);
cmdLineBuffer.append(" -cluster-net-ip-address 127.0.0.1");
+ cmdLineBuffer.append(" -result-ip-address 127.0.0.1");
if (ccPort != 0) {
cmdLineBuffer.append(" -cc-port ").append(ccPort);
}
diff --git a/hyracks/hyracks-net/pom.xml b/hyracks/hyracks-net/pom.xml
index a079306..fb486df 100644
--- a/hyracks/hyracks-net/pom.xml
+++ b/hyracks/hyracks-net/pom.xml
@@ -15,8 +15,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-net/src/main/java/edu/uci/ics/hyracks/net/protocols/muxdemux/MuxDemux.java b/hyracks/hyracks-net/src/main/java/edu/uci/ics/hyracks/net/protocols/muxdemux/MuxDemux.java
index c719bc4..e4df6b9 100644
--- a/hyracks/hyracks-net/src/main/java/edu/uci/ics/hyracks/net/protocols/muxdemux/MuxDemux.java
+++ b/hyracks/hyracks-net/src/main/java/edu/uci/ics/hyracks/net/protocols/muxdemux/MuxDemux.java
@@ -48,9 +48,9 @@
* Constructor.
*
* @param localAddress
- * - TCP/IP socket address to listen on
+ * - TCP/IP socket address to listen on. Null for non-listening unidirectional sockets
* @param listener
- * - Callback interface to report channel events
+ * - Callback interface to report channel events. Null for non-listening unidirectional sockets
* @param nThreads
* - Number of threads to use for data transfer
* @param maxConnectionAttempts
diff --git a/hyracks/hyracks-net/src/main/java/edu/uci/ics/hyracks/net/protocols/tcp/TCPEndpoint.java b/hyracks/hyracks-net/src/main/java/edu/uci/ics/hyracks/net/protocols/tcp/TCPEndpoint.java
index d13a17e..a9061e1 100644
--- a/hyracks/hyracks-net/src/main/java/edu/uci/ics/hyracks/net/protocols/tcp/TCPEndpoint.java
+++ b/hyracks/hyracks-net/src/main/java/edu/uci/ics/hyracks/net/protocols/tcp/TCPEndpoint.java
@@ -45,15 +45,23 @@
}
public void start(InetSocketAddress localAddress) throws IOException {
- serverSocketChannel = ServerSocketChannel.open();
- ServerSocket serverSocket = serverSocketChannel.socket();
- serverSocket.bind(localAddress);
- this.localAddress = (InetSocketAddress) serverSocket.getLocalSocketAddress();
+ // Setup a server socket listening channel only if the TCPEndpoint is a listening endpoint.
+ if (localAddress != null) {
+ serverSocketChannel = ServerSocketChannel.open();
+ ServerSocket serverSocket = serverSocketChannel.socket();
+ serverSocket.bind(localAddress);
+ this.localAddress = (InetSocketAddress) serverSocket.getLocalSocketAddress();
+ }
+
ioThreads = new IOThread[nThreads];
for (int i = 0; i < ioThreads.length; ++i) {
ioThreads[i] = new IOThread();
}
- ioThreads[0].registerServerSocket(serverSocketChannel);
+
+ if (localAddress != null) {
+ ioThreads[0].registerServerSocket(serverSocketChannel);
+ }
+
for (int i = 0; i < ioThreads.length; ++i) {
ioThreads[i].start();
}
diff --git a/hyracks/hyracks-server/pom.xml b/hyracks/hyracks-server/pom.xml
index 6c6640e..e0fc40a 100644
--- a/hyracks/hyracks-server/pom.xml
+++ b/hyracks/hyracks-server/pom.xml
@@ -15,8 +15,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
diff --git a/hyracks/hyracks-storage-am-btree/pom.xml b/hyracks/hyracks-storage-am-btree/pom.xml
index 118d46d..f251d51 100644
--- a/hyracks/hyracks-storage-am-btree/pom.xml
+++ b/hyracks/hyracks-storage-am-btree/pom.xml
@@ -18,8 +18,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-storage-am-common/pom.xml b/hyracks/hyracks-storage-am-common/pom.xml
index 0b32733..dbc4f41d 100644
--- a/hyracks/hyracks-storage-am-common/pom.xml
+++ b/hyracks/hyracks-storage-am-common/pom.xml
@@ -18,8 +18,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-storage-am-invertedindex/pom.xml b/hyracks/hyracks-storage-am-invertedindex/pom.xml
index a647e9d..5fe2d96 100644
--- a/hyracks/hyracks-storage-am-invertedindex/pom.xml
+++ b/hyracks/hyracks-storage-am-invertedindex/pom.xml
@@ -18,8 +18,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-storage-am-rtree/pom.xml b/hyracks/hyracks-storage-am-rtree/pom.xml
index 61620ec..6c2d734 100644
--- a/hyracks/hyracks-storage-am-rtree/pom.xml
+++ b/hyracks/hyracks-storage-am-rtree/pom.xml
@@ -18,8 +18,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-storage-common/pom.xml b/hyracks/hyracks-storage-common/pom.xml
index 289171a..3360097 100644
--- a/hyracks/hyracks-storage-common/pom.xml
+++ b/hyracks/hyracks-storage-common/pom.xml
@@ -18,8 +18,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-test-support/pom.xml b/hyracks/hyracks-test-support/pom.xml
index 25a5378..89233c9 100644
--- a/hyracks/hyracks-test-support/pom.xml
+++ b/hyracks/hyracks-test-support/pom.xml
@@ -18,8 +18,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/test/support/TestTaskContext.java b/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/test/support/TestTaskContext.java
index c122b25..0ca93b2 100644
--- a/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/test/support/TestTaskContext.java
+++ b/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/test/support/TestTaskContext.java
@@ -20,6 +20,7 @@
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.TaskAttemptId;
import edu.uci.ics.hyracks.api.dataflow.state.IStateObject;
+import edu.uci.ics.hyracks.api.dataset.IDatasetPartitionManager;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
import edu.uci.ics.hyracks.api.io.FileReference;
@@ -101,6 +102,11 @@
}
@Override
+ public IDatasetPartitionManager getDatasetPartitionManager() {
+ return null;
+ }
+
+ @Override
public void sendApplicationMessageToCC(byte[] message, String nodeId) throws Exception {
// TODO Auto-generated method stub
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-btree-test/pom.xml b/hyracks/hyracks-tests/hyracks-storage-am-btree-test/pom.xml
index 7b03a71..d0bb883 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-btree-test/pom.xml
+++ b/hyracks/hyracks-tests/hyracks-storage-am-btree-test/pom.xml
@@ -18,8 +18,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/pom.xml b/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/pom.xml
index 2cf6ce2..59c8c46 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/pom.xml
+++ b/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/pom.xml
@@ -18,9 +18,10 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
<encoding>UTF-8</encoding>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-rtree-test/pom.xml b/hyracks/hyracks-tests/hyracks-storage-am-rtree-test/pom.xml
index ea86042..7b1a3f3 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-rtree-test/pom.xml
+++ b/hyracks/hyracks-tests/hyracks-storage-am-rtree-test/pom.xml
@@ -18,8 +18,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-tests/hyracks-storage-common-test/pom.xml b/hyracks/hyracks-tests/hyracks-storage-common-test/pom.xml
index bd10e13..8e429f9 100644
--- a/hyracks/hyracks-tests/hyracks-storage-common-test/pom.xml
+++ b/hyracks/hyracks-tests/hyracks-storage-common-test/pom.xml
@@ -18,8 +18,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/hyracks-yarn/hyracks-yarn-am/pom.xml b/hyracks/hyracks-yarn/hyracks-yarn-am/pom.xml
index 9e453a6..d33ddc5 100644
--- a/hyracks/hyracks-yarn/hyracks-yarn-am/pom.xml
+++ b/hyracks/hyracks-yarn/hyracks-yarn-am/pom.xml
@@ -14,8 +14,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
</configuration>
</plugin>
<plugin>
diff --git a/hyracks/hyracks-yarn/hyracks-yarn-client/pom.xml b/hyracks/hyracks-yarn/hyracks-yarn-client/pom.xml
index 08935a7..649aa6c 100644
--- a/hyracks/hyracks-yarn/hyracks-yarn-client/pom.xml
+++ b/hyracks/hyracks-yarn/hyracks-yarn-client/pom.xml
@@ -14,8 +14,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
</configuration>
</plugin>
<plugin>
diff --git a/hyracks/hyracks-yarn/hyracks-yarn-common/pom.xml b/hyracks/hyracks-yarn/hyracks-yarn-common/pom.xml
index 3aaf4a2..fe210fd 100644
--- a/hyracks/hyracks-yarn/hyracks-yarn-common/pom.xml
+++ b/hyracks/hyracks-yarn/hyracks-yarn-common/pom.xml
@@ -14,8 +14,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
</configuration>
</plugin>
</plugins>
diff --git a/hyracks/pom.xml b/hyracks/pom.xml
index 6c3859c..9c324df 100644
--- a/hyracks/pom.xml
+++ b/hyracks/pom.xml
@@ -73,6 +73,8 @@
<modules>
<module>hyracks-ipc</module>
<module>hyracks-api</module>
+ <module>hyracks-comm</module>
+ <module>hyracks-client</module>
<module>hyracks-dataflow-common</module>
<module>hyracks-dataflow-std</module>
<module>hyracks-dataflow-hadoop</module>
diff --git a/pregelix/pregelix-api/pom.xml b/pregelix/pregelix-api/pom.xml
index f580752..10efa59 100644
--- a/pregelix/pregelix-api/pom.xml
+++ b/pregelix/pregelix-api/pom.xml
@@ -21,8 +21,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/Vertex.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/Vertex.java
index a8cd3db..3a98fd9 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/Vertex.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/Vertex.java
@@ -73,6 +73,8 @@
private boolean updated = false;
/** has outgoing messages */
private boolean hasMessage = false;
+ /** created new vertex */
+ private boolean createdNewLiveVertex = false;
/**
* use object pool for re-using objects
@@ -258,6 +260,7 @@
halt = in.readBoolean();
updated = false;
hasMessage = false;
+ createdNewLiveVertex = false;
}
@Override
@@ -369,6 +372,13 @@
}
/**
+ * Pregelix internal use only
+ */
+ public boolean createdNewLiveVertex() {
+ return this.createdNewLiveVertex;
+ }
+
+ /**
* sort the edges
*/
@SuppressWarnings("unchecked")
@@ -449,7 +459,8 @@
* @param vertex
* the vertex
*/
- public final void addVertex(I vertexId, V vertex) {
+ public final void addVertex(I vertexId, Vertex vertex) {
+ createdNewLiveVertex |= !vertex.isHalted();
delegate.addVertex(vertexId, vertex);
}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/VertexDelegate.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/VertexDelegate.java
index d949bc5..4b153c1 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/VertexDelegate.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/VertexDelegate.java
@@ -105,7 +105,7 @@
this.vertexId = vertexId;
}
- public final void addVertex(I vertexId, V vertex) {
+ public final void addVertex(I vertexId, Vertex vertex) {
try {
insertTb.reset();
DataOutput outputInsert = insertTb.getDataOutput();
@@ -114,6 +114,19 @@
vertex.write(outputInsert);
insertTb.addFieldEndOffset();
FrameTupleUtils.flushTuple(appenderInsert, insertTb, insertWriter);
+
+ /**
+ * push alive when necessary
+ */
+ if (pushAlive && !vertex.isHalted()) {
+ alive.reset();
+ DataOutput outputAlive = alive.getDataOutput();
+ vertexId.write(outputAlive);
+ alive.addFieldEndOffset();
+ dummyMessageList.write(outputAlive);
+ alive.addFieldEndOffset();
+ FrameTupleUtils.flushTuple(appenderAlive, alive, aliveWriter);
+ }
} catch (Exception e) {
throw new IllegalStateException(e);
}
diff --git a/pregelix/pregelix-core/pom.xml b/pregelix/pregelix-core/pom.xml
index 972d0ec..2f167fb 100644
--- a/pregelix/pregelix-core/pom.xml
+++ b/pregelix/pregelix-core/pom.xml
@@ -66,8 +66,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java
index 727e7fe..9de4c04 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java
@@ -246,28 +246,26 @@
ITuplePartitionComputerFactory partionFactory = new VertexIdPartitionComputerFactory(
rdUnnestedMessage.getFields()[0]);
- ITuplePartitionComputerFactory hashPartitionComputerFactory = new MergePartitionComputerFactory();
+ ITuplePartitionComputerFactory unifyingPartitionComputerFactory = new MergePartitionComputerFactory();
/** connect all operators **/
spec.connect(new OneToOneConnectorDescriptor(spec), emptyTupleSource, 0, preSuperStep, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), preSuperStep, 0, scanner, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, localSort, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 1,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), scanner, 1,
terminateWriter, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 2,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), scanner, 2,
finalAggregator, 0);
/**
* connect the insert/delete operator
*/
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 3, insertOp,
- 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), scanner, 3, insertOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), insertOp, 0, emptySink3, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 4, deleteOp,
- 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), scanner, 4, deleteOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), deleteOp, 0, emptySink4, 0);
- spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 5, btreeBulkLoad, 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), scanner, 5, btreeBulkLoad, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), localSort, 0, localGby, 0);
spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, partionFactory, keyFields, sortCmpFactories),
localGby, 0, globalGby, 0);
@@ -282,7 +280,7 @@
spec.addRoot(emptySink3);
spec.addRoot(emptySink4);
- spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
spec.setFrameSize(frameSize);
return spec;
}
@@ -470,7 +468,7 @@
EmptySinkOperatorDescriptor emptySink4 = new EmptySinkOperatorDescriptor(spec);
ClusterConfig.setLocationConstraint(spec, emptySink4);
- ITuplePartitionComputerFactory hashPartitionComputerFactory = new MergePartitionComputerFactory();
+ ITuplePartitionComputerFactory unifyingPartitionComputerFactory = new MergePartitionComputerFactory();
ITuplePartitionComputerFactory partionFactory = new VertexIdPartitionComputerFactory(
rdUnnestedMessage.getFields()[0]);
/** connect all operators **/
@@ -479,20 +477,20 @@
spec.connect(new OneToOneConnectorDescriptor(spec), materializeRead, 0, setUnion, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), setUnion, 0, join, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), join, 0, localSort, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 1,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 1,
terminateWriter, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 2,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 2,
finalAggregator, 0);
/**
* connect the insert/delete operator
*/
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 3, insertOp, 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), join, 3, insertOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), insertOp, 0, emptySink3, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 4, deleteOp, 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), join, 4, deleteOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), deleteOp, 0, emptySink4, 0);
- spec.connect(new OneToOneConnectorDescriptor(spec), join, 5, btreeBulkLoad, 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), join, 5, btreeBulkLoad, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), localSort, 0, localGby, 0);
spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, partionFactory, keyFields, sortCmpFactories),
localGby, 0, globalGby, 0);
@@ -506,7 +504,7 @@
spec.addRoot(emptySink3);
spec.addRoot(emptySink4);
- spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
spec.setFrameSize(frameSize);
return spec;
}
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java
index 9bad169..91c15b2 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java
@@ -189,7 +189,7 @@
TerminationStateWriterOperatorDescriptor terminateWriter = new TerminationStateWriterOperatorDescriptor(spec,
configurationFactory, jobId);
PartitionConstraintHelper.addPartitionCountConstraint(spec, terminateWriter, 1);
- ITuplePartitionComputerFactory hashPartitionComputerFactory = new MergePartitionComputerFactory();
+ ITuplePartitionComputerFactory unifyingPartitionComputerFactory = new MergePartitionComputerFactory();
/**
* final aggregate write operator
@@ -233,19 +233,17 @@
spec.connect(new OneToOneConnectorDescriptor(spec), emptyTupleSource, 0, preSuperStep, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), preSuperStep, 0, scanner, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, localSort, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 1,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), scanner, 1,
terminateWriter, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 2,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), scanner, 2,
finalAggregator, 0);
/**
* connect the insert/delete operator
*/
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 3, insertOp,
- 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), scanner, 3, insertOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), insertOp, 0, emptySink3, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 4, deleteOp,
- 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), scanner, 4, deleteOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), deleteOp, 0, emptySink4, 0);
/**
@@ -264,7 +262,7 @@
spec.addRoot(emptySink3);
spec.addRoot(emptySink4);
- spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
spec.setFrameSize(frameSize);
return spec;
}
@@ -432,7 +430,7 @@
EmptySinkOperatorDescriptor emptySink4 = new EmptySinkOperatorDescriptor(spec);
ClusterConfig.setLocationConstraint(spec, emptySink4);
- ITuplePartitionComputerFactory hashPartitionComputerFactory = new MergePartitionComputerFactory();
+ ITuplePartitionComputerFactory unifyingPartitionComputerFactory = new MergePartitionComputerFactory();
ITuplePartitionComputerFactory partionFactory = new VertexIdPartitionComputerFactory(
rdUnnestedMessage.getFields()[0]);
@@ -441,17 +439,17 @@
spec.connect(new OneToOneConnectorDescriptor(spec), preSuperStep, 0, materializeRead, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), materializeRead, 0, join, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), join, 0, localSort, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 1,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 1,
terminateWriter, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 2,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 2,
finalAggregator, 0);
/**
* connect the insert/delete operator
*/
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 3, insertOp, 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), join, 3, insertOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), insertOp, 0, emptySink3, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 4, deleteOp, 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), join, 4, deleteOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), deleteOp, 0, emptySink4, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), localSort, 0, localGby, 0);
@@ -465,7 +463,7 @@
spec.addRoot(finalAggregator);
spec.addRoot(emptySink);
- spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
spec.setFrameSize(frameSize);
return spec;
}
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java
index ffdef10..ee1fd0f 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java
@@ -50,12 +50,12 @@
import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
import edu.uci.ics.pregelix.core.runtime.touchpoint.WritableComparingBinaryComparatorFactory;
import edu.uci.ics.pregelix.core.util.DataflowUtils;
+import edu.uci.ics.pregelix.dataflow.ConnectorPolicyAssignmentPolicy;
import edu.uci.ics.pregelix.dataflow.EmptySinkOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.EmptyTupleSourceOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.FinalAggregateOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.MaterializingReadOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.MaterializingWriteOperatorDescriptor;
-import edu.uci.ics.pregelix.dataflow.NonCombinerConnectorPolicyAssignmentPolicy;
import edu.uci.ics.pregelix.dataflow.TerminationStateWriterOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
import edu.uci.ics.pregelix.dataflow.std.BTreeSearchFunctionUpdateOperatorDescriptor;
@@ -135,7 +135,7 @@
comparatorFactories, JobGenUtil.getForwardScan(iteration), null, null, true, true,
new BTreeDataflowHelperFactory(), inputRdFactory, 5,
new StartComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
- rdPartialAggregate);
+ rdPartialAggregate, rdInsert, rdDelete);
ClusterConfig.setLocationConstraint(spec, scanner);
/**
@@ -219,26 +219,24 @@
EmptySinkOperatorDescriptor emptySink4 = new EmptySinkOperatorDescriptor(spec);
ClusterConfig.setLocationConstraint(spec, emptySink4);
- ITuplePartitionComputerFactory hashPartitionComputerFactory = new MergePartitionComputerFactory();
+ ITuplePartitionComputerFactory unifyingPartitionComputerFactory = new MergePartitionComputerFactory();
ITuplePartitionComputerFactory partionFactory = new VertexIdPartitionComputerFactory(
rdUnnestedMessage.getFields()[0]);
/** connect all operators **/
spec.connect(new OneToOneConnectorDescriptor(spec), emptyTupleSource, 0, preSuperStep, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), preSuperStep, 0, scanner, 0);
spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), scanner, 0, globalSort, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 1,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), scanner, 1,
terminateWriter, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 2,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), scanner, 2,
finalAggregator, 0);
/**
* connect the insert/delete operator
*/
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 3, insertOp,
- 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), scanner, 3, insertOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), insertOp, 0, emptySink3, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 4, deleteOp,
- 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), scanner, 4, deleteOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), deleteOp, 0, emptySink4, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), globalSort, 0, globalGby, 0);
@@ -252,7 +250,7 @@
spec.addRoot(emptySink3);
spec.addRoot(emptySink4);
- spec.setConnectorPolicyAssignmentPolicy(new NonCombinerConnectorPolicyAssignmentPolicy());
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
spec.setFrameSize(frameSize);
return spec;
}
@@ -329,7 +327,7 @@
leafFrameFactory, typeTraits, comparatorFactories, JobGenUtil.getForwardScan(iteration), keyFields,
keyFields, true, true, new BTreeDataflowHelperFactory(), true, nullWriterFactories, inputRdFactory, 5,
new ComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
- rdPartialAggregate);
+ rdPartialAggregate, rdInsert, rdDelete);
ClusterConfig.setLocationConstraint(spec, join);
/**
@@ -383,7 +381,6 @@
FinalAggregateOperatorDescriptor finalAggregator = new FinalAggregateOperatorDescriptor(spec,
configurationFactory, aggRdFactory, jobId);
PartitionConstraintHelper.addPartitionCountConstraint(spec, finalAggregator, 1);
-
int[] fieldPermutation = new int[] { 0, 1 };
TreeIndexInsertUpdateDeleteOperatorDescriptor insertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
@@ -400,7 +397,7 @@
comparatorFactories, fieldPermutation, IndexOp.DELETE, new BTreeDataflowHelperFactory(), null,
NoOpOperationCallbackProvider.INSTANCE);
ClusterConfig.setLocationConstraint(spec, deleteOp);
-
+
/** construct empty sink operator */
EmptySinkOperatorDescriptor emptySink3 = new EmptySinkOperatorDescriptor(spec);
ClusterConfig.setLocationConstraint(spec, emptySink3);
@@ -409,7 +406,7 @@
EmptySinkOperatorDescriptor emptySink4 = new EmptySinkOperatorDescriptor(spec);
ClusterConfig.setLocationConstraint(spec, emptySink4);
- ITuplePartitionComputerFactory hashPartitionComputerFactory = new MergePartitionComputerFactory();
+ ITuplePartitionComputerFactory unifyingPartitionComputerFactory = new MergePartitionComputerFactory();
ITuplePartitionComputerFactory partionFactory = new VertexIdPartitionComputerFactory(
rdUnnestedMessage.getFields()[0]);
@@ -418,20 +415,18 @@
spec.connect(new OneToOneConnectorDescriptor(spec), preSuperStep, 0, materializeRead, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), materializeRead, 0, join, 0);
spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), join, 0, globalSort, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 1,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 1,
terminateWriter, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 2,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 2,
finalAggregator, 0);
/**
* connect the insert/delete operator
*/
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 3, insertOp,
- 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), join, 3, insertOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), insertOp, 0, emptySink3, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 4, deleteOp,
- 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), join, 4, deleteOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), deleteOp, 0, emptySink4, 0);
-
+
spec.connect(new OneToOneConnectorDescriptor(spec), globalSort, 0, globalGby, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), globalGby, 0, materialize, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), materialize, 0, postSuperStep, 0);
@@ -441,7 +436,7 @@
spec.addRoot(finalAggregator);
spec.addRoot(emptySink);
- spec.setConnectorPolicyAssignmentPolicy(new NonCombinerConnectorPolicyAssignmentPolicy());
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
spec.setFrameSize(frameSize);
return spec;
}
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java
index cc12523..628e9ce 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java
@@ -50,6 +50,7 @@
import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
import edu.uci.ics.pregelix.core.runtime.touchpoint.WritableComparingBinaryComparatorFactory;
import edu.uci.ics.pregelix.core.util.DataflowUtils;
+import edu.uci.ics.pregelix.dataflow.ConnectorPolicyAssignmentPolicy;
import edu.uci.ics.pregelix.dataflow.EmptySinkOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.EmptyTupleSourceOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.FinalAggregateOperatorDescriptor;
@@ -131,7 +132,7 @@
comparatorFactories, JobGenUtil.getForwardScan(iteration), null, null, true, true,
new BTreeDataflowHelperFactory(), inputRdFactory, 5,
new StartComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
- rdPartialAggregate);
+ rdPartialAggregate, rdInsert, rdDelete);
ClusterConfig.setLocationConstraint(spec, scanner);
/**
@@ -194,7 +195,7 @@
TerminationStateWriterOperatorDescriptor terminateWriter = new TerminationStateWriterOperatorDescriptor(spec,
configurationFactory, jobId);
PartitionConstraintHelper.addPartitionCountConstraint(spec, terminateWriter, 1);
- ITuplePartitionComputerFactory hashPartitionComputerFactory = new MergePartitionComputerFactory();
+ ITuplePartitionComputerFactory unifyingPartitionComputerFactory = new MergePartitionComputerFactory();
/**
* final aggregate write operator
@@ -238,18 +239,16 @@
spec.connect(new OneToOneConnectorDescriptor(spec), emptyTupleSource, 0, preSuperStep, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), preSuperStep, 0, scanner, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, localSort, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 1,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), scanner, 1,
terminateWriter, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 2,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), scanner, 2,
finalAggregator, 0);
/**
* connect the insert/delete operator
*/
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 3, insertOp,
- 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), scanner, 3, insertOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), insertOp, 0, emptySink3, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 4, deleteOp,
- 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), scanner, 4, deleteOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), deleteOp, 0, emptySink4, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), localSort, 0, localGby, 0);
@@ -266,6 +265,7 @@
spec.addRoot(emptySink4);
spec.setFrameSize(frameSize);
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
return spec;
}
@@ -341,7 +341,7 @@
leafFrameFactory, typeTraits, comparatorFactories, JobGenUtil.getForwardScan(iteration), keyFields,
keyFields, true, true, new BTreeDataflowHelperFactory(), true, nullWriterFactories, inputRdFactory, 5,
new ComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
- rdPartialAggregate);
+ rdPartialAggregate, rdInsert, rdDelete);
ClusterConfig.setLocationConstraint(spec, join);
/**
@@ -439,7 +439,7 @@
EmptySinkOperatorDescriptor emptySink4 = new EmptySinkOperatorDescriptor(spec);
ClusterConfig.setLocationConstraint(spec, emptySink4);
- ITuplePartitionComputerFactory hashPartitionComputerFactory = new MergePartitionComputerFactory();
+ ITuplePartitionComputerFactory unifyingPartitionComputerFactory = new MergePartitionComputerFactory();
ITuplePartitionComputerFactory partionFactory = new VertexIdPartitionComputerFactory(
rdUnnestedMessage.getFields()[0]);
@@ -448,16 +448,16 @@
spec.connect(new OneToOneConnectorDescriptor(spec), preSuperStep, 0, materializeRead, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), materializeRead, 0, join, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), join, 0, localSort, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 1,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 1,
terminateWriter, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 2,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 2,
finalAggregator, 0);
/**
* connect the insert/delete operator
*/
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 3, insertOp, 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), join, 3, insertOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), insertOp, 0, emptySink3, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 4, deleteOp, 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), join, 4, deleteOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), deleteOp, 0, emptySink4, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), localSort, 0, localGby, 0);
@@ -474,6 +474,7 @@
spec.addRoot(emptySink4);
spec.setFrameSize(frameSize);
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
return spec;
}
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java
index 98d9612..d099645 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java
@@ -65,6 +65,7 @@
ncConfig1.clusterNetIPAddress = "localhost";
ncConfig1.ccPort = TEST_HYRACKS_CC_PORT;
ncConfig1.dataIPAddress = "127.0.0.1";
+ ncConfig1.datasetIPAddress = "127.0.0.1";
ncConfig1.nodeId = NC1_ID;
ncConfig1.appNCMainClass = NCApplicationEntryPoint.class.getName();
nc1 = new NodeControllerService(ncConfig1);
@@ -75,6 +76,7 @@
ncConfig2.clusterNetIPAddress = "localhost";
ncConfig2.ccPort = TEST_HYRACKS_CC_PORT;
ncConfig2.dataIPAddress = "127.0.0.1";
+ ncConfig2.datasetIPAddress = "127.0.0.1";
ncConfig2.nodeId = NC2_ID;
ncConfig2.appNCMainClass = NCApplicationEntryPoint.class.getName();
nc2 = new NodeControllerService(ncConfig2);
diff --git a/pregelix/pregelix-core/src/main/resources/scripts/getip.sh b/pregelix/pregelix-core/src/main/resources/scripts/getip.sh
index e0cdf73..a691c0f 100755
--- a/pregelix/pregelix-core/src/main/resources/scripts/getip.sh
+++ b/pregelix/pregelix-core/src/main/resources/scripts/getip.sh
@@ -6,6 +6,10 @@
then
#Get IP Address
IPADDR=`/sbin/ifconfig eth0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ if [ "$IPADDR" = "" ]
+ then
+ IPADDR=`/sbin/ifconfig em1 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ fi
if [ "$IPADDR" = "" ]
then
IPADDR=`/sbin/ifconfig lo | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
diff --git a/pregelix/pregelix-core/src/main/resources/scripts/startAllNCs.sh b/pregelix/pregelix-core/src/main/resources/scripts/startAllNCs.sh
index 629bd90..d30da26 100644
--- a/pregelix/pregelix-core/src/main/resources/scripts/startAllNCs.sh
+++ b/pregelix/pregelix-core/src/main/resources/scripts/startAllNCs.sh
@@ -2,5 +2,5 @@
for i in `cat conf/slaves`
do
- ssh $i "cd ${PREGELIX_PATH}; bin/startnc.sh"
+ ssh $i "cd ${PREGELIX_PATH}; export JAVA_HOME=${JAVA_HOME}; bin/startnc.sh"
done
diff --git a/pregelix/pregelix-core/src/main/resources/scripts/startcc.sh b/pregelix/pregelix-core/src/main/resources/scripts/startcc.sh
index fe2551d..efb79ce 100644
--- a/pregelix/pregelix-core/src/main/resources/scripts/startcc.sh
+++ b/pregelix/pregelix-core/src/main/resources/scripts/startcc.sh
@@ -22,4 +22,4 @@
#Launch hyracks cc script
chmod -R 755 $HYRACKS_HOME
-$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyrackscc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 3 &> $CCLOGS_DIR/cc.log &
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyrackscc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 0 &> $CCLOGS_DIR/cc.log &
diff --git a/pregelix/pregelix-dataflow-std-base/pom.xml b/pregelix/pregelix-dataflow-std-base/pom.xml
index b404d7f..4fda45f 100644
--- a/pregelix/pregelix-dataflow-std-base/pom.xml
+++ b/pregelix/pregelix-dataflow-std-base/pom.xml
@@ -22,8 +22,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
diff --git a/pregelix/pregelix-dataflow-std/pom.xml b/pregelix/pregelix-dataflow-std/pom.xml
index b27d88d..efe1607 100644
--- a/pregelix/pregelix-dataflow-std/pom.xml
+++ b/pregelix/pregelix-dataflow-std/pom.xml
@@ -23,8 +23,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
diff --git a/pregelix/pregelix-dataflow/pom.xml b/pregelix/pregelix-dataflow/pom.xml
index 77a76aa..2d0859b 100644
--- a/pregelix/pregelix-dataflow/pom.xml
+++ b/pregelix/pregelix-dataflow/pom.xml
@@ -23,8 +23,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/ConnectorPolicyAssignmentPolicy.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/ConnectorPolicyAssignmentPolicy.java
index d29afca..ae47ed8 100644
--- a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/ConnectorPolicyAssignmentPolicy.java
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/ConnectorPolicyAssignmentPolicy.java
@@ -15,25 +15,44 @@
package edu.uci.ics.pregelix.dataflow;
+import org.apache.commons.lang3.tuple.Pair;
+
import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicy;
import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicyAssignmentPolicy;
import edu.uci.ics.hyracks.api.dataflow.connectors.PipeliningConnectorPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedBlockingConnectorPolicy;
import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedPipeliningConnectorPolicy;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexInsertUpdateDeleteOperatorDescriptor;
public class ConnectorPolicyAssignmentPolicy implements IConnectorPolicyAssignmentPolicy {
private static final long serialVersionUID = 1L;
- private IConnectorPolicy senderSideMaterializePolicy = new SendSideMaterializedPipeliningConnectorPolicy();
+ private IConnectorPolicy senderSideMatPipPolicy = new SendSideMaterializedPipeliningConnectorPolicy();
+ private IConnectorPolicy senderSideMatBlkPolicy = new SendSideMaterializedBlockingConnectorPolicy();
private IConnectorPolicy pipeliningPolicy = new PipeliningConnectorPolicy();
+ private JobSpecification spec;
+
+ public ConnectorPolicyAssignmentPolicy(JobSpecification spec) {
+ this.spec = spec;
+ }
@Override
public IConnectorPolicy getConnectorPolicyAssignment(IConnectorDescriptor c, int nProducers, int nConsumers,
int[] fanouts) {
if (c instanceof MToNPartitioningMergingConnectorDescriptor) {
- return senderSideMaterializePolicy;
+ return senderSideMatPipPolicy;
} else {
- return pipeliningPolicy;
+ Pair<Pair<IOperatorDescriptor, Integer>, Pair<IOperatorDescriptor, Integer>> endPoints = spec
+ .getConnectorOperatorMap().get(c.getConnectorId());
+ IOperatorDescriptor consumer = endPoints.getRight().getLeft();
+ if (consumer instanceof TreeIndexInsertUpdateDeleteOperatorDescriptor) {
+ return senderSideMatBlkPolicy;
+ } else {
+ return pipeliningPolicy;
+ }
}
}
}
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/HDFSFileWriteOperatorDescriptor.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/HDFSFileWriteOperatorDescriptor.java
index 0133d761..2402748 100644
--- a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/HDFSFileWriteOperatorDescriptor.java
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/HDFSFileWriteOperatorDescriptor.java
@@ -14,6 +14,8 @@
*/
package edu.uci.ics.pregelix.dataflow;
+import java.io.File;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
@@ -23,9 +25,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
@@ -37,6 +37,7 @@
import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameDeserializer;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable;
+import edu.uci.ics.hyracks.hdfs.ContextFactory;
import edu.uci.ics.pregelix.api.graph.Vertex;
import edu.uci.ics.pregelix.api.io.VertexOutputFormat;
import edu.uci.ics.pregelix.api.io.VertexWriter;
@@ -69,6 +70,7 @@
private TaskAttemptContext context;
private String TEMP_DIR = "_temporary";
private ClassLoader ctxCL;
+ private ContextFactory ctxFactory = new ContextFactory();
@Override
public void open() throws HyracksDataException {
@@ -80,8 +82,7 @@
conf = confFactory.createConfiguration();
VertexOutputFormat outputFormat = BspUtils.createVertexOutputFormat(conf);
- TaskAttemptID tid = new TaskAttemptID("", 0, true, partition, 0);
- context = new TaskAttemptContext(conf, tid);
+ context = ctxFactory.createContext(conf, partition);
try {
vertexWriter = outputFormat.createVertexWriter(context);
} catch (InterruptedException e) {
@@ -127,31 +128,26 @@
private void moveFilesToFinalPath() throws HyracksDataException {
try {
- JobContext job = new JobContext(conf, new JobID("0", 0));
+ JobContext job = ctxFactory.createJobContext(conf);
Path outputPath = FileOutputFormat.getOutputPath(job);
FileSystem dfs = FileSystem.get(conf);
Path filePath = new Path(outputPath, "part-" + new Integer(partition).toString());
- FileStatus[] tempPaths = dfs.listStatus(outputPath, new PathFilter() {
- @Override
- public boolean accept(Path dir) {
- return dir.getName().endsWith(TEMP_DIR);
- }
- });
- Path tempDir = tempPaths[0].getPath();
- FileStatus[] results = dfs.listStatus(tempDir, new PathFilter() {
- @Override
- public boolean accept(Path dir) {
- return dir.getName().indexOf(context.getTaskAttemptID().toString()) >= 0;
- }
- });
- Path srcDir = results[0].getPath();
- if (!dfs.exists(srcDir))
- throw new HyracksDataException("file " + srcDir.toString() + " does not exist!");
-
- FileStatus[] srcFiles = dfs.listStatus(srcDir);
- Path srcFile = srcFiles[0].getPath();
- dfs.delete(filePath, true);
- dfs.rename(srcFile, filePath);
+ FileStatus[] results = findPartitionPaths(outputPath, dfs);
+ if (results.length >= 1) {
+ /**
+ * for Hadoop-0.20.2
+ */
+ renameFile(dfs, filePath, results);
+ } else {
+ /**
+ * for Hadoop-0.23.1
+ */
+ int jobId = job.getJobID().getId();
+ outputPath = new Path(outputPath.toString() + File.separator + TEMP_DIR + File.separator
+ + jobId);
+ results = findPartitionPaths(outputPath, dfs);
+ renameFile(dfs, filePath, results);
+ }
} catch (IOException e) {
throw new HyracksDataException(e);
} finally {
@@ -159,6 +155,36 @@
}
}
+ private FileStatus[] findPartitionPaths(Path outputPath, FileSystem dfs) throws FileNotFoundException,
+ IOException {
+ FileStatus[] tempPaths = dfs.listStatus(outputPath, new PathFilter() {
+ @Override
+ public boolean accept(Path dir) {
+ return dir.getName().endsWith(TEMP_DIR);
+ }
+ });
+ Path tempDir = tempPaths[0].getPath();
+ FileStatus[] results = dfs.listStatus(tempDir, new PathFilter() {
+ @Override
+ public boolean accept(Path dir) {
+ return dir.getName().indexOf(context.getTaskAttemptID().toString()) >= 0;
+ }
+ });
+ return results;
+ }
+
+ private void renameFile(FileSystem dfs, Path filePath, FileStatus[] results) throws IOException,
+ HyracksDataException, FileNotFoundException {
+ Path srcDir = results[0].getPath();
+ if (!dfs.exists(srcDir))
+ throw new HyracksDataException("file " + srcDir.toString() + " does not exist!");
+
+ FileStatus[] srcFiles = dfs.listStatus(srcDir);
+ Path srcFile = srcFiles[0].getPath();
+ dfs.delete(filePath, true);
+ dfs.rename(srcFile, filePath);
+ }
+
};
}
}
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/VertexFileScanOperatorDescriptor.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/VertexFileScanOperatorDescriptor.java
index a38b19e..0da7baf 100644
--- a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/VertexFileScanOperatorDescriptor.java
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/VertexFileScanOperatorDescriptor.java
@@ -26,7 +26,6 @@
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
@@ -134,11 +133,11 @@
appender.reset(frame, true);
VertexInputFormat vertexInputFormat = BspUtils.createVertexInputFormat(conf);
- TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID());
InputSplit split = splits.get(splitId);
+ TaskAttemptContext mapperContext = ctxFactory.createContext(conf, splitId);
- VertexReader vertexReader = vertexInputFormat.createVertexReader(split, context);
- vertexReader.initialize(split, context);
+ VertexReader vertexReader = vertexInputFormat.createVertexReader(split, mapperContext);
+ vertexReader.initialize(split, mapperContext);
Vertex readerVertex = (Vertex) BspUtils.createVertex(conf);
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldSize);
DataOutput dos = tb.getDataOutput();
@@ -146,7 +145,6 @@
/**
* set context
*/
- TaskAttemptContext mapperContext = ctxFactory.createContext(conf, splits.get(splitId));
Vertex.setContext(mapperContext);
/**
diff --git a/pregelix/pregelix-dist/pom.xml b/pregelix/pregelix-dist/pom.xml
index aa3d541..847e843 100644
--- a/pregelix/pregelix-dist/pom.xml
+++ b/pregelix/pregelix-dist/pom.xml
@@ -19,8 +19,8 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
</configuration>
</plugin>
<plugin>
diff --git a/pregelix/pregelix-example/pom.xml b/pregelix/pregelix-example/pom.xml
index 0cda633..84feb78 100644
--- a/pregelix/pregelix-example/pom.xml
+++ b/pregelix/pregelix-example/pom.xml
@@ -17,8 +17,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
@@ -77,6 +78,7 @@
</configuration>
</plugin>
<plugin>
+ <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-clean-plugin</artifactId>
<version>2.4.1</version>
<configuration>
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/GraphMutationVertex.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/GraphMutationVertex.java
new file mode 100644
index 0000000..e54373f
--- /dev/null
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/GraphMutationVertex.java
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.pregelix.example;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexWriter;
+import edu.uci.ics.pregelix.api.io.text.TextVertexOutputFormat;
+import edu.uci.ics.pregelix.api.io.text.TextVertexOutputFormat.TextVertexWriter;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.example.client.Client;
+import edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat;
+import edu.uci.ics.pregelix.example.io.VLongWritable;
+
+/**
+ * Demonstrates the basic graph vertex insert/delete implementation.
+ */
+public class GraphMutationVertex extends Vertex<VLongWritable, DoubleWritable, FloatWritable, DoubleWritable> {
+
+ private VLongWritable vid = new VLongWritable();
+ private GraphMutationVertex newVertex = null;
+
+ @Override
+ public void compute(Iterator<DoubleWritable> msgIterator) {
+ if (Vertex.getSuperstep() == 1) {
+ if (newVertex == null) {
+ newVertex = new GraphMutationVertex();
+ }
+ if (getVertexId().get() % 2 == 0 || getVertexId().get() % 3 == 0) {
+ deleteVertex(getVertexId());
+ } else {
+ vid.set(100 * getVertexId().get());
+ newVertex.setVertexId(vid);
+ newVertex.setVertexValue(getVertexValue());
+ addVertex(vid, newVertex);
+ }
+ voteToHalt();
+ } else {
+ if (getVertexId().get() % 190 == 0) {
+ deleteVertex(getVertexId());
+ }
+ voteToHalt();
+ }
+ }
+
+ /**
+ * Simple VertexWriter that supports {@link SimplePageRankVertex}
+ */
+ public static class SimpleGraphMutationVertexWriter extends
+ TextVertexWriter<VLongWritable, DoubleWritable, FloatWritable> {
+ public SimpleGraphMutationVertexWriter(RecordWriter<Text, Text> lineRecordWriter) {
+ super(lineRecordWriter);
+ }
+
+ @Override
+ public void writeVertex(Vertex<VLongWritable, DoubleWritable, FloatWritable, ?> vertex) throws IOException,
+ InterruptedException {
+ getRecordWriter().write(new Text(vertex.getVertexId().toString()),
+ new Text(vertex.getVertexValue().toString()));
+ }
+ }
+
+ @Override
+ public String toString() {
+ return getVertexId() + " " + getVertexValue();
+ }
+
+ /**
+ * Simple VertexOutputFormat that supports {@link SimplePageRankVertex}
+ */
+ public static class SimpleGraphMutationVertexOutputFormat extends
+ TextVertexOutputFormat<VLongWritable, DoubleWritable, FloatWritable> {
+
+ @Override
+ public VertexWriter<VLongWritable, DoubleWritable, FloatWritable> createVertexWriter(TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ RecordWriter<Text, Text> recordWriter = textOutputFormat.getRecordWriter(context);
+ return new SimpleGraphMutationVertexWriter(recordWriter);
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(GraphMutationVertex.class.getSimpleName());
+ job.setVertexClass(GraphMutationVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(SimpleGraphMutationVertexOutputFormat.class);
+ Client.run(args, job);
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java
index c353d84..ca5a1c4 100644
--- a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java
@@ -25,6 +25,8 @@
import edu.uci.ics.pregelix.api.job.PregelixJob;
import edu.uci.ics.pregelix.example.ConnectedComponentsVertex;
import edu.uci.ics.pregelix.example.ConnectedComponentsVertex.SimpleConnectedComponentsVertexOutputFormat;
+import edu.uci.ics.pregelix.example.GraphMutationVertex;
+import edu.uci.ics.pregelix.example.GraphMutationVertex.SimpleGraphMutationVertexOutputFormat;
import edu.uci.ics.pregelix.example.PageRankVertex;
import edu.uci.ics.pregelix.example.PageRankVertex.SimplePageRankVertexOutputFormat;
import edu.uci.ics.pregelix.example.PageRankVertex.SimulatedPageRankVertexInputFormat;
@@ -217,6 +219,17 @@
job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
}
+ private static void generateGraphMutationJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(GraphMutationVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(SimpleGraphMutationVertexOutputFormat.class);
+ FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
+ FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
+ job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
private static void genPageRank() throws IOException {
generatePageRankJob("PageRank", outputBase + "PageRank.xml");
generatePageRankJobReal("PageRank", outputBase + "PageRankReal.xml");
@@ -250,6 +263,10 @@
generateMaximalCliqueJob("Maximal Clique", outputBase + "MaximalClique.xml");
}
+ private static void genGraphMutation() throws IOException {
+ generateGraphMutationJob("Graph Mutation", outputBase + "GraphMutation.xml");
+ }
+
public static void main(String[] args) throws IOException {
genPageRank();
genShortestPath();
@@ -257,5 +274,6 @@
genReachibility();
genTriangleCounting();
genMaximalClique();
+ genGraphMutation();
}
}
diff --git a/pregelix/pregelix-example/src/test/resources/expected/GraphMutation.result b/pregelix/pregelix-example/src/test/resources/expected/GraphMutation.result
new file mode 100644
index 0000000..a30166c
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/GraphMutation.result
@@ -0,0 +1,13 @@
+1 0.0
+5 0.0
+7 0.0
+11 0.0
+13 0.0
+17 0.0
+19 0.0
+100 0.0
+500 0.0
+700 0.0
+1100 0.0
+1300 0.0
+1700 0.0
diff --git a/pregelix/pregelix-example/src/test/resources/hadoop/conf/log4j.properties b/pregelix/pregelix-example/src/test/resources/hadoop/conf/log4j.properties
index d5e6004..3335964 100755
--- a/pregelix/pregelix-example/src/test/resources/hadoop/conf/log4j.properties
+++ b/pregelix/pregelix-example/src/test/resources/hadoop/conf/log4j.properties
@@ -76,7 +76,7 @@
# FSNamesystem Audit logging
# All audit events are logged at INFO level
#
-log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
+log4j.logger.org.apache.hadoop=FATAL
# Custom Logging levels
diff --git a/pregelix/pregelix-example/src/test/resources/jobs/GraphMutation.xml b/pregelix/pregelix-example/src/test/resources/jobs/GraphMutation.xml
new file mode 100644
index 0000000..9f51f6d
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/jobs/GraphMutation.xml
@@ -0,0 +1,141 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.input.dir</name><value>file:/webmap</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>fs.checkpoint.dir</name><value>${hadoop.tmp.dir}/dfs/namesecondary</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.output.dir</name><value>/result</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.job.name</name><value>Graph Mutation</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>pregelix.numVertices</name><value>20</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>fs.default.name</name><value>file:///</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.ScriptBasedMapping</value></property>
+<property><name>pregelix.vertexClass</name><value>edu.uci.ics.pregelix.example.GraphMutationVertex</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>pregelix.vertexOutputFormatClass</name><value>edu.uci.ics.pregelix.example.GraphMutationVertex$SimpleGraphMutationVertexOutputFormat</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>pregelix.vertexInputFormatClass</name><value>edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+</configuration>
\ No newline at end of file
diff --git a/pregelix/pregelix-example/src/test/resources/jobs/TriangleCounting.xml b/pregelix/pregelix-example/src/test/resources/jobs/TriangleCounting.xml
index ee2acc1..0f44f4d 100644
--- a/pregelix/pregelix-example/src/test/resources/jobs/TriangleCounting.xml
+++ b/pregelix/pregelix-example/src/test/resources/jobs/TriangleCounting.xml
@@ -121,7 +121,7 @@
<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
<property><name>hadoop.logfile.size</name><value>10000000</value></property>
-<property><name>pregelix.vertexInputFormatClass</name><value>edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat</value></property>
+<property><name>pregelix.vertexInputFormatClass</name><value>edu.uci.ics.pregelix.example.trianglecounting.TextTriangleCountingInputFormat</value></property>
<property><name>pregelix.aggregatorClass</name><value>edu.uci.ics.pregelix.example.trianglecounting.TriangleCountingAggregator</value></property>
<property><name>mapred.job.queue.name</name><value>default</value></property>
<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
diff --git a/pregelix/pregelix-runtime/pom.xml b/pregelix/pregelix-runtime/pom.xml
index a0d25cc..94bda18 100644
--- a/pregelix/pregelix-runtime/pom.xml
+++ b/pregelix/pregelix-runtime/pom.xml
@@ -22,8 +22,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java
index a0dca3d..f7958d9 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java
@@ -188,7 +188,7 @@
/**
* this partition should not terminate
*/
- if (terminate && (!vertex.isHalted() || vertex.hasMessage()))
+ if (terminate && (!vertex.isHalted() || vertex.hasMessage() || vertex.createdNewLiveVertex()))
terminate = false;
aggregator.step(vertex);
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/StartComputeUpdateFunctionFactory.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/StartComputeUpdateFunctionFactory.java
index 3d8a355..0cf64a0 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/StartComputeUpdateFunctionFactory.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/StartComputeUpdateFunctionFactory.java
@@ -189,7 +189,7 @@
/**
* this partition should not terminate
*/
- if (terminate && (!vertex.isHalted() || vertex.hasMessage()))
+ if (terminate && (!vertex.isHalted() || vertex.hasMessage() || vertex.createdNewLiveVertex()))
terminate = false;
/**
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/RuntimeHookFactory.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/RuntimeHookFactory.java
index d968262..c025f85 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/RuntimeHookFactory.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/RuntimeHookFactory.java
@@ -16,6 +16,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
@@ -45,7 +46,7 @@
public void configure(IHyracksTaskContext ctx) throws HyracksDataException {
Configuration conf = confFactory.createConfiguration();
try {
- TaskAttemptContext mapperContext = ctxFactory.createContext(conf, null);
+ TaskAttemptContext mapperContext = ctxFactory.createContext(conf, new TaskAttemptID());
Vertex.setContext(mapperContext);
BspUtils.setDefaultConfiguration(conf);
} catch (Exception e) {