Merge branch 'master' into jarodwen/features/positionvar

commit: 31b144f1bd05f7b965e4aa8fbba2a169b792713d [log] [tgz]
author: JArod <jarodwen@gmail.com> Sun Sep 01 06:48:52 2013 -0700
committer: JArod <jarodwen@gmail.com> Sun Sep 01 06:48:52 2013 -0700
tree: d5bbb639f59b35f9d60989d9003cd1903b9bf38c
parent: 71c575b2ab9c7713263e748f2556a2522a016850 [diff]
parent: 6440e891f1e3d53be18156a3b51ff3f5f5ba2847 [diff]
diff --git a/asterix-algebra/src/main/java/edu/uci/ics/asterix/algebra/base/LogicalOperatorDeepCopyVisitor.java b/asterix-algebra/src/main/java/edu/uci/ics/asterix/algebra/base/LogicalOperatorDeepCopyVisitor.java
index ba940d6..693a19e 100644
--- a/asterix-algebra/src/main/java/edu/uci/ics/asterix/algebra/base/LogicalOperatorDeepCopyVisitor.java
+++ b/asterix-algebra/src/main/java/edu/uci/ics/asterix/algebra/base/LogicalOperatorDeepCopyVisitor.java

@@ -39,6 +39,7 @@
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExternalDataAccessByRIDOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IndexInsertDeleteOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
@@ -387,6 +388,13 @@
     public ILogicalOperator visitUnnestMapOperator(UnnestMapOperator op, ILogicalOperator arg) {
         throw new UnsupportedOperationException();
     }
+    
+    @Override
+	public ILogicalOperator visitExternalDataAccessByRIDOperator(
+			ExternalDataAccessByRIDOperator op, ILogicalOperator arg)
+			throws AlgebricksException {
+    	throw new UnsupportedOperationException();
+	}
 
     @Override
     public ILogicalOperator visitUnnestOperator(UnnestOperator op, ILogicalOperator arg) throws AlgebricksException {
@@ -438,4 +446,5 @@
     public Map<LogicalVariable, LogicalVariable> getVariableMapping() {
         return outVarMapping;
     }
+
 }

diff --git a/asterix-algebra/src/main/java/edu/uci/ics/asterix/algebra/operators/physical/CommitRuntime.java b/asterix-algebra/src/main/java/edu/uci/ics/asterix/algebra/operators/physical/CommitRuntime.java
index a7c2fdb..6dd11bd 100644
--- a/asterix-algebra/src/main/java/edu/uci/ics/asterix/algebra/operators/physical/CommitRuntime.java
+++ b/asterix-algebra/src/main/java/edu/uci/ics/asterix/algebra/operators/physical/CommitRuntime.java

@@ -19,10 +19,11 @@
 
 import edu.uci.ics.asterix.common.api.IAsterixAppRuntimeContext;
 import edu.uci.ics.asterix.common.exceptions.ACIDException;
-import edu.uci.ics.asterix.common.transactions.DatasetId;
+import edu.uci.ics.asterix.common.transactions.ILogManager;
 import edu.uci.ics.asterix.common.transactions.ITransactionContext;
 import edu.uci.ics.asterix.common.transactions.ITransactionManager;
 import edu.uci.ics.asterix.common.transactions.JobId;
+import edu.uci.ics.asterix.transaction.management.service.logging.LogRecord;
 import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntime;
 import edu.uci.ics.hyracks.api.comm.IFrameWriter;
 import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
@@ -39,11 +40,13 @@
 
     private final IHyracksTaskContext hyracksTaskCtx;
     private final ITransactionManager transactionManager;
+    private final ILogManager logMgr;
     private final JobId jobId;
-    private final DatasetId datasetId;
+    private final int datasetId;
     private final int[] primaryKeyFields;
     private final boolean isWriteTransaction;
     private final long[] longHashes;
+    private final LogRecord logRecord;
 
     private ITransactionContext transactionContext;
     private RecordDescriptor inputRecordDesc;
@@ -56,12 +59,14 @@
         IAsterixAppRuntimeContext runtimeCtx = (IAsterixAppRuntimeContext) ctx.getJobletContext()
                 .getApplicationContext().getApplicationObject();
         this.transactionManager = runtimeCtx.getTransactionSubsystem().getTransactionManager();
+        this.logMgr = runtimeCtx.getTransactionSubsystem().getLogManager();
         this.jobId = jobId;
-        this.datasetId = new DatasetId(datasetId);
+        this.datasetId = datasetId;
         this.primaryKeyFields = primaryKeyFields;
         this.frameTupleReference = new FrameTupleReference();
         this.isWriteTransaction = isWriteTransaction;
         this.longHashes = new long[2];
+        this.logRecord = new LogRecord();
     }
 
     @Override
@@ -82,11 +87,9 @@
         for (int t = 0; t < nTuple; t++) {
             frameTupleReference.reset(frameTupleAccessor, t);
             pkHash = computePrimaryKeyHashValue(frameTupleReference, primaryKeyFields);
-            try {
-                transactionManager.commitTransaction(transactionContext, datasetId, pkHash);
-            } catch (ACIDException e) {
-                throw new HyracksDataException(e);
-            }
+            logRecord.formEntityCommitLogRecord(transactionContext, datasetId, pkHash, frameTupleReference,
+                    primaryKeyFields);
+            logMgr.log(logRecord);
         }
     }
 

diff --git a/asterix-algebra/src/main/java/edu/uci/ics/asterix/algebra/operators/physical/ExternalDataAccessByRIDPOperator.java b/asterix-algebra/src/main/java/edu/uci/ics/asterix/algebra/operators/physical/ExternalDataAccessByRIDPOperator.java
new file mode 100644
index 0000000..447555e
--- /dev/null
+++ b/asterix-algebra/src/main/java/edu/uci/ics/asterix/algebra/operators/physical/ExternalDataAccessByRIDPOperator.java

@@ -0,0 +1,110 @@
+package edu.uci.ics.asterix.algebra.operators.physical;
+
+import edu.uci.ics.asterix.metadata.declared.AqlDataSource;
+import edu.uci.ics.asterix.metadata.declared.AqlMetadataProvider;
+import edu.uci.ics.asterix.metadata.declared.AqlSourceId;
+import edu.uci.ics.asterix.metadata.entities.Dataset;
+import edu.uci.ics.asterix.metadata.entities.Index;
+import edu.uci.ics.asterix.om.functions.AsterixBuiltinFunctions;
+import edu.uci.ics.asterix.om.types.ARecordType;
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IHyracksJobBuilder;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.PhysicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSourcePropertiesProvider;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractScanOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExternalDataAccessByRIDOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.AbstractScanPOperator;
+import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenContext;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
+
+public class ExternalDataAccessByRIDPOperator extends AbstractScanPOperator{
+
+    private AqlSourceId datasetId;
+    private Dataset dataset;
+    private ARecordType recordType;
+    private Index secondaryIndex;
+	public ExternalDataAccessByRIDPOperator(AqlSourceId datasetId, Dataset dataset, ARecordType recordType,Index secondaryIndex)
+    {
+    	this.datasetId = datasetId;
+    	this.dataset = dataset;
+    	this.recordType = recordType;
+    	this.secondaryIndex = secondaryIndex;
+    }
+    
+	public Dataset getDataset() {
+		return dataset;
+	}
+
+	public void setDataset(Dataset dataset) {
+		this.dataset = dataset;
+	}
+
+	public ARecordType getRecordType() {
+		return recordType;
+	}
+
+	public void setRecordType(ARecordType recordType) {
+		this.recordType = recordType;
+	}
+	
+	public AqlSourceId getDatasetId() {
+		return datasetId;
+	}
+
+	public void setDatasetId(AqlSourceId datasetId) {
+		this.datasetId = datasetId;
+	}
+	
+	@Override
+	public PhysicalOperatorTag getOperatorTag() {
+		return PhysicalOperatorTag.EXTERNAL_ACCESS_BY_RID;
+	}
+
+	@Override
+	public void computeDeliveredProperties(ILogicalOperator op,
+			IOptimizationContext context) throws AlgebricksException {
+		AqlDataSource ds = new AqlDataSource(datasetId, dataset, recordType);
+        IDataSourcePropertiesProvider dspp = ds.getPropertiesProvider();
+        AbstractScanOperator as = (AbstractScanOperator) op;
+        deliveredProperties = dspp.computePropertiesVector(as.getVariables());
+	}
+
+	@Override
+	public void contributeRuntimeOperator(IHyracksJobBuilder builder,
+			JobGenContext context, ILogicalOperator op,
+			IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas,
+			IOperatorSchema outerPlanSchema) throws AlgebricksException {
+		ExternalDataAccessByRIDOperator edabro = (ExternalDataAccessByRIDOperator) op;
+        ILogicalExpression expr = edabro.getExpressionRef().getValue();
+        if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+            throw new IllegalStateException();
+        }
+        AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+        FunctionIdentifier funcIdent = funcExpr.getFunctionIdentifier();
+        if (!funcIdent.equals(AsterixBuiltinFunctions.EXTERNAL_ACCESS_BY_RID)) {
+            return;
+        }
+        AqlMetadataProvider metadataProvider = (AqlMetadataProvider) context.getMetadataProvider();
+        Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> externalAccessByRID = metadataProvider.buildExternalDataAccesByRIDRuntime(
+        		builder.getJobSpec(), dataset,secondaryIndex);
+        builder.contributeHyracksOperator(edabro, externalAccessByRID.first);
+        builder.contributeAlgebricksPartitionConstraint(externalAccessByRID.first, externalAccessByRID.second);
+        ILogicalOperator srcExchange = edabro.getInputs().get(0).getValue();
+        builder.contributeGraphEdge(srcExchange, 0, edabro, 0);
+	}
+	
+	@Override
+	public boolean isMicroOperator() {
+		return false;
+	}
+
+}
\ No newline at end of file

diff --git a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/AccessMethodUtils.java b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/AccessMethodUtils.java
index ab0fd79..95d29e3 100644
--- a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/AccessMethodUtils.java
+++ b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/AccessMethodUtils.java

@@ -22,8 +22,12 @@
 import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.commons.lang3.mutable.MutableObject;
 
+import edu.uci.ics.asterix.algebra.operators.physical.ExternalDataAccessByRIDPOperator;
 import edu.uci.ics.asterix.aql.util.FunctionUtils;
+import edu.uci.ics.asterix.common.config.DatasetConfig.DatasetType;
 import edu.uci.ics.asterix.common.config.DatasetConfig.IndexType;
+import edu.uci.ics.asterix.metadata.declared.AqlMetadataProvider;
+import edu.uci.ics.asterix.metadata.declared.AqlSourceId;
 import edu.uci.ics.asterix.metadata.entities.Dataset;
 import edu.uci.ics.asterix.metadata.entities.Index;
 import edu.uci.ics.asterix.metadata.utils.DatasetUtils;
@@ -34,6 +38,7 @@
 import edu.uci.ics.asterix.om.constants.AsterixConstantValue;
 import edu.uci.ics.asterix.om.functions.AsterixBuiltinFunctions;
 import edu.uci.ics.asterix.om.types.ARecordType;
+import edu.uci.ics.asterix.om.types.BuiltinType;
 import edu.uci.ics.asterix.om.types.IAType;
 import edu.uci.ics.asterix.om.util.NonTaggedFormatUtil;
 import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
@@ -52,6 +57,7 @@
 import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator.ExecutionMode;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExternalDataAccessByRIDOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.OrderOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.OrderOperator.IOrder;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator;
@@ -60,290 +66,384 @@
  * Static helper functions for rewriting plans using indexes.
  */
 public class AccessMethodUtils {
-    public static void appendPrimaryIndexTypes(Dataset dataset, IAType itemType, List<Object> target)
-            throws IOException {
-        ARecordType recordType = (ARecordType) itemType;
-        List<String> partitioningKeys = DatasetUtils.getPartitioningKeys(dataset);
-        for (String partitioningKey : partitioningKeys) {
-            target.add(recordType.getFieldType(partitioningKey));
-        }
-        target.add(itemType);
-    }
+	public static void appendPrimaryIndexTypes(Dataset dataset, IAType itemType, List<Object> target)
+			throws IOException {
+		ARecordType recordType = (ARecordType) itemType;
+		List<String> partitioningKeys = DatasetUtils.getPartitioningKeys(dataset);
+		for (String partitioningKey : partitioningKeys) {
+			target.add(recordType.getFieldType(partitioningKey));
+		}
+		target.add(itemType);
+	}
 
-    public static ConstantExpression createStringConstant(String str) {
-        return new ConstantExpression(new AsterixConstantValue(new AString(str)));
-    }
+	public static void appendExternalRecTypes(Dataset dataset, IAType itemType, List<Object> target){
+		target.add(itemType);
+	}
 
-    public static ConstantExpression createInt32Constant(int i) {
-        return new ConstantExpression(new AsterixConstantValue(new AInt32(i)));
-    }
+	public static void appendExternalRecPrimaryKey(Dataset dataset, List<Object> target){
+		target.add(BuiltinType.ASTRING);
+		target.add(BuiltinType.AINT64);
+		if(DatasetUtils.getExternalRIDSize(dataset) == 3)
+		{
+			target.add(BuiltinType.AINT32);
+		}
+	}
 
-    public static ConstantExpression createBooleanConstant(boolean b) {
-        if (b) {
-            return new ConstantExpression(new AsterixConstantValue(ABoolean.TRUE));
-        } else {
-            return new ConstantExpression(new AsterixConstantValue(ABoolean.FALSE));
-        }
-    }
+	public static ConstantExpression createStringConstant(String str) {
+		return new ConstantExpression(new AsterixConstantValue(new AString(str)));
+	}
 
-    public static String getStringConstant(Mutable<ILogicalExpression> expr) {
-        IAObject obj = ((AsterixConstantValue) ((ConstantExpression) expr.getValue()).getValue()).getObject();
-        return ((AString) obj).getStringValue();
-    }
+	public static ConstantExpression createInt32Constant(int i) {
+		return new ConstantExpression(new AsterixConstantValue(new AInt32(i)));
+	}
 
-    public static int getInt32Constant(Mutable<ILogicalExpression> expr) {
-        IAObject obj = ((AsterixConstantValue) ((ConstantExpression) expr.getValue()).getValue()).getObject();
-        return ((AInt32) obj).getIntegerValue();
-    }
+	public static ConstantExpression createBooleanConstant(boolean b) {
+		if (b) {
+			return new ConstantExpression(new AsterixConstantValue(ABoolean.TRUE));
+		} else {
+			return new ConstantExpression(new AsterixConstantValue(ABoolean.FALSE));
+		}
+	}
 
-    public static boolean getBooleanConstant(Mutable<ILogicalExpression> expr) {
-        IAObject obj = ((AsterixConstantValue) ((ConstantExpression) expr.getValue()).getValue()).getObject();
-        return ((ABoolean) obj).getBoolean();
-    }
+	public static String getStringConstant(Mutable<ILogicalExpression> expr) {
+		IAObject obj = ((AsterixConstantValue) ((ConstantExpression) expr.getValue()).getValue()).getObject();
+		return ((AString) obj).getStringValue();
+	}
 
-    public static boolean analyzeFuncExprArgsForOneConstAndVar(AbstractFunctionCallExpression funcExpr,
-            AccessMethodAnalysisContext analysisCtx) {
-        IAlgebricksConstantValue constFilterVal = null;
-        LogicalVariable fieldVar = null;
-        ILogicalExpression arg1 = funcExpr.getArguments().get(0).getValue();
-        ILogicalExpression arg2 = funcExpr.getArguments().get(1).getValue();
-        // One of the args must be a constant, and the other arg must be a variable.
-        if (arg1.getExpressionTag() == LogicalExpressionTag.CONSTANT
-                && arg2.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
-            ConstantExpression constExpr = (ConstantExpression) arg1;
-            constFilterVal = constExpr.getValue();
-            VariableReferenceExpression varExpr = (VariableReferenceExpression) arg2;
-            fieldVar = varExpr.getVariableReference();
-        } else if (arg1.getExpressionTag() == LogicalExpressionTag.VARIABLE
-                && arg2.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
-            ConstantExpression constExpr = (ConstantExpression) arg2;
-            constFilterVal = constExpr.getValue();
-            VariableReferenceExpression varExpr = (VariableReferenceExpression) arg1;
-            fieldVar = varExpr.getVariableReference();
-        } else {
-            return false;
-        }
-        analysisCtx.matchedFuncExprs.add(new OptimizableFuncExpr(funcExpr, fieldVar, constFilterVal));
-        return true;
-    }
+	public static int getInt32Constant(Mutable<ILogicalExpression> expr) {
+		IAObject obj = ((AsterixConstantValue) ((ConstantExpression) expr.getValue()).getValue()).getObject();
+		return ((AInt32) obj).getIntegerValue();
+	}
 
-    public static boolean analyzeFuncExprArgsForTwoVars(AbstractFunctionCallExpression funcExpr,
-            AccessMethodAnalysisContext analysisCtx) {
-        LogicalVariable fieldVar1 = null;
-        LogicalVariable fieldVar2 = null;
-        ILogicalExpression arg1 = funcExpr.getArguments().get(0).getValue();
-        ILogicalExpression arg2 = funcExpr.getArguments().get(1).getValue();
-        if (arg1.getExpressionTag() == LogicalExpressionTag.VARIABLE
-                && arg2.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
-            fieldVar1 = ((VariableReferenceExpression) arg1).getVariableReference();
-            fieldVar2 = ((VariableReferenceExpression) arg2).getVariableReference();
-        } else {
-            return false;
-        }
-        analysisCtx.matchedFuncExprs.add(new OptimizableFuncExpr(funcExpr,
-                new LogicalVariable[] { fieldVar1, fieldVar2 }, null));
-        return true;
-    }
+	public static boolean getBooleanConstant(Mutable<ILogicalExpression> expr) {
+		IAObject obj = ((AsterixConstantValue) ((ConstantExpression) expr.getValue()).getValue()).getObject();
+		return ((ABoolean) obj).getBoolean();
+	}
 
-    public static int getNumSecondaryKeys(Index index, ARecordType recordType) throws AlgebricksException {
-        switch (index.getIndexType()) {
-            case BTREE:
-            case SINGLE_PARTITION_WORD_INVIX:
-            case SINGLE_PARTITION_NGRAM_INVIX: 
-            case LENGTH_PARTITIONED_WORD_INVIX:
-            case LENGTH_PARTITIONED_NGRAM_INVIX: {
-                return index.getKeyFieldNames().size();
-            }
-            case RTREE: {
-                Pair<IAType, Boolean> keyPairType = Index.getNonNullableKeyFieldType(index.getKeyFieldNames().get(0),
-                        recordType);
-                IAType keyType = keyPairType.first;
-                int numDimensions = NonTaggedFormatUtil.getNumDimensions(keyType.getTypeTag());
-                return numDimensions * 2;
-            }
-            default: {
-                throw new AlgebricksException("Unknown index kind: " + index.getIndexType());
-            }
-        }
-    }
+	public static boolean analyzeFuncExprArgsForOneConstAndVar(AbstractFunctionCallExpression funcExpr,
+			AccessMethodAnalysisContext analysisCtx) {
+		IAlgebricksConstantValue constFilterVal = null;
+		LogicalVariable fieldVar = null;
+		ILogicalExpression arg1 = funcExpr.getArguments().get(0).getValue();
+		ILogicalExpression arg2 = funcExpr.getArguments().get(1).getValue();
+		// One of the args must be a constant, and the other arg must be a variable.
+		if (arg1.getExpressionTag() == LogicalExpressionTag.CONSTANT
+				&& arg2.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
+			ConstantExpression constExpr = (ConstantExpression) arg1;
+			constFilterVal = constExpr.getValue();
+			VariableReferenceExpression varExpr = (VariableReferenceExpression) arg2;
+			fieldVar = varExpr.getVariableReference();
+		} else if (arg1.getExpressionTag() == LogicalExpressionTag.VARIABLE
+				&& arg2.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
+			ConstantExpression constExpr = (ConstantExpression) arg2;
+			constFilterVal = constExpr.getValue();
+			VariableReferenceExpression varExpr = (VariableReferenceExpression) arg1;
+			fieldVar = varExpr.getVariableReference();
+		} else {
+			return false;
+		}
+		analysisCtx.matchedFuncExprs.add(new OptimizableFuncExpr(funcExpr, fieldVar, constFilterVal));
+		return true;
+	}
 
-    /**
-     * Appends the types of the fields produced by the given secondary index to dest.
-     */
-    public static void appendSecondaryIndexTypes(Dataset dataset, ARecordType recordType, Index index,
-            boolean primaryKeysOnly, List<Object> dest) throws AlgebricksException {
-        if (!primaryKeysOnly) {
-            switch (index.getIndexType()) {
-                case BTREE:
-                case SINGLE_PARTITION_WORD_INVIX:
-                case SINGLE_PARTITION_NGRAM_INVIX: {
-                    for (String sk : index.getKeyFieldNames()) {
-                        Pair<IAType, Boolean> keyPairType = Index.getNonNullableKeyFieldType(sk, recordType);
-                        dest.add(keyPairType.first);
-                    }
-                    break;
-                }
-                case RTREE: {
-                    Pair<IAType, Boolean> keyPairType = Index.getNonNullableKeyFieldType(index.getKeyFieldNames()
-                            .get(0), recordType);
-                    IAType keyType = keyPairType.first;
-                    IAType nestedKeyType = NonTaggedFormatUtil.getNestedSpatialType(keyType.getTypeTag());
-                    int numKeys = getNumSecondaryKeys(index, recordType);
-                    for (int i = 0; i < numKeys; i++) {
-                        dest.add(nestedKeyType);
-                    }
-                    break;
-                }
-            }
-        }
-        // Primary keys.
-        List<String> partitioningKeys = DatasetUtils.getPartitioningKeys(dataset);
-        for (String partitioningKey : partitioningKeys) {
-            try {
-                dest.add(recordType.getFieldType(partitioningKey));
-            } catch (IOException e) {
-                throw new AlgebricksException(e);
-            }
-        }
-    }
+	public static boolean analyzeFuncExprArgsForTwoVars(AbstractFunctionCallExpression funcExpr,
+			AccessMethodAnalysisContext analysisCtx) {
+		LogicalVariable fieldVar1 = null;
+		LogicalVariable fieldVar2 = null;
+		ILogicalExpression arg1 = funcExpr.getArguments().get(0).getValue();
+		ILogicalExpression arg2 = funcExpr.getArguments().get(1).getValue();
+		if (arg1.getExpressionTag() == LogicalExpressionTag.VARIABLE
+				&& arg2.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
+			fieldVar1 = ((VariableReferenceExpression) arg1).getVariableReference();
+			fieldVar2 = ((VariableReferenceExpression) arg2).getVariableReference();
+		} else {
+			return false;
+		}
+		analysisCtx.matchedFuncExprs.add(new OptimizableFuncExpr(funcExpr,
+				new LogicalVariable[] { fieldVar1, fieldVar2 }, null));
+		return true;
+	}
 
-    public static void appendSecondaryIndexOutputVars(Dataset dataset, ARecordType recordType, Index index,
-            boolean primaryKeysOnly, IOptimizationContext context, List<LogicalVariable> dest)
-            throws AlgebricksException {
-        int numPrimaryKeys = DatasetUtils.getPartitioningKeys(dataset).size();
-        int numSecondaryKeys = getNumSecondaryKeys(index, recordType);
-        int numVars = (primaryKeysOnly) ? numPrimaryKeys : numPrimaryKeys + numSecondaryKeys;
-        for (int i = 0; i < numVars; i++) {
-            dest.add(context.newVar());
-        }
-    }
+	public static int getNumSecondaryKeys(Index index, ARecordType recordType) throws AlgebricksException {
+		switch (index.getIndexType()) {
+		case BTREE:
+		case SINGLE_PARTITION_WORD_INVIX:
+		case SINGLE_PARTITION_NGRAM_INVIX: 
+		case LENGTH_PARTITIONED_WORD_INVIX:
+		case LENGTH_PARTITIONED_NGRAM_INVIX: {
+			return index.getKeyFieldNames().size();
+		}
+		case RTREE: {
+			Pair<IAType, Boolean> keyPairType = Index.getNonNullableKeyFieldType(index.getKeyFieldNames().get(0),
+					recordType);
+			IAType keyType = keyPairType.first;
+			int numDimensions = NonTaggedFormatUtil.getNumDimensions(keyType.getTypeTag());
+			return numDimensions * 2;
+		}
+		default: {
+			throw new AlgebricksException("Unknown index kind: " + index.getIndexType());
+		}
+		}
+	}
 
-    public static List<LogicalVariable> getPrimaryKeyVarsFromUnnestMap(Dataset dataset, ILogicalOperator unnestMapOp) {
-        int numPrimaryKeys = DatasetUtils.getPartitioningKeys(dataset).size();
-        List<LogicalVariable> primaryKeyVars = new ArrayList<LogicalVariable>();
-        List<LogicalVariable> sourceVars = ((UnnestMapOperator) unnestMapOp).getVariables();
-        // Assumes the primary keys are located at the end.
-        int start = sourceVars.size() - numPrimaryKeys;
-        int stop = sourceVars.size();
-        for (int i = start; i < stop; i++) {
-            primaryKeyVars.add(sourceVars.get(i));
-        }
-        return primaryKeyVars;
-    }
+	/**
+	 * Appends the types of the fields produced by the given secondary index to dest.
+	 */
+	public static void appendSecondaryIndexTypes(Dataset dataset, ARecordType recordType, Index index,
+			boolean primaryKeysOnly, List<Object> dest) throws AlgebricksException {
+		if (!primaryKeysOnly) {
+			switch (index.getIndexType()) {
+			case BTREE:
+			case SINGLE_PARTITION_WORD_INVIX:
+			case SINGLE_PARTITION_NGRAM_INVIX: {
+				for (String sk : index.getKeyFieldNames()) {
+					Pair<IAType, Boolean> keyPairType = Index.getNonNullableKeyFieldType(sk, recordType);
+					dest.add(keyPairType.first);
+				}
+				break;
+			}
+			case RTREE: {
+				Pair<IAType, Boolean> keyPairType = Index.getNonNullableKeyFieldType(index.getKeyFieldNames()
+						.get(0), recordType);
+				IAType keyType = keyPairType.first;
+				IAType nestedKeyType = NonTaggedFormatUtil.getNestedSpatialType(keyType.getTypeTag());
+				int numKeys = getNumSecondaryKeys(index, recordType);
+				for (int i = 0; i < numKeys; i++) {
+					dest.add(nestedKeyType);
+				}
+				break;
+			}
+			}
+		}
+		// Primary keys.
+		if(dataset.getDatasetType() == DatasetType.EXTERNAL)
+		{
+			//add primary keys
+			appendExternalRecPrimaryKey(dataset, dest);
+		}
+		else
+		{
+			List<String> partitioningKeys = DatasetUtils.getPartitioningKeys(dataset);
+			for (String partitioningKey : partitioningKeys) {
+				try {
+					dest.add(recordType.getFieldType(partitioningKey));
+				} catch (IOException e) {
+					throw new AlgebricksException(e);
+				}
+			}
+		}
+	}
 
-    /**
-     * Returns the search key expression which feeds a secondary-index search. If we are optimizing a selection query then this method returns
-     * the a ConstantExpression from the first constant value in the optimizable function expression.
-     * If we are optimizing a join, then this method returns the VariableReferenceExpression that should feed the secondary index probe.
-     */
-    public static ILogicalExpression createSearchKeyExpr(IOptimizableFuncExpr optFuncExpr,
-            OptimizableOperatorSubTree indexSubTree, OptimizableOperatorSubTree probeSubTree) {
-        if (probeSubTree == null) {
-            // We are optimizing a selection query. Search key is a constant.
-            return new ConstantExpression(optFuncExpr.getConstantVal(0));
-        } else {
-            // We are optimizing a join query. Determine which variable feeds the secondary index. 
-            if (optFuncExpr.getOperatorSubTree(0) == null || optFuncExpr.getOperatorSubTree(0) == probeSubTree) {
-                return new VariableReferenceExpression(optFuncExpr.getLogicalVar(0));
-            } else {
-                return new VariableReferenceExpression(optFuncExpr.getLogicalVar(1));
-            }
-        }
-    }
+	public static void appendSecondaryIndexOutputVars(Dataset dataset, ARecordType recordType, Index index,
+			boolean primaryKeysOnly, IOptimizationContext context, List<LogicalVariable> dest)
+					throws AlgebricksException {
+		int numPrimaryKeys=0;
+		if(dataset.getDatasetType() == DatasetType.EXTERNAL)
+		{
+			numPrimaryKeys = DatasetUtils.getExternalRIDSize(dataset);
+		}
+		else
+		{
+			numPrimaryKeys = DatasetUtils.getPartitioningKeys(dataset).size();
+		}
+		int numSecondaryKeys = getNumSecondaryKeys(index, recordType);
+		int numVars = (primaryKeysOnly) ? numPrimaryKeys : numPrimaryKeys + numSecondaryKeys;
+		for (int i = 0; i < numVars; i++) {
+			dest.add(context.newVar());
+		}
+	}
 
-    /**
-     * Returns the first expr optimizable by this index.
-     */
-    public static IOptimizableFuncExpr chooseFirstOptFuncExpr(Index chosenIndex, AccessMethodAnalysisContext analysisCtx) {
-        List<Integer> indexExprs = analysisCtx.getIndexExprs(chosenIndex);
-        int firstExprIndex = indexExprs.get(0);
-        return analysisCtx.matchedFuncExprs.get(firstExprIndex);
-    }
+	public static List<LogicalVariable> getPrimaryKeyVarsFromUnnestMap(Dataset dataset, ILogicalOperator unnestMapOp) {
+		int numPrimaryKeys;
+		if(dataset.getDatasetType() == DatasetType.EXTERNAL)
+		{
+			numPrimaryKeys = DatasetUtils.getExternalRIDSize(dataset);
+		}
+		else
+		{
+			numPrimaryKeys = DatasetUtils.getPartitioningKeys(dataset).size();
+		}
+		List<LogicalVariable> primaryKeyVars = new ArrayList<LogicalVariable>();
+		List<LogicalVariable> sourceVars = ((UnnestMapOperator) unnestMapOp).getVariables();
+		// Assumes the primary keys are located at the end.
+		int start = sourceVars.size() - numPrimaryKeys;
+		int stop = sourceVars.size();
+		for (int i = start; i < stop; i++) {
+			primaryKeyVars.add(sourceVars.get(i));
+		}
+		return primaryKeyVars;
+	}
 
-    public static UnnestMapOperator createSecondaryIndexUnnestMap(Dataset dataset, ARecordType recordType, Index index,
-            ILogicalOperator inputOp, AccessMethodJobGenParams jobGenParams, IOptimizationContext context,
-            boolean outputPrimaryKeysOnly, boolean retainInput) throws AlgebricksException {
-        // The job gen parameters are transferred to the actual job gen via the UnnestMapOperator's function arguments.
-        ArrayList<Mutable<ILogicalExpression>> secondaryIndexFuncArgs = new ArrayList<Mutable<ILogicalExpression>>();
-        jobGenParams.writeToFuncArgs(secondaryIndexFuncArgs);
-        // Variables and types coming out of the secondary-index search. 
-        List<LogicalVariable> secondaryIndexUnnestVars = new ArrayList<LogicalVariable>();
-        List<Object> secondaryIndexOutputTypes = new ArrayList<Object>();
-        // Append output variables/types generated by the secondary-index search (not forwarded from input).
-        appendSecondaryIndexOutputVars(dataset, recordType, index, outputPrimaryKeysOnly, context,
-                secondaryIndexUnnestVars);
-        appendSecondaryIndexTypes(dataset, recordType, index, outputPrimaryKeysOnly, secondaryIndexOutputTypes);
-        // An index search is expressed as an unnest over an index-search function.
-        IFunctionInfo secondaryIndexSearch = FunctionUtils.getFunctionInfo(AsterixBuiltinFunctions.INDEX_SEARCH);
-        UnnestingFunctionCallExpression secondaryIndexSearchFunc = new UnnestingFunctionCallExpression(
-                secondaryIndexSearch, secondaryIndexFuncArgs);
-        secondaryIndexSearchFunc.setReturnsUniqueValues(true);
-        // This is the operator that jobgen will be looking for. It contains an unnest function that has all necessary arguments to determine
-        // which index to use, which variables contain the index-search keys, what is the original dataset, etc.
-        UnnestMapOperator secondaryIndexUnnestOp = new UnnestMapOperator(secondaryIndexUnnestVars,
-                new MutableObject<ILogicalExpression>(secondaryIndexSearchFunc), secondaryIndexOutputTypes, retainInput);
-        secondaryIndexUnnestOp.getInputs().add(new MutableObject<ILogicalOperator>(inputOp));
-        context.computeAndSetTypeEnvironmentForOperator(secondaryIndexUnnestOp);
-        secondaryIndexUnnestOp.setExecutionMode(ExecutionMode.PARTITIONED);
-        return secondaryIndexUnnestOp;
-    }
+	/**
+	 * Returns the search key expression which feeds a secondary-index search. If we are optimizing a selection query then this method returns
+	 * the a ConstantExpression from the first constant value in the optimizable function expression.
+	 * If we are optimizing a join, then this method returns the VariableReferenceExpression that should feed the secondary index probe.
+	 */
+	public static ILogicalExpression createSearchKeyExpr(IOptimizableFuncExpr optFuncExpr,
+			OptimizableOperatorSubTree indexSubTree, OptimizableOperatorSubTree probeSubTree) {
+		if (probeSubTree == null) {
+			// We are optimizing a selection query. Search key is a constant.
+			return new ConstantExpression(optFuncExpr.getConstantVal(0));
+		} else {
+			// We are optimizing a join query. Determine which variable feeds the secondary index. 
+			if (optFuncExpr.getOperatorSubTree(0) == null || optFuncExpr.getOperatorSubTree(0) == probeSubTree) {
+				return new VariableReferenceExpression(optFuncExpr.getLogicalVar(0));
+			} else {
+				return new VariableReferenceExpression(optFuncExpr.getLogicalVar(1));
+			}
+		}
+	}
 
-    public static UnnestMapOperator createPrimaryIndexUnnestMap(DataSourceScanOperator dataSourceScan, Dataset dataset,
-            ARecordType recordType, ILogicalOperator inputOp, IOptimizationContext context, boolean sortPrimaryKeys,
-            boolean retainInput, boolean requiresBroadcast) throws AlgebricksException {
-        List<LogicalVariable> primaryKeyVars = AccessMethodUtils.getPrimaryKeyVarsFromUnnestMap(dataset, inputOp);
-        // Optionally add a sort on the primary-index keys before searching the primary index.
-        OrderOperator order = null;
-        if (sortPrimaryKeys) {
-            order = new OrderOperator();
-            for (LogicalVariable pkVar : primaryKeyVars) {
-                Mutable<ILogicalExpression> vRef = new MutableObject<ILogicalExpression>(
-                        new VariableReferenceExpression(pkVar));
-                order.getOrderExpressions().add(
-                        new Pair<IOrder, Mutable<ILogicalExpression>>(OrderOperator.ASC_ORDER, vRef));
-            }
-            // The secondary-index search feeds into the sort.
-            order.getInputs().add(new MutableObject<ILogicalOperator>(inputOp));
-            order.setExecutionMode(ExecutionMode.LOCAL);
-            context.computeAndSetTypeEnvironmentForOperator(order);
-        }
-        // The job gen parameters are transferred to the actual job gen via the UnnestMapOperator's function arguments. 
-        List<Mutable<ILogicalExpression>> primaryIndexFuncArgs = new ArrayList<Mutable<ILogicalExpression>>();
-        BTreeJobGenParams jobGenParams = new BTreeJobGenParams(dataset.getDatasetName(), IndexType.BTREE,
-                dataset.getDataverseName(), dataset.getDatasetName(), retainInput, requiresBroadcast);
-        // Set low/high inclusive to true for a point lookup.
-        jobGenParams.setLowKeyInclusive(true);
-        jobGenParams.setHighKeyInclusive(true);
-        jobGenParams.setLowKeyVarList(primaryKeyVars, 0, primaryKeyVars.size());
-        jobGenParams.setHighKeyVarList(primaryKeyVars, 0, primaryKeyVars.size());
-        jobGenParams.writeToFuncArgs(primaryIndexFuncArgs);
-        // Variables and types coming out of the primary-index search.
-        List<LogicalVariable> primaryIndexUnnestVars = new ArrayList<LogicalVariable>();
-        List<Object> primaryIndexOutputTypes = new ArrayList<Object>();
-        // Append output variables/types generated by the primary-index search (not forwarded from input).
-        primaryIndexUnnestVars.addAll(dataSourceScan.getVariables());
-        try {
-            appendPrimaryIndexTypes(dataset, recordType, primaryIndexOutputTypes);
-        } catch (IOException e) {
-            throw new AlgebricksException(e);
-        }
-        // An index search is expressed as an unnest over an index-search function.
-        IFunctionInfo primaryIndexSearch = FunctionUtils.getFunctionInfo(AsterixBuiltinFunctions.INDEX_SEARCH);
-        AbstractFunctionCallExpression primaryIndexSearchFunc = new ScalarFunctionCallExpression(primaryIndexSearch,
-                primaryIndexFuncArgs);
-        // This is the operator that jobgen will be looking for. It contains an unnest function that has all necessary arguments to determine
-        // which index to use, which variables contain the index-search keys, what is the original dataset, etc.
-        UnnestMapOperator primaryIndexUnnestOp = new UnnestMapOperator(primaryIndexUnnestVars,
-                new MutableObject<ILogicalExpression>(primaryIndexSearchFunc), primaryIndexOutputTypes, retainInput);
-        // Fed by the order operator or the secondaryIndexUnnestOp.
-        if (sortPrimaryKeys) {
-            primaryIndexUnnestOp.getInputs().add(new MutableObject<ILogicalOperator>(order));
-        } else {
-            primaryIndexUnnestOp.getInputs().add(new MutableObject<ILogicalOperator>(inputOp));
-        }
-        context.computeAndSetTypeEnvironmentForOperator(primaryIndexUnnestOp);
-        primaryIndexUnnestOp.setExecutionMode(ExecutionMode.PARTITIONED);
-        return primaryIndexUnnestOp;
-    }
+	/**
+	 * Returns the first expr optimizable by this index.
+	 */
+	public static IOptimizableFuncExpr chooseFirstOptFuncExpr(Index chosenIndex, AccessMethodAnalysisContext analysisCtx) {
+		List<Integer> indexExprs = analysisCtx.getIndexExprs(chosenIndex);
+		int firstExprIndex = indexExprs.get(0);
+		return analysisCtx.matchedFuncExprs.get(firstExprIndex);
+	}
+
+	public static void writeVarList(List<LogicalVariable> varList, List<Mutable<ILogicalExpression>> funcArgs) {
+		Mutable<ILogicalExpression> numKeysRef = new MutableObject<ILogicalExpression>(new ConstantExpression(
+				new AsterixConstantValue(new AInt32(varList.size()))));
+		funcArgs.add(numKeysRef);
+		for (LogicalVariable keyVar : varList) {
+			Mutable<ILogicalExpression> keyVarRef = new MutableObject<ILogicalExpression>(
+					new VariableReferenceExpression(keyVar));
+			funcArgs.add(keyVarRef);
+		}
+	}
+
+	public static ExternalDataAccessByRIDOperator createExternalDataAccessByRIDUnnestMap(DataSourceScanOperator dataSourceScan, Dataset dataset,
+			ARecordType recordType, ILogicalOperator inputOp, IOptimizationContext context, Index secondaryIndex) throws AlgebricksException {
+		List<LogicalVariable> primaryKeyVars = AccessMethodUtils.getPrimaryKeyVarsFromUnnestMap(dataset, inputOp);
+
+		// add a sort on the RID fields before fetching external data.
+		OrderOperator order = new OrderOperator();
+		for (LogicalVariable pkVar : primaryKeyVars) {
+			Mutable<ILogicalExpression> vRef = new MutableObject<ILogicalExpression>(
+					new VariableReferenceExpression(pkVar));
+			order.getOrderExpressions().add(
+					new Pair<IOrder, Mutable<ILogicalExpression>>(OrderOperator.ASC_ORDER, vRef));
+		}
+		// The secondary-index search feeds into the sort.
+		order.getInputs().add(new MutableObject<ILogicalOperator>(inputOp));
+		order.setExecutionMode(ExecutionMode.LOCAL);
+		context.computeAndSetTypeEnvironmentForOperator(order);
+
+		List<Mutable<ILogicalExpression>> externalRIDAccessFuncArgs = new ArrayList<Mutable<ILogicalExpression>>();
+		AccessMethodUtils.writeVarList(primaryKeyVars,externalRIDAccessFuncArgs);
+
+		// Variables and types coming out of the external access.
+		List<LogicalVariable> externalAccessByRIDVars = new ArrayList<LogicalVariable>();
+		List<Object> externalAccessOutputTypes = new ArrayList<Object>();
+		// Append output variables/types generated by the data scan (not forwarded from input).
+		externalAccessByRIDVars.addAll(dataSourceScan.getVariables());
+		appendExternalRecTypes(dataset, recordType, externalAccessOutputTypes);
+
+		IFunctionInfo externalAccessByRID = FunctionUtils.getFunctionInfo(AsterixBuiltinFunctions.EXTERNAL_ACCESS_BY_RID);
+		AbstractFunctionCallExpression externalAccessFunc = new ScalarFunctionCallExpression(externalAccessByRID,
+				externalRIDAccessFuncArgs);
+
+		ExternalDataAccessByRIDOperator externalAccessByRIDOp = new ExternalDataAccessByRIDOperator(externalAccessByRIDVars,
+				new MutableObject<ILogicalExpression>(externalAccessFunc), externalAccessOutputTypes);
+		// Fed by the order operator or the secondaryIndexUnnestOp.
+		externalAccessByRIDOp.getInputs().add(new MutableObject<ILogicalOperator>(order));
+
+		context.computeAndSetTypeEnvironmentForOperator(externalAccessByRIDOp);
+		externalAccessByRIDOp.setExecutionMode(ExecutionMode.PARTITIONED);
+
+		//set the physical operator
+		AqlSourceId dataSourceId = new AqlSourceId(dataset.getDataverseName(),
+				dataset.getDatasetName());
+		externalAccessByRIDOp.setPhysicalOperator(new ExternalDataAccessByRIDPOperator(dataSourceId,  dataset, recordType, secondaryIndex));
+		return externalAccessByRIDOp;
+	}
+
+	public static UnnestMapOperator createSecondaryIndexUnnestMap(Dataset dataset, ARecordType recordType, Index index,
+			ILogicalOperator inputOp, AccessMethodJobGenParams jobGenParams, IOptimizationContext context,
+			boolean outputPrimaryKeysOnly, boolean retainInput) throws AlgebricksException {
+		// The job gen parameters are transferred to the actual job gen via the UnnestMapOperator's function arguments.
+		ArrayList<Mutable<ILogicalExpression>> secondaryIndexFuncArgs = new ArrayList<Mutable<ILogicalExpression>>();
+		jobGenParams.writeToFuncArgs(secondaryIndexFuncArgs);
+		// Variables and types coming out of the secondary-index search. 
+		List<LogicalVariable> secondaryIndexUnnestVars = new ArrayList<LogicalVariable>();
+		List<Object> secondaryIndexOutputTypes = new ArrayList<Object>();
+		// Append output variables/types generated by the secondary-index search (not forwarded from input).
+		appendSecondaryIndexOutputVars(dataset, recordType, index, outputPrimaryKeysOnly, context,
+				secondaryIndexUnnestVars);
+		appendSecondaryIndexTypes(dataset, recordType, index, outputPrimaryKeysOnly, secondaryIndexOutputTypes);
+		// An index search is expressed as an unnest over an index-search function.
+		IFunctionInfo secondaryIndexSearch = FunctionUtils.getFunctionInfo(AsterixBuiltinFunctions.INDEX_SEARCH);
+		UnnestingFunctionCallExpression secondaryIndexSearchFunc = new UnnestingFunctionCallExpression(
+				secondaryIndexSearch, secondaryIndexFuncArgs);
+		secondaryIndexSearchFunc.setReturnsUniqueValues(true);
+		// This is the operator that jobgen will be looking for. It contains an unnest function that has all necessary arguments to determine
+		// which index to use, which variables contain the index-search keys, what is the original dataset, etc.
+		UnnestMapOperator secondaryIndexUnnestOp = new UnnestMapOperator(secondaryIndexUnnestVars,
+				new MutableObject<ILogicalExpression>(secondaryIndexSearchFunc), secondaryIndexOutputTypes, retainInput);
+		secondaryIndexUnnestOp.getInputs().add(new MutableObject<ILogicalOperator>(inputOp));
+		context.computeAndSetTypeEnvironmentForOperator(secondaryIndexUnnestOp);
+		secondaryIndexUnnestOp.setExecutionMode(ExecutionMode.PARTITIONED);
+		return secondaryIndexUnnestOp;
+	}
+
+	public static UnnestMapOperator createPrimaryIndexUnnestMap(DataSourceScanOperator dataSourceScan, Dataset dataset,
+			ARecordType recordType, ILogicalOperator inputOp, IOptimizationContext context, boolean sortPrimaryKeys,
+			boolean retainInput, boolean requiresBroadcast) throws AlgebricksException {
+		List<LogicalVariable> primaryKeyVars = AccessMethodUtils.getPrimaryKeyVarsFromUnnestMap(dataset, inputOp);
+		// Optionally add a sort on the primary-index keys before searching the primary index.
+		OrderOperator order = null;
+		if (sortPrimaryKeys) {
+			order = new OrderOperator();
+			for (LogicalVariable pkVar : primaryKeyVars) {
+				Mutable<ILogicalExpression> vRef = new MutableObject<ILogicalExpression>(
+						new VariableReferenceExpression(pkVar));
+				order.getOrderExpressions().add(
+						new Pair<IOrder, Mutable<ILogicalExpression>>(OrderOperator.ASC_ORDER, vRef));
+			}
+			// The secondary-index search feeds into the sort.
+			order.getInputs().add(new MutableObject<ILogicalOperator>(inputOp));
+			order.setExecutionMode(ExecutionMode.LOCAL);
+			context.computeAndSetTypeEnvironmentForOperator(order);
+		}
+		// The job gen parameters are transferred to the actual job gen via the UnnestMapOperator's function arguments. 
+		List<Mutable<ILogicalExpression>> primaryIndexFuncArgs = new ArrayList<Mutable<ILogicalExpression>>();
+		BTreeJobGenParams jobGenParams = new BTreeJobGenParams(dataset.getDatasetName(), IndexType.BTREE,
+				dataset.getDataverseName(), dataset.getDatasetName(), retainInput, requiresBroadcast);
+		// Set low/high inclusive to true for a point lookup.
+		jobGenParams.setLowKeyInclusive(true);
+		jobGenParams.setHighKeyInclusive(true);
+		jobGenParams.setLowKeyVarList(primaryKeyVars, 0, primaryKeyVars.size());
+		jobGenParams.setHighKeyVarList(primaryKeyVars, 0, primaryKeyVars.size());
+		jobGenParams.writeToFuncArgs(primaryIndexFuncArgs);
+		// Variables and types coming out of the primary-index search.
+		List<LogicalVariable> primaryIndexUnnestVars = new ArrayList<LogicalVariable>();
+		List<Object> primaryIndexOutputTypes = new ArrayList<Object>();
+		// Append output variables/types generated by the primary-index search (not forwarded from input).
+		primaryIndexUnnestVars.addAll(dataSourceScan.getVariables());
+		try {
+			appendPrimaryIndexTypes(dataset, recordType, primaryIndexOutputTypes);
+		} catch (IOException e) {
+			throw new AlgebricksException(e);
+		}
+		// An index search is expressed as an unnest over an index-search function.
+		IFunctionInfo primaryIndexSearch = FunctionUtils.getFunctionInfo(AsterixBuiltinFunctions.INDEX_SEARCH);
+		AbstractFunctionCallExpression primaryIndexSearchFunc = new ScalarFunctionCallExpression(primaryIndexSearch,
+				primaryIndexFuncArgs);
+		// This is the operator that jobgen will be looking for. It contains an unnest function that has all necessary arguments to determine
+		// which index to use, which variables contain the index-search keys, what is the original dataset, etc.
+		UnnestMapOperator primaryIndexUnnestOp = new UnnestMapOperator(primaryIndexUnnestVars,
+				new MutableObject<ILogicalExpression>(primaryIndexSearchFunc), primaryIndexOutputTypes, retainInput);
+		// Fed by the order operator or the secondaryIndexUnnestOp.
+		if (sortPrimaryKeys) {
+			primaryIndexUnnestOp.getInputs().add(new MutableObject<ILogicalOperator>(order));
+		} else {
+			primaryIndexUnnestOp.getInputs().add(new MutableObject<ILogicalOperator>(inputOp));
+		}
+		context.computeAndSetTypeEnvironmentForOperator(primaryIndexUnnestOp);
+		primaryIndexUnnestOp.setExecutionMode(ExecutionMode.PARTITIONED);
+		return primaryIndexUnnestOp;
+	}
 
 }

diff --git a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/BTreeAccessMethod.java b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/BTreeAccessMethod.java
index ddcf768..ce2a1f7 100644
--- a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/BTreeAccessMethod.java
+++ b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/BTreeAccessMethod.java

@@ -27,6 +27,7 @@
 import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.commons.lang3.mutable.MutableObject;
 
+import edu.uci.ics.asterix.common.config.DatasetConfig.DatasetType;
 import edu.uci.ics.asterix.common.config.DatasetConfig.IndexType;
 import edu.uci.ics.asterix.metadata.entities.Dataset;
 import edu.uci.ics.asterix.metadata.entities.Index;
@@ -51,6 +52,7 @@
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator.ExecutionMode;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExternalDataAccessByRIDOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator;
 
@@ -416,10 +418,16 @@
         // Generate the rest of the upstream plan which feeds the search results into the primary index.        
         UnnestMapOperator primaryIndexUnnestOp;
         boolean isPrimaryIndex = chosenIndex.getIndexName().equals(dataset.getDatasetName());
-        if (!isPrimaryIndex) {
+        if(dataset.getDatasetType() == DatasetType.EXTERNAL)
+    	{
+        	ExternalDataAccessByRIDOperator externalDataAccessOp = AccessMethodUtils.createExternalDataAccessByRIDUnnestMap(dataSourceScan, dataset, 
+    				recordType, secondaryIndexUnnestOp, context, chosenIndex);
+    		indexSubTree.dataSourceScanRef.setValue(externalDataAccessOp);
+    		return externalDataAccessOp;
+    	}
+        else if (!isPrimaryIndex) {
             primaryIndexUnnestOp = AccessMethodUtils.createPrimaryIndexUnnestMap(dataSourceScan, dataset, recordType,
                     secondaryIndexUnnestOp, context, true, retainInput, false);
-
             // Replace the datasource scan with the new plan rooted at
             // primaryIndexUnnestMap.
             indexSubTree.dataSourceScanRef.setValue(primaryIndexUnnestOp); //kisskys

diff --git a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/OptimizableOperatorSubTree.java b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/OptimizableOperatorSubTree.java
index 28aee7a..ea2adad 100644
--- a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/OptimizableOperatorSubTree.java
+++ b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/OptimizableOperatorSubTree.java

@@ -114,7 +114,8 @@
         if (dataset == null) {
             throw new AlgebricksException("No metadata for dataset " + datasetName);
         }
-        if (dataset.getDatasetType() != DatasetType.INTERNAL && dataset.getDatasetType() != DatasetType.FEED) {
+        if (dataset.getDatasetType() != DatasetType.INTERNAL && dataset.getDatasetType() != DatasetType.FEED
+        		&& dataset.getDatasetType() != DatasetType.EXTERNAL	) {
             return false;
         }
         // Get the record type for that dataset.

diff --git a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/RTreeAccessMethod.java b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/RTreeAccessMethod.java
index b8125aa..8af4ac1 100644
--- a/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/RTreeAccessMethod.java
+++ b/asterix-algebra/src/main/java/edu/uci/ics/asterix/optimizer/rules/am/RTreeAccessMethod.java

@@ -21,6 +21,7 @@
 import org.apache.commons.lang3.mutable.MutableObject;
 
 import edu.uci.ics.asterix.aql.util.FunctionUtils;
+import edu.uci.ics.asterix.common.config.DatasetConfig.DatasetType;
 import edu.uci.ics.asterix.common.config.DatasetConfig.IndexType;
 import edu.uci.ics.asterix.metadata.entities.Dataset;
 import edu.uci.ics.asterix.metadata.entities.Index;
@@ -44,6 +45,7 @@
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator.ExecutionMode;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExternalDataAccessByRIDOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator;
 
@@ -196,10 +198,19 @@
         UnnestMapOperator secondaryIndexUnnestOp = AccessMethodUtils.createSecondaryIndexUnnestMap(dataset, recordType,
                 chosenIndex, assignSearchKeys, jobGenParams, context, false, retainInput);
         // Generate the rest of the upstream plan which feeds the search results into the primary index.
-        UnnestMapOperator primaryIndexUnnestOp = AccessMethodUtils.createPrimaryIndexUnnestMap(dataSourceScan, dataset,
-                recordType, secondaryIndexUnnestOp, context, true, retainInput, false);
+        if(dataset.getDatasetType() == DatasetType.EXTERNAL)
+		{
+        	ExternalDataAccessByRIDOperator externalDataAccessOp = AccessMethodUtils.createExternalDataAccessByRIDUnnestMap(dataSourceScan, dataset, 
+    				recordType, secondaryIndexUnnestOp, context, chosenIndex);
+			return externalDataAccessOp;
+		}
+		else
+		{
+			UnnestMapOperator primaryIndexUnnestOp = AccessMethodUtils.createPrimaryIndexUnnestMap(dataSourceScan, dataset,
+					recordType, secondaryIndexUnnestOp, context, true, retainInput, false);
 
-        return primaryIndexUnnestOp;
+			return primaryIndexUnnestOp;
+		}
     }
 
     @Override

diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/aql/translator/AqlTranslator.java b/asterix-app/src/main/java/edu/uci/ics/asterix/aql/translator/AqlTranslator.java
index f22d2fb..9c3a853 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/aql/translator/AqlTranslator.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/aql/translator/AqlTranslator.java

@@ -414,7 +414,8 @@
                 case EXTERNAL: {
                     String adapter = ((ExternalDetailsDecl) dd.getDatasetDetailsDecl()).getAdapter();
                     Map<String, String> properties = ((ExternalDetailsDecl) dd.getDatasetDetailsDecl()).getProperties();
-                    datasetDetails = new ExternalDatasetDetails(adapter, properties);
+                    String ngName = ((ExternalDetailsDecl) dd.getDatasetDetailsDecl()).getNodegroupName().getValue();
+                    datasetDetails = new ExternalDatasetDetails(adapter, properties,ngName);
                     break;
                 }
                 case FEED: {
@@ -590,6 +591,18 @@
             //#. create the index artifact in NC.
             runJob(hcc, spec, true);
 
+            //if external data and optimization is turned on, load file names
+            if(ds.getDatasetType() == DatasetType.EXTERNAL && AqlMetadataProvider.isOptimizeExternalIndexes())
+            {
+            	//load the file names into external files index
+            	mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
+            	bActiveTxn = true;
+                metadataProvider.setMetadataTxnContext(mdTxnCtx);
+                IndexOperations.addExternalDatasetFilesToMetadata(metadataProvider, ds);
+                MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
+                bActiveTxn = false;
+            }
+            
             mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
             bActiveTxn = true;
             metadataProvider.setMetadataTxnContext(mdTxnCtx);
@@ -654,6 +667,8 @@
                     throw new IllegalStateException("System is inconsistent state: pending index(" + dataverseName
                             + "." + datasetName + "." + indexName + ") couldn't be removed from the metadata", e);
                 }
+                
+                //if external dataset, remove external files from metadata
             }
             throw e;
         } finally {
@@ -730,7 +745,7 @@
             for (int j = 0; j < datasets.size(); j++) {
                 String datasetName = datasets.get(j).getDatasetName();
                 DatasetType dsType = datasets.get(j).getDatasetType();
-                if (dsType == DatasetType.INTERNAL || dsType == DatasetType.FEED) {
+                if (dsType == DatasetType.INTERNAL || dsType == DatasetType.FEED || dsType == DatasetType.EXTERNAL) {
 
                     List<Index> indexes = MetadataManager.INSTANCE.getDatasetIndexes(mdTxnCtx, dataverseName,
                             datasetName);
@@ -843,7 +858,7 @@
                 }
             }
 
-            if (ds.getDatasetType() == DatasetType.INTERNAL || ds.getDatasetType() == DatasetType.FEED) {
+            if (ds.getDatasetType() == DatasetType.INTERNAL || ds.getDatasetType() == DatasetType.FEED || ds.getDatasetType() == DatasetType.EXTERNAL) {
 
                 //#. prepare jobs to drop the datatset and the indexes in NC
                 List<Index> indexes = MetadataManager.INSTANCE.getDatasetIndexes(mdTxnCtx, dataverseName, datasetName);
@@ -944,7 +959,7 @@
                         + dataverseName);
             }
 
-            if (ds.getDatasetType() == DatasetType.INTERNAL || ds.getDatasetType() == DatasetType.FEED) {
+            if (ds.getDatasetType() == DatasetType.INTERNAL || ds.getDatasetType() == DatasetType.FEED || ds.getDatasetType() == DatasetType.EXTERNAL) {
                 indexName = stmtIndexDrop.getIndexName().getValue();
                 Index index = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataverseName, datasetName, indexName);
                 if (index == null) {
@@ -982,8 +997,8 @@
                 //#. finally, delete the existing index
                 MetadataManager.INSTANCE.dropIndex(mdTxnCtx, dataverseName, datasetName, indexName);
             } else {
-                throw new AlgebricksException(datasetName
-                        + " is an external dataset. Indexes are not maintained for external datasets.");
+                //throw new AlgebricksException(datasetName
+                //        + " is an external dataset. Indexes are not maintained for external datasets.");
             }
             MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
 

diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/file/IndexOperations.java b/asterix-app/src/main/java/edu/uci/ics/asterix/file/IndexOperations.java
index af56894..de4d075 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/file/IndexOperations.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/file/IndexOperations.java

@@ -14,14 +14,18 @@
  */
 package edu.uci.ics.asterix.file;
 
+import java.util.ArrayList;
+
 import edu.uci.ics.asterix.common.config.AsterixStorageProperties;
 import edu.uci.ics.asterix.common.config.OptimizationConfUtil;
 import edu.uci.ics.asterix.common.context.AsterixVirtualBufferCacheProvider;
 import edu.uci.ics.asterix.common.exceptions.AsterixException;
 import edu.uci.ics.asterix.common.ioopcallbacks.LSMBTreeIOOperationCallbackFactory;
 import edu.uci.ics.asterix.metadata.MetadataException;
+import edu.uci.ics.asterix.metadata.MetadataManager;
 import edu.uci.ics.asterix.metadata.declared.AqlMetadataProvider;
 import edu.uci.ics.asterix.metadata.entities.Dataset;
+import edu.uci.ics.asterix.metadata.entities.ExternalFile;
 import edu.uci.ics.asterix.om.util.AsterixAppContextInfo;
 import edu.uci.ics.asterix.transaction.management.opcallbacks.SecondaryIndexOperationTrackerProvider;
 import edu.uci.ics.asterix.transaction.management.service.transaction.AsterixRuntimeComponentsProvider;
@@ -55,6 +59,17 @@
                 metadataProvider, physicalOptimizationConfig);
         return secondaryIndexCreator.buildLoadingJobSpec();
     }
+    
+    public static void addExternalDatasetFilesToMetadata(AqlMetadataProvider metadataProvider, 
+			Dataset dataset) throws AlgebricksException, MetadataException{
+			//get the file list
+			ArrayList<ExternalFile> files = metadataProvider.getExternalDatasetFiles(dataset);
+			//add files to metadata
+			for(int i=0; i < files.size(); i++)
+			{
+				MetadataManager.INSTANCE.addExternalFile(metadataProvider.getMetadataTxnContext(), files.get(i));
+			}
+	}
 
     public static JobSpecification buildDropSecondaryIndexJobSpec(CompiledIndexDropStatement indexDropStmt,
             AqlMetadataProvider metadataProvider, Dataset dataset) throws AlgebricksException, MetadataException {

diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryBTreeCreator.java b/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryBTreeCreator.java
index e3832d4..baf16de 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryBTreeCreator.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryBTreeCreator.java

@@ -17,20 +17,31 @@
 import edu.uci.ics.asterix.common.api.ILocalResourceMetadata;
 import edu.uci.ics.asterix.common.config.AsterixStorageProperties;
 import edu.uci.ics.asterix.common.config.IAsterixPropertiesProvider;
+import edu.uci.ics.asterix.common.config.DatasetConfig.DatasetType;
 import edu.uci.ics.asterix.common.context.AsterixVirtualBufferCacheProvider;
 import edu.uci.ics.asterix.common.exceptions.AsterixException;
 import edu.uci.ics.asterix.common.ioopcallbacks.LSMBTreeIOOperationCallbackFactory;
 import edu.uci.ics.asterix.transaction.management.opcallbacks.SecondaryIndexOperationTrackerProvider;
+
+import edu.uci.ics.asterix.external.data.operator.ExternalDataIndexingOperatorDescriptor;
+import edu.uci.ics.asterix.external.util.ExternalIndexHashPartitionComputerFactory;
+import edu.uci.ics.asterix.metadata.utils.DatasetUtils;
+import edu.uci.ics.asterix.runtime.formats.NonTaggedDataFormat;
 import edu.uci.ics.asterix.transaction.management.resource.LSMBTreeLocalResourceMetadata;
 import edu.uci.ics.asterix.transaction.management.resource.PersistentLocalResourceFactoryProvider;
 import edu.uci.ics.asterix.transaction.management.service.transaction.AsterixRuntimeComponentsProvider;
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
 import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraintHelper;
 import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
 import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy;
 import edu.uci.ics.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig;
 import edu.uci.ics.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
 import edu.uci.ics.hyracks.api.job.JobSpecification;
 import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor;
 import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
 import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
 import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor;
@@ -49,18 +60,16 @@
         super(physOptConf, propertiesProvider);
     }
 
-    @Override
-    public JobSpecification buildCreationJobSpec() throws AsterixException, AlgebricksException {
-        JobSpecification spec = JobSpecificationUtils.createJobSpecification();
-
-        AsterixStorageProperties storageProperties = propertiesProvider.getStorageProperties();
-        //prepare a LocalResourceMetadata which will be stored in NC's local resource repository
-        ILocalResourceMetadata localResourceMetadata = new LSMBTreeLocalResourceMetadata(
-                secondaryRecDesc.getTypeTraits(), secondaryComparatorFactories, secondaryBloomFilterKeyFields, true,
-                dataset.getDatasetId());
-        ILocalResourceFactoryProvider localResourceFactoryProvider = new PersistentLocalResourceFactoryProvider(
-                localResourceMetadata, LocalResource.LSMBTreeResource);
-
+	@Override
+	public JobSpecification buildCreationJobSpec() throws AsterixException, AlgebricksException {
+		JobSpecification spec = JobSpecificationUtils.createJobSpecification();
+		AsterixStorageProperties storageProperties = propertiesProvider.getStorageProperties();
+		//prepare a LocalResourceMetadata which will be stored in NC's local resource repository
+		ILocalResourceMetadata localResourceMetadata = new LSMBTreeLocalResourceMetadata(
+				secondaryRecDesc.getTypeTraits(), secondaryComparatorFactories, secondaryBloomFilterKeyFields, true,
+				dataset.getDatasetId());
+		ILocalResourceFactoryProvider localResourceFactoryProvider = new PersistentLocalResourceFactoryProvider(
+				localResourceMetadata, LocalResource.LSMBTreeResource);
         TreeIndexCreateOperatorDescriptor secondaryIndexCreateOp = new TreeIndexCreateOperatorDescriptor(spec,
                 AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER,
                 secondaryFileSplitProvider, secondaryRecDesc.getTypeTraits(), secondaryComparatorFactories,
@@ -78,30 +87,93 @@
         return spec;
     }
 
-    @Override
-    public JobSpecification buildLoadingJobSpec() throws AsterixException, AlgebricksException {
-        JobSpecification spec = JobSpecificationUtils.createJobSpecification();
 
-        // Create dummy key provider for feeding the primary index scan. 
-        AbstractOperatorDescriptor keyProviderOp = createDummyKeyProviderOp(spec);
+	@Override
+	public JobSpecification buildLoadingJobSpec() throws AsterixException, AlgebricksException{
+		if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
+			JobSpecification spec = JobSpecificationUtils.createJobSpecification();
+			Pair<ExternalDataIndexingOperatorDescriptor, AlgebricksPartitionConstraint> RIDScanOpAndConstraints;
+			AlgebricksMetaOperatorDescriptor asterixAssignOp;
+			try
+			{
+				//create external indexing scan operator
+				RIDScanOpAndConstraints = createExternalIndexingOp(spec);
 
-        // Create primary index scan op.
-        BTreeSearchOperatorDescriptor primaryScanOp = createPrimaryIndexScanOp(spec);
+				//create assign operator
+				asterixAssignOp = createExternalAssignOp(spec);
+				AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, asterixAssignOp,
+						RIDScanOpAndConstraints.second);
+			}
+			catch(Exception e)
+			{
+				throw new AsterixException("Failed to create external index scanning and loading job");
+			}
 
-        // Assign op.
-        AlgebricksMetaOperatorDescriptor asterixAssignOp = createAssignOp(spec, primaryScanOp, numSecondaryKeys);
+			// If any of the secondary fields are nullable, then add a select op that filters nulls.
+			AlgebricksMetaOperatorDescriptor selectOp = null;
+			if (anySecondaryKeyIsNullable) {
+				selectOp = createFilterNullsSelectOp(spec, numSecondaryKeys);
+			}
 
-        // If any of the secondary fields are nullable, then add a select op that filters nulls.
-        AlgebricksMetaOperatorDescriptor selectOp = null;
-        if (anySecondaryKeyIsNullable) {
-            selectOp = createFilterNullsSelectOp(spec, numSecondaryKeys);
-        }
+			// Sort by secondary keys.
+			ExternalSortOperatorDescriptor sortOp = createSortOp(spec, secondaryComparatorFactories, secondaryRecDesc,RIDScanOpAndConstraints.second);
+			AsterixStorageProperties storageProperties = propertiesProvider.getStorageProperties();
+			// Create secondary BTree bulk load op.
+        TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoadOp = createTreeIndexBulkLoadOp(
+                spec,
+                numSecondaryKeys,
+                new LSMBTreeDataflowHelperFactory(new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()),
+                        AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, new SecondaryIndexOperationTrackerProvider(
+                                LSMBTreeIOOperationCallbackFactory.INSTANCE, dataset.getDatasetId()),
+                        AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER,
+                        AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, storageProperties
+                                .getBloomFilterFalsePositiveRate()), BTree.DEFAULT_FILL_FACTOR);
+			IBinaryHashFunctionFactory[] hashFactories = DatasetUtils.computeExternalDataKeysBinaryHashFunFactories(dataset, NonTaggedDataFormat.INSTANCE.getBinaryHashFunctionFactoryProvider());
 
-        // Sort by secondary keys.
-        ExternalSortOperatorDescriptor sortOp = createSortOp(spec, secondaryComparatorFactories, secondaryRecDesc);
+			//select partitioning keys (always the first 2 after secondary keys)
+			int[] keys = new int[2];
+			keys[0] = numSecondaryKeys;
+			keys[1] = numSecondaryKeys + 1;
 
-        AsterixStorageProperties storageProperties = propertiesProvider.getStorageProperties();
-        // Create secondary BTree bulk load op.
+			IConnectorDescriptor hashConn = new MToNPartitioningConnectorDescriptor(spec,
+					new ExternalIndexHashPartitionComputerFactory(keys, hashFactories));
+
+			spec.connect(new OneToOneConnectorDescriptor(spec), RIDScanOpAndConstraints.first, 0, asterixAssignOp, 0);
+			if (anySecondaryKeyIsNullable) {
+				spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
+				spec.connect(hashConn, selectOp, 0, sortOp, 0);
+			} else {
+				spec.connect(hashConn, asterixAssignOp, 0, sortOp, 0);
+			}
+			spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
+			spec.addRoot(secondaryBulkLoadOp);
+			spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
+			return spec;
+		}
+		else
+		{
+			JobSpecification spec = JobSpecificationUtils.createJobSpecification();
+
+			// Create dummy key provider for feeding the primary index scan. 
+			AbstractOperatorDescriptor keyProviderOp = createDummyKeyProviderOp(spec);
+
+			// Create primary index scan op.
+			BTreeSearchOperatorDescriptor primaryScanOp = createPrimaryIndexScanOp(spec);
+
+			// Assign op.
+			AlgebricksMetaOperatorDescriptor asterixAssignOp = createAssignOp(spec, primaryScanOp, numSecondaryKeys);
+
+			// If any of the secondary fields are nullable, then add a select op that filters nulls.
+			AlgebricksMetaOperatorDescriptor selectOp = null;
+			if (anySecondaryKeyIsNullable) {
+				selectOp = createFilterNullsSelectOp(spec, numSecondaryKeys);
+			}
+
+			// Sort by secondary keys.
+			ExternalSortOperatorDescriptor sortOp = createSortOp(spec, secondaryComparatorFactories, secondaryRecDesc);
+
+			AsterixStorageProperties storageProperties = propertiesProvider.getStorageProperties();
+			// Create secondary BTree bulk load op.
         TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoadOp = createTreeIndexBulkLoadOp(
                 spec,
                 numSecondaryKeys,
@@ -112,18 +184,20 @@
                         AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, storageProperties
                                 .getBloomFilterFalsePositiveRate()), BTree.DEFAULT_FILL_FACTOR);
 
-        // Connect the operators.
-        spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
-        spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, asterixAssignOp, 0);
-        if (anySecondaryKeyIsNullable) {
-            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
-            spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
-        } else {
-            spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
-        }
-        spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
-        spec.addRoot(secondaryBulkLoadOp);
-        spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
-        return spec;
-    }
+			// Connect the operators.
+			spec.connect(new OneToOneConnectorDescriptor(spec), keyProviderOp, 0, primaryScanOp, 0);
+			spec.connect(new OneToOneConnectorDescriptor(spec), primaryScanOp, 0, asterixAssignOp, 0);
+			if (anySecondaryKeyIsNullable) {
+				spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
+				spec.connect(new OneToOneConnectorDescriptor(spec), selectOp, 0, sortOp, 0);
+			} else {
+				spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, sortOp, 0);
+			}
+			spec.connect(new OneToOneConnectorDescriptor(spec), sortOp, 0, secondaryBulkLoadOp, 0);
+			spec.addRoot(secondaryBulkLoadOp);
+			spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
+			return spec;
+		}
+	}
 }
+

diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryIndexCreator.java b/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryIndexCreator.java
index a9b3881..5da336f 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryIndexCreator.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryIndexCreator.java

@@ -19,6 +19,9 @@
 import java.io.IOException;
 import java.util.List;
 
+import edu.uci.ics.asterix.external.adapter.factory.HDFSAdapterFactory;
+import edu.uci.ics.asterix.external.data.operator.ExternalDataIndexingOperatorDescriptor;
+import edu.uci.ics.asterix.external.dataset.adapter.HiveAdapter;
 import edu.uci.ics.asterix.common.config.AsterixStorageProperties;
 import edu.uci.ics.asterix.common.config.DatasetConfig.DatasetType;
 import edu.uci.ics.asterix.common.config.IAsterixPropertiesProvider;
@@ -32,11 +35,13 @@
 import edu.uci.ics.asterix.formats.nontagged.AqlBinaryComparatorFactoryProvider;
 import edu.uci.ics.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
 import edu.uci.ics.asterix.formats.nontagged.AqlTypeTraitProvider;
+import edu.uci.ics.asterix.metadata.entities.ExternalDatasetDetails;
 import edu.uci.ics.asterix.metadata.MetadataException;
 import edu.uci.ics.asterix.metadata.declared.AqlMetadataProvider;
 import edu.uci.ics.asterix.metadata.entities.Dataset;
 import edu.uci.ics.asterix.metadata.entities.Index;
 import edu.uci.ics.asterix.metadata.utils.DatasetUtils;
+import edu.uci.ics.asterix.om.types.BuiltinType;
 import edu.uci.ics.asterix.om.types.ARecordType;
 import edu.uci.ics.asterix.om.types.IAType;
 import edu.uci.ics.asterix.om.util.AsterixAppContextInfo;
@@ -78,6 +83,7 @@
 import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
 import edu.uci.ics.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor;
 import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
+import edu.uci.ics.asterix.runtime.formats.NonTaggedDataFormat;
 import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor;
 import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallbackFactory;
 import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
@@ -85,6 +91,7 @@
 import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallbackFactory;
 import edu.uci.ics.hyracks.storage.am.lsm.btree.dataflow.LSMBTreeDataflowHelperFactory;
 
+
 @SuppressWarnings("rawtypes")
 // TODO: We should eventually have a hierarchy of classes that can create all
 // possible index job specs,
@@ -102,11 +109,11 @@
     protected ISerializerDeserializer payloadSerde;
     protected IFileSplitProvider primaryFileSplitProvider;
     protected AlgebricksPartitionConstraint primaryPartitionConstraint;
+    protected List<String> secondaryKeyFields;
     protected IFileSplitProvider secondaryFileSplitProvider;
     protected AlgebricksPartitionConstraint secondaryPartitionConstraint;
     protected String secondaryIndexName;
     protected boolean anySecondaryKeyIsNullable = false;
-
     protected long numElementsHint;
     protected IBinaryComparatorFactory[] primaryComparatorFactories;
     protected int[] primaryBloomFilterKeyFields;
@@ -115,7 +122,6 @@
     protected int[] secondaryBloomFilterKeyFields;
     protected RecordDescriptor secondaryRecDesc;
     protected ICopyEvaluatorFactory[] secondaryFieldAccessEvalFactories;
-
     protected IAsterixPropertiesProvider propertiesProvider;
 
     // Prevent public construction. Should be created via createIndexCreator().
@@ -159,37 +165,68 @@
     public abstract JobSpecification buildLoadingJobSpec() throws AsterixException, AlgebricksException;
 
     protected void init(CompiledCreateIndexStatement createIndexStmt, AqlMetadataProvider metadataProvider)
-            throws AsterixException, AlgebricksException {
-        this.metadataProvider = metadataProvider;
-        dataverseName = createIndexStmt.getDataverseName() == null ? metadataProvider.getDefaultDataverseName()
-                : createIndexStmt.getDataverseName();
-        datasetName = createIndexStmt.getDatasetName();
-        secondaryIndexName = createIndexStmt.getIndexName();
-        dataset = metadataProvider.findDataset(dataverseName, datasetName);
-        if (dataset == null) {
-            throw new AsterixException("Unknown dataset " + datasetName);
+                        throws AsterixException, AlgebricksException {
+                this.metadataProvider = metadataProvider;
+                dataverseName = createIndexStmt.getDataverseName() == null ? metadataProvider.getDefaultDataverseName()
+                                : createIndexStmt.getDataverseName();
+                datasetName = createIndexStmt.getDatasetName();
+                secondaryIndexName = createIndexStmt.getIndexName();
+                dataset = metadataProvider.findDataset(dataverseName, datasetName);
+                if (dataset == null) {
+                        throw new AsterixException("Unknown dataset " + datasetName);
+                }
+                if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
+                        //get external dataset details
+                        ExternalDatasetDetails edsd = ((ExternalDatasetDetails)dataset.getDatasetDetails());
+                        //get adapter name
+                        String adapter = edsd.getAdapter();
+                        //if not an hdfs adapter, throw an exception
+                        if(!adapter.equals(HDFSAdapterFactory.HDFS_ADAPTER_NAME) && !adapter.equals(HiveAdapter.class.getName()))
+                        {
+                                throw new AsterixException("Cannot index an external dataset with adapter type(" + adapter + ").");
+                        }
+                        //get the item type
+                        ARecordType externalItemType = (ARecordType) metadataProvider.findType(dataset.getDataverseName(), dataset.getItemTypeName());
+                        //number of primary keys here depends on the file input, 3 for rcfiles and 2 for text and sequence files.
+                        numPrimaryKeys = DatasetUtils.getExternalRIDSize(dataset);
+                        itemType = createExternalItemTypeWithRID(externalItemType);
+                        payloadSerde = AqlSerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(itemType);
+                        numSecondaryKeys = createIndexStmt.getKeyFields().size();
+                        //splits and constraints <--They don't exist-->
+                        primaryFileSplitProvider = null;
+                        primaryPartitionConstraint = null;
+                        //create secondary split and constraints
+                        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider
+                                        .splitProviderAndPartitionConstraintsForExternalDataset(dataverseName, datasetName,
+                                                        secondaryIndexName);
+                        secondaryFileSplitProvider = secondarySplitsAndConstraint.first;
+                        secondaryPartitionConstraint = secondarySplitsAndConstraint.second;
+                        // Must be called in this order.
+                        setExternalRIDDescAndComparators();
+                        setExternalSecondaryRecDescAndComparators(createIndexStmt, metadataProvider);
+                        numElementsHint = metadataProvider.getCardinalityPerPartitionHint(dataset);
+                }
+                else
+                {
+                        itemType = (ARecordType) metadataProvider.findType(dataset.getDataverseName(), dataset.getItemTypeName());
+                        payloadSerde = AqlSerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(itemType);
+                        numPrimaryKeys = DatasetUtils.getPartitioningKeys(dataset).size();
+                        numSecondaryKeys = createIndexStmt.getKeyFields().size();
+                        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> primarySplitsAndConstraint = metadataProvider
+                                        .splitProviderAndPartitionConstraintsForInternalOrFeedDataset(dataverseName, datasetName, datasetName);
+                        primaryFileSplitProvider = primarySplitsAndConstraint.first;
+                        primaryPartitionConstraint = primarySplitsAndConstraint.second;
+                        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider
+                                        .splitProviderAndPartitionConstraintsForInternalOrFeedDataset(dataverseName, datasetName,
+                                                        secondaryIndexName);
+                        secondaryFileSplitProvider = secondarySplitsAndConstraint.first;
+                        secondaryPartitionConstraint = secondarySplitsAndConstraint.second;
+                        // Must be called in this order.
+                        setPrimaryRecDescAndComparators();
+                        setSecondaryRecDescAndComparators(createIndexStmt, metadataProvider);
+                        numElementsHint = metadataProvider.getCardinalityPerPartitionHint(dataset);
+                }
         }
-        if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
-            throw new AsterixException("Cannot index an external dataset (" + datasetName + ").");
-        }
-        itemType = (ARecordType) metadataProvider.findType(dataset.getDataverseName(), dataset.getItemTypeName());
-        payloadSerde = AqlSerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(itemType);
-        numPrimaryKeys = DatasetUtils.getPartitioningKeys(dataset).size();
-        numSecondaryKeys = createIndexStmt.getKeyFields().size();
-        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> primarySplitsAndConstraint = metadataProvider
-                .splitProviderAndPartitionConstraintsForInternalOrFeedDataset(dataverseName, datasetName, datasetName);
-        primaryFileSplitProvider = primarySplitsAndConstraint.first;
-        primaryPartitionConstraint = primarySplitsAndConstraint.second;
-        Pair<IFileSplitProvider, AlgebricksPartitionConstraint> secondarySplitsAndConstraint = metadataProvider
-                .splitProviderAndPartitionConstraintsForInternalOrFeedDataset(dataverseName, datasetName,
-                        secondaryIndexName);
-        secondaryFileSplitProvider = secondarySplitsAndConstraint.first;
-        secondaryPartitionConstraint = secondarySplitsAndConstraint.second;
-        // Must be called in this order.
-        setPrimaryRecDescAndComparators();
-        setSecondaryRecDescAndComparators(createIndexStmt, metadataProvider);
-        numElementsHint = metadataProvider.getCardinalityPerPartitionHint(dataset);
-    }
 
     protected void setPrimaryRecDescAndComparators() throws AlgebricksException {
         List<String> partitioningKeys = DatasetUtils.getPartitioningKeys(dataset);
@@ -273,6 +310,210 @@
         return keyProviderOp;
     }
 
+protected ARecordType createExternalItemTypeWithRID(
+                        ARecordType externalItemType) throws AsterixException {
+
+                String[] fieldsNames = new String[externalItemType.getFieldNames().length+numPrimaryKeys];
+                IAType[] fieldsTypes = new IAType[externalItemType.getFieldTypes().length+numPrimaryKeys];
+
+                //add RID fields names and types
+                if(AqlMetadataProvider.isOptimizeExternalIndexes())
+                {
+                        fieldsNames[0] = "_file-number";
+                        fieldsTypes[0] = BuiltinType.AINT32;
+                }
+                else
+                {
+                        fieldsNames[0] = "_file-name";
+                        fieldsTypes[0] = BuiltinType.ASTRING;
+                }
+                fieldsNames[1] = "_byte-location";
+                fieldsTypes[1] = BuiltinType.AINT64;
+                if(numPrimaryKeys == 3)
+                {       
+                        //add the row number for rc files
+                        fieldsNames[2] = "_row-number";
+                        fieldsTypes[2] = BuiltinType.AINT32;
+                }
+                
+                //add the original fields names and types
+                for(int i=0; i < externalItemType.getFieldNames().length; i++)
+                {
+                        fieldsNames[i+numPrimaryKeys] = externalItemType.getFieldNames()[i];
+                        fieldsTypes[i+numPrimaryKeys] = externalItemType.getFieldTypes()[i];
+                }
+                return new ARecordType(externalItemType.getTypeName(), fieldsNames, fieldsTypes, externalItemType.isOpen());
+        }
+
+        protected void setExternalRIDDescAndComparators() throws AlgebricksException {
+
+                ISerializerDeserializer[] externalRecFields = new ISerializerDeserializer[itemType.getFieldNames().length];
+                ITypeTraits[] externalTypeTraits = new ITypeTraits[itemType.getFieldNames().length];
+
+                primaryComparatorFactories = new IBinaryComparatorFactory[numPrimaryKeys];
+                primaryBloomFilterKeyFields = new int[numPrimaryKeys];
+                ISerializerDeserializerProvider serdeProvider = metadataProvider.getFormat().getSerdeProvider();
+                
+                if(AqlMetadataProvider.isOptimizeExternalIndexes())
+                {
+                        primaryComparatorFactories[0] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory(BuiltinType.AINT32, true);
+                }
+                else
+                {
+                        primaryComparatorFactories[0] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory(BuiltinType.ASTRING, true);
+                }
+                primaryComparatorFactories[1] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory(BuiltinType.AINT64, true);
+
+                primaryBloomFilterKeyFields[0]=0;
+                primaryBloomFilterKeyFields[1]=1;
+
+                if(numPrimaryKeys == 3)
+                {
+                        primaryComparatorFactories[2] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory(BuiltinType.AINT32, true);
+                        primaryBloomFilterKeyFields[2]=2;
+                }
+
+                for(int i=0; i < itemType.getFieldNames().length; i++)
+                {
+                        externalRecFields[i] = serdeProvider.getSerializerDeserializer(itemType.getFieldTypes()[i]); 
+                        externalTypeTraits[i] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(itemType.getFieldTypes()[i]);
+                }
+                primaryRecDesc = new RecordDescriptor(externalRecFields, externalTypeTraits);
+        }
+
+protected void setExternalSecondaryRecDescAndComparators(CompiledCreateIndexStatement createIndexStmt,
+                        AqlMetadataProvider metadataProvider) throws AlgebricksException, AsterixException {
+                secondaryKeyFields = createIndexStmt.getKeyFields();
+                secondaryFieldAccessEvalFactories = new ICopyEvaluatorFactory[numSecondaryKeys+ numPrimaryKeys];
+                secondaryComparatorFactories = new IBinaryComparatorFactory[numSecondaryKeys + numPrimaryKeys];
+                secondaryBloomFilterKeyFields = new int[numSecondaryKeys];
+                ISerializerDeserializer[] secondaryRecFields = new ISerializerDeserializer[numPrimaryKeys + numSecondaryKeys];
+                ITypeTraits[] secondaryTypeTraits = new ITypeTraits[numSecondaryKeys + numPrimaryKeys];
+                ISerializerDeserializerProvider serdeProvider = metadataProvider.getFormat().getSerdeProvider();
+                ITypeTraitProvider typeTraitProvider = metadataProvider.getFormat().getTypeTraitProvider();
+                IBinaryComparatorFactoryProvider comparatorFactoryProvider = metadataProvider.getFormat()
+                                .getBinaryComparatorFactoryProvider();
+
+                for (int i = 0; i < numSecondaryKeys; i++) {
+                        secondaryFieldAccessEvalFactories[i] = metadataProvider.getFormat().getFieldAccessEvaluatorFactory(
+                                        itemType, secondaryKeyFields.get(i), 0);
+                        Pair<IAType, Boolean> keyTypePair = Index.getNonNullableKeyFieldType(secondaryKeyFields.get(i), itemType);
+                        IAType keyType = keyTypePair.first;
+                        anySecondaryKeyIsNullable = anySecondaryKeyIsNullable || keyTypePair.second;
+                        ISerializerDeserializer keySerde = serdeProvider.getSerializerDeserializer(keyType);
+                        secondaryRecFields[i] = keySerde;
+                        secondaryComparatorFactories[i] = comparatorFactoryProvider.getBinaryComparatorFactory(keyType, true);
+                        secondaryTypeTraits[i] = typeTraitProvider.getTypeTrait(keyType);
+                        secondaryBloomFilterKeyFields[i] = i;
+                }
+
+                if(AqlMetadataProvider.isOptimizeExternalIndexes())
+                {
+                        secondaryFieldAccessEvalFactories[numSecondaryKeys] = metadataProvider.getFormat().getFieldAccessEvaluatorFactory(
+                                itemType, "_file-number", 0);
+                }
+                else
+                {
+                        secondaryFieldAccessEvalFactories[numSecondaryKeys] = metadataProvider.getFormat().getFieldAccessEvaluatorFactory(
+                                        itemType, "_file-name", 0);
+                }
+                secondaryFieldAccessEvalFactories[numSecondaryKeys+1] = metadataProvider.getFormat().getFieldAccessEvaluatorFactory(
+                                itemType, "_byte-location", 0);
+                if(numPrimaryKeys == 3)
+                {
+                        secondaryFieldAccessEvalFactories[numSecondaryKeys+2] = metadataProvider.getFormat().getFieldAccessEvaluatorFactory(
+                                        itemType, "_row-number", 0);
+                }
+
+                for (int i = 0; i < numPrimaryKeys; i++) {
+                        secondaryRecFields[numSecondaryKeys + i] = primaryRecDesc.getFields()[i];
+                        secondaryTypeTraits[numSecondaryKeys + i] = primaryRecDesc.getTypeTraits()[i];
+                        secondaryComparatorFactories[numSecondaryKeys + i] = primaryComparatorFactories[i];
+                }
+                secondaryRecDesc = new RecordDescriptor(secondaryRecFields, secondaryTypeTraits);
+        }
+
+protected Pair<ExternalDataIndexingOperatorDescriptor, AlgebricksPartitionConstraint> createExternalIndexingOp(JobSpecification spec) throws Exception {
+                Pair<ExternalDataIndexingOperatorDescriptor,AlgebricksPartitionConstraint> indexingOpAndConstraints = metadataProvider.buildExternalDataIndexingRuntime(spec, itemType, dataset, NonTaggedDataFormat.INSTANCE);
+                AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, indexingOpAndConstraints.first,
+                                indexingOpAndConstraints.second);
+                return indexingOpAndConstraints;
+        }
+
+protected AlgebricksMetaOperatorDescriptor createExternalAssignOp(JobSpecification spec) throws AlgebricksException {
+                int[] outColumns = new int[numSecondaryKeys + numPrimaryKeys];
+                int[] projectionList = new int[numSecondaryKeys + numPrimaryKeys];
+                for (int i = 0; i < numSecondaryKeys + numPrimaryKeys; i++) {
+                        outColumns[i] = i;
+                        projectionList[i] = i;
+                }
+
+                IScalarEvaluatorFactory[] sefs = new IScalarEvaluatorFactory[secondaryFieldAccessEvalFactories.length];
+                for (int i = 0; i < secondaryFieldAccessEvalFactories.length; ++i) {
+                        sefs[i] = new LogicalExpressionJobGenToExpressionRuntimeProviderAdapter.ScalarEvaluatorFactoryAdapter(
+                                        secondaryFieldAccessEvalFactories[i]);
+                }
+                AssignRuntimeFactory assign = new AssignRuntimeFactory(outColumns, sefs, projectionList);
+                AlgebricksMetaOperatorDescriptor asterixAssignOp = new AlgebricksMetaOperatorDescriptor(spec, 1, 1,
+                                new IPushRuntimeFactory[] { assign }, new RecordDescriptor[] { secondaryRecDesc });
+                return asterixAssignOp;
+        }
+
+        protected ExternalSortOperatorDescriptor createSortOp(JobSpecification spec,
+                        IBinaryComparatorFactory[] secondaryComparatorFactories, RecordDescriptor secondaryRecDesc,
+                        AlgebricksPartitionConstraint partitionConstraints) {
+                int[] sortFields = new int[secondaryComparatorFactories.length];
+                for (int i = 0; i < secondaryComparatorFactories.length; i++) {
+                        sortFields[i] = i;
+                }
+                ExternalSortOperatorDescriptor sortOp = new ExternalSortOperatorDescriptor(spec,
+                                physOptConf.getMaxFramesExternalSort(), sortFields, secondaryComparatorFactories, secondaryRecDesc);
+                AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, sortOp, partitionConstraints);
+                return sortOp;
+        }
+
+protected ARecordType createSecondaryItemType(ARecordType externalItemType, boolean isRCFile) throws AsterixException
+        {
+
+                String[] fieldsNames = new String[numSecondaryKeys+numPrimaryKeys];
+                IAType[] fieldsTypes = new IAType[numSecondaryKeys+numPrimaryKeys];
+
+                //first create the secondary index fields
+                for(int i=0; i<numSecondaryKeys; i++)
+                {
+                        fieldsNames[i] = secondaryKeyFields.get(i);
+                        try {
+                                fieldsTypes[i] = externalItemType.getFieldType(fieldsNames[i]);
+                        } catch (IOException e) {
+                                // TODO Auto-generated catch block
+                                throw new AsterixException(e);
+                        }
+                }
+
+                //second add RID fields (File name or number and byte location)
+                if(AqlMetadataProvider.isOptimizeExternalIndexes())
+                {
+                        fieldsNames[numSecondaryKeys] = "_file-number";
+                        fieldsTypes[numSecondaryKeys] = BuiltinType.ASTRING;
+                }
+                else
+                {
+                        fieldsNames[numSecondaryKeys] = "_file-name";
+                        fieldsTypes[numSecondaryKeys] = BuiltinType.ASTRING;
+                }
+                fieldsNames[numSecondaryKeys+1] = "_byte-location";
+                fieldsTypes[numSecondaryKeys+1] = BuiltinType.AINT64;
+
+                if(isRCFile)
+                {
+                        fieldsNames[numSecondaryKeys+2] = "_row-Number";
+                        fieldsTypes[numSecondaryKeys+2] = BuiltinType.AINT32;
+                }
+
+                //return type
+                return new ARecordType(externalItemType.getTypeName(), fieldsNames, fieldsTypes, externalItemType.isOpen());
+        }
+
     protected BTreeSearchOperatorDescriptor createPrimaryIndexScanOp(JobSpecification spec) throws AlgebricksException {
         // -Infinity
         int[] lowKeyFields = null;
@@ -392,3 +633,4 @@
         return asterixSelectOp;
     }
 }
+

diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryInvertedIndexCreator.java b/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryInvertedIndexCreator.java
index 40e0aa9..6f64aa2 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryInvertedIndexCreator.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryInvertedIndexCreator.java

@@ -296,4 +296,10 @@
                     storageProperties.getBloomFilterFalsePositiveRate());
         }
     }
+    
+    @Override
+	protected void setExternalSecondaryRecDescAndComparators(CompiledCreateIndexStatement createIndexStmt,
+			AqlMetadataProvider metadataProvider) throws AlgebricksException, AsterixException {
+    	throw new AsterixException("Cannot create inverted index on external dataset due to composite RID Fields.");
+	}
 }

diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryRTreeCreator.java b/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryRTreeCreator.java
index ec62068..6ce694c 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryRTreeCreator.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryRTreeCreator.java

@@ -18,36 +18,47 @@
 
 import edu.uci.ics.asterix.common.api.ILocalResourceMetadata;
 import edu.uci.ics.asterix.common.config.AsterixStorageProperties;
+import edu.uci.ics.asterix.common.config.DatasetConfig.DatasetType;
 import edu.uci.ics.asterix.common.config.IAsterixPropertiesProvider;
 import edu.uci.ics.asterix.common.context.AsterixVirtualBufferCacheProvider;
 import edu.uci.ics.asterix.common.exceptions.AsterixException;
 import edu.uci.ics.asterix.common.ioopcallbacks.LSMRTreeIOOperationCallbackFactory;
 import edu.uci.ics.asterix.dataflow.data.nontagged.valueproviders.AqlPrimitiveValueProviderFactory;
+import edu.uci.ics.asterix.external.adapter.factory.HDFSAdapterFactory;
+import edu.uci.ics.asterix.external.data.operator.ExternalDataIndexingOperatorDescriptor;
+import edu.uci.ics.asterix.external.util.ExternalIndexHashPartitionComputerFactory;
 import edu.uci.ics.asterix.formats.nontagged.AqlBinaryComparatorFactoryProvider;
 import edu.uci.ics.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
 import edu.uci.ics.asterix.formats.nontagged.AqlTypeTraitProvider;
 import edu.uci.ics.asterix.metadata.declared.AqlMetadataProvider;
+import edu.uci.ics.asterix.metadata.entities.ExternalDatasetDetails;
 import edu.uci.ics.asterix.metadata.entities.Index;
+import edu.uci.ics.asterix.metadata.utils.DatasetUtils;
 import edu.uci.ics.asterix.om.types.ATypeTag;
 import edu.uci.ics.asterix.om.types.IAType;
 import edu.uci.ics.asterix.om.util.NonTaggedFormatUtil;
+import edu.uci.ics.asterix.runtime.formats.NonTaggedDataFormat;
 import edu.uci.ics.asterix.transaction.management.opcallbacks.SecondaryIndexOperationTrackerProvider;
 import edu.uci.ics.asterix.transaction.management.resource.LSMRTreeLocalResourceMetadata;
 import edu.uci.ics.asterix.transaction.management.resource.PersistentLocalResourceFactoryProvider;
 import edu.uci.ics.asterix.transaction.management.service.transaction.AsterixRuntimeComponentsProvider;
 import edu.uci.ics.asterix.translator.CompiledStatements.CompiledCreateIndexStatement;
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
 import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraintHelper;
 import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
 import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
 import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.ConnectorPolicyAssignmentPolicy;
 import edu.uci.ics.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig;
 import edu.uci.ics.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
 import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
 import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
 import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
 import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
 import edu.uci.ics.hyracks.api.job.JobSpecification;
 import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor;
 import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
 import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor;
 import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
@@ -150,8 +161,121 @@
     }
 
     @Override
+	protected void setExternalSecondaryRecDescAndComparators(CompiledCreateIndexStatement createIndexStmt,
+			AqlMetadataProvider metadataProvider) throws AlgebricksException, AsterixException {
+		secondaryKeyFields = createIndexStmt.getKeyFields();
+		if (numSecondaryKeys != 1) {
+			throw new AsterixException(
+					"Cannot use "
+							+ numSecondaryKeys
+							+ " fields as a key for the R-tree index. There can be only one field as a key for the R-tree index.");
+		}
+		Pair<IAType, Boolean> spatialTypePair = Index.getNonNullableKeyFieldType(secondaryKeyFields.get(0), itemType);
+		IAType spatialType = spatialTypePair.first;
+		anySecondaryKeyIsNullable = spatialTypePair.second;
+		if (spatialType == null) {
+			throw new AsterixException("Could not find field " + secondaryKeyFields.get(0) + " in the schema.");
+		}
+		int numDimensions = NonTaggedFormatUtil.getNumDimensions(spatialType.getTypeTag());
+		numNestedSecondaryKeyFields = numDimensions * 2;
+		secondaryFieldAccessEvalFactories = metadataProvider.getFormat().createMBRFactory(itemType, secondaryKeyFields.get(0),
+				numPrimaryKeys, numDimensions);
+		secondaryComparatorFactories = new IBinaryComparatorFactory[numNestedSecondaryKeyFields];
+		valueProviderFactories = new IPrimitiveValueProviderFactory[numNestedSecondaryKeyFields];
+		ISerializerDeserializer[] secondaryRecFields = new ISerializerDeserializer[numPrimaryKeys
+		                                                                           + numNestedSecondaryKeyFields];
+		ITypeTraits[] secondaryTypeTraits = new ITypeTraits[numNestedSecondaryKeyFields + numPrimaryKeys];
+		IAType nestedKeyType = NonTaggedFormatUtil.getNestedSpatialType(spatialType.getTypeTag());
+		keyType = nestedKeyType.getTypeTag();
+		for (int i = 0; i < numNestedSecondaryKeyFields; i++) {
+			ISerializerDeserializer keySerde = AqlSerializerDeserializerProvider.INSTANCE
+					.getSerializerDeserializer(nestedKeyType);
+			secondaryRecFields[i] = keySerde;
+			secondaryComparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory(
+					nestedKeyType, true);
+			secondaryTypeTraits[i] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(nestedKeyType);
+			valueProviderFactories[i] = AqlPrimitiveValueProviderFactory.INSTANCE;
+		}
+
+		// Add serializers and comparators for primary index fields.
+		for (int i = 0; i < numPrimaryKeys; i++) {
+			secondaryRecFields[numNestedSecondaryKeyFields + i] = primaryRecDesc.getFields()[i];
+			secondaryTypeTraits[numNestedSecondaryKeyFields + i] = primaryRecDesc.getTypeTraits()[i];
+		}
+		secondaryRecDesc = new RecordDescriptor(secondaryRecFields, secondaryTypeTraits);
+	}
+    
+    @Override
     public JobSpecification buildLoadingJobSpec() throws AsterixException, AlgebricksException {
         JobSpecification spec = JobSpecificationUtils.createJobSpecification();
+        if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
+			Pair<ExternalDataIndexingOperatorDescriptor, AlgebricksPartitionConstraint> RIDScanOpAndConstraints;
+			AlgebricksMetaOperatorDescriptor asterixAssignOp;
+			try
+			{
+				//create external indexing scan operator
+				RIDScanOpAndConstraints = createExternalIndexingOp(spec);
+				//create assign operator
+				asterixAssignOp = createExternalAssignOp(spec);
+				AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(spec, asterixAssignOp,
+						RIDScanOpAndConstraints.second);
+			}
+			catch(Exception e)
+			{
+				throw new AsterixException("Failed to create external index scanning and loading job");
+			}
+
+			// If any of the secondary fields are nullable, then add a select op that filters nulls.
+			AlgebricksMetaOperatorDescriptor selectOp = null;
+			if (anySecondaryKeyIsNullable) {
+				selectOp = createFilterNullsSelectOp(spec, numSecondaryKeys);
+			}
+
+			// Create secondary RTree bulk load op.
+			AsterixStorageProperties storageProperties = propertiesProvider.getStorageProperties();
+			TreeIndexBulkLoadOperatorDescriptor secondaryBulkLoadOp = createTreeIndexBulkLoadOp(
+					spec,
+					numNestedSecondaryKeyFields,
+					new LSMRTreeDataflowHelperFactory(valueProviderFactories, RTreePolicyType.RTREE,
+	                        primaryComparatorFactories, new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()),
+	                        AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, new SecondaryIndexOperationTrackerProvider(
+	                                LSMRTreeIOOperationCallbackFactory.INSTANCE, dataset.getDatasetId()),
+	                        AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER,
+	                        AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER, AqlMetadataProvider.proposeLinearizer(
+	                                keyType, secondaryComparatorFactories.length), storageProperties
+	                                .getBloomFilterFalsePositiveRate()), BTree.DEFAULT_FILL_FACTOR);
+			// Connect the operators.
+			// Create a hash partitioning connector
+			ExternalDatasetDetails edsd = (ExternalDatasetDetails)dataset.getDatasetDetails();
+			IBinaryHashFunctionFactory[] hashFactories = null;
+			if(edsd.getProperties().get(HDFSAdapterFactory.KEY_INPUT_FORMAT).trim().equals(HDFSAdapterFactory.INPUT_FORMAT_RC))
+			{
+				hashFactories = DatasetUtils.computeExternalDataKeysBinaryHashFunFactories(dataset, NonTaggedDataFormat.INSTANCE.getBinaryHashFunctionFactoryProvider());
+			}
+			else
+			{
+				hashFactories = DatasetUtils.computeExternalDataKeysBinaryHashFunFactories(dataset, NonTaggedDataFormat.INSTANCE.getBinaryHashFunctionFactoryProvider());
+			}	 
+			//select partitioning keys (always the first 2 after secondary keys)
+			int[] keys = new int[2];
+			keys[0] = numSecondaryKeys;
+			keys[1] = numSecondaryKeys + 1;
+
+			IConnectorDescriptor hashConn = new MToNPartitioningConnectorDescriptor(spec,
+					new ExternalIndexHashPartitionComputerFactory(keys, hashFactories));
+			spec.connect(new OneToOneConnectorDescriptor(spec), RIDScanOpAndConstraints.first, 0, asterixAssignOp, 0);
+			if (anySecondaryKeyIsNullable) {
+				spec.connect(new OneToOneConnectorDescriptor(spec), asterixAssignOp, 0, selectOp, 0);
+				spec.connect(hashConn, selectOp, 0, secondaryBulkLoadOp, 0);
+			} else {
+				spec.connect(hashConn, asterixAssignOp, 0, secondaryBulkLoadOp, 0);
+			}
+			spec.addRoot(secondaryBulkLoadOp);
+			spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
+			return spec;
+		}
+		else
+		{
 
         // Create dummy key provider for feeding the primary index scan. 
         AbstractOperatorDescriptor keyProviderOp = createDummyKeyProviderOp(spec);
@@ -195,5 +319,7 @@
         spec.addRoot(secondaryBulkLoadOp);
         spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
         return spec;
+		}
     }
 }
+

diff --git a/asterix-app/src/main/resources/asterix-build-configuration.xml b/asterix-app/src/main/resources/asterix-build-configuration.xml
index 6a6332d..9c4d15b 100644
--- a/asterix-app/src/main/resources/asterix-build-configuration.xml
+++ b/asterix-app/src/main/resources/asterix-build-configuration.xml

@@ -1,50 +1,40 @@
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- ! 
- !     http://www.apache.org/licenses/LICENSE-2.0
- ! 
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
+<!-- ! Copyright 2009-2013 by The Regents of the University of California 
+	! Licensed under the Apache License, Version 2.0 (the "License"); ! you may 
+	not use this file except in compliance with the License. ! you may obtain 
+	a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0 
+	! ! Unless required by applicable law or agreed to in writing, software ! 
+	distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT 
+	WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the 
+	License for the specific language governing permissions and ! limitations 
+	under the License. ! -->
 <asterixConfiguration xmlns="asterixconf">
-  <metadataNode>nc1</metadataNode>
-  <store>
-     <ncId>nc1</ncId>
-     <storeDirs>nc1data</storeDirs> 
-  </store>
-  <store>
-     <ncId>nc2</ncId>
-     <storeDirs>nc2data</storeDirs> 
-  </store>
-  <transactionLogDir>
-  	 <ncId>nc1</ncId>
-  	 <txnLogDirPath>target/txnLogDir/nc1</txnLogDirPath> 
-  </transactionLogDir>
-  <transactionLogDir>
-  	 <ncId>nc2</ncId>
-  	 <txnLogDirPath>target/txnLogDir/nc2</txnLogDirPath> 
-  </transactionLogDir>
-  <property>
-     <name>log.level</name>
-     <value>WARNING</value>
-     <description>Log level for running tests/build</description>
-  </property>
-  <property>
+	<metadataNode>nc1</metadataNode>
+	<store>
+		<ncId>nc1</ncId>
+		<storeDirs>nc1data</storeDirs>
+	</store>
+	<store>
+		<ncId>nc2</ncId>
+		<storeDirs>nc2data</storeDirs>
+	</store>
+	<transactionLogDir>
+		<ncId>nc1</ncId>
+		<txnLogDirPath>target/txnLogDir/nc1</txnLogDirPath>
+	</transactionLogDir>
+	<transactionLogDir>
+		<ncId>nc2</ncId>
+		<txnLogDirPath>target/txnLogDir/nc2</txnLogDirPath>
+	</transactionLogDir>
+	<property>
+		<name>log.level</name>
+		<value>WARNING</value>
+		<description>Log level for running tests/build</description>
+	</property>
+	<property>
 		<name>storage.memorycomponent.numpages</name>
 		<value>8</value>
 		<description>The number of pages to allocate for a memory component.
 			(Default = 8)
 		</description>
 	</property>
-  <property>
-     <name>txn.log.groupcommitinterval</name>
-     <value>1</value>
-     <description>The group commit wait time in milliseconds.</description>
-  </property>
 </asterixConfiguration>

diff --git a/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/ExternalDetailsDecl.java b/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/ExternalDetailsDecl.java
index 4233225..2d97f0a 100644
--- a/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/ExternalDetailsDecl.java
+++ b/asterix-aql/src/main/java/edu/uci/ics/asterix/aql/expression/ExternalDetailsDecl.java

@@ -16,10 +16,20 @@
 
 import java.util.Map;
 
+import edu.uci.ics.asterix.metadata.bootstrap.MetadataConstants;
+
 public class ExternalDetailsDecl implements IDatasetDetailsDecl {
     private Map<String, String> properties;
     private String adapter;
+    private Identifier nodegroupName;
 
+    public ExternalDetailsDecl(Map<String, String> properties, String adapter, Identifier nodegroupName) {
+		this.properties = properties;
+		this.adapter = adapter;
+		this.nodegroupName = nodegroupName == null ? new Identifier(MetadataConstants.METADATA_DEFAULT_NODEGROUP_NAME)
+        : nodegroupName;
+	}
+    
     public void setAdapter(String adapter) {
         this.adapter = adapter;
     }
@@ -35,4 +45,12 @@
     public Map<String, String> getProperties() {
         return properties;
     }
+    
+    public void setNodegroupName(Identifier nodegroupName) {
+		this.nodegroupName = nodegroupName;
+	}
+    
+    public Identifier getNodegroupName() {
+		return nodegroupName;
+	}
 }

diff --git a/asterix-aql/src/main/javacc/AQL.jj b/asterix-aql/src/main/javacc/AQL.jj
index cb6336b..8dd7bfb 100644
--- a/asterix-aql/src/main/javacc/AQL.jj
+++ b/asterix-aql/src/main/javacc/AQL.jj

@@ -308,11 +308,11 @@
     <LEFTPAREN> typeName = Identifier() <RIGHTPAREN>
     ifNotExists = IfNotExists()
     "using" adapterName = AdapterName() properties = Configuration()
+    ("on" nodeGroupName = Identifier() )?
     ( "hints" hints = Properties() )?
       {
-        ExternalDetailsDecl edd = new ExternalDetailsDecl();
-        edd.setAdapter(adapterName);
-        edd.setProperties(properties);
+        ExternalDetailsDecl edd = new ExternalDetailsDecl(properties,
+        adapterName,nodeGroupName != null? new Identifier(nodeGroupName): null);
         dsetDecl = new DatasetDecl(nameComponents.first,
                                    nameComponents.second,
                                    new Identifier(typeName),

diff --git a/asterix-common/src/main/java/edu/uci/ics/asterix/common/api/AsterixThreadExecutor.java b/asterix-common/src/main/java/edu/uci/ics/asterix/common/api/AsterixThreadExecutor.java
index 14975ff..edd4b2a 100644
--- a/asterix-common/src/main/java/edu/uci/ics/asterix/common/api/AsterixThreadExecutor.java
+++ b/asterix-common/src/main/java/edu/uci/ics/asterix/common/api/AsterixThreadExecutor.java

@@ -14,12 +14,15 @@
  */
 package edu.uci.ics.asterix.common.api;
 
+import java.util.concurrent.Callable;
 import java.util.concurrent.Executor;
+import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 
 public class AsterixThreadExecutor implements Executor {
     public final static AsterixThreadExecutor INSTANCE = new AsterixThreadExecutor();
-    private final Executor executor = Executors.newCachedThreadPool(AsterixThreadFactory.INSTANCE);
+    private final ExecutorService executorService = Executors.newCachedThreadPool(AsterixThreadFactory.INSTANCE);
 
     private AsterixThreadExecutor() {
 
@@ -27,6 +30,10 @@
 
     @Override
     public void execute(Runnable command) {
-        executor.execute(command);
+        executorService.execute(command);
+    }
+
+    public Future<Object> submit(Callable command) {
+        return (Future<Object>) executorService.submit(command);
     }
 }

diff --git a/asterix-common/src/main/java/edu/uci/ics/asterix/common/config/AsterixTransactionProperties.java b/asterix-common/src/main/java/edu/uci/ics/asterix/common/config/AsterixTransactionProperties.java
index 5a40ece..a1dd52a 100644
--- a/asterix-common/src/main/java/edu/uci/ics/asterix/common/config/AsterixTransactionProperties.java
+++ b/asterix-common/src/main/java/edu/uci/ics/asterix/common/config/AsterixTransactionProperties.java

@@ -25,12 +25,6 @@
     private static final String TXN_LOG_PARTITIONSIZE_KEY = "txn.log.partitionsize";
     private static final long TXN_LOG_PARTITIONSIZE_DEFAULT = ((long)2 << 30); // 2GB
 
-    private static final String TXN_LOG_DISKSECTORSIZE_KEY = "txn.log.disksectorsize";
-    private static final int TXN_LOG_DISKSECTORSIZE_DEFAULT = 4096;
-
-    private static final String TXN_LOG_GROUPCOMMITINTERVAL_KEY = "txn.log.groupcommitinterval";
-    private static int TXN_LOG_GROUPCOMMITINTERVAL_DEFAULT = 10; // 0.1ms
-
     private static final String TXN_LOG_CHECKPOINT_LSNTHRESHOLD_KEY = "txn.log.checkpoint.lsnthreshold";
     private static final int TXN_LOG_CHECKPOINT_LSNTHRESHOLD_DEFAULT = (64 << 20); // 64M
 
@@ -75,16 +69,6 @@
                 PropertyInterpreters.getLongPropertyInterpreter());
     }
 
-    public int getLogDiskSectorSize() {
-        return accessor.getProperty(TXN_LOG_DISKSECTORSIZE_KEY, TXN_LOG_DISKSECTORSIZE_DEFAULT,
-                PropertyInterpreters.getIntegerPropertyInterpreter());
-    }
-
-    public int getGroupCommitInterval() {
-        return accessor.getProperty(TXN_LOG_GROUPCOMMITINTERVAL_KEY, TXN_LOG_GROUPCOMMITINTERVAL_DEFAULT,
-                PropertyInterpreters.getIntegerPropertyInterpreter());
-    }
-
     public int getCheckpointLSNThreshold() {
         return accessor.getProperty(TXN_LOG_CHECKPOINT_LSNTHRESHOLD_KEY, TXN_LOG_CHECKPOINT_LSNTHRESHOLD_DEFAULT,
                 PropertyInterpreters.getIntegerPropertyInterpreter());

diff --git a/asterix-common/src/main/java/edu/uci/ics/asterix/common/transactions/ILockManager.java b/asterix-common/src/main/java/edu/uci/ics/asterix/common/transactions/ILockManager.java
index 54c86af..a752afa 100644
--- a/asterix-common/src/main/java/edu/uci/ics/asterix/common/transactions/ILockManager.java
+++ b/asterix-common/src/main/java/edu/uci/ics/asterix/common/transactions/ILockManager.java

@@ -70,17 +70,6 @@
             throws ACIDException;
 
     /**
-     * @param datasetId
-     * @param entityHashValue
-     * @param txnContext
-     * @throws ACIDException
-     *             TODO
-     * @return 
-     */
-    public void unlock(DatasetId datasetId, int entityHashValue, ITransactionContext txnContext, boolean commitFlag)
-            throws ACIDException;
-
-    /**
      * Call to lock and unlock a specific resource in a specific lock mode
      * 
      * @param datasetId

diff --git a/asterix-common/src/main/java/edu/uci/ics/asterix/common/transactions/ILogRecord.java b/asterix-common/src/main/java/edu/uci/ics/asterix/common/transactions/ILogRecord.java
index d810ebd..d13ef6c 100644
--- a/asterix-common/src/main/java/edu/uci/ics/asterix/common/transactions/ILogRecord.java
+++ b/asterix-common/src/main/java/edu/uci/ics/asterix/common/transactions/ILogRecord.java

@@ -20,16 +20,18 @@
 
 public interface ILogRecord {
 
-    public static final int COMMIT_LOG_SIZE = 21;
-    public static final int UPDATE_LOG_BASE_SIZE = 56;
+    public static final int JOB_COMMIT_LOG_SIZE = 13;
+    public static final int ENTITY_COMMIT_LOG_BASE_SIZE = 29;
+    public static final int UPDATE_LOG_BASE_SIZE = 64;
 
     public boolean readLogRecord(ByteBuffer buffer);
 
     public void writeLogRecord(ByteBuffer buffer);
-    
-    public void formCommitLogRecord(ITransactionContext txnCtx, byte logType, int jobId, int datasetId, int PKHashValue);
 
-    public void setUpdateLogSize();
+    public void formJobCommitLogRecord(ITransactionContext txnCtx);
+
+    public void formEntityCommitLogRecord(ITransactionContext txnCtx, int datasetId, int PKHashValue,
+            ITupleReference tupleReference, int[] primaryKeyFields);
 
     public ITransactionContext getTxnCtx();
 
@@ -98,11 +100,23 @@
     public long getChecksum();
 
     public void setChecksum(long checksum);
-    
+
     public long getLSN();
 
     public void setLSN(long LSN);
 
     public String getLogRecordForDisplay();
 
+    public void computeAndSetLogSize();
+
+    public int getPKValueSize();
+
+    public ITupleReference getPKValue();
+
+    public void setPKFields(int[] primaryKeyFields);
+
+    public void computeAndSetPKValueSize();
+
+    public void setPKValue(ITupleReference PKValue);
+
 }

diff --git a/asterix-common/src/main/java/edu/uci/ics/asterix/common/transactions/LogManagerProperties.java b/asterix-common/src/main/java/edu/uci/ics/asterix/common/transactions/LogManagerProperties.java
index 591d9b1..dd1e7b4 100644
--- a/asterix-common/src/main/java/edu/uci/ics/asterix/common/transactions/LogManagerProperties.java
+++ b/asterix-common/src/main/java/edu/uci/ics/asterix/common/transactions/LogManagerProperties.java

@@ -36,14 +36,10 @@
     private final int logPageSize;
     // number of log pages in the log buffer.
     private final int numLogPages;
-    // time in milliseconds
-    private final long groupCommitWaitPeriod;
     // logBufferSize = logPageSize * numLogPages;
     private final int logBufferSize;
     // maximum size of each log file
     private final long logPartitionSize;
-    // default disk sector size
-    private final int diskSectorSize;
 
     public LogManagerProperties(AsterixTransactionProperties txnProperties, String nodeId) {
         this.logDirKey = new String(nodeId + LOG_DIR_SUFFIX);
@@ -52,12 +48,9 @@
         long logPartitionSize = txnProperties.getLogPartitionSize();
         this.logDir = txnProperties.getLogDirectory(nodeId);
         this.logFilePrefix = DEFAULT_LOG_FILE_PREFIX;
-        this.groupCommitWaitPeriod = txnProperties.getGroupCommitInterval();
-
         this.logBufferSize = logPageSize * numLogPages;
         //make sure that the log partition size is the multiple of log buffer size.
         this.logPartitionSize = (logPartitionSize / logBufferSize) * logBufferSize;
-        this.diskSectorSize = txnProperties.getLogDiskSectorSize();
     }
 
     public long getLogPartitionSize() {
@@ -84,18 +77,10 @@
         return logBufferSize;
     }
 
-    public long getGroupCommitWaitPeriod() {
-        return groupCommitWaitPeriod;
-    }
-
     public String getLogDirKey() {
         return logDirKey;
     }
 
-    public int getDiskSectorSize() {
-        return diskSectorSize;
-    }
-
     public String toString() {
         StringBuilder builder = new StringBuilder();
         builder.append("log_dir_ : " + logDir + lineSeparator);
@@ -103,7 +88,6 @@
         builder.append("log_page_size : " + logPageSize + lineSeparator);
         builder.append("num_log_pages : " + numLogPages + lineSeparator);
         builder.append("log_partition_size : " + logPartitionSize + lineSeparator);
-        builder.append("group_commit_wait_period : " + groupCommitWaitPeriod + lineSeparator);
         return builder.toString();
     }
 }

diff --git a/asterix-external-data/pom.xml b/asterix-external-data/pom.xml
index f8d5ea2..7966b52 100644
--- a/asterix-external-data/pom.xml
+++ b/asterix-external-data/pom.xml

@@ -153,6 +153,11 @@
 			<artifactId>jdom</artifactId>
 			<version>1.0</version>
 		</dependency>
+		<dependency>
+			<groupId>org.apache.hive</groupId>
+			<artifactId>hive-exec</artifactId>
+			<version>0.11.0</version>
+		</dependency>
 	</dependencies>
 
 </project>

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HDFSAdapterFactory.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HDFSAdapterFactory.java
index 4ca3d72..e9d2175 100644
--- a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HDFSAdapterFactory.java
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HDFSAdapterFactory.java

@@ -20,12 +20,21 @@
 
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
-
+import org.apache.hadoop.conf.Configuration;
+import edu.uci.ics.asterix.external.dataset.adapter.HDFSAccessByRIDAdapter;
 import edu.uci.ics.asterix.external.dataset.adapter.HDFSAdapter;
+import edu.uci.ics.asterix.external.dataset.adapter.HDFSIndexingAdapter;
+import edu.uci.ics.asterix.external.dataset.adapter.IControlledAdapter;
 import edu.uci.ics.asterix.external.dataset.adapter.IDatasourceAdapter;
+import edu.uci.ics.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
+import edu.uci.ics.asterix.formats.nontagged.AqlTypeTraitProvider;
+import edu.uci.ics.asterix.om.types.BuiltinType;
 import edu.uci.ics.asterix.om.types.IAType;
 import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
 import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
 import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
 import edu.uci.ics.hyracks.hdfs.dataflow.InputSplitsFactory;
 import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
@@ -35,76 +44,190 @@
  */
 @SuppressWarnings("deprecation")
 public class HDFSAdapterFactory implements IGenericDatasetAdapterFactory {
-    private static final long serialVersionUID = 1L;
+	private static final long serialVersionUID = 1L;
 
-    public static final String HDFS_ADAPTER_NAME = "hdfs";
-    public static final String CLUSTER_LOCATIONS = "cluster-locations";
-    public static transient String SCHEDULER = "hdfs-scheduler";
+	public static final String HDFS_ADAPTER_NAME = "hdfs";
+	public static final String CLUSTER_LOCATIONS = "cluster-locations";
+	public static transient String SCHEDULER = "hdfs-scheduler";
 
-    public static final String KEY_HDFS_URL = "hdfs";
-    public static final String KEY_PATH = "path";
-    public static final String KEY_INPUT_FORMAT = "input-format";
-    public static final String INPUT_FORMAT_TEXT = "text-input-format";
-    public static final String INPUT_FORMAT_SEQUENCE = "sequence-input-format";
+	public static final String KEY_HDFS_URL = "hdfs";
+	public static final String KEY_PATH = "path";
+	public static final String KEY_INPUT_FORMAT = "input-format";
+	public static final String INPUT_FORMAT_TEXT = "text-input-format";
+	public static final String INPUT_FORMAT_SEQUENCE = "sequence-input-format";
+	public static final String INPUT_FORMAT_RC = "rc-input-format";
+	public static final String KEY_DELIMITER = "delimiter";
+	public static final String KEY_FORMAT = "format";
+	public static final String FORMAT_DELIMITED_TEXT = "delimited-text";
 
-    private transient AlgebricksPartitionConstraint clusterLocations;
-    private String[] readSchedule;
-    private boolean executed[];
-    private InputSplitsFactory inputSplitsFactory;
-    private ConfFactory confFactory;
-    private boolean setup = false;
+	private transient AlgebricksPartitionConstraint clusterLocations;
+	private String[] readSchedule;
+	private boolean executed[];
+	private InputSplitsFactory inputSplitsFactory;
+	private ConfFactory confFactory;
+	private boolean setup = false;
 
-    private static final Map<String, String> formatClassNames = initInputFormatMap();
+	private static final Map<String, String> formatClassNames = initInputFormatMap();
 
-    private static Map<String, String> initInputFormatMap() {
-        Map<String, String> formatClassNames = new HashMap<String, String>();
-        formatClassNames.put(INPUT_FORMAT_TEXT, "org.apache.hadoop.mapred.TextInputFormat");
-        formatClassNames.put(INPUT_FORMAT_SEQUENCE, "org.apache.hadoop.mapred.SequenceFileInputFormat");
-        return formatClassNames;
-    }
+	private static Map<String, String> initInputFormatMap() {
+		Map<String, String> formatClassNames = new HashMap<String, String>();
+		formatClassNames.put(INPUT_FORMAT_TEXT, "org.apache.hadoop.mapred.TextInputFormat");
+		formatClassNames.put(INPUT_FORMAT_SEQUENCE, "org.apache.hadoop.mapred.SequenceFileInputFormat");
+		formatClassNames.put(INPUT_FORMAT_RC, "org.apache.hadoop.hive.ql.io.RCFileInputFormat");
+		return formatClassNames;
+	}
 
-    @Override
-    public IDatasourceAdapter createAdapter(Map<String, Object> configuration, IAType atype) throws Exception {
-        if (!setup) {
-            /** set up the factory --serializable stuff --- this if-block should be called only once for each factory instance */
-            configureJobConf(configuration);
-            JobConf conf = configureJobConf(configuration);
-            confFactory = new ConfFactory(conf);
+	@Override
+	public IDatasourceAdapter createAdapter(Map<String, Object> configuration, IAType atype) throws Exception {
+		if (!setup) {
+			/** set up the factory --serializable stuff --- this if-block should be called only once for each factory instance */
+			configureJobConf(configuration);
+			JobConf conf = configureJobConf(configuration);
+			confFactory = new ConfFactory(conf);
 
-            clusterLocations = (AlgebricksPartitionConstraint) configuration.get(CLUSTER_LOCATIONS);
-            int numPartitions = ((AlgebricksAbsolutePartitionConstraint) clusterLocations).getLocations().length;
+			clusterLocations = (AlgebricksPartitionConstraint) configuration.get(CLUSTER_LOCATIONS);
+			int numPartitions = ((AlgebricksAbsolutePartitionConstraint) clusterLocations).getLocations().length;
 
-            InputSplit[] inputSplits = conf.getInputFormat().getSplits(conf, numPartitions);
-            inputSplitsFactory = new InputSplitsFactory(inputSplits);
+			InputSplit[] inputSplits = conf.getInputFormat().getSplits(conf, numPartitions);
+			inputSplitsFactory = new InputSplitsFactory(inputSplits);
 
-            Scheduler scheduler = (Scheduler) configuration.get(SCHEDULER);
-            readSchedule = scheduler.getLocationConstraints(inputSplits);
-            executed = new boolean[readSchedule.length];
-            Arrays.fill(executed, false);
+			Scheduler scheduler = (Scheduler) configuration.get(SCHEDULER);
+			readSchedule = scheduler.getLocationConstraints(inputSplits);
+			executed = new boolean[readSchedule.length];
+			Arrays.fill(executed, false);
 
-            setup = true;
-        }
-        JobConf conf = confFactory.getConf();
-        InputSplit[] inputSplits = inputSplitsFactory.getSplits();
-        HDFSAdapter hdfsAdapter = new HDFSAdapter(atype, readSchedule, executed, inputSplits, conf, clusterLocations);
-        hdfsAdapter.configure(configuration);
-        return hdfsAdapter;
-    }
+			setup = true;
+		}
+		JobConf conf = confFactory.getConf();
+		InputSplit[] inputSplits = inputSplitsFactory.getSplits();
+		HDFSAdapter hdfsAdapter = new HDFSAdapter(atype, readSchedule, executed, inputSplits, conf, clusterLocations);
 
-    @Override
-    public String getName() {
-        return HDFS_ADAPTER_NAME;
-    }
+		//If input format is rcfile, configure parser expected format to delimeted text with 0x01 (default ) as delimiter
+		if(((String)configuration.get(KEY_INPUT_FORMAT)).equals(INPUT_FORMAT_RC))
+		{
+			char delimeter = 0x01;
+			configuration.put(KEY_FORMAT, FORMAT_DELIMITED_TEXT);
+			configuration.put(KEY_DELIMITER, Character.toString(delimeter));
+		}
 
-    private JobConf configureJobConf(Map<String, Object> configuration) throws Exception {
-        JobConf conf = new JobConf();
-        conf.set("fs.default.name", ((String) configuration.get(KEY_HDFS_URL)).trim());
-        conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
-        conf.setClassLoader(HDFSAdapter.class.getClassLoader());
-        conf.set("mapred.input.dir", ((String) configuration.get(KEY_PATH)).trim());
-        conf.set("mapred.input.format.class",
-                (String) formatClassNames.get(((String) configuration.get(KEY_INPUT_FORMAT)).trim()));
-        return conf;
-    }
+		hdfsAdapter.configure(configuration);
+		return hdfsAdapter;
+	}
+
+	@Override
+	public IControlledAdapter createAccessByRIDAdapter(
+			Map<String, Object> configuration, IAType atype, HashMap<Integer, String> files) throws Exception {
+		Configuration conf = configureHadoopConnection(configuration);
+		clusterLocations = (AlgebricksPartitionConstraint) configuration.get(CLUSTER_LOCATIONS);
+		
+		//Create RID record desc
+		RecordDescriptor ridRecordDesc = null;
+
+		//If input format is rcfile, configure parser expected format to delimeted text with control char 0x01 as delimiter
+		if(((String)configuration.get(KEY_INPUT_FORMAT)).equals(INPUT_FORMAT_RC))
+		{
+			char delimeter = 0x01;
+			configuration.put(KEY_FORMAT, FORMAT_DELIMITED_TEXT);
+			configuration.put(KEY_DELIMITER, Character.toString(delimeter));
+			ridRecordDesc = getRIDRecDesc(true, files != null);
+		}
+		else
+		{
+			ridRecordDesc = getRIDRecDesc(false, files != null);
+		}
+		HDFSAccessByRIDAdapter adapter = new HDFSAccessByRIDAdapter(atype, ((String)configuration.get(KEY_INPUT_FORMAT)), clusterLocations,ridRecordDesc, conf, files);
+		adapter.configure(configuration);
+		return adapter;
+	}
+
+	@Override
+	public IDatasourceAdapter createIndexingAdapter(Map<String, Object> configuration, IAType atype, Map<String,Integer> files) throws Exception {
+		if (!setup) {
+			/** set up the factory --serializable stuff --- this if-block should be called only once for each factory instance */
+			configureJobConf(configuration);
+			JobConf conf = configureJobConf(configuration);
+			confFactory = new ConfFactory(conf);
+
+			clusterLocations = (AlgebricksPartitionConstraint) configuration.get(CLUSTER_LOCATIONS);
+			int numPartitions = ((AlgebricksAbsolutePartitionConstraint) clusterLocations).getLocations().length;
+
+			InputSplit[] inputSplits = conf.getInputFormat().getSplits(conf, numPartitions);
+			inputSplitsFactory = new InputSplitsFactory(inputSplits);
+
+			Scheduler scheduler = (Scheduler) configuration.get(SCHEDULER);
+			readSchedule = scheduler.getLocationConstraints(inputSplits);
+			executed = new boolean[readSchedule.length];
+			Arrays.fill(executed, false);
+
+			setup = true;
+		}
+		JobConf conf = confFactory.getConf();
+		InputSplit[] inputSplits = inputSplitsFactory.getSplits();
+		//If input format is rcfile, configure parser expected format to delimeted text with 0x01 (default) as delimiter
+		if(((String)configuration.get(KEY_INPUT_FORMAT)).equals(INPUT_FORMAT_RC))
+		{
+			char delimeter = 0x01;
+			configuration.put(KEY_FORMAT, FORMAT_DELIMITED_TEXT);
+			configuration.put(KEY_DELIMITER, Character.toString(delimeter));	
+		}
+		HDFSIndexingAdapter hdfsIndexingAdapter = new HDFSIndexingAdapter(atype, readSchedule, executed, inputSplits, conf, clusterLocations, files);
+		hdfsIndexingAdapter.configure(configuration);
+		return hdfsIndexingAdapter;
+	}
+
+	@Override
+	public String getName() {
+		return HDFS_ADAPTER_NAME;
+	}
+
+	private JobConf configureJobConf(Map<String, Object> configuration) throws Exception {
+		JobConf conf = new JobConf();
+		conf.set("fs.default.name", ((String) configuration.get(KEY_HDFS_URL)).trim());
+		conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
+		conf.setClassLoader(HDFSAdapter.class.getClassLoader());
+		conf.set("mapred.input.dir", ((String) configuration.get(KEY_PATH)).trim());
+		conf.set("mapred.input.format.class",
+				(String) formatClassNames.get(((String) configuration.get(KEY_INPUT_FORMAT)).trim()));
+		return conf;
+	}
+
+	public static Configuration configureHadoopConnection(Map<String, Object> configuration)
+	{
+		Configuration conf = new Configuration();
+		conf.set("fs.default.name", ((String) configuration.get(KEY_HDFS_URL)).trim());
+		conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
+		return conf;
+	}
+	
+	public static RecordDescriptor getRIDRecDesc(boolean isRCFile, boolean optimize){
+		int numOfPrimaryKeys = 2;
+		if(isRCFile)
+		{
+			numOfPrimaryKeys++;
+		}
+		@SuppressWarnings("rawtypes")
+		ISerializerDeserializer[] serde = new ISerializerDeserializer[numOfPrimaryKeys];
+		ITypeTraits[] tt = new ITypeTraits[numOfPrimaryKeys];
+		if(optimize)
+		{
+			serde[0] = AqlSerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AINT32);
+			tt[0] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(BuiltinType.AINT32);
+		}
+		else
+		{
+			serde[0] = AqlSerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ASTRING);
+			tt[0] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(BuiltinType.ASTRING);
+		}
+		serde[1] = AqlSerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AINT64);
+		tt[1] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(BuiltinType.AINT64);
+		if(isRCFile)
+		{
+			//we add the row number for rc-files
+			serde[2] = AqlSerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.AINT32);
+			tt[2] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(BuiltinType.AINT32);
+		}
+		return new RecordDescriptor(serde, tt);
+	}
+
 
 }

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HiveAdapterFactory.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HiveAdapterFactory.java
index 409eb7a..64c8153 100644
--- a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HiveAdapterFactory.java
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HiveAdapterFactory.java

@@ -18,15 +18,20 @@
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
 
+import edu.uci.ics.asterix.external.dataset.adapter.HDFSAccessByRIDAdapter;
 import edu.uci.ics.asterix.external.dataset.adapter.HDFSAdapter;
 import edu.uci.ics.asterix.external.dataset.adapter.HiveAdapter;
+import edu.uci.ics.asterix.external.dataset.adapter.HiveIndexingAdapter;
 import edu.uci.ics.asterix.external.dataset.adapter.IDatasourceAdapter;
+import edu.uci.ics.asterix.external.dataset.adapter.IControlledAdapter;
 import edu.uci.ics.asterix.om.types.IAType;
 import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
 import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
 import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
 import edu.uci.ics.hyracks.hdfs.dataflow.InputSplitsFactory;
 import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
@@ -36,109 +41,185 @@
  */
 @SuppressWarnings("deprecation")
 public class HiveAdapterFactory implements IGenericDatasetAdapterFactory {
-    private static final long serialVersionUID = 1L;
+	private static final long serialVersionUID = 1L;
 
-    public static final String HDFS_ADAPTER_NAME = "hdfs";
-    public static final String CLUSTER_LOCATIONS = "cluster-locations";
-    public static transient String SCHEDULER = "hdfs-scheduler";
+	public static final String HDFS_ADAPTER_NAME = "hdfs";
+	public static final String CLUSTER_LOCATIONS = "cluster-locations";
+	public static transient String SCHEDULER = "hdfs-scheduler";
 
-    public static final String KEY_HDFS_URL = "hdfs";
-    public static final String KEY_PATH = "path";
-    public static final String KEY_INPUT_FORMAT = "input-format";
-    public static final String INPUT_FORMAT_TEXT = "text-input-format";
-    public static final String INPUT_FORMAT_SEQUENCE = "sequence-input-format";
+	public static final String KEY_HDFS_URL = "hdfs";
+	public static final String KEY_PATH = "path";
+	public static final String KEY_INPUT_FORMAT = "input-format";
+	public static final String INPUT_FORMAT_TEXT = "text-input-format";
+	public static final String INPUT_FORMAT_SEQUENCE = "sequence-input-format";
+	public static final String INPUT_FORMAT_RC = "rc-input-format";
 
-    public static final String KEY_FORMAT = "format";
-    public static final String KEY_PARSER_FACTORY = "parser";
-    public static final String FORMAT_DELIMITED_TEXT = "delimited-text";
-    public static final String FORMAT_ADM = "adm";
+	public static final String KEY_FORMAT = "format";
+	public static final String KEY_PARSER_FACTORY = "parser";
+	public static final String FORMAT_DELIMITED_TEXT = "delimited-text";
+	public static final String FORMAT_ADM = "adm";
+	public static final String KEY_DELIMITER = "delimiter";
 
-    public static final String HIVE_DATABASE = "database";
-    public static final String HIVE_TABLE = "table";
-    public static final String HIVE_HOME = "hive-home";
-    public static final String HIVE_METASTORE_URI = "metastore-uri";
-    public static final String HIVE_WAREHOUSE_DIR = "warehouse-dir";
-    public static final String HIVE_METASTORE_RAWSTORE_IMPL = "rawstore-impl";
+	public static final String HIVE_DATABASE = "database";
+	public static final String HIVE_TABLE = "table";
+	public static final String HIVE_HOME = "hive-home";
+	public static final String HIVE_METASTORE_URI = "metastore-uri";
+	public static final String HIVE_WAREHOUSE_DIR = "warehouse-dir";
+	public static final String HIVE_METASTORE_RAWSTORE_IMPL = "rawstore-impl";
 
-    private String[] readSchedule;
-    private boolean executed[];
-    private InputSplitsFactory inputSplitsFactory;
-    private ConfFactory confFactory;
-    private transient AlgebricksPartitionConstraint clusterLocations;
-    private boolean setup = false;
+	private String[] readSchedule;
+	private boolean executed[];
+	private InputSplitsFactory inputSplitsFactory;
+	private ConfFactory confFactory;
+	private transient AlgebricksPartitionConstraint clusterLocations;
+	private boolean setup = false;
 
-    private static final Map<String, String> formatClassNames = initInputFormatMap();
+	private static final Map<String, String> formatClassNames = initInputFormatMap();
 
-    private static Map<String, String> initInputFormatMap() {
-        Map<String, String> formatClassNames = new HashMap<String, String>();
-        formatClassNames.put(INPUT_FORMAT_TEXT, "org.apache.hadoop.mapred.TextInputFormat");
-        formatClassNames.put(INPUT_FORMAT_SEQUENCE, "org.apache.hadoop.mapred.SequenceFileInputFormat");
-        return formatClassNames;
-    }
+	private static Map<String, String> initInputFormatMap() {
+		Map<String, String> formatClassNames = new HashMap<String, String>();
+		formatClassNames.put(INPUT_FORMAT_TEXT, "org.apache.hadoop.mapred.TextInputFormat");
+		formatClassNames.put(INPUT_FORMAT_SEQUENCE, "org.apache.hadoop.mapred.SequenceFileInputFormat");
+		formatClassNames.put(INPUT_FORMAT_RC, "org.apache.hadoop.hive.ql.io.RCFileInputFormat");
+		return formatClassNames;
+	}
 
-    @Override
-    public IDatasourceAdapter createAdapter(Map<String, Object> configuration, IAType atype) throws Exception {
-        if (!setup) {
-            /** set up the factory --serializable stuff --- this if-block should be called only once for each factory instance */
-            configureJobConf(configuration);
-            JobConf conf = configureJobConf(configuration);
-            confFactory = new ConfFactory(conf);
 
-            clusterLocations = (AlgebricksPartitionConstraint) configuration.get(CLUSTER_LOCATIONS);
-            int numPartitions = ((AlgebricksAbsolutePartitionConstraint) clusterLocations).getLocations().length;
+	@Override
+	public IControlledAdapter createAccessByRIDAdapter(Map<String, Object> configuration, IAType atype, HashMap<Integer, String> files) throws Exception {
+		Configuration conf = HDFSAdapterFactory.configureHadoopConnection(configuration);
+		clusterLocations = (AlgebricksPartitionConstraint) configuration.get(CLUSTER_LOCATIONS);
+		//Create RID record desc
+		RecordDescriptor ridRecordDesc = null;
 
-            InputSplit[] inputSplits = conf.getInputFormat().getSplits(conf, numPartitions);
-            inputSplitsFactory = new InputSplitsFactory(inputSplits);
+		//If input format is rcfile, configure parser expected format to delimeted text with control char 0x01 as delimiter
+		if(((String)configuration.get(KEY_INPUT_FORMAT)).equals(INPUT_FORMAT_RC))
+		{
+			char delimeter = 0x01;
+			configuration.put(KEY_FORMAT, FORMAT_DELIMITED_TEXT);
+			configuration.put(KEY_DELIMITER, Character.toString(delimeter));
+			ridRecordDesc = HDFSAdapterFactory.getRIDRecDesc(true, files != null);
+		}
+		else
+		{
+			ridRecordDesc = HDFSAdapterFactory.getRIDRecDesc(false, files != null);
+		}
+		HDFSAccessByRIDAdapter adapter = new HDFSAccessByRIDAdapter(atype, ((String)configuration.get(KEY_INPUT_FORMAT)), clusterLocations,ridRecordDesc, conf, files);
+		adapter.configure(configuration);
+		return adapter;
+	}
 
-            Scheduler scheduler = (Scheduler) configuration.get(SCHEDULER);
-            readSchedule = scheduler.getLocationConstraints(inputSplits);
-            executed = new boolean[readSchedule.length];
-            Arrays.fill(executed, false);
+	@Override
+	public IDatasourceAdapter createIndexingAdapter(
+			Map<String, Object> configuration, IAType atype, Map<String,Integer> files) throws Exception {
+		if (!setup) {
+			/** set up the factory --serializable stuff --- this if-block should be called only once for each factory instance */
+			configureJobConf(configuration);
+			JobConf conf = configureJobConf(configuration);
+			confFactory = new ConfFactory(conf);
 
-            setup = true;
-        }
-        JobConf conf = confFactory.getConf();
-        InputSplit[] inputSplits = inputSplitsFactory.getSplits();
-        HiveAdapter hiveAdapter = new HiveAdapter(atype, readSchedule, executed, inputSplits, conf, clusterLocations);
-        hiveAdapter.configure(configuration);
-        return hiveAdapter;
-    }
+			clusterLocations = (AlgebricksPartitionConstraint) configuration.get(CLUSTER_LOCATIONS);
+			int numPartitions = ((AlgebricksAbsolutePartitionConstraint) clusterLocations).getLocations().length;
 
-    @Override
-    public String getName() {
-        return "hive";
-    }
+			InputSplit[] inputSplits = conf.getInputFormat().getSplits(conf, numPartitions);
+			inputSplitsFactory = new InputSplitsFactory(inputSplits);
 
-    private JobConf configureJobConf(Map<String, Object> configuration) throws Exception {
-        JobConf conf = new JobConf();
+			Scheduler scheduler = (Scheduler) configuration.get(SCHEDULER);
+			readSchedule = scheduler.getLocationConstraints(inputSplits);
+			executed = new boolean[readSchedule.length];
+			Arrays.fill(executed, false);
 
-        /** configure hive */
-        String database = (String) configuration.get(HIVE_DATABASE);
-        String tablePath = null;
-        if (database == null) {
-            tablePath = configuration.get(HIVE_WAREHOUSE_DIR) + "/" + configuration.get(HIVE_TABLE);
-        } else {
-            tablePath = configuration.get(HIVE_WAREHOUSE_DIR) + "/" + tablePath + ".db" + "/"
-                    + configuration.get(HIVE_TABLE);
-        }
-        configuration.put(HDFSAdapter.KEY_PATH, tablePath);
-        if (!configuration.get(KEY_FORMAT).equals(FORMAT_DELIMITED_TEXT)) {
-            throw new IllegalArgumentException("format" + configuration.get(KEY_FORMAT) + " is not supported");
-        }
+			setup = true;
+		}
+		JobConf conf = confFactory.getConf();
+		InputSplit[] inputSplits = inputSplitsFactory.getSplits();
+		HiveIndexingAdapter hiveIndexingAdapter = new HiveIndexingAdapter(atype, readSchedule, executed, inputSplits, conf, clusterLocations, files);
 
-        if (!(configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT).equals(HDFSAdapterFactory.INPUT_FORMAT_TEXT) || configuration
-                .get(HDFSAdapterFactory.KEY_INPUT_FORMAT).equals(HDFSAdapterFactory.INPUT_FORMAT_SEQUENCE))) {
-            throw new IllegalArgumentException("file input format"
-                    + configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT) + " is not supported");
-        }
+		//If input format is rcfile, configure parser expected format to delimeted text with 0x01 (default ) as delimiter
+		if(((String)configuration.get(KEY_INPUT_FORMAT)).equals(INPUT_FORMAT_RC))
+		{
+			char delimeter = 0x01;
+			configuration.put(KEY_FORMAT, FORMAT_DELIMITED_TEXT);
+			configuration.put(KEY_DELIMITER, Character.toString(delimeter));
+		}
 
-        /** configure hdfs */
-        conf.set("fs.default.name", ((String) configuration.get(KEY_HDFS_URL)).trim());
-        conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
-        conf.setClassLoader(HDFSAdapter.class.getClassLoader());
-        conf.set("mapred.input.dir", ((String) configuration.get(KEY_PATH)).trim());
-        conf.set("mapred.input.format.class",
-                (String) formatClassNames.get(((String) configuration.get(KEY_INPUT_FORMAT)).trim()));
-        return conf;
-    }
-}
+		hiveIndexingAdapter.configure(configuration);
+		return hiveIndexingAdapter;
+	}
+
+	@Override
+	public IDatasourceAdapter createAdapter(Map<String, Object> configuration, IAType atype) throws Exception {
+		if (!setup) {
+			/** set up the factory --serializable stuff --- this if-block should be called only once for each factory instance */
+			configureJobConf(configuration);
+			JobConf conf = configureJobConf(configuration);
+			confFactory = new ConfFactory(conf);
+
+			clusterLocations = (AlgebricksPartitionConstraint) configuration.get(CLUSTER_LOCATIONS);
+			int numPartitions = ((AlgebricksAbsolutePartitionConstraint) clusterLocations).getLocations().length;
+
+			InputSplit[] inputSplits = conf.getInputFormat().getSplits(conf, numPartitions);
+			inputSplitsFactory = new InputSplitsFactory(inputSplits);
+
+			Scheduler scheduler = (Scheduler) configuration.get(SCHEDULER);
+			readSchedule = scheduler.getLocationConstraints(inputSplits);
+			executed = new boolean[readSchedule.length];
+			Arrays.fill(executed, false);
+
+			setup = true;
+		}
+		JobConf conf = confFactory.getConf();
+		InputSplit[] inputSplits = inputSplitsFactory.getSplits();
+		HiveAdapter hiveAdapter = new HiveAdapter(atype, readSchedule, executed, inputSplits, conf, clusterLocations);
+
+		//If input format is rcfile, configure parser expected format to delimeted text with 0x01 (default ) as delimiter
+		if(((String)configuration.get(KEY_INPUT_FORMAT)).equals(INPUT_FORMAT_RC))
+		{
+			char delimeter = 0x01;
+			configuration.put(KEY_FORMAT, FORMAT_DELIMITED_TEXT);
+			configuration.put(KEY_DELIMITER, Character.toString(delimeter));
+		}
+
+		hiveAdapter.configure(configuration);
+		return hiveAdapter;
+	}
+
+	@Override
+	public String getName() {
+		return "hive";
+	}
+
+	private JobConf configureJobConf(Map<String, Object> configuration) throws Exception {
+		JobConf conf = new JobConf();
+
+		/** configure hive */
+		String database = (String) configuration.get(HIVE_DATABASE);
+		String tablePath = null;
+		if (database == null) {
+			tablePath = configuration.get(HIVE_WAREHOUSE_DIR) + "/" + configuration.get(HIVE_TABLE);
+		} else {
+			tablePath = configuration.get(HIVE_WAREHOUSE_DIR) + "/" + tablePath + ".db" + "/"
+					+ configuration.get(HIVE_TABLE);
+		}
+		configuration.put(HDFSAdapter.KEY_PATH, tablePath);
+		if (!configuration.get(KEY_FORMAT).equals(FORMAT_DELIMITED_TEXT)) {
+			throw new IllegalArgumentException("format" + configuration.get(KEY_FORMAT) + " is not supported");
+		}
+
+		if (!(configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT).equals(HDFSAdapterFactory.INPUT_FORMAT_TEXT) || configuration
+				.get(HDFSAdapterFactory.KEY_INPUT_FORMAT).equals(HDFSAdapterFactory.INPUT_FORMAT_SEQUENCE) || configuration
+				.get(HDFSAdapterFactory.KEY_INPUT_FORMAT).equals(HDFSAdapterFactory.INPUT_FORMAT_RC))) {
+			throw new IllegalArgumentException("file input format"
+					+ configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT) + " is not supported");
+		}
+
+		/** configure hdfs */
+		conf.set("fs.default.name", ((String) configuration.get(KEY_HDFS_URL)).trim());
+		conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
+		conf.setClassLoader(HDFSAdapter.class.getClassLoader());
+		conf.set("mapred.input.dir", ((String) configuration.get(KEY_PATH)).trim());
+		conf.set("mapred.input.format.class",
+				(String) formatClassNames.get(((String) configuration.get(KEY_INPUT_FORMAT)).trim()));
+		return conf;
+	}
+}
\ No newline at end of file

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/IGenericDatasetAdapterFactory.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/IGenericDatasetAdapterFactory.java
index 0a178a7..f046f88 100644
--- a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/IGenericDatasetAdapterFactory.java
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/IGenericDatasetAdapterFactory.java

@@ -14,8 +14,10 @@
  */
 package edu.uci.ics.asterix.external.adapter.factory;
 
+import java.util.HashMap;
 import java.util.Map;
 
+import edu.uci.ics.asterix.external.dataset.adapter.IControlledAdapter;
 import edu.uci.ics.asterix.external.dataset.adapter.IDatasourceAdapter;
 import edu.uci.ics.asterix.om.types.IAType;
 
@@ -40,4 +42,29 @@
      */
     public IDatasourceAdapter createAdapter(Map<String, Object> configuration, IAType atype) throws Exception;
 
+    /**
+     * Creates an instance of IDatasourceAdapter that is used to read records and their RIDs.
+     * 
+     * @param configuration
+     *            The configuration parameters for the adapter that is instantiated.
+     *            The passed-in configuration is used to configure the created instance of the adapter.
+     * @param atype
+     *            The type for the ADM records that are returned by the adapter (contains both original fields and RID fields).
+     * @return An instance of IDatasourceAdapter.
+     * @throws Exception
+     */
+    public IDatasourceAdapter createIndexingAdapter(Map<String, Object> configuration, IAType atype, Map<String,Integer> files) throws Exception;
+
+    /**
+     * Creates an instance of IDatasourceAdapter that is used to read records using their RIDs.
+     * 
+     * @param configuration
+     *            The configuration parameters for the adapter that is instantiated.
+     *            The passed-in configuration is used to configure the created instance of the adapter.
+     * @param atype
+     *            The type for the ADM records that are returned by the adapter.
+     * @return An instance of IControlledAdapter.
+     * @throws Exception
+     */
+    public IControlledAdapter createAccessByRIDAdapter(Map<String, Object> configuration, IAType atype, HashMap<Integer, String> files) throws Exception;
 }

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/NCFileSystemAdapterFactory.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/NCFileSystemAdapterFactory.java
index e680232..4fae7e7 100644
--- a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/NCFileSystemAdapterFactory.java
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/NCFileSystemAdapterFactory.java

@@ -14,11 +14,14 @@
  */
 package edu.uci.ics.asterix.external.adapter.factory;
 
+import java.util.HashMap;
 import java.util.Map;
 
+import edu.uci.ics.asterix.external.dataset.adapter.IControlledAdapter;
 import edu.uci.ics.asterix.external.dataset.adapter.IDatasourceAdapter;
 import edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter;
 import edu.uci.ics.asterix.om.types.IAType;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
 
 /**
  * Factory class for creating an instance of NCFileSystemAdapter. An
@@ -26,18 +29,31 @@
  * an NC.
  */
 public class NCFileSystemAdapterFactory implements IGenericDatasetAdapterFactory {
-    private static final long serialVersionUID = 1L;
-    public static final String NC_FILE_SYSTEM_ADAPTER_NAME = "localfs";
+	private static final long serialVersionUID = 1L;
+	public static final String NC_FILE_SYSTEM_ADAPTER_NAME = "localfs";
 
-    @Override
-    public IDatasourceAdapter createAdapter(Map<String, Object> configuration, IAType atype) throws Exception {
-        NCFileSystemAdapter fsAdapter = new NCFileSystemAdapter(atype);
-        fsAdapter.configure(configuration);
-        return fsAdapter;
-    }
+	@Override
+	public IDatasourceAdapter createAdapter(Map<String, Object> configuration, IAType atype) throws Exception {
+		NCFileSystemAdapter fsAdapter = new NCFileSystemAdapter(atype);
+		fsAdapter.configure(configuration);
+		return fsAdapter;
+	}
 
-    @Override
-    public String getName() {
-        return NC_FILE_SYSTEM_ADAPTER_NAME;
-    }
+	@Override
+	public String getName() {
+		return NC_FILE_SYSTEM_ADAPTER_NAME;
+	}
+
+	@Override
+	public IDatasourceAdapter createIndexingAdapter(
+			Map<String, Object> configuration, IAType atype, Map<String,Integer> files) throws Exception {
+		throw new NotImplementedException("Indexing Adapter is not implemented for NC FileSystem Data");
+	}
+
+
+
+	@Override
+	public IControlledAdapter createAccessByRIDAdapter(Map<String, Object> configuration, IAType atype, HashMap<Integer, String> files) throws Exception {
+		throw new NotImplementedException("Access by RID Adapter is not implemented for NC FileSystem Data");
+	}
 }

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/data/operator/ExternalDataAccessByRIDOperatorDescriptor.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/data/operator/ExternalDataAccessByRIDOperatorDescriptor.java
new file mode 100644
index 0000000..aa91a56
--- /dev/null
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/data/operator/ExternalDataAccessByRIDOperatorDescriptor.java

@@ -0,0 +1,78 @@
+package edu.uci.ics.asterix.external.data.operator;
+
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.Map;
+
+import edu.uci.ics.asterix.external.adapter.factory.IGenericDatasetAdapterFactory;
+import edu.uci.ics.asterix.external.dataset.adapter.IControlledAdapter;
+import edu.uci.ics.asterix.om.types.IAType;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
+import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.IOperatorDescriptorRegistry;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputUnaryOutputOperatorNodePushable;
+
+public class ExternalDataAccessByRIDOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor {
+
+	/**
+	 * This operator is used to access external data residing in hdfs using record ids pushed in frame buffers
+	 */
+	private static final long serialVersionUID = 1L;
+	private final Map<String, Object> adapterConfiguration;
+	private final IAType atype;
+	private IGenericDatasetAdapterFactory datasourceAdapterFactory;
+	private IControlledAdapter adapter;
+	private final HashMap<Integer, String> files;
+	
+	public ExternalDataAccessByRIDOperatorDescriptor(
+			IOperatorDescriptorRegistry spec, Map<String, Object> arguments, IAType atype,
+			RecordDescriptor outRecDesc,IGenericDatasetAdapterFactory dataSourceAdapterFactory, HashMap<Integer, String> files) {
+		super(spec, 1, 1);
+		this.atype = atype;
+		this.adapterConfiguration = arguments;
+		this.datasourceAdapterFactory = dataSourceAdapterFactory;
+		this.recordDescriptors[0] = outRecDesc;
+		this.files = files;
+	}
+
+	@Override
+	public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+			IRecordDescriptorProvider recordDescProvider, int partition,
+			int nPartitions) throws HyracksDataException {
+		return new AbstractUnaryInputUnaryOutputOperatorNodePushable() {
+			@Override
+			public void open() throws HyracksDataException {
+				//create the access by index adapter
+				try {
+					adapter = datasourceAdapterFactory.createAccessByRIDAdapter(adapterConfiguration, atype, files);
+					adapter.initialize(ctx);
+				} catch (Exception e) {
+					throw new HyracksDataException("error during creation of external read by RID adapter", e);
+				}
+				writer.open();
+			}
+
+			@Override
+			public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+				adapter.processNextFrame(buffer, writer);
+			}
+
+			@Override
+			public void close() throws HyracksDataException {
+				//close adapter and flush remaining frame if needed
+				adapter.close(writer);
+				//close writer
+				writer.close();
+			}
+
+			@Override
+			public void fail() throws HyracksDataException {
+				writer.fail();
+			}
+		};	
+	}
+}
\ No newline at end of file

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/data/operator/ExternalDataIndexingOperatorDescriptor.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/data/operator/ExternalDataIndexingOperatorDescriptor.java
new file mode 100644
index 0000000..9ff1f06
--- /dev/null
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/data/operator/ExternalDataIndexingOperatorDescriptor.java

@@ -0,0 +1,64 @@
+package edu.uci.ics.asterix.external.data.operator;
+
+import java.util.Map;
+
+import edu.uci.ics.asterix.external.adapter.factory.IGenericDatasetAdapterFactory;
+import edu.uci.ics.asterix.external.dataset.adapter.IDatasourceAdapter;
+import edu.uci.ics.asterix.om.types.IAType;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
+import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable;
+
+/*
+ * A single activity operator that provides the functionality of scanning data along 
+ * with their RIDs using an instance of the configured adapter.
+ */
+
+public class ExternalDataIndexingOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor{
+
+	private static final long serialVersionUID = 1L;
+
+	private final Map<String, Object> adapterConfiguration;
+	private final Map<String,Integer> files;
+	private final IAType atype;
+	private IGenericDatasetAdapterFactory datasourceAdapterFactory;
+
+	public ExternalDataIndexingOperatorDescriptor(JobSpecification spec, Map<String, Object> arguments, IAType atype,
+			RecordDescriptor rDesc, IGenericDatasetAdapterFactory dataSourceAdapterFactory, Map<String,Integer> files) {
+		super(spec, 0, 1);
+		recordDescriptors[0] = rDesc;
+		this.adapterConfiguration = arguments;
+		this.atype = atype;
+		this.datasourceAdapterFactory = dataSourceAdapterFactory;
+		this.files = files;
+	}
+
+	@Override
+	public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+			IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions)
+					throws HyracksDataException {
+
+		return new AbstractUnaryOutputSourceOperatorNodePushable() {
+			@Override
+			public void initialize() throws HyracksDataException {
+				writer.open();
+				IDatasourceAdapter adapter = null;
+				try {
+					adapter = ((IGenericDatasetAdapterFactory) datasourceAdapterFactory).createIndexingAdapter(
+							adapterConfiguration, atype, files);
+					adapter.initialize(ctx);
+					adapter.start(partition, writer);
+				} catch (Exception e) {
+					throw new HyracksDataException("exception during reading from external data source", e);
+				} finally {
+					writer.close();
+				}
+			}
+		};
+	}
+}
\ No newline at end of file

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/HDFSAccessByRIDAdapter.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/HDFSAccessByRIDAdapter.java
new file mode 100644
index 0000000..86a060c
--- /dev/null
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/HDFSAccessByRIDAdapter.java

@@ -0,0 +1,1170 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.asterix.external.dataset.adapter;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.RCFile;
+import org.apache.hadoop.hive.ql.io.RCFile.Reader;
+import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
+import edu.uci.ics.asterix.common.exceptions.AsterixException;
+import edu.uci.ics.asterix.external.adapter.factory.HDFSAdapterFactory;
+import edu.uci.ics.asterix.om.base.AInt32;
+import edu.uci.ics.asterix.om.base.AInt64;
+import edu.uci.ics.asterix.om.base.AString;
+import edu.uci.ics.asterix.om.types.ARecordType;
+import edu.uci.ics.asterix.om.types.ATypeTag;
+import edu.uci.ics.asterix.om.types.IAType;
+import edu.uci.ics.asterix.runtime.operators.file.ControlledADMTupleParserFactory;
+import edu.uci.ics.asterix.runtime.operators.file.ControlledDelimitedDataTupleParserFactory;
+import edu.uci.ics.asterix.runtime.operators.file.ControlledTupleParser;
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
+import edu.uci.ics.hyracks.dataflow.common.data.parsers.IValueParserFactory;
+import edu.uci.ics.hyracks.dataflow.std.file.ITupleParserFactory;
+
+/**
+ * Provides functionality for fetching specific external data records stored in an HDFS instance
+ * using their RID.
+ */
+@SuppressWarnings({ "deprecation" })
+public class HDFSAccessByRIDAdapter extends FileSystemBasedAdapter implements IControlledAdapter{
+
+	private static final long serialVersionUID = 1L;
+	private boolean newFrame;
+	private transient ByteBuffer frameBuffer;
+	private String inputFormat;
+	private Configuration conf;
+	private transient FileSystem fs;
+	private RecordDescriptor inRecDesc;
+	private final HashMap<Integer, String> files;
+
+	public HDFSAccessByRIDAdapter(IAType atype, String inputFormat, AlgebricksPartitionConstraint clusterLocations, RecordDescriptor inRecDesc, Configuration conf, HashMap<Integer,String> files) {
+		super(atype);
+		this.inputFormat = inputFormat;
+		this.conf = conf;
+		this.inRecDesc = inRecDesc;
+		this.files = files;
+	}
+
+	@Override
+	public void configure(Map<String, Object> arguments) throws Exception {
+		this.configuration = arguments;
+		fs = FileSystem.get(conf);
+		String specifiedFormat = (String) configuration.get(KEY_FORMAT);
+		if (specifiedFormat == null) {
+			throw new IllegalArgumentException(" Unspecified data format");
+		} else if (FORMAT_DELIMITED_TEXT.equalsIgnoreCase(specifiedFormat)) {
+			parserFactory = getDelimitedDataTupleParserFactory((ARecordType) atype);
+		} else if (FORMAT_ADM.equalsIgnoreCase((String)configuration.get(KEY_FORMAT))) {
+			parserFactory = new ControlledADMTupleParserFactory((ARecordType) atype);
+		} else {
+			throw new IllegalArgumentException(" format " + configuration.get(KEY_FORMAT) + " not supported");
+		}
+	}
+
+	@Override
+	protected ITupleParserFactory getDelimitedDataTupleParserFactory(ARecordType recordType) throws AsterixException {
+		int n = recordType.getFieldTypes().length;
+		IValueParserFactory[] fieldParserFactories = new IValueParserFactory[n];
+		for (int i = 0; i < n; i++) {
+			ATypeTag tag = recordType.getFieldTypes()[i].getTypeTag();
+			IValueParserFactory vpf = typeToValueParserFactMap.get(tag);
+			if (vpf == null) {
+				throw new NotImplementedException("No value parser factory for delimited fields of type " + tag);
+			}
+			fieldParserFactories[i] = vpf;
+		}
+		String delimiterValue = (String) configuration.get(KEY_DELIMITER);
+		if (delimiterValue != null && delimiterValue.length() > 1) {
+			throw new AsterixException("improper delimiter");
+		}
+
+		Character delimiter = delimiterValue.charAt(0);
+		return new ControlledDelimitedDataTupleParserFactory(recordType, fieldParserFactories, delimiter);
+	}
+
+	@Override
+	public void start(int partition, IFrameWriter writer) throws Exception {
+		throw new NotImplementedException("Access by RID adapter doesn't support start function");
+	}
+
+	public void processNextFrame(ByteBuffer buffer, IFrameWriter writer) throws HyracksDataException
+	{
+		frameBuffer = buffer;
+		newFrame = true;
+		((ControlledTupleParser)parser).parseNext(writer);
+	}
+
+	public void close(IFrameWriter writer) throws HyracksDataException
+	{
+		((ControlledTupleParser)parser).close(writer);
+	}
+
+	public AdapterType getAdapterType() {
+		return AdapterType.READ;
+	}
+
+	@Override
+	public void initialize(IHyracksTaskContext ctx) throws Exception {
+		this.ctx = ctx;
+		//create parser and initialize it with an instance of the inputStream
+		parser = parserFactory.createTupleParser(ctx);
+		((ControlledTupleParser)parser).initialize(getInputStream(0));
+	}
+
+	@Override
+	public InputStream getInputStream(int partition) throws IOException {
+
+		//if files map is not null, then it is optimized and we should return optimized inputStream, else return regular
+		if(files == null)
+		{	
+
+			//different input stream implementation based on the input format
+			if(inputFormat.equals(HDFSAdapterFactory.INPUT_FORMAT_RC))
+			{
+				return new InputStream() {
+					private RCFile.Reader reader;
+					private int rowDifference;
+					private String lastFileName = "";
+					private String newFileName;
+					private long lastByteLocation = 0;
+					private long newByteLocation = 0;
+					private int lastRowNumber = 0;
+					private int newRowNumber = 0;
+					private LongWritable key;
+					private BytesRefArrayWritable value;
+					private int EOL = "\n".getBytes()[0];
+					private byte delimiter = 0x01;
+					private boolean pendingValue = false;
+					private int currentTupleIdx;
+					private int numberOfTuplesInCurrentFrame;
+					private IFrameTupleAccessor tupleAccessor = new FrameTupleAccessor(ctx.getFrameSize(),inRecDesc);
+					private ByteBufferInputStream bbis = new ByteBufferInputStream();
+					private DataInputStream dis = new DataInputStream(bbis);
+
+					@Override
+					public void close()
+					{
+						if (reader != null)
+						{
+							reader.close();
+						}
+						try {
+							super.close();
+						} catch (IOException e) {
+							e.printStackTrace();
+						}
+					}
+
+					@Override
+					public int read(byte[] buffer, int offset, int len) throws IOException {
+						if(newFrame)
+						{
+							//first time called with this frame
+							//reset frame buffer
+							tupleAccessor.reset(frameBuffer);
+							//get number of tuples in frame
+							numberOfTuplesInCurrentFrame = tupleAccessor.getTupleCount();
+							//set tuple index to first tuple
+							currentTupleIdx = 0;
+							//set new frame to false
+							newFrame = false;
+							pendingValue = false;
+						}
+
+						//check and see if there is a pending value
+						//Double check this
+						int numBytes = 0;
+						if (pendingValue) {
+							//last value didn't fit into buffer
+							int sizeOfNextTuple = getTupleSize(value) + 1;
+							if(sizeOfNextTuple > len)
+							{
+								return 0;
+							}
+							copyCurrentTuple(buffer, offset + numBytes);
+							buffer[offset + numBytes + sizeOfNextTuple - 1] = (byte) EOL;
+							numBytes += sizeOfNextTuple;
+							//set pending to false
+							pendingValue = false;
+							//move to next tuple
+							currentTupleIdx++;
+						}
+
+						//No pending value or done with pending value
+						//check if there are more tuples in the frame
+						while(currentTupleIdx < numberOfTuplesInCurrentFrame)
+						{
+							//get 3 things from the current tuple in the frame(File name, byte location and row number)
+							//get the fileName
+							bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 0));
+							newFileName = ((AString) inRecDesc.getFields()[0].deserialize(dis)).getStringValue();
+							//check if it is a new file
+							if(!lastFileName.equals(newFileName))//stringBuilder.toString()))
+							{
+								//new file
+								lastFileName = newFileName;
+								//close old file
+								if(reader != null)
+								{
+									reader.close();
+								}
+								//open new file
+								reader = new Reader(fs, new Path(lastFileName), conf);
+								//read and save byte location
+								bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 1));
+								lastByteLocation = ((AInt64) inRecDesc.getFields()[1].deserialize(dis)).getLongValue();
+								//seek
+								reader.seek(lastByteLocation);
+								//read and save rowNumber
+								bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 2));
+								lastRowNumber = ((AInt32)(inRecDesc.getFields()[2].deserialize(dis))).getIntegerValue();
+								//loop until row
+								for(int i=0; i < lastRowNumber; i++)
+								{
+									//this loop perform a single I/O and move to the next record in the block which is already in memory
+									//if no more records in the current block, it perform another I/O and get the next block
+									//<this should never happen here>
+									reader.next(key);
+								}
+								//read record
+								reader.getCurrentRow(value);
+								//copy it to the buffer if there is enough space
+								int sizeOfNextTuple = getTupleSize(value) + 1;
+								if(sizeOfNextTuple + numBytes > len)
+								{
+									//mark waiting value
+									pendingValue = true;
+									return numBytes;
+								}
+								copyCurrentTuple(buffer, offset + numBytes);
+								buffer[offset + numBytes + sizeOfNextTuple - 1] = (byte) EOL;
+								numBytes += sizeOfNextTuple;
+							}
+							else
+							{
+								//same file
+								//get the byte location
+								bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 1));
+								newByteLocation = ((AInt64) inRecDesc.getFields()[1].deserialize(dis)).getLongValue();
+
+								//check if same block
+								if(lastByteLocation != newByteLocation)
+								{
+									//new block
+									lastByteLocation = newByteLocation;
+									//seek
+									reader.seek(lastByteLocation);
+									//read and save rowNumber
+									bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 2));
+									lastRowNumber = ((AInt32)(inRecDesc.getFields()[2].deserialize(dis))).getIntegerValue();
+									//loop until row
+									for(int i=0; i < lastRowNumber; i++)
+									{
+										reader.next(key);
+									}
+									//read record
+									reader.getCurrentRow(value);
+									//copy it to the buffer if there is enough space
+									int sizeOfNextTuple = getTupleSize(value) + 1;
+									if(sizeOfNextTuple + numBytes > len)
+									{
+										//mark waiting value
+										pendingValue = true;
+										return numBytes;
+									}
+									copyCurrentTuple(buffer, offset + numBytes);
+									buffer[offset + numBytes + sizeOfNextTuple - 1] = (byte) EOL;
+									numBytes += sizeOfNextTuple;
+								}
+								else
+								{
+									//same block
+									//get the row number
+									bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 2));
+									newRowNumber = ((AInt32)(inRecDesc.getFields()[2].deserialize(dis))).getIntegerValue();
+
+									//calculate row difference
+									rowDifference = newRowNumber - lastRowNumber;
+
+									//update last row number
+									lastRowNumber = newRowNumber;
+
+									//move to the new row
+									for(int i=0; i < rowDifference; i++)
+									{
+										reader.next(key);
+									}
+									//read record
+									reader.getCurrentRow(value);
+
+									//copy it to the buffer if there is enough space
+									int sizeOfNextTuple = getTupleSize(value) + 1;
+									if(sizeOfNextTuple + numBytes > len)
+									{
+										//mark waiting value
+										pendingValue = true;
+										return numBytes;
+									}
+									copyCurrentTuple(buffer, offset + numBytes);
+									buffer[offset + numBytes + sizeOfNextTuple - 1] = (byte) EOL;
+									numBytes += sizeOfNextTuple;
+								}
+							}
+							//move to next tuple
+							currentTupleIdx++;
+						}	
+						//no more tuples in frame
+						return (numBytes == 0) ? -1 : numBytes;
+					}
+
+					private void copyCurrentTuple(byte[] buffer, int offset) throws IOException {
+						int rcOffset = 0;
+						for(int i=0; i< value.size(); i++)
+						{
+							System.arraycopy(value.get(i).getData(), value.get(i).getStart(), buffer, offset + rcOffset, value.get(i).getLength());
+							rcOffset += value.get(i).getLength() + 1;
+							buffer[rcOffset - 1] = delimiter;
+						}
+					}
+
+					private int getTupleSize(BytesRefArrayWritable value2) {
+						int size=0;
+						//loop over rc column and add lengths
+						for(int i=0; i< value.size(); i++)
+						{
+							size += value.get(i).getLength();
+						}
+						//add delimeters bytes sizes
+						size += value.size() -1;
+						return size;
+					}
+
+					@Override
+					public int read() throws IOException {
+						throw new NotImplementedException("Use read(byte[], int, int");
+					}
+				};
+			}
+			else if (inputFormat.equals(HDFSAdapterFactory.INPUT_FORMAT_TEXT))
+			{
+				return new InputStream() {
+					private FSDataInputStream reader;
+					private String lastFileName = "";
+					private String newFileName;
+					private int EOL = "\n".getBytes()[0];
+					private int currentTupleIdx;
+					private int numberOfTuplesInCurrentFrame;
+					private long byteLocation;
+					private IFrameTupleAccessor tupleAccessor = new FrameTupleAccessor(ctx.getFrameSize(),inRecDesc);
+					private String value;
+					private String pendingValue = null;
+					private ByteBufferInputStream bbis = new ByteBufferInputStream();
+					private DataInputStream dis = new DataInputStream(bbis);
+
+					@Override
+					public int read(byte[] buffer, int offset, int len) throws IOException {
+						if(newFrame)
+						{
+							//first time called with this frame
+							//reset frame buffer
+							tupleAccessor.reset(frameBuffer);
+							//get number of tuples in frame
+							numberOfTuplesInCurrentFrame = tupleAccessor.getTupleCount();
+							//set tuple index to first tuple
+							currentTupleIdx = 0;
+							//set new frame to false
+							newFrame = false;
+						}
+
+						//check and see if there is a pending value
+						int numBytes = 0;
+						if (pendingValue != null) {
+							//last value didn't fit into buffer
+							int sizeOfNextTuple = pendingValue.length() + 1;
+							if(sizeOfNextTuple > len)
+							{
+								return 0;
+							}
+							//there is enough space
+							System.arraycopy(pendingValue.getBytes(), 0, buffer, offset + numBytes, pendingValue.length());
+							buffer[offset + numBytes + pendingValue.length()] = (byte) EOL;
+							numBytes += sizeOfNextTuple;
+							//set pending to false
+							pendingValue = null;
+							//move to next tuple
+							currentTupleIdx++;
+						}
+
+						//No pending value or done with pending value
+						//check if there are more tuples in the frame
+						while(currentTupleIdx < numberOfTuplesInCurrentFrame)
+						{
+							//get the fileName
+							bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 0));
+							newFileName = ((AString) inRecDesc.getFields()[0].deserialize(dis)).getStringValue();
+							//check if it is a new file
+							if(!lastFileName.equals(newFileName))
+							{
+								//new file
+								lastFileName = newFileName;
+								//close old file
+								if(reader != null)
+								{
+									reader.close();
+								}
+								//open new file
+								reader = fs.open(new Path(lastFileName));
+								//read byte location
+								bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 1));
+								byteLocation = ((AInt64) inRecDesc.getFields()[1].deserialize(dis)).getLongValue();
+								//seek
+								reader.seek(byteLocation);
+								//read record
+								value = reader.readLine();
+								//copy it to the buffer if there is enough space
+								int sizeOfNextTuple = value.length() + 1;
+								if(sizeOfNextTuple + numBytes > len)
+								{
+									//mark waiting value
+									pendingValue = value;
+									return numBytes;
+								}
+								System.arraycopy(value.getBytes(), 0, buffer, offset + numBytes, value.length());
+								buffer[offset + numBytes + value.length()] = (byte) EOL;
+								numBytes += sizeOfNextTuple;
+							}
+							else
+							{
+								//same file, just seek and read
+								//read byte location
+								bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 1));
+								byteLocation = ((AInt64) inRecDesc.getFields()[1].deserialize(dis)).getLongValue();
+								//seek
+								reader.seek(byteLocation);
+								//read record
+								value = reader.readLine();
+								//copy it to the buffer if there is enough space
+								int sizeOfNextTuple = value.length() + 1;
+								if(sizeOfNextTuple + numBytes > len)
+								{
+									//mark waiting value
+									pendingValue = value;
+									return numBytes;
+								}
+								System.arraycopy(value.getBytes(), 0, buffer, offset + numBytes, value.length());
+								buffer[offset + numBytes + value.length()] = (byte) EOL;
+								numBytes += sizeOfNextTuple;
+							}
+							currentTupleIdx++;
+						}
+						return (numBytes == 0) ? -1 : numBytes;
+					}
+
+					@Override
+					public int read() throws IOException {
+						throw new NotImplementedException("Use read(byte[], int, int");
+					}
+
+					@Override
+					public void close(){
+						try {
+							if (reader != null)
+							{
+								reader.close();
+							}
+							super.close();
+						} catch (IOException e) {
+							e.printStackTrace();
+						}
+					}
+
+				};
+			}
+			else if (inputFormat.equals(HDFSAdapterFactory.INPUT_FORMAT_SEQUENCE))
+			{
+				return new InputStream() {
+					private SequenceFile.Reader reader;
+					private Writable key;
+					private Text value;
+					private String lastFileName = "";
+					private String newFileName;
+					private long byteLocation;
+					private int EOL = "\n".getBytes()[0];
+					private int currentTupleIdx;
+					private int numberOfTuplesInCurrentFrame;
+					private IFrameTupleAccessor tupleAccessor = new FrameTupleAccessor(ctx.getFrameSize(),inRecDesc);
+					private Text pendingValue = null;
+					private ByteBufferInputStream bbis = new ByteBufferInputStream();
+					private DataInputStream dis = new DataInputStream(bbis);
+
+					@Override
+					public int read(byte[] buffer, int offset, int len) throws IOException {
+
+						if(newFrame)
+						{
+							//first time called with this frame
+							//reset frame buffer
+							tupleAccessor.reset(frameBuffer);
+							//get number of tuples in frame
+							numberOfTuplesInCurrentFrame = tupleAccessor.getTupleCount();
+							//set tuple index to first tuple
+							currentTupleIdx = 0;
+							//set new frame to false
+							newFrame = false;
+						}
+
+						//check and see if there is a pending value
+						//Double check this
+						int numBytes = 0;
+						if (pendingValue != null) {
+							//last value didn't fit into buffer
+							int sizeOfNextTuple = pendingValue.getLength() + 1;
+							if(sizeOfNextTuple > len)
+							{
+								return 0;
+							}
+							//there is enough space
+							System.arraycopy(pendingValue.getBytes(), 0, buffer, offset + numBytes, pendingValue.getLength());
+							buffer[offset + numBytes + pendingValue.getLength()] = (byte) EOL;
+							numBytes += sizeOfNextTuple;
+							//set pending to false
+							pendingValue = null;
+							//move to next tuple
+							currentTupleIdx++;
+						}
+
+						//No pending value or done with pending value
+						//check if there are more tuples in the frame
+						while(currentTupleIdx < numberOfTuplesInCurrentFrame)
+						{
+							//get the fileName]
+							bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 0));
+							newFileName = ((AString) inRecDesc.getFields()[0].deserialize(dis)).getStringValue();
+							//check if it is a new file
+							if(!lastFileName.equals(newFileName))
+							{
+								//new file
+								lastFileName = newFileName;
+								//close old file
+								if(reader != null)
+								{
+									reader.close();
+								}
+								//open new file
+								reader = new SequenceFile.Reader(fs,new Path(lastFileName),conf);
+								//read byte location
+								bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 1));
+								byteLocation = ((AInt64) inRecDesc.getFields()[1].deserialize(dis)).getLongValue();
+								//seek
+								reader.seek(byteLocation);
+								//read record
+								reader.next(key, value);
+								//copy it to the buffer if there is enough space
+								int sizeOfNextTuple = value.getLength() + 1;
+								if(sizeOfNextTuple + numBytes > len)
+								{
+									//mark waiting value
+									pendingValue = value;
+									return numBytes;
+								}
+								System.arraycopy(value.getBytes(), 0, buffer, offset + numBytes, value.getLength());
+								buffer[offset + numBytes + value.getLength()] = (byte) EOL;
+								numBytes += sizeOfNextTuple;
+							}
+							else
+							{
+								//same file, just seek and read
+								//read byte location
+								bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 1));
+								byteLocation = ((AInt64) inRecDesc.getFields()[1].deserialize(dis)).getLongValue();
+								//seek
+								reader.seek(byteLocation);
+								//read record
+								reader.next(key, value);
+								//copy it to the buffer if there is enough space
+								int sizeOfNextTuple = value.getLength() + 1;
+								if(sizeOfNextTuple + numBytes > len)
+								{
+									//mark waiting value
+									pendingValue = value;
+									return numBytes;
+								}
+								System.arraycopy(value.getBytes(), 0, buffer, offset + numBytes, value.getLength());
+								buffer[offset + numBytes + value.getLength()] = (byte) EOL;
+								numBytes += sizeOfNextTuple;
+							}
+							currentTupleIdx++;
+						}
+						return (numBytes == 0) ? -1 : numBytes;
+					}
+
+					@Override
+					public int read() throws IOException {
+						throw new NotImplementedException("Use read(byte[], int, int");
+					}
+
+					@Override
+					public void close(){
+						try {
+							if (reader != null)
+							{
+								reader.close();
+							}
+							super.close();
+						} catch (IOException e) {
+							e.printStackTrace();
+						}
+					}
+				};
+			}
+			//unknow format
+			throw new IOException("Unknown input format");
+		}
+		else
+		{
+			//optimized
+			//different input stream implementation based on the input format
+			if(inputFormat.equals(HDFSAdapterFactory.INPUT_FORMAT_RC))
+			{
+				return new InputStream() {
+					private RCFile.Reader reader;
+					private int rowDifference;
+					private int lastFileNumber = -1;
+					private int newFileNumber = 0;
+					private long lastByteLocation = 0;
+					private long newByteLocation = 0;
+					private int lastRowNumber = 0;
+					private int newRowNumber = 0;
+					private LongWritable key;
+					private BytesRefArrayWritable value;
+					private int EOL = "\n".getBytes()[0];
+					private byte delimiter = 0x01;
+					private boolean pendingValue = false;
+					private int currentTupleIdx;
+					private int numberOfTuplesInCurrentFrame;
+					private IFrameTupleAccessor tupleAccessor = new FrameTupleAccessor(ctx.getFrameSize(),inRecDesc);
+					private ByteBufferInputStream bbis = new ByteBufferInputStream();
+					private DataInputStream dis = new DataInputStream(bbis);
+
+					@Override
+					public void close()
+					{
+						if (reader != null)
+						{
+							reader.close();
+						}
+						try {
+							super.close();
+						} catch (IOException e) {
+							e.printStackTrace();
+						}
+					}
+
+					@Override
+					public int read(byte[] buffer, int offset, int len) throws IOException {
+						if(newFrame)
+						{
+							//first time called with this frame
+							//reset frame buffer
+							tupleAccessor.reset(frameBuffer);
+							//get number of tuples in frame
+							numberOfTuplesInCurrentFrame = tupleAccessor.getTupleCount();
+							//set tuple index to first tuple
+							currentTupleIdx = 0;
+							//set new frame to false
+							newFrame = false;
+							pendingValue = false;
+						}
+
+						//check and see if there is a pending value
+						//Double check this
+						int numBytes = 0;
+						if (pendingValue) {
+							//last value didn't fit into buffer
+							int sizeOfNextTuple = getTupleSize(value) + 1;
+							if(sizeOfNextTuple > len)
+							{
+								return 0;
+							}
+							copyCurrentTuple(buffer, offset + numBytes);
+							buffer[offset + numBytes + sizeOfNextTuple - 1] = (byte) EOL;
+							numBytes += sizeOfNextTuple;
+							//set pending to false
+							pendingValue = false;
+							//move to next tuple
+							currentTupleIdx++;
+						}
+
+						//No pending value or done with pending value
+						//check if there are more tuples in the frame
+						while(currentTupleIdx < numberOfTuplesInCurrentFrame)
+						{
+							//get 3 things from the current tuple in the frame(File name, byte location and row number)
+							//get the fileName
+							bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 0));
+							newFileNumber = ((AInt32) inRecDesc.getFields()[0].deserialize(dis)).getIntegerValue();
+							//check if it is a new file
+							if(lastFileNumber != newFileNumber)
+							{
+								//new file
+								lastFileNumber = newFileNumber;
+								//close old file
+								if(reader != null)
+								{
+									reader.close();
+								}
+								//open new file
+								reader = new Reader(fs, new Path(files.get(newFileNumber)), conf);
+								//read and save byte location
+								bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 1));
+								lastByteLocation = ((AInt64) inRecDesc.getFields()[1].deserialize(dis)).getLongValue();
+								//seek
+								reader.seek(lastByteLocation);
+								//read and save rowNumber
+								bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 2));
+								lastRowNumber = ((AInt32)(inRecDesc.getFields()[2].deserialize(dis))).getIntegerValue();
+								//loop until row
+								for(int i=0; i < lastRowNumber; i++)
+								{
+									//this loop perform a single I/O and move to the next record in the block which is already in memory
+									//if no more records in the current block, it perform another I/O and get the next block
+									//<this should never happen here>
+									reader.next(key);
+								}
+								//read record
+								reader.getCurrentRow(value);
+								//copy it to the buffer if there is enough space
+								int sizeOfNextTuple = getTupleSize(value) + 1;
+								if(sizeOfNextTuple + numBytes > len)
+								{
+									//mark waiting value
+									pendingValue = true;
+									return numBytes;
+								}
+								copyCurrentTuple(buffer, offset + numBytes);
+								buffer[offset + numBytes + sizeOfNextTuple - 1] = (byte) EOL;
+								numBytes += sizeOfNextTuple;
+							}
+							else
+							{
+								//same file
+								//get the byte location
+								bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 1));
+								newByteLocation = ((AInt64) inRecDesc.getFields()[1].deserialize(dis)).getLongValue();
+
+								//check if same block
+								if(lastByteLocation != newByteLocation)
+								{
+									//new block
+									lastByteLocation = newByteLocation;
+									//seek
+									reader.seek(lastByteLocation);
+									//read and save rowNumber
+									bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 2));
+									lastRowNumber = ((AInt32)(inRecDesc.getFields()[2].deserialize(dis))).getIntegerValue();
+									//loop until row
+									for(int i=0; i < lastRowNumber; i++)
+									{
+										reader.next(key);
+									}
+									//read record
+									reader.getCurrentRow(value);
+									//copy it to the buffer if there is enough space
+									int sizeOfNextTuple = getTupleSize(value) + 1;
+									if(sizeOfNextTuple + numBytes > len)
+									{
+										//mark waiting value
+										pendingValue = true;
+										return numBytes;
+									}
+									copyCurrentTuple(buffer, offset + numBytes);
+									buffer[offset + numBytes + sizeOfNextTuple - 1] = (byte) EOL;
+									numBytes += sizeOfNextTuple;
+								}
+								else
+								{
+									//same block
+									//get the row number
+									bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 2));
+									newRowNumber = ((AInt32)(inRecDesc.getFields()[2].deserialize(dis))).getIntegerValue();
+
+									//calculate row difference
+									rowDifference = newRowNumber - lastRowNumber;
+
+									//update last row number
+									lastRowNumber = newRowNumber;
+
+									//move to the new row
+									for(int i=0; i < rowDifference; i++)
+									{
+										reader.next(key);
+									}
+									//read record
+									reader.getCurrentRow(value);
+
+									//copy it to the buffer if there is enough space
+									int sizeOfNextTuple = getTupleSize(value) + 1;
+									if(sizeOfNextTuple + numBytes > len)
+									{
+										//mark waiting value
+										pendingValue = true;
+										return numBytes;
+									}
+									copyCurrentTuple(buffer, offset + numBytes);
+									buffer[offset + numBytes + sizeOfNextTuple - 1] = (byte) EOL;
+									numBytes += sizeOfNextTuple;
+								}
+							}
+							//move to next tuple
+							currentTupleIdx++;
+						}	
+						//no more tuples in frame
+						return (numBytes == 0) ? -1 : numBytes;
+					}
+
+					private void copyCurrentTuple(byte[] buffer, int offset) throws IOException {
+						int rcOffset = 0;
+						for(int i=0; i< value.size(); i++)
+						{
+							System.arraycopy(value.get(i).getData(), value.get(i).getStart(), buffer, offset + rcOffset, value.get(i).getLength());
+							rcOffset += value.get(i).getLength() + 1;
+							buffer[rcOffset - 1] = delimiter;
+						}
+					}
+
+					private int getTupleSize(BytesRefArrayWritable value2) {
+						int size=0;
+						//loop over rc column and add lengths
+						for(int i=0; i< value.size(); i++)
+						{
+							size += value.get(i).getLength();
+						}
+						//add delimeters bytes sizes
+						size += value.size() -1;
+						return size;
+					}
+
+					@Override
+					public int read() throws IOException {
+						throw new NotImplementedException("Use read(byte[], int, int");
+					}
+				};
+			}
+			else if (inputFormat.equals(HDFSAdapterFactory.INPUT_FORMAT_TEXT))
+			{
+				return new InputStream() {
+					private FSDataInputStream reader;
+					private int lastFileNumber = -1;
+					private int newFileNumber = 0;
+					private int EOL = "\n".getBytes()[0];
+					private int currentTupleIdx;
+					private int numberOfTuplesInCurrentFrame;
+					private long byteLocation;
+					private IFrameTupleAccessor tupleAccessor = new FrameTupleAccessor(ctx.getFrameSize(),inRecDesc);
+					private String value;
+					private String pendingValue = null;
+					private ByteBufferInputStream bbis = new ByteBufferInputStream();
+					private DataInputStream dis = new DataInputStream(bbis);
+
+					@Override
+					public int read(byte[] buffer, int offset, int len) throws IOException {
+						if(newFrame)
+						{
+							//first time called with this frame
+							//reset frame buffer
+							tupleAccessor.reset(frameBuffer);
+							//get number of tuples in frame
+							numberOfTuplesInCurrentFrame = tupleAccessor.getTupleCount();
+							//set tuple index to first tuple
+							currentTupleIdx = 0;
+							//set new frame to false
+							newFrame = false;
+						}
+
+						//check and see if there is a pending value
+						int numBytes = 0;
+						if (pendingValue != null) {
+							//last value didn't fit into buffer
+							int sizeOfNextTuple = pendingValue.length() + 1;
+							if(sizeOfNextTuple > len)
+							{
+								return 0;
+							}
+							//there is enough space
+							System.arraycopy(pendingValue.getBytes(), 0, buffer, offset + numBytes, pendingValue.length());
+							buffer[offset + numBytes + pendingValue.length()] = (byte) EOL;
+							numBytes += sizeOfNextTuple;
+							//set pending to false
+							pendingValue = null;
+							//move to next tuple
+							currentTupleIdx++;
+						}
+
+						//No pending value or done with pending value
+						//check if there are more tuples in the frame
+						while(currentTupleIdx < numberOfTuplesInCurrentFrame)
+						{
+							//get the file number
+							bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 0));
+							newFileNumber = ((AInt32) inRecDesc.getFields()[0].deserialize(dis)).getIntegerValue();
+							//check if it is a new file
+							if(lastFileNumber != newFileNumber)
+							{
+								//new file
+								lastFileNumber = newFileNumber;
+								//close old file
+								if(reader != null)
+								{
+									reader.close();
+								}
+								
+								//open new file
+								reader = fs.open(new Path(files.get(newFileNumber)));
+								//read byte location
+								bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 1));
+								byteLocation = ((AInt64) inRecDesc.getFields()[1].deserialize(dis)).getLongValue();
+								//seek
+								reader.seek(byteLocation);
+								//read record
+								value = reader.readLine();
+								//copy it to the buffer if there is enough space
+								int sizeOfNextTuple = value.length() + 1;
+								if(sizeOfNextTuple + numBytes > len)
+								{
+									//mark waiting value
+									pendingValue = value;
+									return numBytes;
+								}
+								System.arraycopy(value.getBytes(), 0, buffer, offset + numBytes, value.length());
+								buffer[offset + numBytes + value.length()] = (byte) EOL;
+								numBytes += sizeOfNextTuple;
+							}
+							else
+							{
+								//same file, just seek and read
+								//read byte location
+								bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 1));
+								byteLocation = ((AInt64) inRecDesc.getFields()[1].deserialize(dis)).getLongValue();
+								//seek
+								reader.seek(byteLocation);
+								//read record
+								value = reader.readLine();
+								//copy it to the buffer if there is enough space
+								int sizeOfNextTuple = value.length() + 1;
+								if(sizeOfNextTuple + numBytes > len)
+								{
+									//mark waiting value
+									pendingValue = value;
+									return numBytes;
+								}
+								System.arraycopy(value.getBytes(), 0, buffer, offset + numBytes, value.length());
+								buffer[offset + numBytes + value.length()] = (byte) EOL;
+								numBytes += sizeOfNextTuple;
+							}
+							currentTupleIdx++;
+						}
+						return (numBytes == 0) ? -1 : numBytes;
+					}
+
+					@Override
+					public int read() throws IOException {
+						throw new NotImplementedException("Use read(byte[], int, int");
+					}
+
+					@Override
+					public void close(){
+						try {
+							if (reader != null)
+							{
+								reader.close();
+							}
+							super.close();
+						} catch (IOException e) {
+							e.printStackTrace();
+						}
+					}
+
+				};
+			}
+			else if (inputFormat.equals(HDFSAdapterFactory.INPUT_FORMAT_SEQUENCE))
+			{
+				return new InputStream() {
+					private SequenceFile.Reader reader;
+					private Writable key;
+					private Text value;
+					private int lastFileNumber = -1;
+					private int newFileNumber = 0;
+					private long byteLocation;
+					private int EOL = "\n".getBytes()[0];
+					private int currentTupleIdx;
+					private int numberOfTuplesInCurrentFrame;
+					private IFrameTupleAccessor tupleAccessor = new FrameTupleAccessor(ctx.getFrameSize(),inRecDesc);
+					private Text pendingValue = null;
+					private ByteBufferInputStream bbis = new ByteBufferInputStream();
+					private DataInputStream dis = new DataInputStream(bbis);
+
+					@Override
+					public int read(byte[] buffer, int offset, int len) throws IOException {
+
+						if(newFrame)
+						{
+							//first time called with this frame
+							//reset frame buffer
+							tupleAccessor.reset(frameBuffer);
+							//get number of tuples in frame
+							numberOfTuplesInCurrentFrame = tupleAccessor.getTupleCount();
+							//set tuple index to first tuple
+							currentTupleIdx = 0;
+							//set new frame to false
+							newFrame = false;
+						}
+
+						//check and see if there is a pending value
+						//Double check this
+						int numBytes = 0;
+						if (pendingValue != null) {
+							//last value didn't fit into buffer
+							int sizeOfNextTuple = pendingValue.getLength() + 1;
+							if(sizeOfNextTuple > len)
+							{
+								return 0;
+							}
+							//there is enough space
+							System.arraycopy(pendingValue.getBytes(), 0, buffer, offset + numBytes, pendingValue.getLength());
+							buffer[offset + numBytes + pendingValue.getLength()] = (byte) EOL;
+							numBytes += sizeOfNextTuple;
+							//set pending to false
+							pendingValue = null;
+							//move to next tuple
+							currentTupleIdx++;
+						}
+
+						//No pending value or done with pending value
+						//check if there are more tuples in the frame
+						while(currentTupleIdx < numberOfTuplesInCurrentFrame)
+						{
+							//get the fileName]
+							bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 0));
+							newFileNumber = ((AInt32) inRecDesc.getFields()[0].deserialize(dis)).getIntegerValue();
+							//check if it is a new file
+							if(lastFileNumber != newFileNumber)
+							{
+								//new file
+								lastFileNumber = newFileNumber;
+								//close old file
+								if(reader != null)
+								{
+									reader.close();
+								}
+								//open new file
+								reader = new SequenceFile.Reader(fs,new Path(files.get(newFileNumber)),conf);
+								//read byte location
+								bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 1));
+								byteLocation = ((AInt64) inRecDesc.getFields()[1].deserialize(dis)).getLongValue();
+								//seek
+								reader.seek(byteLocation);
+								//read record
+								reader.next(key, value);
+								//copy it to the buffer if there is enough space
+								int sizeOfNextTuple = value.getLength() + 1;
+								if(sizeOfNextTuple + numBytes > len)
+								{
+									//mark waiting value
+									pendingValue = value;
+									return numBytes;
+								}
+								System.arraycopy(value.getBytes(), 0, buffer, offset + numBytes, value.getLength());
+								buffer[offset + numBytes + value.getLength()] = (byte) EOL;
+								numBytes += sizeOfNextTuple;
+							}
+							else
+							{
+								//same file, just seek and read
+								//read byte location
+								bbis.setByteBuffer(frameBuffer, tupleAccessor.getTupleStartOffset(currentTupleIdx) + tupleAccessor.getFieldSlotsLength() + tupleAccessor.getFieldStartOffset(currentTupleIdx, 1));
+								byteLocation = ((AInt64) inRecDesc.getFields()[1].deserialize(dis)).getLongValue();
+								//seek
+								reader.seek(byteLocation);
+								//read record
+								reader.next(key, value);
+								//copy it to the buffer if there is enough space
+								int sizeOfNextTuple = value.getLength() + 1;
+								if(sizeOfNextTuple + numBytes > len)
+								{
+									//mark waiting value
+									pendingValue = value;
+									return numBytes;
+								}
+								System.arraycopy(value.getBytes(), 0, buffer, offset + numBytes, value.getLength());
+								buffer[offset + numBytes + value.getLength()] = (byte) EOL;
+								numBytes += sizeOfNextTuple;
+							}
+							currentTupleIdx++;
+						}
+						return (numBytes == 0) ? -1 : numBytes;
+					}
+
+					@Override
+					public int read() throws IOException {
+						throw new NotImplementedException("Use read(byte[], int, int");
+					}
+
+					@Override
+					public void close(){
+						try {
+							if (reader != null)
+							{
+								reader.close();
+							}
+							super.close();
+						} catch (IOException e) {
+							e.printStackTrace();
+						}
+					}
+				};
+			}
+			//unknow format
+			throw new IOException("Unknown input format");
+		}
+	}
+
+	@Override
+	public AlgebricksPartitionConstraint getPartitionConstraint()
+			throws Exception {
+		return partitionConstraint;
+	}
+}
\ No newline at end of file

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/HDFSAdapter.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/HDFSAdapter.java
index f8b381b..2b355ff 100644
--- a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/HDFSAdapter.java
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/HDFSAdapter.java

@@ -18,6 +18,7 @@
 import java.io.InputStream;
 import java.util.Map;
 
+import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.Counters.Counter;
 import org.apache.hadoop.mapred.InputSplit;
@@ -26,6 +27,8 @@
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
+import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
 
 import edu.uci.ics.asterix.om.types.IAType;
 import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
@@ -34,202 +37,348 @@
 
 /**
  * Provides functionality for fetching external data stored in an HDFS instance.
+ * Note: reader are never closed in adapters. should we make sure they are closed before returning or switching to a different reader?
  */
 @SuppressWarnings({ "deprecation", "rawtypes" })
 public class HDFSAdapter extends FileSystemBasedAdapter {
 
-    private static final long serialVersionUID = 1L;
+	private static final long serialVersionUID = 1L;
 
-    private transient String[] readSchedule;
-    private transient boolean executed[];
-    private transient InputSplit[] inputSplits;
-    private transient JobConf conf;
-    private transient AlgebricksPartitionConstraint clusterLocations;
+	private transient String[] readSchedule;
+	private transient boolean executed[];
+	private transient InputSplit[] inputSplits;
+	private transient JobConf conf;
+	private transient AlgebricksPartitionConstraint clusterLocations;
 
-    private transient String nodeName;
+	private transient String nodeName;
 
-    public HDFSAdapter(IAType atype, String[] readSchedule, boolean[] executed, InputSplit[] inputSplits, JobConf conf,
-            AlgebricksPartitionConstraint clusterLocations) {
-        super(atype);
-        this.readSchedule = readSchedule;
-        this.executed = executed;
-        this.inputSplits = inputSplits;
-        this.conf = conf;
-        this.clusterLocations = clusterLocations;
-    }
+	public HDFSAdapter(IAType atype, String[] readSchedule, boolean[] executed, InputSplit[] inputSplits, JobConf conf,
+			AlgebricksPartitionConstraint clusterLocations) {
+		super(atype);
+		this.readSchedule = readSchedule;
+		this.executed = executed;
+		this.inputSplits = inputSplits;
+		this.conf = conf;
+		this.clusterLocations = clusterLocations;
+	}
 
-    @Override
-    public void configure(Map<String, Object> arguments) throws Exception {
-        this.configuration = arguments;
-        configureFormat();
-    }
+	@Override
+	public void configure(Map<String, Object> arguments) throws Exception {
+		this.configuration = arguments;
+		configureFormat();
+	}
 
-    public AdapterType getAdapterType() {
-        return AdapterType.READ_WRITE;
-    }
+	public AdapterType getAdapterType() {
+		return AdapterType.READ_WRITE;
+	}
 
-    @Override
-    public void initialize(IHyracksTaskContext ctx) throws Exception {
-        this.ctx = ctx;
-        this.nodeName = ctx.getJobletContext().getApplicationContext().getNodeId();
-    }
+	@Override
+	public void initialize(IHyracksTaskContext ctx) throws Exception {
+		this.ctx = ctx;
+		this.nodeName = ctx.getJobletContext().getApplicationContext().getNodeId();
+	}
 
-    private Reporter getReporter() {
-        Reporter reporter = new Reporter() {
+	private Reporter getReporter() {
+		Reporter reporter = new Reporter() {
 
-            @Override
-            public Counter getCounter(Enum<?> arg0) {
-                return null;
-            }
+			@Override
+			public Counter getCounter(Enum<?> arg0) {
+				return null;
+			}
 
-            @Override
-            public Counter getCounter(String arg0, String arg1) {
-                return null;
-            }
+			@Override
+			public Counter getCounter(String arg0, String arg1) {
+				return null;
+			}
 
-            @Override
-            public InputSplit getInputSplit() throws UnsupportedOperationException {
-                return null;
-            }
+			@Override
+			public InputSplit getInputSplit() throws UnsupportedOperationException {
+				return null;
+			}
 
-            @Override
-            public void incrCounter(Enum<?> arg0, long arg1) {
-            }
+			@Override
+			public void incrCounter(Enum<?> arg0, long arg1) {
+			}
 
-            @Override
-            public void incrCounter(String arg0, String arg1, long arg2) {
-            }
+			@Override
+			public void incrCounter(String arg0, String arg1, long arg2) {
+			}
 
-            @Override
-            public void setStatus(String arg0) {
-            }
+			@Override
+			public void setStatus(String arg0) {
+			}
 
-            @Override
-            public void progress() {
-            }
-        };
+			@Override
+			public void progress() {
+			}
+		};
 
-        return reporter;
-    }
+		return reporter;
+	}
 
-    @Override
-    public InputStream getInputStream(int partition) throws IOException {
+	@Override
+	public InputStream getInputStream(int partition) throws IOException {
 
-        return new InputStream() {
+		if(conf.getInputFormat() instanceof RCFileInputFormat)
+		{
+			//if hdfs input format is rc-input-format, we return a different InputStream
+			return new InputStream() {
 
-            private RecordReader<Object, Text> reader;
-            private Object key;
-            private Text value;
-            private boolean hasMore = false;
-            private int EOL = "\n".getBytes()[0];
-            private Text pendingValue = null;
-            private int currentSplitIndex = 0;
+				private RecordReader<LongWritable, BytesRefArrayWritable> reader;
+				private LongWritable key;
+				private BytesRefArrayWritable value;
+				private boolean hasMore = false;
+				private int EOL = "\n".getBytes()[0];
+				private byte delimiter = 0x01;
+				private boolean pendingValue = false;
+				private int currentSplitIndex = 0;
 
-            @SuppressWarnings("unchecked")
-            private boolean moveToNext() throws IOException {
-                for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
-                    /**
-                     * read all the partitions scheduled to the current node
-                     */
-                    if (readSchedule[currentSplitIndex].equals(nodeName)) {
-                        /**
-                         * pick an unread split to read
-                         * synchronize among simultaneous partitions in the same machine
-                         */
-                        synchronized (executed) {
-                            if (executed[currentSplitIndex] == false) {
-                                executed[currentSplitIndex] = true;
-                            } else {
-                                continue;
-                            }
-                        }
+				@SuppressWarnings("unchecked")
+				private boolean moveToNext() throws IOException {
+					for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
+						/**
+						 * read all the partitions scheduled to the current node
+						 */
+						if (readSchedule[currentSplitIndex].equals(nodeName)) {
+							/**
+							 * pick an unread split to read
+							 * synchronize among simultaneous partitions in the same machine
+							 */
+							synchronized (executed) {
+								if (executed[currentSplitIndex] == false) {
+									executed[currentSplitIndex] = true;
+								} else {
+									continue;
+								}
+							}
 
-                        /**
-                         * read the split
-                         */
-                        reader = getRecordReader(currentSplitIndex);
-                        key = reader.createKey();
-                        value = (Text) reader.createValue();
-                        return true;
-                    }
-                }
-                return false;
-            }
+							/**
+							 * read the split
+							 */
+							reader = getRecordReader(currentSplitIndex);
+							key = reader.createKey();
+							value = reader.createValue();
+							return true;
+						}
+					}
+					return false;
+				}
 
-            @Override
-            public int read(byte[] buffer, int offset, int len) throws IOException {
-                if (reader == null) {
-                    if (!moveToNext()) {
-                        //nothing to read
-                        return -1;
-                    }
-                }
+				@Override
+				public int read(byte[] buffer, int offset, int len) throws IOException {
+					if (reader == null) {
+						if (!moveToNext()) {
+							//nothing to read
+							return -1;
+						}
+					}
 
-                int numBytes = 0;
-                if (pendingValue != null) {
-                    System.arraycopy(pendingValue.getBytes(), 0, buffer, offset + numBytes, pendingValue.getLength());
-                    buffer[offset + numBytes + pendingValue.getLength()] = (byte) EOL;
-                    numBytes += pendingValue.getLength() + 1;
-                    pendingValue = null;
-                }
+					int numBytes = 0;
+					if (pendingValue) {
+						//last value didn't fit into buffer
+						int sizeOfNextTuple = getTupleSize(value) + 1;
+						if(sizeOfNextTuple > len)
+						{
+							return 0;
+						}
+						copyCurrentTuple(buffer, offset + numBytes);
+						buffer[offset + numBytes + sizeOfNextTuple - 1] = (byte) EOL;
+						numBytes += sizeOfNextTuple;
+						//set pending to false
+						pendingValue = false;
+					}
 
-                while (numBytes < len) {
-                    hasMore = reader.next(key, value);
-                    if (!hasMore) {
-                        while (moveToNext()) {
-                            hasMore = reader.next(key, value);
-                            if (hasMore) {
-                                //move to the next non-empty split
-                                break;
-                            }
-                        }
-                    }
-                    if (!hasMore) {
-                        return (numBytes == 0) ? -1 : numBytes;
-                    }
-                    int sizeOfNextTuple = value.getLength() + 1;
-                    if (numBytes + sizeOfNextTuple > len) {
-                        // cannot add tuple to current buffer
-                        // but the reader has moved pass the fetched tuple
-                        // we need to store this for a subsequent read call.
-                        // and return this then.
-                        pendingValue = value;
-                        break;
-                    } else {
-                        System.arraycopy(value.getBytes(), 0, buffer, offset + numBytes, value.getLength());
-                        buffer[offset + numBytes + value.getLength()] = (byte) EOL;
-                        numBytes += sizeOfNextTuple;
-                    }
-                }
-                return numBytes;
-            }
+					while (numBytes < len) {
+						hasMore = reader.next(key, value);
+						if (!hasMore) {
+							while (moveToNext()) {
+								hasMore = reader.next(key, value);
+								if (hasMore) {
+									//move to the next non-empty split
+									break;
+								}
+							}
+						}
+						if (!hasMore) {
+							return (numBytes == 0) ? -1 : numBytes;
+						}
+						int sizeOfNextTuple = getTupleSize(value) + 1;
+						if (numBytes + sizeOfNextTuple > len) {
+							// cannot add tuple to current buffer
+							// but the reader has moved pass the fetched tuple
+							// we need to store this for a subsequent read call.
+							// and return this then.
+							pendingValue = true;
+							break;
+						} else {
+							//copy
+							copyCurrentTuple(buffer, offset + numBytes);
+							buffer[offset + numBytes + sizeOfNextTuple - 1] = (byte) EOL;
+							numBytes += sizeOfNextTuple;
+						}
+					}
+					return numBytes;
+				}
 
-            @Override
-            public int read() throws IOException {
-                throw new NotImplementedException("Use read(byte[], int, int");
-            }
+				private void copyCurrentTuple(byte[] buffer, int offset) throws IOException {
+					int rcOffset = 0;
+					for(int i=0; i< value.size(); i++)
+					{
+						System.arraycopy(value.get(i).getData(), value.get(i).getStart(), buffer, offset + rcOffset, value.get(i).getLength());
+						rcOffset += value.get(i).getLength() + 1;
+						buffer[rcOffset - 1] = delimiter;
+					}
+				}
 
-            private RecordReader getRecordReader(int slitIndex) throws IOException {
-                if (conf.getInputFormat() instanceof SequenceFileInputFormat) {
-                    SequenceFileInputFormat format = (SequenceFileInputFormat) conf.getInputFormat();
-                    RecordReader reader = format.getRecordReader(
-                            (org.apache.hadoop.mapred.FileSplit) inputSplits[slitIndex], conf, getReporter());
-                    return reader;
-                } else {
-                    TextInputFormat format = (TextInputFormat) conf.getInputFormat();
-                    RecordReader reader = format.getRecordReader(
-                            (org.apache.hadoop.mapred.FileSplit) inputSplits[slitIndex], conf, getReporter());
-                    return reader;
-                }
-            }
+				private int getTupleSize(BytesRefArrayWritable value2) {
+					int size=0;
+					//loop over rc column and add lengths
+					for(int i=0; i< value.size(); i++)
+					{
+						size += value.get(i).getLength();
+					}
+					//add delimeters bytes sizes
+					size += value.size() -1;
+					return size;
+				}
 
-        };
+				@Override
+				public int read() throws IOException {
+					throw new NotImplementedException("Use read(byte[], int, int");
+				}
 
-    }
+				private RecordReader getRecordReader(int slitIndex) throws IOException {
+					RCFileInputFormat format = (RCFileInputFormat) conf.getInputFormat();
+					RecordReader reader = format.getRecordReader(
+							(org.apache.hadoop.mapred.FileSplit) inputSplits[slitIndex], conf, getReporter());
+					return reader;
+				}
 
-    @Override
-    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
-        return clusterLocations;
-    }
+			};
+		}
+		else
+		{
+			return new InputStream() {
+
+				private RecordReader<Object, Text> reader;
+				private Object key;
+				private Text value;
+				private boolean hasMore = false;
+				private int EOL = "\n".getBytes()[0];
+				private Text pendingValue = null;
+				private int currentSplitIndex = 0;
+
+				@SuppressWarnings("unchecked")
+				private boolean moveToNext() throws IOException {
+					for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
+						/**
+						 * read all the partitions scheduled to the current node
+						 */
+						if (readSchedule[currentSplitIndex].equals(nodeName)) {
+							/**
+							 * pick an unread split to read
+							 * synchronize among simultaneous partitions in the same machine
+							 */
+							synchronized (executed) {
+								if (executed[currentSplitIndex] == false) {
+									executed[currentSplitIndex] = true;
+								} else {
+									continue;
+								}
+							}
+
+							/**
+							 * read the split
+							 */
+							reader = getRecordReader(currentSplitIndex);
+							key = reader.createKey();
+							value = (Text) reader.createValue();
+							return true;
+						}
+					}
+					return false;
+				}
+
+				@Override
+				public int read(byte[] buffer, int offset, int len) throws IOException {
+					if (reader == null) {
+						if (!moveToNext()) {
+							//nothing to read
+							return -1;
+						}
+					}
+
+					int numBytes = 0;
+					if (pendingValue != null) {
+						int sizeOfNextTuple = pendingValue.getLength() + 1;
+						if(sizeOfNextTuple > len)
+						{
+							return 0;
+						}
+						System.arraycopy(pendingValue.getBytes(), 0, buffer, offset + numBytes, pendingValue.getLength());
+						buffer[offset + numBytes + pendingValue.getLength()] = (byte) EOL;
+						numBytes += pendingValue.getLength() + 1;
+						pendingValue = null;
+					}
+
+					while (numBytes < len) {
+						hasMore = reader.next(key, value);
+						if (!hasMore) {
+							while (moveToNext()) {
+								hasMore = reader.next(key, value);
+								if (hasMore) {
+									//move to the next non-empty split
+									break;
+								}
+							}
+						}
+						if (!hasMore) {
+							return (numBytes == 0) ? -1 : numBytes;
+						}
+						int sizeOfNextTuple = value.getLength() + 1;
+						if (numBytes + sizeOfNextTuple > len) {
+							// cannot add tuple to current buffer
+							// but the reader has moved pass the fetched tuple
+							// we need to store this for a subsequent read call.
+							// and return this then.
+							pendingValue = value;
+							break;
+						} else {
+							System.arraycopy(value.getBytes(), 0, buffer, offset + numBytes, value.getLength());
+							buffer[offset + numBytes + value.getLength()] = (byte) EOL;
+							numBytes += sizeOfNextTuple;
+						}
+					}
+					return numBytes;
+				}
+
+				@Override
+				public int read() throws IOException {
+					throw new NotImplementedException("Use read(byte[], int, int");
+				}
+
+				private RecordReader getRecordReader(int slitIndex) throws IOException {
+					if (conf.getInputFormat() instanceof SequenceFileInputFormat) {
+						SequenceFileInputFormat format = (SequenceFileInputFormat) conf.getInputFormat();
+						RecordReader reader = format.getRecordReader(
+								(org.apache.hadoop.mapred.FileSplit) inputSplits[slitIndex], conf, getReporter());
+						return reader;
+					} else {
+						TextInputFormat format = (TextInputFormat) conf.getInputFormat();
+						RecordReader reader = format.getRecordReader(
+								(org.apache.hadoop.mapred.FileSplit) inputSplits[slitIndex], conf, getReporter());
+						return reader;
+					}
+				}
+
+			};
+		}
+
+	}
+
+	@Override
+	public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
+		return clusterLocations;
+	}
 
 }
\ No newline at end of file

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/HDFSIndexingAdapter.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/HDFSIndexingAdapter.java
new file mode 100644
index 0000000..59b39c5
--- /dev/null
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/HDFSIndexingAdapter.java

@@ -0,0 +1,1208 @@
+package edu.uci.ics.asterix.external.dataset.adapter;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
+import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.Counters.Counter;
+import edu.uci.ics.asterix.om.types.IAType;
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+
+
+/**
+ * Provides functionality for reading external files and attach RID info to them before they are sent to the parser
+ * 
+ *  Room for optimization: get numbers of indexing fields (or fields names for adm) and do a quick filtering before sending to parser
+ *  
+ */
+@SuppressWarnings({ "deprecation", "rawtypes" })
+public class HDFSIndexingAdapter extends FileSystemBasedAdapter {
+
+	private static final long serialVersionUID = 1L;
+	private transient String[] readSchedule;
+	private transient boolean executed[];
+	private transient InputSplit[] inputSplits;
+	private transient JobConf conf;
+	private transient AlgebricksPartitionConstraint clusterLocations;
+	private final Map<String,Integer> files;
+	private transient String nodeName;
+	public static final byte[] fileNameFieldNameWithRecOpeningBraces = "{\"_file-name\":\"".getBytes();
+	public static final byte[] fileNameFieldClosingQuotation = "\"".getBytes();
+	public static final byte[] fileNumberFieldNameWithRecOpeningBraces = "{\"_file-number\":".getBytes();
+	public static final byte[] bytelocationFieldName = ",\"_byte-location\":".getBytes();
+	public static final byte[] bytelocationValueEnd = "i64,".getBytes();
+
+	public HDFSIndexingAdapter(IAType atype, String[] readSchedule, boolean[] executed, InputSplit[] inputSplits, JobConf conf,
+			AlgebricksPartitionConstraint clusterLocations, Map<String,Integer> files) {
+		super(atype);
+		this.readSchedule = readSchedule;
+		this.executed = executed;
+		this.inputSplits = inputSplits;
+		this.conf = conf;
+		this.clusterLocations = clusterLocations;
+		this.files = files;
+	}
+
+	@Override
+	public void configure(Map<String, Object> arguments) throws Exception {
+		this.configuration = arguments;
+		configureFormat();
+	}
+
+	public AdapterType getAdapterType() {
+		return AdapterType.READ;
+	}
+
+	@Override
+	public void initialize(IHyracksTaskContext ctx) throws Exception {
+		this.ctx = ctx;
+		this.nodeName = ctx.getJobletContext().getApplicationContext().getNodeId();
+	}
+
+	private Reporter getReporter() {
+		Reporter reporter = new Reporter() {
+
+			@Override
+			public Counter getCounter(Enum<?> arg0) {
+				return null;
+			}
+
+			@Override
+			public Counter getCounter(String arg0, String arg1) {
+				return null;
+			}
+
+			@Override
+			public InputSplit getInputSplit() throws UnsupportedOperationException {
+				return null;
+			}
+
+			@Override
+			public void incrCounter(Enum<?> arg0, long arg1) {
+			}
+
+			@Override
+			public void incrCounter(String arg0, String arg1, long arg2) {
+			}
+
+			@Override
+			public void setStatus(String arg0) {
+			}
+
+			@Override
+			public void progress() {
+			}
+		};
+
+		return reporter;
+	}
+
+	@Override
+	public InputStream getInputStream(int partition) throws IOException {
+		if(files == null)
+		{
+			if(conf.getInputFormat() instanceof RCFileInputFormat)
+			{
+				//indexing rc input format
+				return new InputStream() {
+
+					private RecordReader<LongWritable, BytesRefArrayWritable> reader;
+					private LongWritable key;
+					private BytesRefArrayWritable value;
+					private boolean hasMore = false;
+					private int EOL = "\n".getBytes()[0];
+					private byte delimiter = 0x01;
+					private boolean pendingValue = false;
+					private int currentSplitIndex = 0;
+					private byte[] fileName;
+					private byte[] byteLocation;
+					private byte[] rowNumberBytes;
+					private long blockByteLocation;
+					private long NextblockByteLocation;
+					private int rowNumber;
+
+					@SuppressWarnings("unchecked")
+					private boolean moveToNext() throws IOException {
+						for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
+							/**
+							 * read all the partitions scheduled to the current node
+							 */
+							if (readSchedule[currentSplitIndex].equals(nodeName)) {
+								/**
+								 * pick an unread split to read
+								 * synchronize among simultaneous partitions in the same machine
+								 */
+								synchronized (executed) {
+									if (executed[currentSplitIndex] == false) {
+										executed[currentSplitIndex] = true;
+									} else {
+										continue;
+									}
+								}
+
+								/**
+								 * read the split
+								 */
+								reader = getRecordReader(currentSplitIndex);
+								key = reader.createKey();
+								value = reader.createValue();
+								fileName = ((FileSplit)(inputSplits[currentSplitIndex])).getPath().toUri().getPath().getBytes();
+								blockByteLocation = reader.getPos();
+								pendingValue = reader.next(key, value);
+								NextblockByteLocation = reader.getPos();
+								rowNumber = 1;
+								byteLocation = String.valueOf(blockByteLocation).getBytes("UTF-8");
+								rowNumberBytes = String.valueOf(rowNumber).getBytes("UTF-8");
+								return true;
+							}
+						}
+						return false;
+					}
+
+					@Override
+					public int read(byte[] buffer, int offset, int len) throws IOException {
+						if (reader == null) {
+							if (!moveToNext()) {
+								//nothing to read
+								return -1;
+							}
+						}
+
+						int numBytes = 0;
+						if (pendingValue) {
+							//last value didn't fit into buffer
+							// 1 for EOL
+							int sizeOfNextTuple = getTupleSize(value) + 1;
+							if (numBytes + sizeOfNextTuple +  rowNumberBytes.length + byteLocation.length + fileName.length + 3 > len) {
+								return 0;
+							}
+
+							//copy filename
+							System.arraycopy(fileName, 0, buffer, offset + numBytes, fileName.length);
+							buffer[offset + numBytes + fileName.length] = delimiter;
+							numBytes += fileName.length + 1;
+
+							//copy byte location
+							System.arraycopy(byteLocation, 0, buffer, offset + numBytes, byteLocation.length);
+							buffer[offset + numBytes + byteLocation.length] = delimiter;
+							numBytes += byteLocation.length + 1;
+
+							//copy row number
+							System.arraycopy(rowNumberBytes, 0, buffer, offset + numBytes, rowNumberBytes.length);
+							buffer[offset + numBytes + rowNumberBytes.length] = delimiter;
+							numBytes += rowNumberBytes.length + 1;
+
+							copyCurrentTuple(buffer, offset + numBytes);
+							buffer[offset + numBytes + sizeOfNextTuple - 1] = (byte) EOL;
+							numBytes += sizeOfNextTuple;
+							//set pending to false
+							pendingValue = false;
+						}
+
+						while (numBytes < len) {
+							hasMore = reader.next(key, value);
+							if (!hasMore) {
+								while (moveToNext()) {
+									hasMore = reader.next(key, value);
+									if (hasMore) {
+										//move to the next non-empty split
+										break;
+									}
+								}
+							}
+							if (!hasMore) {
+								return (numBytes == 0) ? -1 : numBytes;
+							}
+
+							//check if moved to next block
+							blockByteLocation = reader.getPos();
+							if(blockByteLocation != NextblockByteLocation)
+							{
+								//moved to a new block, reset stuff
+								//row number
+								rowNumber = 1;
+								rowNumberBytes = String.valueOf(rowNumber).getBytes("UTF-8");
+
+								//block location
+								byteLocation = String.valueOf(NextblockByteLocation).getBytes("UTF-8");
+								NextblockByteLocation = blockByteLocation;
+							}
+							else
+							{
+								rowNumber += 1;
+								rowNumberBytes = String.valueOf(rowNumber).getBytes("UTF-8");
+							}
+
+							int sizeOfNextTuple = getTupleSize(value) + 1;
+							if (numBytes + sizeOfNextTuple +  rowNumberBytes.length + byteLocation.length + fileName.length + 3 > len) {
+								// cannot add tuple to current buffer
+								// but the reader has moved pass the fetched tuple
+								// we need to store this for a subsequent read call.
+								// and return this then.
+								pendingValue = true;
+								break;
+							} else {
+								//copy filename
+								System.arraycopy(fileName, 0, buffer, offset + numBytes, fileName.length);
+								buffer[offset + numBytes + fileName.length] = delimiter;
+								numBytes += fileName.length + 1;
+
+								//copy byte location
+								System.arraycopy(byteLocation, 0, buffer, offset + numBytes, byteLocation.length);
+								buffer[offset + numBytes + byteLocation.length] = delimiter;
+								numBytes += byteLocation.length + 1;
+
+								//copy row number
+								System.arraycopy(rowNumberBytes, 0, buffer, offset + numBytes, rowNumberBytes.length);
+								buffer[offset + numBytes + rowNumberBytes.length] = delimiter;
+								numBytes += rowNumberBytes.length + 1;
+
+								copyCurrentTuple(buffer, offset + numBytes);
+								buffer[offset + numBytes + sizeOfNextTuple - 1] = (byte) EOL;
+								numBytes += sizeOfNextTuple;
+							}
+						}
+						return numBytes;
+					}
+
+					private void copyCurrentTuple(byte[] buffer, int offset) throws IOException {
+						int rcOffset = 0;
+						for(int i=0; i< value.size(); i++)
+						{
+							System.arraycopy(value.get(i).getData(), value.get(i).getStart(), buffer, offset + rcOffset, value.get(i).getLength());
+							rcOffset += value.get(i).getLength() + 1;
+							buffer[rcOffset - 1] = delimiter;
+						}
+					}
+
+					private int getTupleSize(BytesRefArrayWritable value2) {
+						int size=0;
+						//loop over rc column and add lengths
+						for(int i=0; i< value.size(); i++)
+						{
+							size += value.get(i).getLength();
+						}
+						//add delimeters bytes sizes
+						size += value.size() -1;
+						return size;
+					}
+
+					@Override
+					public int read() throws IOException {
+						throw new NotImplementedException("Use read(byte[], int, int");
+					}
+
+					private RecordReader getRecordReader(int slitIndex) throws IOException {
+						RCFileInputFormat format = (RCFileInputFormat) conf.getInputFormat();
+						RecordReader reader = format.getRecordReader(
+								(org.apache.hadoop.mapred.FileSplit) inputSplits[slitIndex], conf, getReporter());
+						return reader;
+					}
+
+				};
+			}
+			else
+			{
+				//get content format
+				if(configuration.get(KEY_FORMAT).equals(FORMAT_DELIMITED_TEXT))
+				{
+					//reading data and RIDs for delimited text
+					return new InputStream() {
+
+						private RecordReader<Object, Text> reader;
+						private Object key;
+						private Text value;
+						private boolean hasMore = false;
+						private int EOL = "\n".getBytes()[0];
+						private Text pendingValue = null;
+						private int currentSplitIndex = 0;
+						private byte[] fileName;
+						private byte[] byteLocation;
+						private byte delimiter = ((String)configuration.get(KEY_DELIMITER)).getBytes()[0];
+
+						@SuppressWarnings("unchecked")
+						private boolean moveToNext() throws IOException {
+							for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
+								/**
+								 * read all the partitions scheduled to the current node
+								 */
+								if (readSchedule[currentSplitIndex].equals(nodeName)) {
+									/**
+									 * pick an unread split to read
+									 * synchronize among simultaneous partitions in the same machine
+									 */
+									synchronized (executed) {
+										if (executed[currentSplitIndex] == false) {
+											executed[currentSplitIndex] = true;
+										} else {
+											continue;
+										}
+									}
+
+									/**
+									 * read the split
+									 */
+									reader = getRecordReader(currentSplitIndex);
+									key = reader.createKey();
+									value = (Text) reader.createValue();
+									fileName = ((FileSplit)(inputSplits[currentSplitIndex])).getPath().toUri().getPath().getBytes();
+									return true;
+								}
+							}
+							return false;
+						}
+
+						@Override
+						public int read(byte[] buffer, int offset, int len) throws IOException {
+							if (reader == null) {
+								if (!moveToNext()) {
+									//nothing to read
+									return -1;
+								}
+							}
+
+							int numBytes = 0;
+							if (pendingValue != null) {
+								int sizeOfNextTuple = pendingValue.getLength() + 1;
+								if (numBytes + sizeOfNextTuple +byteLocation.length + fileName.length + 2> len)
+								{
+									return numBytes;
+								}
+								//copy filename
+								System.arraycopy(fileName, 0, buffer, offset + numBytes, fileName.length);
+								buffer[offset + numBytes + fileName.length] = delimiter;
+								numBytes += fileName.length + 1;
+
+								//copy byte location
+								System.arraycopy(byteLocation, 0, buffer, offset + numBytes, byteLocation.length);
+								buffer[offset + numBytes + byteLocation.length] = delimiter;
+								numBytes += byteLocation.length + 1;
+
+								//copy actual value
+								System.arraycopy(pendingValue.getBytes(), 0, buffer, offset + numBytes, pendingValue.getLength());
+								buffer[offset + numBytes + pendingValue.getLength()] = (byte) EOL;
+								numBytes += pendingValue.getLength() + 1;
+								pendingValue = null;
+							}
+
+							while (numBytes < len) {
+								//get reader position before you actually read
+								byteLocation = String.valueOf(reader.getPos()).getBytes();
+								hasMore = reader.next(key, value);
+								if (!hasMore) {
+									while (moveToNext()) {
+										//get reader position before you actually read
+										byteLocation = String.valueOf(reader.getPos()).getBytes("UTF-8");
+										hasMore = reader.next(key, value);
+										if (hasMore) {
+											//move to the next non-empty split
+											break;
+										}
+									}
+								}
+								if (!hasMore) {
+									return (numBytes == 0) ? -1 : numBytes;
+								}
+								int sizeOfNextTuple = value.getLength() + 1;
+								if (numBytes + sizeOfNextTuple +byteLocation.length + fileName.length + 2> len) {
+									// cannot add tuple to current buffer
+									// but the reader has moved pass the fetched tuple
+									// we need to store this for a subsequent read call.
+									// and return this then.
+									pendingValue = value;
+									break;
+								} else {
+									//copy filename
+									System.arraycopy(fileName, 0, buffer, offset + numBytes, fileName.length);
+									buffer[offset + numBytes + fileName.length] = delimiter;
+									numBytes += fileName.length + 1;
+
+									//copy byte location
+									System.arraycopy(byteLocation, 0, buffer, offset + numBytes, byteLocation.length);
+									buffer[offset + numBytes + byteLocation.length] = delimiter;
+									numBytes += byteLocation.length + 1;
+
+									//Copy actual value
+									System.arraycopy(value.getBytes(), 0, buffer, offset + numBytes, value.getLength());
+									buffer[offset + numBytes + value.getLength()] = (byte) EOL;
+									numBytes += sizeOfNextTuple;
+								}
+							}
+							return numBytes;
+						}
+
+						@Override
+						public int read() throws IOException {
+							throw new NotImplementedException("Use read(byte[], int, int");
+						}
+
+						private RecordReader getRecordReader(int slitIndex) throws IOException {
+							if (conf.getInputFormat() instanceof SequenceFileInputFormat) {
+								SequenceFileInputFormat format = (SequenceFileInputFormat) conf.getInputFormat();
+								RecordReader reader = format.getRecordReader(
+										(org.apache.hadoop.mapred.FileSplit) inputSplits[slitIndex], conf, getReporter());
+								return reader;
+							} else {
+								TextInputFormat format = (TextInputFormat) conf.getInputFormat();
+								RecordReader reader = format.getRecordReader(
+										(org.apache.hadoop.mapred.FileSplit) inputSplits[slitIndex], conf, getReporter());
+								return reader;
+							}
+						}
+
+					};
+				}
+				else if((configuration.get(KEY_FORMAT).equals(FORMAT_ADM)))
+				{
+					//reading data and RIDs for adm formatted data
+					return new InputStream() {
+
+						private RecordReader<Object, Text> reader;
+						private Object key;
+						private Text value;
+						private boolean hasMore = false;
+						private int EOL = "\n".getBytes()[0];
+						private Text pendingValue = null;
+						private int currentSplitIndex = 0;
+						private byte[] fileName;
+						private byte[] byteLocation;
+
+						@SuppressWarnings("unchecked")
+						private boolean moveToNext() throws IOException {
+							for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
+								/**
+								 * read all the partitions scheduled to the current node
+								 */
+								if (readSchedule[currentSplitIndex].equals(nodeName)) {
+									/**
+									 * pick an unread split to read
+									 * synchronize among simultaneous partitions in the same machine
+									 */
+									synchronized (executed) {
+										if (executed[currentSplitIndex] == false) {
+											executed[currentSplitIndex] = true;
+										} else {
+											continue;
+										}
+									}
+
+									/**
+									 * read the split
+									 */
+									reader = getRecordReader(currentSplitIndex);
+									key = reader.createKey();
+									value = (Text) reader.createValue();
+									fileName = ((FileSplit)(inputSplits[currentSplitIndex])).getPath().toUri().getPath().getBytes();
+									return true;
+								}
+							}
+							return false;
+						}
+
+						@Override
+						public int read(byte[] buffer, int offset, int len) throws IOException {
+							if (reader == null) {
+								if (!moveToNext()) {
+									//nothing to read
+									return -1;
+								}
+							}
+
+							int numBytes = 0;
+							if (pendingValue != null) {
+								int firstFieldLocation = value.find("\"");
+								int admValueSize = value.getLength();
+								if(firstFieldLocation >= 0)
+								{
+									int sizeOfNextTuple = value.getLength() - firstFieldLocation + 1;
+									int sizeOfNextTupleAndRID = fileNameFieldNameWithRecOpeningBraces.length + fileName.length + fileNameFieldClosingQuotation.length + bytelocationFieldName.length  + byteLocation.length + bytelocationValueEnd.length + sizeOfNextTuple;
+									if (numBytes + sizeOfNextTupleAndRID > len) {
+										// still cannot add tuple to current buffer
+										// return 0 so parser would double the buffer size.
+										return 0;
+									} else {
+										//copy fileNameFieldNameWithRecOpeningBraces
+										System.arraycopy(fileNameFieldNameWithRecOpeningBraces, 0, buffer, offset + numBytes,fileNameFieldNameWithRecOpeningBraces.length);
+										numBytes += fileNameFieldNameWithRecOpeningBraces.length;
+										//copy fileName
+										System.arraycopy(fileName, 0, buffer, offset + numBytes,fileName.length);
+										numBytes += fileName.length;
+										//copy fileName closing quotation
+										System.arraycopy(fileNameFieldClosingQuotation, 0, buffer, offset + numBytes,fileNameFieldClosingQuotation.length);
+										numBytes += fileNameFieldClosingQuotation.length;
+										//copy bytelocationFieldName
+										System.arraycopy(bytelocationFieldName, 0, buffer, offset + numBytes,bytelocationFieldName.length);
+										numBytes += bytelocationFieldName.length;
+										//copy byte location value
+										System.arraycopy(byteLocation, 0, buffer, offset + numBytes,byteLocation.length);
+										numBytes += byteLocation.length;
+										//copy byte location field end 
+										System.arraycopy(bytelocationValueEnd, 0, buffer, offset + numBytes,bytelocationValueEnd.length);
+										numBytes += bytelocationValueEnd.length;
+										//copy the actual adm instance
+										System.arraycopy(value.getBytes(), firstFieldLocation, buffer, offset + numBytes,admValueSize - firstFieldLocation);
+										buffer[offset + numBytes + admValueSize - firstFieldLocation] = (byte) EOL;
+										numBytes += admValueSize - firstFieldLocation +1;
+									}
+								}
+								pendingValue = null;
+							}
+
+							while (numBytes < len) {
+								//get reader position before you actually read
+								byteLocation = String.valueOf(reader.getPos()).getBytes("UTF-8");
+								hasMore = reader.next(key, value);
+								if (!hasMore) {
+									while (moveToNext()) {
+										//get reader position before you actually read
+										byteLocation = String.valueOf(reader.getPos()).getBytes("UTF-8");
+										hasMore = reader.next(key, value);
+										if (hasMore) {
+											//move to the next non-empty split
+											break;
+										}
+									}
+								}
+								if (!hasMore) {
+									return (numBytes == 0) ? -1 : numBytes;
+								}
+								//get the index of the first field name
+								int firstFieldLocation = value.find("\"");
+								int admValueSize = value.getLength();
+								if(firstFieldLocation >= 0)
+								{
+									int sizeOfNextTuple = value.getLength() - firstFieldLocation + 1;
+									int sizeOfNextTupleAndRID = fileNameFieldNameWithRecOpeningBraces.length + fileName.length + fileNameFieldClosingQuotation.length + bytelocationFieldName.length  + byteLocation.length + bytelocationValueEnd.length + sizeOfNextTuple;
+									if (numBytes + sizeOfNextTupleAndRID > len) {
+										// cannot add tuple to current buffer
+										// but the reader has moved pass the fetched tuple
+										// we need to store this for a subsequent read call.
+										// and return this then.
+										pendingValue = value;
+										break;
+									} else {
+										//copy fileNameFieldNameWithRecOpeningBraces
+										System.arraycopy(fileNameFieldNameWithRecOpeningBraces, 0, buffer, offset + numBytes,fileNameFieldNameWithRecOpeningBraces.length);
+										numBytes += fileNameFieldNameWithRecOpeningBraces.length;
+										//copy fileName
+										System.arraycopy(fileName, 0, buffer, offset + numBytes,fileName.length);
+										numBytes += fileName.length;
+										//copy fileName closing quotation
+										System.arraycopy(fileNameFieldClosingQuotation, 0, buffer, offset + numBytes,fileNameFieldClosingQuotation.length);
+										numBytes += fileNameFieldClosingQuotation.length;
+										//copy bytelocationFieldName
+										System.arraycopy(bytelocationFieldName, 0, buffer, offset + numBytes,bytelocationFieldName.length);
+										numBytes += bytelocationFieldName.length;
+										//copy byte location value
+										System.arraycopy(byteLocation, 0, buffer, offset + numBytes,byteLocation.length);
+										numBytes += byteLocation.length;
+										//copy byte location field end 
+										System.arraycopy(bytelocationValueEnd, 0, buffer, offset + numBytes,bytelocationValueEnd.length);
+										numBytes += bytelocationValueEnd.length;
+										//copy the actual adm instance
+										System.arraycopy(value.getBytes(), firstFieldLocation, buffer, offset + numBytes,admValueSize - firstFieldLocation);
+										buffer[offset + numBytes + admValueSize - firstFieldLocation] = (byte) EOL;
+										numBytes += admValueSize - firstFieldLocation +1;
+									}
+								}
+							}
+							return numBytes;
+						}
+
+						@Override
+						public int read() throws IOException {
+							throw new NotImplementedException("Use read(byte[], int, int");
+						}
+
+						private RecordReader getRecordReader(int slitIndex) throws IOException {
+							if (conf.getInputFormat() instanceof SequenceFileInputFormat) {
+								SequenceFileInputFormat format = (SequenceFileInputFormat) conf.getInputFormat();
+								RecordReader reader = format.getRecordReader(
+										(org.apache.hadoop.mapred.FileSplit) inputSplits[slitIndex], conf, getReporter());
+								return reader;
+							} else {
+								TextInputFormat format = (TextInputFormat) conf.getInputFormat();
+								RecordReader reader = format.getRecordReader(
+										(org.apache.hadoop.mapred.FileSplit) inputSplits[slitIndex], conf, getReporter());
+								return reader;
+							}
+						}
+
+					};
+				}
+				else
+				{
+					throw new IOException("Can't index " +configuration.get(KEY_FORMAT)+" input");
+				}
+			}
+		}
+		else
+		{
+			if(conf.getInputFormat() instanceof RCFileInputFormat)
+			{
+				//indexing rc input format
+				return new InputStream() {
+
+					private RecordReader<LongWritable, BytesRefArrayWritable> reader;
+					private LongWritable key;
+					private BytesRefArrayWritable value;
+					private boolean hasMore = false;
+					private int EOL = "\n".getBytes()[0];
+					private byte delimiter = 0x01;
+					private boolean pendingValue = false;
+					private int currentSplitIndex = 0;
+					private byte[] fileNumber;
+					private byte[] byteLocation;
+					private byte[] rowNumberBytes;
+					private Integer file;
+					private long blockByteLocation;
+					private long NextblockByteLocation;
+					private int rowNumber;
+
+					@SuppressWarnings("unchecked")
+					private boolean moveToNext() throws IOException {
+						for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
+							/**
+							 * read all the partitions scheduled to the current node
+							 */
+							if (readSchedule[currentSplitIndex].equals(nodeName)) {
+								/**
+								 * pick an unread split to read
+								 * synchronize among simultaneous partitions in the same machine
+								 */
+								synchronized (executed) {
+									if (executed[currentSplitIndex] == false) {
+										executed[currentSplitIndex] = true;
+									} else {
+										continue;
+									}
+								}
+
+								/**
+								 * read the split
+								 */
+								reader = getRecordReader(currentSplitIndex);
+								key = reader.createKey();
+								value = reader.createValue();
+								//getting the file number
+								file = files.get(((FileSplit)(inputSplits[currentSplitIndex])).getPath().toUri().getPath());
+								if(file == null)
+								{
+									throw new HyracksException("a file was not found in the map while indexing");
+								}
+								fileNumber = String.valueOf(file).getBytes("UTF-8");
+								blockByteLocation = reader.getPos();
+								pendingValue = reader.next(key, value);
+								NextblockByteLocation = reader.getPos();
+								rowNumber = 1;
+								byteLocation = String.valueOf(blockByteLocation).getBytes("UTF-8");
+								rowNumberBytes = String.valueOf(rowNumber).getBytes("UTF-8");
+								return true;
+							}
+						}
+						return false;
+					}
+
+					@Override
+					public int read(byte[] buffer, int offset, int len) throws IOException {
+						if (reader == null) {
+							if (!moveToNext()) {
+								//nothing to read
+								return -1;
+							}
+						}
+
+						int numBytes = 0;
+						if (pendingValue) {
+							//last value didn't fit into buffer
+							// 1 for EOL
+							int sizeOfNextTuple = getTupleSize(value) + 1;
+							if (numBytes + sizeOfNextTuple +  rowNumberBytes.length + byteLocation.length + fileNumber.length + 3 > len) {
+								return 0;
+							}
+							//copy file number
+							System.arraycopy(fileNumber, 0, buffer, offset + numBytes, fileNumber.length);
+							buffer[offset + numBytes + fileNumber.length] = delimiter;
+							numBytes += fileNumber.length + 1;
+
+							//copy byte location
+							System.arraycopy(byteLocation, 0, buffer, offset + numBytes, byteLocation.length);
+							buffer[offset + numBytes + byteLocation.length] = delimiter;
+							numBytes += byteLocation.length + 1;
+
+							//copy row number
+							System.arraycopy(rowNumberBytes, 0, buffer, offset + numBytes, rowNumberBytes.length);
+							buffer[offset + numBytes + rowNumberBytes.length] = delimiter;
+							numBytes += rowNumberBytes.length + 1;
+
+							copyCurrentTuple(buffer, offset + numBytes);
+							buffer[offset + numBytes + sizeOfNextTuple - 1] = (byte) EOL;
+							numBytes += sizeOfNextTuple;
+							//set pending to false
+							pendingValue = false;
+						}
+
+						while (numBytes < len) {
+							hasMore = reader.next(key, value);
+							if (!hasMore) {
+								while (moveToNext()) {
+									hasMore = reader.next(key, value);
+									if (hasMore) {
+										//move to the next non-empty split
+										break;
+									}
+								}
+							}
+							if (!hasMore) {
+								return (numBytes == 0) ? -1 : numBytes;
+							}
+
+							//check if moved to next block
+							blockByteLocation = reader.getPos();
+							if(blockByteLocation != NextblockByteLocation)
+							{
+								//moved to a new block, reset stuff
+								//row number
+								rowNumber = 1;
+								rowNumberBytes = String.valueOf(rowNumber).getBytes("UTF-8");
+
+								//block location
+								byteLocation = String.valueOf(NextblockByteLocation).getBytes("UTF-8");
+								NextblockByteLocation = blockByteLocation;
+							}
+							else
+							{
+								rowNumber += 1;
+								rowNumberBytes = String.valueOf(rowNumber).getBytes("UTF-8");
+							}
+
+							int sizeOfNextTuple = getTupleSize(value) + 1;
+							if (numBytes + sizeOfNextTuple +  rowNumberBytes.length + byteLocation.length + fileNumber.length + 3 > len) {
+								// cannot add tuple to current buffer
+								// but the reader has moved pass the fetched tuple
+								// we need to store this for a subsequent read call.
+								// and return this then.
+								pendingValue = true;
+								break;
+							} else {
+								//copy file number
+								System.arraycopy(fileNumber, 0, buffer, offset + numBytes, fileNumber.length);
+								buffer[offset + numBytes + fileNumber.length] = delimiter;
+								numBytes += fileNumber.length + 1;
+
+								//copy byte location
+								System.arraycopy(byteLocation, 0, buffer, offset + numBytes, byteLocation.length);
+								buffer[offset + numBytes + byteLocation.length] = delimiter;
+								numBytes += byteLocation.length + 1;
+
+								//copy row number
+								System.arraycopy(rowNumberBytes, 0, buffer, offset + numBytes, rowNumberBytes.length);
+								buffer[offset + numBytes + rowNumberBytes.length] = delimiter;
+								numBytes += rowNumberBytes.length + 1;
+
+								copyCurrentTuple(buffer, offset + numBytes);
+								buffer[offset + numBytes + sizeOfNextTuple - 1] = (byte) EOL;
+								numBytes += sizeOfNextTuple;
+							}
+						}
+						return numBytes;
+					}
+
+					private void copyCurrentTuple(byte[] buffer, int offset) throws IOException {
+						int rcOffset = 0;
+						for(int i=0; i< value.size(); i++)
+						{
+							System.arraycopy(value.get(i).getData(), value.get(i).getStart(), buffer, offset + rcOffset, value.get(i).getLength());
+							rcOffset += value.get(i).getLength() + 1;
+							buffer[rcOffset - 1] = delimiter;
+						}
+					}
+
+					private int getTupleSize(BytesRefArrayWritable value2) {
+						int size=0;
+						//loop over rc column and add lengths
+						for(int i=0; i< value.size(); i++)
+						{
+							size += value.get(i).getLength();
+						}
+						//add delimeters bytes sizes
+						size += value.size() -1;
+						return size;
+					}
+
+					@Override
+					public int read() throws IOException {
+						throw new NotImplementedException("Use read(byte[], int, int");
+					}
+
+					private RecordReader getRecordReader(int slitIndex) throws IOException {
+						RCFileInputFormat format = (RCFileInputFormat) conf.getInputFormat();
+						RecordReader reader = format.getRecordReader(
+								(org.apache.hadoop.mapred.FileSplit) inputSplits[slitIndex], conf, getReporter());
+						return reader;
+					}
+
+				};
+			}
+			else
+			{
+				//get content format
+				if(configuration.get(KEY_FORMAT).equals(FORMAT_DELIMITED_TEXT))
+				{
+					//reading data and RIDs for delimited text
+					return new InputStream() {
+
+						private RecordReader<Object, Text> reader;
+						private Object key;
+						private Text value;
+						private boolean hasMore = false;
+						private int EOL = "\n".getBytes()[0];
+						private Text pendingValue = null;
+						private int currentSplitIndex = 0;
+						private Integer file;
+						private byte[] fileNumber;
+						private byte[] byteLocation;
+						private byte delimiter = ((String)configuration.get(KEY_DELIMITER)).getBytes()[0];
+
+						@SuppressWarnings("unchecked")
+						private boolean moveToNext() throws IOException {
+							for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
+								/**
+								 * read all the partitions scheduled to the current node
+								 */
+								if (readSchedule[currentSplitIndex].equals(nodeName)) {
+									/**
+									 * pick an unread split to read
+									 * synchronize among simultaneous partitions in the same machine
+									 */
+									synchronized (executed) {
+										if (executed[currentSplitIndex] == false) {
+											executed[currentSplitIndex] = true;
+										} else {
+											continue;
+										}
+									}
+
+									/**
+									 * read the split
+									 */
+									reader = getRecordReader(currentSplitIndex);
+									key = reader.createKey();
+									value = (Text) reader.createValue();
+									file = files.get(((FileSplit)(inputSplits[currentSplitIndex])).getPath().toUri().getPath());
+									if(file == null)
+									{
+										throw new HyracksException("The file:"+((FileSplit)(inputSplits[currentSplitIndex])).getPath().toUri().getPath()+" was not found in the map while indexing");
+									}
+									fileNumber = String.valueOf(file).getBytes("UTF-8");
+									return true;
+								}
+							}
+							return false;
+						}
+
+						@Override
+						public int read(byte[] buffer, int offset, int len) throws IOException {
+							if (reader == null) {
+								if (!moveToNext()) {
+									//nothing to read
+									return -1;
+								}
+							}
+
+							int numBytes = 0;
+							if (pendingValue != null) {
+								int sizeOfNextTuple = pendingValue.getLength() + 1;
+								if (numBytes + sizeOfNextTuple +byteLocation.length + fileNumber.length + 2> len)
+								{
+									return numBytes;
+								}
+								//copy file number
+								System.arraycopy(fileNumber, 0, buffer, offset + numBytes, fileNumber.length);
+								buffer[offset + numBytes + fileNumber.length] = delimiter;
+								numBytes += fileNumber.length + 1;
+
+								//copy byte location
+								System.arraycopy(byteLocation, 0, buffer, offset + numBytes, byteLocation.length);
+								buffer[offset + numBytes + byteLocation.length] = delimiter;
+								numBytes += byteLocation.length + 1;
+
+								//copy actual value
+								System.arraycopy(pendingValue.getBytes(), 0, buffer, offset + numBytes, pendingValue.getLength());
+								buffer[offset + numBytes + pendingValue.getLength()] = (byte) EOL;
+								numBytes += pendingValue.getLength() + 1;
+								pendingValue = null;
+							}
+
+							while (numBytes < len) {
+								//get reader position before you actually read
+								byteLocation = String.valueOf(reader.getPos()).getBytes();
+								hasMore = reader.next(key, value);
+								if (!hasMore) {
+									while (moveToNext()) {
+										//get reader position before you actually read
+										byteLocation = String.valueOf(reader.getPos()).getBytes("UTF-8");
+										hasMore = reader.next(key, value);
+										if (hasMore) {
+											//move to the next non-empty split
+											break;
+										}
+									}
+								}
+								if (!hasMore) {
+									return (numBytes == 0) ? -1 : numBytes;
+								}
+								int sizeOfNextTuple = value.getLength() + 1;
+								if (numBytes + sizeOfNextTuple +byteLocation.length + fileNumber.length + 2> len) {
+									// cannot add tuple to current buffer
+									// but the reader has moved pass the fetched tuple
+									// we need to store this for a subsequent read call.
+									// and return this then.
+									pendingValue = value;
+									break;
+								} else {
+									//copy file number
+									System.arraycopy(fileNumber, 0, buffer, offset + numBytes, fileNumber.length);
+									buffer[offset + numBytes + fileNumber.length] = delimiter;
+									numBytes += fileNumber.length + 1;
+
+									//copy byte location
+									System.arraycopy(byteLocation, 0, buffer, offset + numBytes, byteLocation.length);
+									buffer[offset + numBytes + byteLocation.length] = delimiter;
+									numBytes += byteLocation.length + 1;
+
+									//Copy actual value
+									System.arraycopy(value.getBytes(), 0, buffer, offset + numBytes, value.getLength());
+									buffer[offset + numBytes + value.getLength()] = (byte) EOL;
+									numBytes += sizeOfNextTuple;
+								}
+							}
+							return numBytes;
+						}
+
+						@Override
+						public int read() throws IOException {
+							throw new NotImplementedException("Use read(byte[], int, int");
+						}
+
+						private RecordReader getRecordReader(int slitIndex) throws IOException {
+							if (conf.getInputFormat() instanceof SequenceFileInputFormat) {
+								SequenceFileInputFormat format = (SequenceFileInputFormat) conf.getInputFormat();
+								RecordReader reader = format.getRecordReader(
+										(org.apache.hadoop.mapred.FileSplit) inputSplits[slitIndex], conf, getReporter());
+								return reader;
+							} else {
+								TextInputFormat format = (TextInputFormat) conf.getInputFormat();
+								RecordReader reader = format.getRecordReader(
+										(org.apache.hadoop.mapred.FileSplit) inputSplits[slitIndex], conf, getReporter());
+								return reader;
+							}
+						}
+
+					};
+				}
+				else if((configuration.get(KEY_FORMAT).equals(FORMAT_ADM)))
+				{
+					//reading data and RIDs for adm formatted data
+					return new InputStream() {
+
+						private RecordReader<Object, Text> reader;
+						private Object key;
+						private Text value;
+						private boolean hasMore = false;
+						private int EOL = "\n".getBytes()[0];
+						private Text pendingValue = null;
+						private int currentSplitIndex = 0;
+						private Integer file;
+						private byte[] fileNumber;
+						private byte[] byteLocation;
+
+						@SuppressWarnings("unchecked")
+						private boolean moveToNext() throws IOException {
+							for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
+								/**
+								 * read all the partitions scheduled to the current node
+								 */
+								if (readSchedule[currentSplitIndex].equals(nodeName)) {
+									/**
+									 * pick an unread split to read
+									 * synchronize among simultaneous partitions in the same machine
+									 */
+									synchronized (executed) {
+										if (executed[currentSplitIndex] == false) {
+											executed[currentSplitIndex] = true;
+										} else {
+											continue;
+										}
+									}
+
+									/**
+									 * read the split
+									 */
+									reader = getRecordReader(currentSplitIndex);
+									key = reader.createKey();
+									value = (Text) reader.createValue();
+									file = files.get(((FileSplit)(inputSplits[currentSplitIndex])).getPath().toUri().getPath());
+									if(file == null)
+									{
+										throw new HyracksException("a file was not found in the map while indexing");
+									}
+									fileNumber = String.valueOf(file).getBytes("UTF-8");
+									return true;
+								}
+							}
+							return false;
+						}
+
+						@Override
+						public int read(byte[] buffer, int offset, int len) throws IOException {
+							if (reader == null) {
+								if (!moveToNext()) {
+									//nothing to read
+									return -1;
+								}
+							}
+
+							int numBytes = 0;
+							if (pendingValue != null) {
+								int firstFieldLocation = value.find("\"");
+								int admValueSize = value.getLength();
+								if(firstFieldLocation >= 0)
+								{
+									int sizeOfNextTuple = value.getLength() - firstFieldLocation + 1;
+									int sizeOfNextTupleAndRID = fileNumberFieldNameWithRecOpeningBraces.length + fileNumber.length + bytelocationFieldName.length  + byteLocation.length + bytelocationValueEnd.length + sizeOfNextTuple;
+									if (numBytes + sizeOfNextTupleAndRID > len) {
+										// still cannot add tuple to current buffer
+										// return 0 so parser would double the buffer size.
+										return 0;
+									} else {
+										//copy fileNumberFieldNameWithRecOpeningBraces
+										System.arraycopy(fileNumberFieldNameWithRecOpeningBraces, 0, buffer, offset + numBytes,fileNumberFieldNameWithRecOpeningBraces.length);
+										numBytes += fileNumberFieldNameWithRecOpeningBraces.length;
+										//copy file Number
+										System.arraycopy(fileNumber, 0, buffer, offset + numBytes,fileNumber.length);
+										numBytes += fileNumber.length;
+										//copy bytelocationFieldName
+										System.arraycopy(bytelocationFieldName, 0, buffer, offset + numBytes,bytelocationFieldName.length);
+										numBytes += bytelocationFieldName.length;
+										//copy byte location value
+										System.arraycopy(byteLocation, 0, buffer, offset + numBytes,byteLocation.length);
+										numBytes += byteLocation.length;
+										//copy byte location field end 
+										System.arraycopy(bytelocationValueEnd, 0, buffer, offset + numBytes,bytelocationValueEnd.length);
+										numBytes += bytelocationValueEnd.length;
+										//copy the actual adm instance
+										System.arraycopy(value.getBytes(), firstFieldLocation, buffer, offset + numBytes,admValueSize - firstFieldLocation);
+										buffer[offset + numBytes + admValueSize - firstFieldLocation] = (byte) EOL;
+										numBytes += admValueSize - firstFieldLocation +1;
+									}
+								}
+								pendingValue = null;
+							}
+
+							while (numBytes < len) {
+								//get reader position before you actually read
+								byteLocation = String.valueOf(reader.getPos()).getBytes("UTF-8");
+								hasMore = reader.next(key, value);
+								if (!hasMore) {
+									while (moveToNext()) {
+										//get reader position before you actually read
+										byteLocation = String.valueOf(reader.getPos()).getBytes("UTF-8");
+										hasMore = reader.next(key, value);
+										if (hasMore) {
+											//move to the next non-empty split
+											break;
+										}
+									}
+								}
+								if (!hasMore) {
+									return (numBytes == 0) ? -1 : numBytes;
+								}
+								//get the index of the first field name
+								int firstFieldLocation = value.find("\"");
+								int admValueSize = value.getLength();
+								if(firstFieldLocation >= 0)
+								{
+									int sizeOfNextTuple = value.getLength() - firstFieldLocation + 1;
+									int sizeOfNextTupleAndRID = fileNumberFieldNameWithRecOpeningBraces.length + fileNumber.length + bytelocationFieldName.length  + byteLocation.length + bytelocationValueEnd.length + sizeOfNextTuple;
+									if (numBytes + sizeOfNextTupleAndRID > len) {
+										// cannot add tuple to current buffer
+										// but the reader has moved pass the fetched tuple
+										// we need to store this for a subsequent read call.
+										// and return this then.
+										pendingValue = value;
+										break;
+									} else {
+										//copy fileNumberFieldNameWithRecOpeningBraces
+										System.arraycopy(fileNumberFieldNameWithRecOpeningBraces, 0, buffer, offset + numBytes,fileNumberFieldNameWithRecOpeningBraces.length);
+										numBytes += fileNumberFieldNameWithRecOpeningBraces.length;
+										//copy fileNumber
+										System.arraycopy(fileNumber, 0, buffer, offset + numBytes,fileNumber.length);
+										numBytes += fileNumber.length;
+										//copy bytelocationFieldName
+										System.arraycopy(bytelocationFieldName, 0, buffer, offset + numBytes,bytelocationFieldName.length);
+										numBytes += bytelocationFieldName.length;
+										//copy byte location value
+										System.arraycopy(byteLocation, 0, buffer, offset + numBytes,byteLocation.length);
+										numBytes += byteLocation.length;
+										//copy byte location field end 
+										System.arraycopy(bytelocationValueEnd, 0, buffer, offset + numBytes,bytelocationValueEnd.length);
+										numBytes += bytelocationValueEnd.length;
+										//copy the actual adm instance
+										System.arraycopy(value.getBytes(), firstFieldLocation, buffer, offset + numBytes,admValueSize - firstFieldLocation);
+										buffer[offset + numBytes + admValueSize - firstFieldLocation] = (byte) EOL;
+										numBytes += admValueSize - firstFieldLocation +1;
+									}
+								}
+							}
+							return numBytes;
+						}
+
+						@Override
+						public int read() throws IOException {
+							throw new NotImplementedException("Use read(byte[], int, int");
+						}
+
+						private RecordReader getRecordReader(int slitIndex) throws IOException {
+							if (conf.getInputFormat() instanceof SequenceFileInputFormat) {
+								SequenceFileInputFormat format = (SequenceFileInputFormat) conf.getInputFormat();
+								RecordReader reader = format.getRecordReader(
+										(org.apache.hadoop.mapred.FileSplit) inputSplits[slitIndex], conf, getReporter());
+								return reader;
+							} else {
+								TextInputFormat format = (TextInputFormat) conf.getInputFormat();
+								RecordReader reader = format.getRecordReader(
+										(org.apache.hadoop.mapred.FileSplit) inputSplits[slitIndex], conf, getReporter());
+								return reader;
+							}
+						}
+
+					};
+				}
+				else
+				{
+					throw new IOException("Can't index " +configuration.get(KEY_FORMAT)+" input");
+				}
+			}
+		}
+	}
+
+	@Override
+	public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
+		return clusterLocations;
+	}
+}

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/HiveIndexingAdapter.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/HiveIndexingAdapter.java
new file mode 100644
index 0000000..178b106
--- /dev/null
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/HiveIndexingAdapter.java

@@ -0,0 +1,62 @@
+package edu.uci.ics.asterix.external.dataset.adapter;
+
+import java.util.Map;
+
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.asterix.om.types.IAType;
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+
+/**
+ * Provides the functionality of fetching data in form of ADM records from a Hive dataset.
+ */
+@SuppressWarnings("deprecation")
+public class HiveIndexingAdapter extends AbstractDatasourceAdapter{
+
+    private static final long serialVersionUID = 1L;
+
+    public static final String HIVE_DATABASE = "database";
+    public static final String HIVE_TABLE = "table";
+    public static final String HIVE_HOME = "hive-home";
+    public static final String HIVE_METASTORE_URI = "metastore-uri";
+    public static final String HIVE_WAREHOUSE_DIR = "warehouse-dir";
+    public static final String HIVE_METASTORE_RAWSTORE_IMPL = "rawstore-impl";
+
+    private HDFSIndexingAdapter hdfsIndexingAdapter;
+
+    public HiveIndexingAdapter(IAType atype, String[] readSchedule, boolean[] executed, InputSplit[] inputSplits, JobConf conf,
+            AlgebricksPartitionConstraint clusterLocations, Map<String,Integer> files) {
+        this.hdfsIndexingAdapter = new HDFSIndexingAdapter(atype, readSchedule, executed, inputSplits, conf, clusterLocations, files);
+        this.atype = atype;
+    }
+
+    @Override
+    public AdapterType getAdapterType() {
+        return AdapterType.READ;
+    }
+
+    @Override
+    public void configure(Map<String, Object> arguments) throws Exception {
+        this.configuration = arguments;
+        this.hdfsIndexingAdapter.configure(arguments);
+    }
+
+    @Override
+    public void initialize(IHyracksTaskContext ctx) throws Exception {
+    	hdfsIndexingAdapter.initialize(ctx);
+    }
+
+    @Override
+    public void start(int partition, IFrameWriter writer) throws Exception {
+    	hdfsIndexingAdapter.start(partition, writer);
+    }
+
+    @Override
+    public AlgebricksPartitionConstraint getPartitionConstraint() throws Exception {
+        return hdfsIndexingAdapter.getPartitionConstraint();
+    }
+
+}
\ No newline at end of file

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/IControlledAdapter.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/IControlledAdapter.java
new file mode 100644
index 0000000..253f675
--- /dev/null
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/IControlledAdapter.java

@@ -0,0 +1,17 @@
+package edu.uci.ics.asterix.external.dataset.adapter;
+
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+public interface IControlledAdapter extends Serializable{
+
+	public void initialize(IHyracksTaskContext ctx) throws Exception;
+	
+	public void processNextFrame(ByteBuffer buffer, IFrameWriter writer) throws HyracksDataException;
+	
+	public void close(IFrameWriter writer) throws HyracksDataException;
+}
\ No newline at end of file

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/util/ExternalDataFilesMetadataProvider.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/util/ExternalDataFilesMetadataProvider.java
new file mode 100644
index 0000000..47550a4
--- /dev/null
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/util/ExternalDataFilesMetadataProvider.java

@@ -0,0 +1,38 @@
+package edu.uci.ics.asterix.external.util;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import edu.uci.ics.asterix.external.adapter.factory.HDFSAdapterFactory;
+import edu.uci.ics.asterix.external.dataset.adapter.AbstractDatasourceAdapter;
+import edu.uci.ics.asterix.external.dataset.adapter.HDFSAdapter;
+
+public class ExternalDataFilesMetadataProvider {
+	public static ArrayList<FileStatus> getHDFSFileStatus(AbstractDatasourceAdapter adapter) throws IOException
+	{
+		ArrayList<FileStatus> files = new ArrayList<FileStatus>();
+		//Configure hadoop connection
+		Configuration conf = HDFSAdapterFactory.configureHadoopConnection(adapter.getConfiguration());
+		FileSystem fs = FileSystem.get(conf);
+		//get the list of paths from the adapter
+		StringTokenizer tokenizer = new StringTokenizer(((String)adapter.getConfiguration().get(HDFSAdapter.KEY_PATH)),",");
+		Path inputPath = null;
+		FileStatus[] fileStatuses;
+		while(tokenizer.hasMoreTokens())
+		{
+			inputPath = new Path(tokenizer.nextToken().trim());
+			fileStatuses = fs.listStatus(inputPath);
+			for(int i=0; i < fileStatuses.length; i++)
+			{
+				files.add(fileStatuses[i]);
+			}
+		}
+		return files;
+	}
+}
\ No newline at end of file

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/util/ExternalIndexHashPartitionComputerFactory.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/util/ExternalIndexHashPartitionComputerFactory.java
new file mode 100644
index 0000000..29fcfb1
--- /dev/null
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/util/ExternalIndexHashPartitionComputerFactory.java

@@ -0,0 +1,94 @@
+package edu.uci.ics.asterix.external.util;
+
+import java.io.DataInputStream;
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
+import edu.uci.ics.asterix.om.base.AInt64;
+import edu.uci.ics.asterix.om.types.BuiltinType;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFactory;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
+
+/* A class that is used to partition external data tuples when building an index over them
+ * the computer it returns, computes the HDFS block value before using the actual hash partitioning 
+ * function. this way we ensure that records within ranges of 64MB sizes are partitioned together to the same
+ * data node.
+ */
+
+public class ExternalIndexHashPartitionComputerFactory implements ITuplePartitionComputerFactory{
+	private static final long serialVersionUID = 1L;
+	private final int[] hashFields;
+	private final int bytesInHDFSBlock = 67108864;
+	private final IBinaryHashFunctionFactory[] hashFunctionFactories;
+	@SuppressWarnings("unchecked")
+	private final ISerializerDeserializer<AInt64> longSerde = AqlSerializerDeserializerProvider.INSTANCE.getNonTaggedSerializerDeserializer(BuiltinType.AINT64);
+
+	public ExternalIndexHashPartitionComputerFactory(int[] hashFields, IBinaryHashFunctionFactory[] hashFunctionFactories) {
+		this.hashFields = hashFields;
+		this.hashFunctionFactories = hashFunctionFactories;
+	}
+
+	@Override
+	public ITuplePartitionComputer createPartitioner() {
+		final IBinaryHashFunction[] hashFunctions = new IBinaryHashFunction[hashFunctionFactories.length];
+		for (int i = 0; i < hashFunctionFactories.length; ++i) {
+			hashFunctions[i] = hashFunctionFactories[i].createBinaryHashFunction();
+		}
+		return new ITuplePartitionComputer() {
+			private ByteBuffer serializedLong = ByteBuffer.allocate(8);;
+			private AInt64 byteLocation;
+			private ByteBufferInputStream bbis = new ByteBufferInputStream();
+			private DataInputStream dis = new DataInputStream(bbis);
+			@Override
+			public int partition(IFrameTupleAccessor accessor, int tIndex, int nParts){
+				if (nParts == 1) {
+					return 0;
+				}
+				int h = 0;
+				int startOffset = accessor.getTupleStartOffset(tIndex);
+				int slotLength = accessor.getFieldSlotsLength();
+				for (int j = 0; j < hashFields.length; ++j) {
+					int fIdx = hashFields[j];
+					IBinaryHashFunction hashFn = hashFunctions[j];
+					int fStart = accessor.getFieldStartOffset(tIndex, fIdx);
+					int fEnd = accessor.getFieldEndOffset(tIndex, fIdx);
+					if(j == 1)
+					{
+						//reset the buffer
+						serializedLong.clear();
+						//read byte location
+						bbis.setByteBuffer(accessor.getBuffer() , accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength() + accessor.getFieldStartOffset(tIndex, hashFields[1]));
+						try {
+							byteLocation = ((AInt64) longSerde.deserialize(dis));
+							//compute the block number, serialize it into a new array and call the hash function
+							serializedLong.putLong(byteLocation.getLongValue()/bytesInHDFSBlock);
+							//call the hash function
+							int fh = hashFn
+									.hash(serializedLong.array(), 0,serializedLong.capacity());
+									h = h * 31 + fh;
+						} catch (HyracksDataException e) {
+							System.err.println("could not serialize the long byte position value!!!");
+							e.printStackTrace();
+						}
+					}
+					else
+					{
+						int fh = hashFn
+						.hash(accessor.getBuffer().array(), startOffset + slotLength + fStart, fEnd - fStart);
+						h = h * 31 + fh;
+					}
+				}
+				if (h < 0) {
+					h = -(h + 1);
+				}
+				return h % nParts;
+			}
+		};
+	}
+}
\ No newline at end of file

diff --git a/asterix-installer/src/main/resources/conf/asterix-configuration.xml b/asterix-installer/src/main/resources/conf/asterix-configuration.xml
index deeb5b0..084a3f8 100644
--- a/asterix-installer/src/main/resources/conf/asterix-configuration.xml
+++ b/asterix-installer/src/main/resources/conf/asterix-configuration.xml

@@ -126,21 +126,6 @@
 	</property>
 
 	<property>
-		<name>txn.log.disksectorsize</name>
-		<value>4096</value>
-		<description>The size of a disk sector. (Default = "4096")
-		</description>
-	</property>
-
-	<property>
-		<name>txn.log.groupcommitinterval</name>
-		<value>40</value>
-		<description>The group commit wait time in milliseconds. (Default =
-			"40" // 40ms)
-		</description>
-	</property>
-
-	<property>
 		<name>txn.log.checkpoint.lsnthreshold</name>
 		<value>67108864</value>
 		<description>The size of the window that the maximum LSN is allowed to

diff --git a/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java b/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
index dae1fb1..92d1c8e 100644
--- a/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
+++ b/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java

@@ -19,215 +19,235 @@
 
 public class [LEXER_NAME] {
 
-    public static final int
-        TOKEN_EOF = 0, TOKEN_AUX_NOT_FOUND = 1 [TOKENS_CONSTANTS];
+	public static final int
+	TOKEN_EOF = 0, TOKEN_AUX_NOT_FOUND = 1 [TOKENS_CONSTANTS];
 
-    // Human representation of tokens. Useful for debug.
-    // Is possible to convert a TOKEN_CONSTANT in its image through
-    // [LEXER_NAME].tokenKindToString(TOKEN_CONSTANT); 
-    private static final String[] tokenImage = {
-            "<EOF>", "<AUX_NOT_FOUND>" [TOKENS_IMAGES]
-          };
-    
-    private static final char EOF_CHAR = 4;
-    protected java.io.Reader inputStream;
-    protected int column;
-    protected int line;
-    protected boolean prevCharIsCR;
-    protected boolean prevCharIsLF;
-    protected char[] buffer;
-    protected int bufsize;
-    protected int bufpos;
-    protected int tokenBegin;
-    protected int endOf_USED_Buffer;
-    protected int endOf_UNUSED_Buffer;
-    protected int maxUnusedBufferSize;
+	// Human representation of tokens. Useful for debug.
+	// Is possible to convert a TOKEN_CONSTANT in its image through
+	// [LEXER_NAME].tokenKindToString(TOKEN_CONSTANT); 
+	private static final String[] tokenImage = {
+		"<EOF>", "<AUX_NOT_FOUND>" [TOKENS_IMAGES]
+	};
 
-// ================================================================================
-//  Auxiliary functions. Can parse the tokens used in the grammar as partial/auxiliary
-// ================================================================================
+	private static final char EOF_CHAR = 4;
+	protected java.io.Reader inputStream;
+	protected int column;
+	protected int line;
+	protected boolean prevCharIsCR;
+	protected boolean prevCharIsLF;
+	protected char[] buffer;
+	protected int bufsize;
+	protected int bufpos;
+	protected int tokenBegin;
+	protected int endOf_USED_Buffer;
+	protected int endOf_UNUSED_Buffer;
+	protected int maxUnusedBufferSize;
 
-    [LEXER_AUXFUNCTIONS]
+	// ================================================================================
+	//  Auxiliary functions. Can parse the tokens used in the grammar as partial/auxiliary
+	// ================================================================================
 
-// ================================================================================
-//  Main method. Return a TOKEN_CONSTANT
-// ================================================================================            
-            
-    public int next() throws [LEXER_NAME]Exception, IOException{
-        char currentChar = buffer[bufpos];
-        while (currentChar == ' ' || currentChar=='\t' || currentChar == '\n' || currentChar=='\r')
-            currentChar = readNextChar(); 
-        tokenBegin = bufpos;
-        if (currentChar==EOF_CHAR) return TOKEN_EOF;
+	[LEXER_AUXFUNCTIONS]
 
-        [LEXER_LOGIC]
-    }
+			// ================================================================================
+			//  Main method. Return a TOKEN_CONSTANT
+			// ================================================================================            
 
-// ================================================================================
-//  Public interface
-// ================================================================================
-    
-    public [LEXER_NAME](java.io.Reader stream) throws IOException{
-        reInit(stream);
-    }
+			public int next() throws [LEXER_NAME]Exception, IOException{
+		if(bufpos < 0)
+			readNextChar();
+		char currentChar = buffer[bufpos];
+		while (currentChar == ' ' || currentChar=='\t' || currentChar == '\n' || currentChar=='\r')
+			currentChar = readNextChar(); 
+		tokenBegin = bufpos;
+		if (currentChar==EOF_CHAR) return TOKEN_EOF;
 
-    public void reInit(java.io.Reader stream) throws IOException{
-        done();
-        inputStream    = stream;
-        bufsize        = 4096;
-        line           = 1;
-        column         = 0;
-        bufpos         = -1;
-        endOf_UNUSED_Buffer = bufsize;
-        endOf_USED_Buffer = 0;
-        prevCharIsCR   = false;
-        prevCharIsLF   = false;
-        buffer         = new char[bufsize];
-        tokenBegin     = -1;
-        maxUnusedBufferSize = 4096/2;
-        readNextChar();
-    }
+		[LEXER_LOGIC]
+	}
 
-    public String getLastTokenImage() {
-        if (bufpos >= tokenBegin)
-            return new String(buffer, tokenBegin, bufpos - tokenBegin);
-          else
-            return new String(buffer, tokenBegin, bufsize - tokenBegin) +
-                                  new String(buffer, 0, bufpos);
-    }
-    
-    public static String tokenKindToString(int token) {
-        return tokenImage[token]; 
-    }
+	//used when done with stream, must be called exiplicitly now.
+	public void close()throws IOException
+	{
+		inputStream.close();
+	}
 
-    public void done(){
-        buffer = null;
-    }
+	//used before processing a new patch in the inputStream
+	public void reset(){
+		line           = 1;
+		column         = 0;
+		bufpos         = -1;
+		endOf_UNUSED_Buffer = bufsize;
+		endOf_USED_Buffer = 0;
+		prevCharIsCR   = false;
+		prevCharIsLF   = false;
+		tokenBegin     = -1;
+	}
 
-// ================================================================================
-//  Parse error management
-// ================================================================================    
-    
-    protected int parseError(String reason) throws [LEXER_NAME]Exception {
-        StringBuilder message = new StringBuilder();
-        message.append(reason).append("\n");
-        message.append("Line: ").append(line).append("\n");
-        message.append("Row: ").append(column).append("\n");
-        throw new [LEXER_NAME]Exception(message.toString());
-    }
+	// ================================================================================
+	//  Public interface
+	// ================================================================================
 
-    protected int parseError(int ... tokens) throws [LEXER_NAME]Exception {
-        StringBuilder message = new StringBuilder();
-        message.append("Error while parsing. ");
-        message.append(" Line: ").append(line);
-        message.append(" Row: ").append(column);
-        message.append(" Expecting:");
-        for (int tokenId : tokens){
-            message.append(" ").append([LEXER_NAME].tokenKindToString(tokenId));
-        }
-        throw new [LEXER_NAME]Exception(message.toString());
-    }
-    
-    protected void updateLineColumn(char c){
-        column++;
-    
-        if (prevCharIsLF)
-        {
-            prevCharIsLF = false;
-            line += (column = 1);
-        }
-        else if (prevCharIsCR)
-        {
-            prevCharIsCR = false;
-            if (c == '\n')
-            {
-                prevCharIsLF = true;
-            }
-            else
-            {
-                line += (column = 1);
-            }
-        }
-        
-        if (c=='\r') {
-            prevCharIsCR = true;
-        } else if(c == '\n') {
-            prevCharIsLF = true;
-        }
-    }
-    
-// ================================================================================
-//  Read data, buffer management. It uses a circular (and expandable) buffer
-// ================================================================================    
+	public [LEXER_NAME](java.io.Reader stream) throws IOException{
+		reInit(stream);
+	}
 
-    protected char readNextChar() throws IOException {
-        if (++bufpos >= endOf_USED_Buffer)
-            fillBuff();
-        char c = buffer[bufpos];
-        updateLineColumn(c);
-        return c;
-    }
+	public void reInit(java.io.Reader stream) throws IOException{
+		done();
+		inputStream    = stream;
+		bufsize        = 4096;
+		line           = 1;
+		column         = 0;
+		bufpos         = -1;
+		endOf_UNUSED_Buffer = bufsize;
+		endOf_USED_Buffer = 0;
+		prevCharIsCR   = false;
+		prevCharIsLF   = false;
+		buffer         = new char[bufsize];
+		tokenBegin     = -1;
+		maxUnusedBufferSize = 4096/2;
+	}
 
-    protected boolean fillBuff() throws IOException {
-        if (endOf_UNUSED_Buffer == endOf_USED_Buffer) // If no more unused buffer space 
-        {
-          if (endOf_UNUSED_Buffer == bufsize)         // -- If the previous unused space was
-          {                                           // -- at the end of the buffer
-            if (tokenBegin > maxUnusedBufferSize)     // -- -- If the first N bytes before
-            {                                         //       the current token are enough
-              bufpos = endOf_USED_Buffer = 0;         // -- -- -- setup buffer to use that fragment 
-              endOf_UNUSED_Buffer = tokenBegin;
-            }
-            else if (tokenBegin < 0)                  // -- -- If no token yet
-              bufpos = endOf_USED_Buffer = 0;         // -- -- -- reuse the whole buffer
-            else
-              ExpandBuff(false);                      // -- -- Otherwise expand buffer after its end
-          }
-          else if (endOf_UNUSED_Buffer > tokenBegin)  // If the endOf_UNUSED_Buffer is after the token
-            endOf_UNUSED_Buffer = bufsize;            // -- set endOf_UNUSED_Buffer to the end of the buffer
-          else if ((tokenBegin - endOf_UNUSED_Buffer) < maxUnusedBufferSize)
-          {                                           // If between endOf_UNUSED_Buffer and the token
-            ExpandBuff(true);                         // there is NOT enough space expand the buffer                          
-          }                                           // reorganizing it
-          else 
-            endOf_UNUSED_Buffer = tokenBegin;         // Otherwise there is enough space at the start
-        }                                             // so we set the buffer to use that fragment
-        int i;
-        if ((i = inputStream.read(buffer, endOf_USED_Buffer, endOf_UNUSED_Buffer - endOf_USED_Buffer)) == -1)
-        {
-            inputStream.close();
-            buffer[endOf_USED_Buffer]=(char)EOF_CHAR;
-            endOf_USED_Buffer++;
-            return false;
-        }
-            else
-                endOf_USED_Buffer += i;
-        return true;
-    }
+	public String getLastTokenImage() {
+		if (bufpos >= tokenBegin)
+			return new String(buffer, tokenBegin, bufpos - tokenBegin);
+		else
+			return new String(buffer, tokenBegin, bufsize - tokenBegin) +
+					new String(buffer, 0, bufpos);
+	}
+
+	public static String tokenKindToString(int token) {
+		return tokenImage[token]; 
+	}
+
+	public void done(){
+		buffer = null;
+	}
+
+	// ================================================================================
+			//  Parse error management
+			// ================================================================================    
+
+	protected int parseError(String reason) throws [LEXER_NAME]Exception {
+		StringBuilder message = new StringBuilder();
+		message.append(reason).append("\n");
+		message.append("Line: ").append(line).append("\n");
+		message.append("Row: ").append(column).append("\n");
+		throw new [LEXER_NAME]Exception(message.toString());
+	}
+
+	protected int parseError(int ... tokens) throws [LEXER_NAME]Exception {
+		StringBuilder message = new StringBuilder();
+		message.append("Error while parsing. ");
+		message.append(" Line: ").append(line);
+		message.append(" Row: ").append(column);
+		message.append(" Expecting:");
+		for (int tokenId : tokens){
+			message.append(" ").append([LEXER_NAME].tokenKindToString(tokenId));
+		}
+		throw new [LEXER_NAME]Exception(message.toString());
+	}
+
+	protected void updateLineColumn(char c){
+		column++;
+
+		if (prevCharIsLF)
+		{
+			prevCharIsLF = false;
+			line += (column = 1);
+		}
+		else if (prevCharIsCR)
+		{
+			prevCharIsCR = false;
+			if (c == '\n')
+			{
+				prevCharIsLF = true;
+			}
+			else
+			{
+				line += (column = 1);
+			}
+		}
+
+		if (c=='\r') {
+			prevCharIsCR = true;
+		} else if(c == '\n') {
+			prevCharIsLF = true;
+		}
+	}
+
+	// ================================================================================
+	//  Read data, buffer management. It uses a circular (and expandable) buffer
+	// ================================================================================    
+
+	protected char readNextChar() throws IOException {
+		if (++bufpos >= endOf_USED_Buffer)
+			fillBuff();
+		char c = buffer[bufpos];
+		updateLineColumn(c);
+		return c;
+	}
+
+	protected boolean fillBuff() throws IOException {
+		if (endOf_UNUSED_Buffer == endOf_USED_Buffer) // If no more unused buffer space 
+		{
+			if (endOf_UNUSED_Buffer == bufsize)         // -- If the previous unused space was
+			{                                           // -- at the end of the buffer
+				if (tokenBegin > maxUnusedBufferSize)     // -- -- If the first N bytes before
+				{                                         //       the current token are enough
+					bufpos = endOf_USED_Buffer = 0;         // -- -- -- setup buffer to use that fragment 
+					endOf_UNUSED_Buffer = tokenBegin;
+				}
+				else if (tokenBegin < 0)                  // -- -- If no token yet
+					bufpos = endOf_USED_Buffer = 0;         // -- -- -- reuse the whole buffer
+				else
+					ExpandBuff(false);                      // -- -- Otherwise expand buffer after its end
+			}
+			else if (endOf_UNUSED_Buffer > tokenBegin)  // If the endOf_UNUSED_Buffer is after the token
+				endOf_UNUSED_Buffer = bufsize;            // -- set endOf_UNUSED_Buffer to the end of the buffer
+			else if ((tokenBegin - endOf_UNUSED_Buffer) < maxUnusedBufferSize)
+			{                                           // If between endOf_UNUSED_Buffer and the token
+				ExpandBuff(true);                         // there is NOT enough space expand the buffer                          
+			}                                           // reorganizing it
+			else 
+				endOf_UNUSED_Buffer = tokenBegin;         // Otherwise there is enough space at the start
+		}                                             // so we set the buffer to use that fragment
+		int i;
+		if ((i = inputStream.read(buffer, endOf_USED_Buffer, endOf_UNUSED_Buffer - endOf_USED_Buffer)) == -1)
+		{
+			//moved outside
+			//inputStream.close();
+			buffer[endOf_USED_Buffer]=(char)EOF_CHAR;
+			endOf_USED_Buffer++;
+			return false;
+		}
+		else
+			endOf_USED_Buffer += i;
+		return true;
+	}
 
 
-    protected void ExpandBuff(boolean wrapAround)
-    {
-      char[] newbuffer = new char[bufsize + maxUnusedBufferSize];
+	protected void ExpandBuff(boolean wrapAround)
+	{
+		char[] newbuffer = new char[bufsize + maxUnusedBufferSize];
 
-      try {
-        if (wrapAround) {
-          System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
-          System.arraycopy(buffer, 0, newbuffer, bufsize - tokenBegin, bufpos);
-          buffer = newbuffer;
-          endOf_USED_Buffer = (bufpos += (bufsize - tokenBegin));
-        }
-        else {
-          System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
-          buffer = newbuffer;
-          endOf_USED_Buffer = (bufpos -= tokenBegin);
-        }
-      } catch (Throwable t) {
-          throw new Error(t.getMessage());
-      }
+		try {
+			if (wrapAround) {
+				System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
+				System.arraycopy(buffer, 0, newbuffer, bufsize - tokenBegin, bufpos);
+				buffer = newbuffer;
+				endOf_USED_Buffer = (bufpos += (bufsize - tokenBegin));
+			}
+			else {
+				System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
+				buffer = newbuffer;
+				endOf_USED_Buffer = (bufpos -= tokenBegin);
+			}
+		} catch (Throwable t) {
+			throw new Error(t.getMessage());
+		}
 
-      bufsize += maxUnusedBufferSize;
-      endOf_UNUSED_Buffer = bufsize;
-      tokenBegin = 0;
-    }    
+		bufsize += maxUnusedBufferSize;
+		endOf_UNUSED_Buffer = bufsize;
+		tokenBegin = 0;
+	}    
 }

diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/MetadataManager.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/MetadataManager.java
index 8f0eedb..2de5d78 100644
--- a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/MetadataManager.java
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/MetadataManager.java

@@ -31,6 +31,7 @@
 import edu.uci.ics.asterix.metadata.entities.DatasourceAdapter;
 import edu.uci.ics.asterix.metadata.entities.Datatype;
 import edu.uci.ics.asterix.metadata.entities.Dataverse;
+import edu.uci.ics.asterix.metadata.entities.ExternalFile;
 import edu.uci.ics.asterix.metadata.entities.Function;
 import edu.uci.ics.asterix.metadata.entities.Index;
 import edu.uci.ics.asterix.metadata.entities.Node;
@@ -268,6 +269,39 @@
         }
         return dataset;
     }
+    
+    @Override
+   	public List<ExternalFile> getDatasetExternalFiles(
+   			MetadataTransactionContext mdTxnCtx, Dataset dataset)
+   			throws MetadataException {
+       	List<ExternalFile> externalFiles;
+           try {
+           	externalFiles = metadataNode.getExternalDatasetFiles(mdTxnCtx.getJobId(), dataset);
+           } catch (RemoteException e) {
+               throw new MetadataException(e);
+           }
+           return externalFiles;
+   	}
+    
+    @Override
+	public void addExternalFile(MetadataTransactionContext mdTxnCtx,
+			ExternalFile externalFile) throws MetadataException {
+    	try {
+            metadataNode.addExternalDatasetFile(mdTxnCtx.getJobId(), externalFile);
+        } catch (RemoteException e) {
+            throw new MetadataException(e);
+        }
+	}
+    
+    @Override
+	public void dropExternalFile(MetadataTransactionContext mdTxnCtx,
+			ExternalFile externalFile) throws MetadataException {
+        try {
+            metadataNode.dropExternalFile(mdTxnCtx.getJobId(), externalFile.getDataverseName(), externalFile.getDatasetName(), externalFile.getFileNumber());
+        } catch (RemoteException e) {
+            throw new MetadataException(e);
+        }
+	}
 
     @Override
     public List<Index> getDatasetIndexes(MetadataTransactionContext ctx, String dataverseName, String datasetName)

diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/MetadataNode.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/MetadataNode.java
index 8765aae..81ce4f8 100644
--- a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/MetadataNode.java
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/MetadataNode.java

@@ -41,6 +41,8 @@
 import edu.uci.ics.asterix.metadata.entities.DatasourceAdapter;
 import edu.uci.ics.asterix.metadata.entities.Datatype;
 import edu.uci.ics.asterix.metadata.entities.Dataverse;
+import edu.uci.ics.asterix.metadata.entities.ExternalDatasetDetails;
+import edu.uci.ics.asterix.metadata.entities.ExternalFile;
 import edu.uci.ics.asterix.metadata.entities.Function;
 import edu.uci.ics.asterix.metadata.entities.Index;
 import edu.uci.ics.asterix.metadata.entities.InternalDatasetDetails;
@@ -50,6 +52,7 @@
 import edu.uci.ics.asterix.metadata.entitytupletranslators.DatasourceAdapterTupleTranslator;
 import edu.uci.ics.asterix.metadata.entitytupletranslators.DatatypeTupleTranslator;
 import edu.uci.ics.asterix.metadata.entitytupletranslators.DataverseTupleTranslator;
+import edu.uci.ics.asterix.metadata.entitytupletranslators.ExternalFileTupleTranslator;
 import edu.uci.ics.asterix.metadata.entitytupletranslators.FunctionTupleTranslator;
 import edu.uci.ics.asterix.metadata.entitytupletranslators.IndexTupleTranslator;
 import edu.uci.ics.asterix.metadata.entitytupletranslators.NodeGroupTupleTranslator;
@@ -59,6 +62,7 @@
 import edu.uci.ics.asterix.metadata.valueextractors.MetadataEntityValueExtractor;
 import edu.uci.ics.asterix.metadata.valueextractors.NestedDatatypeNameValueExtractor;
 import edu.uci.ics.asterix.metadata.valueextractors.TupleCopyValueExtractor;
+import edu.uci.ics.asterix.om.base.AInt32;
 import edu.uci.ics.asterix.om.base.AMutableString;
 import edu.uci.ics.asterix.om.base.AString;
 import edu.uci.ics.asterix.om.types.BuiltinType;
@@ -169,48 +173,56 @@
                 Index primaryIndex = new Index(dataset.getDataverseName(), dataset.getDatasetName(),
                         dataset.getDatasetName(), IndexType.BTREE, id.getPrimaryKey(), true, dataset.getPendingOp());
 
-                addIndex(jobId, primaryIndex);
-                ITupleReference nodeGroupTuple = createTuple(id.getNodeGroupName(), dataset.getDataverseName(),
-                        dataset.getDatasetName());
-                insertTupleIntoIndex(jobId, MetadataSecondaryIndexes.GROUPNAME_ON_DATASET_INDEX, nodeGroupTuple);
-            }
-            // Add entry in datatype secondary index.
-            ITupleReference dataTypeTuple = createTuple(dataset.getDataverseName(), dataset.getItemTypeName(),
-                    dataset.getDatasetName());
-            insertTupleIntoIndex(jobId, MetadataSecondaryIndexes.DATATYPENAME_ON_DATASET_INDEX, dataTypeTuple);
-        } catch (TreeIndexDuplicateKeyException e) {
-            throw new MetadataException("A dataset with this name " + dataset.getDatasetName()
-                    + " already exists in dataverse '" + dataset.getDataverseName() + "'.", e);
-        } catch (Exception e) {
-            throw new MetadataException(e);
-        }
-    }
+				addIndex(jobId, primaryIndex);
+				ITupleReference nodeGroupTuple = createTuple(id.getNodeGroupName(), dataset.getDataverseName(),
+						dataset.getDatasetName());
+				insertTupleIntoIndex(jobId, MetadataSecondaryIndexes.GROUPNAME_ON_DATASET_INDEX, nodeGroupTuple);
+			}
+			else
+			{
+				//added for external data
+				ExternalDatasetDetails id = (ExternalDatasetDetails) dataset.getDatasetDetails();
+				ITupleReference nodeGroupTuple = createTuple(id.getNodeGroupName(), dataset.getDataverseName(),
+						dataset.getDatasetName());
+				insertTupleIntoIndex(jobId, MetadataSecondaryIndexes.GROUPNAME_ON_DATASET_INDEX, nodeGroupTuple);
+			}
+			// Add entry in datatype secondary index.
+			ITupleReference dataTypeTuple = createTuple(dataset.getDataverseName(), dataset.getItemTypeName(),
+					dataset.getDatasetName());
+			insertTupleIntoIndex(jobId, MetadataSecondaryIndexes.DATATYPENAME_ON_DATASET_INDEX, dataTypeTuple);
+		} catch (TreeIndexDuplicateKeyException e) {
+			throw new MetadataException("A dataset with this name " + dataset.getDatasetName()
+					+ " already exists in dataverse '" + dataset.getDataverseName() + "'.", e);
+		} catch (Exception e) {
+			throw new MetadataException(e);
+		}
+	}
 
-    @Override
-    public void addIndex(JobId jobId, Index index) throws MetadataException, RemoteException {
-        try {
-            IndexTupleTranslator tupleWriter = new IndexTupleTranslator(true);
-            ITupleReference tuple = tupleWriter.getTupleFromMetadataEntity(index);
-            insertTupleIntoIndex(jobId, MetadataPrimaryIndexes.INDEX_DATASET, tuple);
-        } catch (TreeIndexDuplicateKeyException e) {
-            throw new MetadataException("An index with name '" + index.getIndexName() + "' already exists.", e);
-        } catch (Exception e) {
-            throw new MetadataException(e);
-        }
-    }
+	@Override
+	public void addIndex(JobId jobId, Index index) throws MetadataException, RemoteException {
+		try {
+			IndexTupleTranslator tupleWriter = new IndexTupleTranslator(true);
+			ITupleReference tuple = tupleWriter.getTupleFromMetadataEntity(index);
+			insertTupleIntoIndex(jobId, MetadataPrimaryIndexes.INDEX_DATASET, tuple);
+		} catch (TreeIndexDuplicateKeyException e) {
+			throw new MetadataException("An index with name '" + index.getIndexName() + "' already exists.", e);
+		} catch (Exception e) {
+			throw new MetadataException(e);
+		}
+	}
 
-    @Override
-    public void addNode(JobId jobId, Node node) throws MetadataException, RemoteException {
-        try {
-            NodeTupleTranslator tupleReaderWriter = new NodeTupleTranslator(true);
-            ITupleReference tuple = tupleReaderWriter.getTupleFromMetadataEntity(node);
-            insertTupleIntoIndex(jobId, MetadataPrimaryIndexes.NODE_DATASET, tuple);
-        } catch (TreeIndexDuplicateKeyException e) {
-            throw new MetadataException("A node with name '" + node.getNodeName() + "' already exists.", e);
-        } catch (Exception e) {
-            throw new MetadataException(e);
-        }
-    }
+	@Override
+	public void addNode(JobId jobId, Node node) throws MetadataException, RemoteException {
+		try {
+			NodeTupleTranslator tupleReaderWriter = new NodeTupleTranslator(true);
+			ITupleReference tuple = tupleReaderWriter.getTupleFromMetadataEntity(node);
+			insertTupleIntoIndex(jobId, MetadataPrimaryIndexes.NODE_DATASET, tuple);
+		} catch (TreeIndexDuplicateKeyException e) {
+			throw new MetadataException("A node with name '" + node.getNodeName() + "' already exists.", e);
+		} catch (Exception e) {
+			throw new MetadataException(e);
+		}
+	}
 
     @Override
     public void addNodeGroup(JobId jobId, NodeGroup nodeGroup) throws MetadataException, RemoteException {
@@ -384,47 +396,60 @@
                 // artifacts.
             }
 
-            // Delete entry from secondary index 'group'.
-            if (dataset.getDatasetType() == DatasetType.INTERNAL || dataset.getDatasetType() == DatasetType.FEED) {
-                InternalDatasetDetails id = (InternalDatasetDetails) dataset.getDatasetDetails();
-                ITupleReference groupNameSearchKey = createTuple(id.getNodeGroupName(), dataverseName, datasetName);
-                // Searches the index for the tuple to be deleted. Acquires an S
-                // lock on the GROUPNAME_ON_DATASET_INDEX index.
-                try {
-                    ITupleReference groupNameTuple = getTupleToBeDeleted(jobId,
-                            MetadataSecondaryIndexes.GROUPNAME_ON_DATASET_INDEX, groupNameSearchKey);
-                    deleteTupleFromIndex(jobId, MetadataSecondaryIndexes.GROUPNAME_ON_DATASET_INDEX, groupNameTuple);
-                } catch (TreeIndexException tie) {
-                    // ignore this exception and continue deleting all relevant
-                    // artifacts.
-                }
-            }
-            // Delete entry from secondary index 'type'.
-            ITupleReference dataTypeSearchKey = createTuple(dataverseName, dataset.getItemTypeName(), datasetName);
-            // Searches the index for the tuple to be deleted. Acquires an S
-            // lock on the DATATYPENAME_ON_DATASET_INDEX index.
-            try {
-                ITupleReference dataTypeTuple = getTupleToBeDeleted(jobId,
-                        MetadataSecondaryIndexes.DATATYPENAME_ON_DATASET_INDEX, dataTypeSearchKey);
-                deleteTupleFromIndex(jobId, MetadataSecondaryIndexes.DATATYPENAME_ON_DATASET_INDEX, dataTypeTuple);
-            } catch (TreeIndexException tie) {
-                // ignore this exception and continue deleting all relevant
-                // artifacts.
-            }
+			// Delete entry from secondary index 'group'.
+			if (dataset.getDatasetType() == DatasetType.INTERNAL || dataset.getDatasetType() == DatasetType.FEED) {
+				InternalDatasetDetails id = (InternalDatasetDetails) dataset.getDatasetDetails();
+				ITupleReference groupNameSearchKey = createTuple(id.getNodeGroupName(), dataverseName, datasetName);
+				// Searches the index for the tuple to be deleted. Acquires an S
+				// lock on the GROUPNAME_ON_DATASET_INDEX index.
+				try {
+					ITupleReference groupNameTuple = getTupleToBeDeleted(jobId,
+							MetadataSecondaryIndexes.GROUPNAME_ON_DATASET_INDEX, groupNameSearchKey);
+					deleteTupleFromIndex(jobId, MetadataSecondaryIndexes.GROUPNAME_ON_DATASET_INDEX, groupNameTuple);
+				} catch (TreeIndexException tie) {
+					// ignore this exception and continue deleting all relevant
+					// artifacts.
+				}
+			}
+			else
+			{
+				ExternalDatasetDetails id = (ExternalDatasetDetails) dataset.getDatasetDetails();
+				ITupleReference groupNameSearchKey = createTuple(id.getNodeGroupName(), dataverseName, datasetName);
+				// Searches the index for the tuple to be deleted. Acquires an S
+				// lock on the GROUPNAME_ON_DATASET_INDEX index.
+				try {
+					ITupleReference groupNameTuple = getTupleToBeDeleted(jobId,
+							MetadataSecondaryIndexes.GROUPNAME_ON_DATASET_INDEX, groupNameSearchKey);
+					deleteTupleFromIndex(jobId, MetadataSecondaryIndexes.GROUPNAME_ON_DATASET_INDEX, groupNameTuple);
+				} catch (TreeIndexException tie) {
+					// ignore this exception and continue deleting all relevant
+					// artifacts.
+				}
+			}
+			// Delete entry from secondary index 'type'.
+			ITupleReference dataTypeSearchKey = createTuple(dataverseName, dataset.getItemTypeName(), datasetName);
+			// Searches the index for the tuple to be deleted. Acquires an S
+			// lock on the DATATYPENAME_ON_DATASET_INDEX index.
+			try {
+				ITupleReference dataTypeTuple = getTupleToBeDeleted(jobId,
+						MetadataSecondaryIndexes.DATATYPENAME_ON_DATASET_INDEX, dataTypeSearchKey);
+				deleteTupleFromIndex(jobId, MetadataSecondaryIndexes.DATATYPENAME_ON_DATASET_INDEX, dataTypeTuple);
+			} catch (TreeIndexException tie) {
+				// ignore this exception and continue deleting all relevant
+				// artifacts.
+			}
 
-            // Delete entry(s) from the 'indexes' dataset.
-            if (dataset.getDatasetType() == DatasetType.INTERNAL || dataset.getDatasetType() == DatasetType.FEED) {
-                List<Index> datasetIndexes = getDatasetIndexes(jobId, dataverseName, datasetName);
-                if (datasetIndexes != null) {
-                    for (Index index : datasetIndexes) {
-                        dropIndex(jobId, dataverseName, datasetName, index.getIndexName());
-                    }
-                }
-            }
-        } catch (Exception e) {
-            throw new MetadataException(e);
-        }
-    }
+			// Delete entry(s) from the 'indexes' dataset.
+			List<Index> datasetIndexes = getDatasetIndexes(jobId, dataverseName, datasetName);
+			if (datasetIndexes != null) {
+				for (Index index : datasetIndexes) {
+					dropIndex(jobId, dataverseName, datasetName, index.getIndexName());
+				}
+			}
+		} catch (Exception e) {
+			throw new MetadataException(e);
+		}
+	}
 
     @Override
     public void dropIndex(JobId jobId, String dataverseName, String datasetName, String indexName)
@@ -1135,8 +1160,116 @@
         }
     }
 
-    @Override
-    public int getMostRecentDatasetId() throws MetadataException, RemoteException {
-        return DatasetIdFactory.getMostRecentDatasetId();
+	@Override
+	public List<ExternalFile> getExternalDatasetFiles(JobId jobId,
+			Dataset dataset) throws MetadataException, RemoteException {
+		try {
+			ITupleReference searchKey = createTuple(dataset.getDataverseName(),dataset.getDatasetName());
+			ExternalFileTupleTranslator tupleReaderWriter = new ExternalFileTupleTranslator(false);
+			IValueExtractor<ExternalFile> valueExtractor = new MetadataEntityValueExtractor<ExternalFile>(
+					tupleReaderWriter);
+			List<ExternalFile> results = new ArrayList<ExternalFile>();
+			searchIndex(jobId, MetadataPrimaryIndexes.EXTERNAL_FILE_DATASET, searchKey, valueExtractor, results);
+			return results;
+		} catch (Exception e) {
+			throw new MetadataException(e);
+		}
+	}
+
+	@SuppressWarnings("unchecked")
+	public ITupleReference createExternalFileSearchTuple(String dataverseName, String datasetName, int fileNumber) throws HyracksDataException {
+		ISerializerDeserializer<AString> stringSerde = AqlSerializerDeserializerProvider.INSTANCE
+				.getSerializerDeserializer(BuiltinType.ASTRING);
+		ISerializerDeserializer<AInt32> intSerde = AqlSerializerDeserializerProvider.INSTANCE
+				.getSerializerDeserializer(BuiltinType.AINT32);
+
+		AMutableString aString = new AMutableString("");
+		ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(3);
+
+		//dataverse field
+		aString.setValue(dataverseName);
+		stringSerde.serialize(aString, tupleBuilder.getDataOutput());
+		tupleBuilder.addFieldEndOffset();
+
+		//dataset field
+		aString.setValue(datasetName);
+		stringSerde.serialize(aString, tupleBuilder.getDataOutput());
+		tupleBuilder.addFieldEndOffset();
+
+		//file number field
+		intSerde.serialize(new AInt32(fileNumber), tupleBuilder.getDataOutput());
+		tupleBuilder.addFieldEndOffset();
+
+		ArrayTupleReference tuple = new ArrayTupleReference();
+		tuple.reset(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray());
+		return tuple;
+		}
+
+	public ExternalFile getExternalDatasetFile(JobId jobId,String dataverseName, String datasetName,
+			int fileNumber) throws MetadataException, RemoteException {
+		try {
+			//create the search key
+			ITupleReference searchKey = createExternalFileSearchTuple(dataverseName, datasetName, fileNumber);
+			ExternalFileTupleTranslator tupleReaderWriter = new ExternalFileTupleTranslator(false);
+			IValueExtractor<ExternalFile> valueExtractor = new MetadataEntityValueExtractor<ExternalFile>(
+					tupleReaderWriter);
+			List<ExternalFile> results = new ArrayList<ExternalFile>();
+			searchIndex(jobId, MetadataPrimaryIndexes.EXTERNAL_FILE_DATASET, searchKey, valueExtractor, results);
+			return results.get(0);
+		} catch (Exception e) {
+			throw new MetadataException(e);
+		}
+	}
+	
+	@Override
+	public void dropExternalFile(JobId jobId, String dataverseName,
+			String datasetName, int fileNumber) throws MetadataException,
+			RemoteException {
+		ExternalFile externalFile;
+		try {
+			externalFile = getExternalDatasetFile(jobId, dataverseName, datasetName,fileNumber);
+		} catch (Exception e) {
+			throw new MetadataException(e);
+		}
+		if (externalFile == null) {
+			throw new MetadataException("Cannot drop external file because it doesn't exist.");
+		}
+		try {
+			// Delete entry from the 'ExternalFile' dataset.
+			ITupleReference searchKey = createExternalFileSearchTuple(dataverseName, datasetName, fileNumber);
+			// Searches the index for the tuple to be deleted. Acquires an S
+			// lock on the 'ExternalFile' dataset.
+			ITupleReference datasetTuple = getTupleToBeDeleted(jobId,
+					MetadataPrimaryIndexes.EXTERNAL_FILE_DATASET, searchKey);
+			deleteTupleFromIndex(jobId, MetadataPrimaryIndexes.EXTERNAL_FILE_DATASET, datasetTuple);
+
+		} catch (TreeIndexException e) {
+			throw new MetadataException("Couldn't drop externalFile.", e);
+		} catch (Exception e) {
+			throw new MetadataException(e);
+		}
+	}
+	
+	@Override
+    public void addExternalDatasetFile(JobId jobId, ExternalFile externalFile)
+                    throws MetadataException, RemoteException {
+            try {
+                    // Insert into the 'externalFiles' dataset.
+                    ExternalFileTupleTranslator tupleReaderWriter = new ExternalFileTupleTranslator(true);
+                    ITupleReference externalFileTuple = tupleReaderWriter.getTupleFromMetadataEntity(externalFile);
+                    insertTupleIntoIndex(jobId, MetadataPrimaryIndexes.EXTERNAL_FILE_DATASET, externalFileTuple);
+            } catch (TreeIndexDuplicateKeyException e) {
+                    throw new MetadataException("An externalFile with this number " + externalFile.getFileNumber()
+                                    + " already exists in dataset '" + externalFile.getDatasetName() + "' in dataverse '"+externalFile.getDataverseName()+"'.", e);
+            } catch (Exception e) {
+                    throw new MetadataException(e);
+            }
     }
+
+
+	@Override
+	public int getMostRecentDatasetId() throws MetadataException, RemoteException {
+		return DatasetIdFactory.getMostRecentDatasetId();
+	}
 }
+

diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/api/IMetadataManager.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/api/IMetadataManager.java
index 22c5e46..53f72dd 100644
--- a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/api/IMetadataManager.java
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/api/IMetadataManager.java

@@ -26,6 +26,7 @@
 import edu.uci.ics.asterix.metadata.entities.DatasourceAdapter;
 import edu.uci.ics.asterix.metadata.entities.Datatype;
 import edu.uci.ics.asterix.metadata.entities.Dataverse;
+import edu.uci.ics.asterix.metadata.entities.ExternalFile;
 import edu.uci.ics.asterix.metadata.entities.Function;
 import edu.uci.ics.asterix.metadata.entities.Index;
 import edu.uci.ics.asterix.metadata.entities.Node;
@@ -439,6 +440,36 @@
      */
     public List<Function> getDataverseFunctions(MetadataTransactionContext ctx, String dataverseName)
             throws MetadataException;
+    
+    /**
+     * @param mdTxnCtx
+     *            MetadataTransactionContext of an active metadata transaction.
+     * @param externalFile
+     *            An instance of type ExternalFile that represents the external file being
+     *            added
+     * @throws MetadataException
+     */
+    public void addExternalFile(MetadataTransactionContext mdTxnCtx, ExternalFile externalFile) throws MetadataException;
+    
+    /**
+     * @param mdTxnCtx
+     *            MetadataTransactionContext of an active metadata transaction.
+     * @param dataset
+     *            An instance of type Dataset that represents the "external" dataset 
+     * @return A list of external files belonging to the dataset
+     * @throws MetadataException
+     */
+    public List<ExternalFile> getDatasetExternalFiles(MetadataTransactionContext mdTxnCtx, Dataset dataset) throws MetadataException;
+
+    /**
+     * @param mdTxnCtx
+     *            MetadataTransactionContext of an active metadata transaction.
+     * @param externalFile
+     *            An instance of type ExternalFile that represents the external file being
+     *            dropped
+     * @throws MetadataException
+     */
+    public void dropExternalFile(MetadataTransactionContext mdTxnCtx, ExternalFile externalFile) throws MetadataException;
 
     public void initializeDatasetIdFactory(MetadataTransactionContext ctx) throws MetadataException;
     
@@ -453,4 +484,5 @@
     public void releaseReadLatch();
 
 
+
 }

diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/api/IMetadataNode.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/api/IMetadataNode.java
index d1e63e1..206ef8a 100644
--- a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/api/IMetadataNode.java
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/api/IMetadataNode.java

@@ -28,6 +28,7 @@
 import edu.uci.ics.asterix.metadata.entities.DatasourceAdapter;
 import edu.uci.ics.asterix.metadata.entities.Datatype;
 import edu.uci.ics.asterix.metadata.entities.Dataverse;
+import edu.uci.ics.asterix.metadata.entities.ExternalFile;
 import edu.uci.ics.asterix.metadata.entities.Function;
 import edu.uci.ics.asterix.metadata.entities.Index;
 import edu.uci.ics.asterix.metadata.entities.Node;
@@ -471,6 +472,45 @@
      * @throws RemoteException
      */
     public void addAdapter(JobId jobId, DatasourceAdapter adapter) throws MetadataException, RemoteException;
+    
+    /**
+     * @param jobId
+     *            A globally unique id for an active metadata transaction.
+     * @param externalFile
+     * 			  An object representing the external file entity
+     * @throws MetadataException
+     *             for example, if the file already exists.
+     * @throws RemoteException
+     */
+	public void addExternalDatasetFile(JobId jobId, ExternalFile externalFile) throws MetadataException, RemoteException;
+	
+	/**
+     * @param jobId
+     *            A globally unique id for an active metadata transaction.
+     * @param dataset
+     *            A dataset the files belongs to.
+     * @throws MetadataException
+     * @throws RemoteException
+     */
+	public List<ExternalFile> getExternalDatasetFiles(JobId jobId, Dataset dataset
+			) throws MetadataException, RemoteException;
+	
+	/**
+     * Deletes an externalFile , acquiring local locks on behalf of the given
+     * transaction id.
+     * 
+     * @param jobId
+     *            A globally unique id for an active metadata transaction.
+     * @param dataverseName
+     *            dataverse asociated with the external dataset that owns the file to be deleted.
+     * @param datasetName
+     *            Name of dataset owning the file to be deleted.
+     * @param fileNumber
+     * 			  the id number for the file to be deleted
+     * @throws RemoteException
+     */
+    public void dropExternalFile(JobId jobId, String dataverseName, String datasetName, int fileNumber) throws MetadataException,
+            RemoteException;
 
     public void initializeDatasetIdFactory(JobId jobId) throws MetadataException, RemoteException;
     

diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/bootstrap/MetadataBootstrap.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/bootstrap/MetadataBootstrap.java
index 4a02bc5..ed89cfc 100644
--- a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/bootstrap/MetadataBootstrap.java
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/bootstrap/MetadataBootstrap.java

@@ -117,7 +117,7 @@
                 MetadataPrimaryIndexes.DATASET_DATASET, MetadataPrimaryIndexes.DATATYPE_DATASET,
                 MetadataPrimaryIndexes.INDEX_DATASET, MetadataPrimaryIndexes.NODE_DATASET,
                 MetadataPrimaryIndexes.NODEGROUP_DATASET, MetadataPrimaryIndexes.FUNCTION_DATASET,
-                MetadataPrimaryIndexes.DATASOURCE_ADAPTER_DATASET };
+                MetadataPrimaryIndexes.DATASOURCE_ADAPTER_DATASET, MetadataPrimaryIndexes.EXTERNAL_FILE_DATASET };
         secondaryIndexes = new IMetadataIndex[] { MetadataSecondaryIndexes.GROUPNAME_ON_DATASET_INDEX,
                 MetadataSecondaryIndexes.DATATYPENAME_ON_DATASET_INDEX,
                 MetadataSecondaryIndexes.DATATYPENAME_ON_DATATYPE_INDEX };

diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/bootstrap/MetadataPrimaryIndexes.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/bootstrap/MetadataPrimaryIndexes.java
index 8bdd92b..100ec40 100644
--- a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/bootstrap/MetadataPrimaryIndexes.java
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/bootstrap/MetadataPrimaryIndexes.java

@@ -41,9 +41,11 @@
     public static final int NODEGROUP_DATASET_ID = 6;
     public static final int FUNCTION_DATASET_ID = 7;
     public static final int DATASOURCE_ADAPTER_DATASET_ID = 8;
+    public static final int EXTERNAL_FILE_DATASET_ID = 9;
     public static final int FIRST_AVAILABLE_USER_DATASET_ID = 100;
 
     public static IMetadataIndex DATASOURCE_ADAPTER_DATASET;
+    public static IMetadataIndex EXTERNAL_FILE_DATASET;
 
     /**
      * Create all metadata primary index descriptors. MetadataRecordTypes must
@@ -92,5 +94,11 @@
                 BuiltinType.ASTRING, BuiltinType.ASTRING }, new String[] { "DataverseName", "Name" }, 0,
                 MetadataRecordTypes.DATASOURCE_ADAPTER_RECORDTYPE, DATASOURCE_ADAPTER_DATASET_ID, true, new int[] { 0,
                         1 });
+        
+        EXTERNAL_FILE_DATASET = new MetadataIndex("ExternalFile", null, 4, new IAType[] {
+                BuiltinType.ASTRING, BuiltinType.ASTRING, BuiltinType.AINT32 }, new String[] { "DataverseName", "DatasetName", 
+        		"FileNumber"}, 0,
+                MetadataRecordTypes.EXTERNAL_FILE_RECORDTYPE, EXTERNAL_FILE_DATASET_ID, true, new int[] { 0,
+                        1, 2 });
     }
 }
\ No newline at end of file

diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/bootstrap/MetadataRecordTypes.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/bootstrap/MetadataRecordTypes.java
index 50681ee..11f9c91 100644
--- a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/bootstrap/MetadataRecordTypes.java
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/bootstrap/MetadataRecordTypes.java

@@ -47,6 +47,7 @@
     public static ARecordType NODEGROUP_RECORDTYPE;
     public static ARecordType FUNCTION_RECORDTYPE;
     public static ARecordType DATASOURCE_ADAPTER_RECORDTYPE;
+    public static ARecordType EXTERNAL_FILE_RECORDTYPE;
 
     /**
      * Create all metadata record types.
@@ -76,6 +77,7 @@
             NODEGROUP_RECORDTYPE = createNodeGroupRecordType();
             FUNCTION_RECORDTYPE = createFunctionRecordType();
             DATASOURCE_ADAPTER_RECORDTYPE = createDatasourceAdapterRecordType();
+            EXTERNAL_FILE_RECORDTYPE = createExternalFileRecordType();
         } catch (AsterixException e) {
             throw new MetadataException(e);
         }
@@ -125,12 +127,13 @@
     // external details.
     public static final int EXTERNAL_DETAILS_ARECORD_DATASOURCE_ADAPTER_FIELD_INDEX = 0;
     public static final int EXTERNAL_DETAILS_ARECORD_PROPERTIES_FIELD_INDEX = 1;
-
+    public static final int EXTERNAL_DETAILS_ARECORD_GROUPNAME_FIELD_INDEX = 2;
+    
     private static final ARecordType createExternalDetailsRecordType() throws AsterixException {
 
         AOrderedListType orderedPropertyListType = new AOrderedListType(DATASOURCE_ADAPTER_PROPERTIES_RECORDTYPE, null);
-        String[] fieldNames = { "DatasourceAdapter", "Properties" };
-        IAType[] fieldTypes = { BuiltinType.ASTRING, orderedPropertyListType };
+        String[] fieldNames = { "DatasourceAdapter", "Properties", "GroupName" };
+        IAType[] fieldTypes = { BuiltinType.ASTRING, orderedPropertyListType, BuiltinType.ASTRING };
         return new ARecordType(null, fieldNames, fieldTypes, true);
     }
 
@@ -356,5 +359,19 @@
                 BuiltinType.ASTRING };
         return new ARecordType("DatasourceAdapterRecordType", fieldNames, fieldTypes, true);
     }
+    
+    public static final int EXTERNAL_FILE_ARECORD_DATAVERSENAME_FIELD_INDEX = 0;
+    public static final int EXTERNAL_FILE_ARECORD_DATASET_NAME_FIELD_INDEX = 1;
+    public static final int EXTERNAL_FILE_ARECORD_FILE_NUMBER_FIELD_INDEX = 2;
+    public static final int EXTERNAL_FILE_ARECORD_FILE_NAME_FIELD_INDEX = 3;
+    public static final int EXTERNAL_FILE_ARECORD_FILE_SIZE_FIELD_INDEX = 4;
+    public static final int EXTERNAL_FILE_ARECORD_FILE_MOD_DATE_FIELD_INDEX = 5;
+    
+    private static ARecordType createExternalFileRecordType() throws AsterixException {
+    	String[] fieldNames = { "DataverseName", "DatasetName", "FileNumber", "FileName", "FileSize", "FileModDate"};
+        IAType[] fieldTypes = { BuiltinType.ASTRING, BuiltinType.ASTRING, BuiltinType.AINT32, BuiltinType.ASTRING, BuiltinType.AINT64,
+        		BuiltinType.ADATETIME};
+        return new ARecordType("ExternalFileRecordType", fieldNames, fieldTypes, true);
+    }
 
 }
\ No newline at end of file

diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/declared/AqlMetadataProvider.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/declared/AqlMetadataProvider.java
index 8b422c8..078b340d 100644
--- a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/declared/AqlMetadataProvider.java
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/declared/AqlMetadataProvider.java

@@ -18,11 +18,14 @@
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.logging.Logger;
 
+import org.apache.hadoop.fs.FileStatus;
+
 import edu.uci.ics.asterix.common.config.AsterixStorageProperties;
 import edu.uci.ics.asterix.common.config.DatasetConfig.DatasetType;
 import edu.uci.ics.asterix.common.config.DatasetConfig.IndexType;
@@ -44,13 +47,17 @@
 import edu.uci.ics.asterix.external.adapter.factory.IAdapterFactory;
 import edu.uci.ics.asterix.external.adapter.factory.IGenericDatasetAdapterFactory;
 import edu.uci.ics.asterix.external.adapter.factory.ITypedDatasetAdapterFactory;
+import edu.uci.ics.asterix.external.data.operator.ExternalDataAccessByRIDOperatorDescriptor;
+import edu.uci.ics.asterix.external.data.operator.ExternalDataIndexingOperatorDescriptor;
 import edu.uci.ics.asterix.external.data.operator.ExternalDataScanOperatorDescriptor;
 import edu.uci.ics.asterix.external.data.operator.FeedIntakeOperatorDescriptor;
 import edu.uci.ics.asterix.external.data.operator.FeedMessageOperatorDescriptor;
+import edu.uci.ics.asterix.external.dataset.adapter.AbstractDatasourceAdapter;
 import edu.uci.ics.asterix.external.dataset.adapter.IDatasourceAdapter;
 import edu.uci.ics.asterix.external.dataset.adapter.ITypedDatasourceAdapter;
 import edu.uci.ics.asterix.external.feed.lifecycle.FeedId;
 import edu.uci.ics.asterix.external.feed.lifecycle.IFeedMessage;
+import edu.uci.ics.asterix.external.util.ExternalDataFilesMetadataProvider;
 import edu.uci.ics.asterix.formats.base.IDataFormat;
 import edu.uci.ics.asterix.formats.nontagged.AqlBinaryComparatorFactoryProvider;
 import edu.uci.ics.asterix.formats.nontagged.AqlTypeTraitProvider;
@@ -64,6 +71,7 @@
 import edu.uci.ics.asterix.metadata.entities.Datatype;
 import edu.uci.ics.asterix.metadata.entities.Dataverse;
 import edu.uci.ics.asterix.metadata.entities.ExternalDatasetDetails;
+import edu.uci.ics.asterix.metadata.entities.ExternalFile;
 import edu.uci.ics.asterix.metadata.entities.FeedDatasetDetails;
 import edu.uci.ics.asterix.metadata.entities.Index;
 import edu.uci.ics.asterix.metadata.entities.InternalDatasetDetails;
@@ -160,6 +168,7 @@
     private boolean asyncResults;
     private ResultSetId resultSetId;
     private IResultSerializerFactoryProvider resultSerializerFactoryProvider;
+    private static boolean optimizeExternalIndexes = false;
 
     private final Dataverse defaultDataverse;
     private JobId jobId;
@@ -169,22 +178,6 @@
     private static final Map<String, String> adapterFactoryMapping = initializeAdapterFactoryMapping();
     private static Scheduler hdfsScheduler;
 
-    public String getPropertyValue(String propertyName) {
-        return config.get(propertyName);
-    }
-
-    public void setConfig(Map<String, String> config) {
-        this.config = config;
-    }
-
-    public Map<String, String[]> getAllStores() {
-        return stores;
-    }
-
-    public Map<String, String> getConfig() {
-        return config;
-    }
-
     public AqlMetadataProvider(Dataverse defaultDataverse) {
         this.defaultDataverse = defaultDataverse;
         this.stores = AsterixAppContextInfo.getInstance().getMetadataProperties().getStores();
@@ -264,6 +257,30 @@
     public IResultSerializerFactoryProvider getResultSerializerFactoryProvider() {
         return resultSerializerFactoryProvider;
     }
+    
+    public String getPropertyValue(String propertyName) {
+        return config.get(propertyName);
+    }
+
+    public void setConfig(Map<String, String> config) {
+        this.config = config;
+    }
+
+    public Map<String, String[]> getAllStores() {
+        return stores;
+    }
+
+    public Map<String, String> getConfig() {
+        return config;
+    }
+
+    public static boolean isOptimizeExternalIndexes() {
+		return optimizeExternalIndexes;
+	}
+    
+    public static void setOptimizeExternalIndexes(boolean optimizeExternalIndexes) {
+		AqlMetadataProvider.optimizeExternalIndexes = optimizeExternalIndexes;
+	}
 
     @Override
     public AqlDataSource findDataSource(AqlSourceId id) throws AlgebricksException {
@@ -402,6 +419,215 @@
 
         return new Pair<IOperatorDescriptor, AlgebricksPartitionConstraint>(dataScanner, constraint);
     }
+    
+    @SuppressWarnings("rawtypes")
+	public Pair<ExternalDataIndexingOperatorDescriptor, AlgebricksPartitionConstraint> buildExternalDataIndexingRuntime(
+			JobSpecification jobSpec, IAType itemType, Dataset dataset, IDataFormat format)
+					throws AlgebricksException {
+		IGenericDatasetAdapterFactory adapterFactory;
+		IDatasourceAdapter adapter;
+		String adapterName;
+		DatasourceAdapter adapterEntity;
+		String adapterFactoryClassname;
+		ExternalDatasetDetails datasetDetails = null;
+		try {
+			datasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails();
+			adapterName = datasetDetails.getAdapter();
+			adapterEntity = MetadataManager.INSTANCE.getAdapter(mdTxnCtx, MetadataConstants.METADATA_DATAVERSE_NAME,
+					adapterName);
+			if (adapterEntity != null) {
+				adapterFactoryClassname = adapterEntity.getClassname();
+				adapterFactory = (IGenericDatasetAdapterFactory) Class.forName(adapterFactoryClassname).newInstance();
+			} else {
+				adapterFactoryClassname = adapterFactoryMapping.get(adapterName);
+				if (adapterFactoryClassname == null) {
+					throw new AlgebricksException(" Unknown adapter :" + adapterName);
+				}
+				adapterFactory = (IGenericDatasetAdapterFactory) Class.forName(adapterFactoryClassname).newInstance();
+			}
+
+			adapter = ((IGenericDatasetAdapterFactory) adapterFactory).createIndexingAdapter(
+					wrapProperties(datasetDetails.getProperties()), itemType, null);
+		} catch (AlgebricksException ae) {
+			throw ae;
+		} catch (Exception e) {
+			e.printStackTrace();
+			throw new AlgebricksException("Unable to create adapter " + e);
+		}
+		if (!(adapter.getAdapterType().equals(IDatasourceAdapter.AdapterType.READ) || adapter.getAdapterType().equals(
+				IDatasourceAdapter.AdapterType.READ_WRITE))) {
+			throw new AlgebricksException("external dataset adapter does not support read operation");
+		}
+		ARecordType rt = (ARecordType) itemType;
+		ISerializerDeserializer payloadSerde = format.getSerdeProvider().getSerializerDeserializer(itemType);
+		RecordDescriptor indexerDesc = new RecordDescriptor(new ISerializerDeserializer[] { payloadSerde });
+		ExternalDataIndexingOperatorDescriptor dataIndexScanner = null;
+		List<ExternalFile> files = null;
+		HashMap<String, Integer> filesNumbers = null;
+		if(optimizeExternalIndexes)
+		{
+			try {
+				files = MetadataManager.INSTANCE.getDatasetExternalFiles(mdTxnCtx, dataset);
+			} catch (MetadataException e) {
+				e.printStackTrace();
+				throw new AlgebricksException("Unable to get list of external files from metadata " + e);
+			}
+			
+			filesNumbers = new HashMap<String,Integer>();
+			for(int i=0; i< files.size(); i++)
+			{
+				filesNumbers.put(files.get(i).getFileName(), files.get(i).getFileNumber());
+			}
+			
+			dataIndexScanner = new ExternalDataIndexingOperatorDescriptor(jobSpec,
+					wrapPropertiesEmpty(datasetDetails.getProperties()), rt, indexerDesc, adapterFactory,filesNumbers);
+		}
+		else
+		{
+		dataIndexScanner = new ExternalDataIndexingOperatorDescriptor(jobSpec,
+				wrapPropertiesEmpty(datasetDetails.getProperties()), rt, indexerDesc, adapterFactory,filesNumbers);
+		}
+		AlgebricksPartitionConstraint constraint;
+		try {
+			constraint = adapter.getPartitionConstraint();
+		} catch (Exception e) {
+			throw new AlgebricksException(e);
+		}
+		return new Pair<ExternalDataIndexingOperatorDescriptor, AlgebricksPartitionConstraint>(dataIndexScanner, constraint);
+	}
+    
+    public ArrayList<ExternalFile> getExternalDatasetFiles(Dataset dataset) throws AlgebricksException
+	{
+    	ArrayList<ExternalFile> files = new ArrayList<ExternalFile>();
+		if(dataset.getDatasetType() != DatasetType.EXTERNAL)
+		{
+			throw new AlgebricksException("Can only get external dataset files");
+		}
+		ExternalDatasetDetails datasetDetails = (ExternalDatasetDetails)dataset.getDatasetDetails();
+		IGenericDatasetAdapterFactory adapterFactory;
+		IDatasourceAdapter adapter;
+		String adapterName;
+		DatasourceAdapter adapterEntity;
+		String adapterFactoryClassname;
+		try {
+			adapterName = datasetDetails.getAdapter();
+			adapterEntity = MetadataManager.INSTANCE.getAdapter(mdTxnCtx, MetadataConstants.METADATA_DATAVERSE_NAME,
+					adapterName);
+			if (adapterEntity != null) {
+				adapterFactoryClassname = adapterEntity.getClassname();
+				adapterFactory = (IGenericDatasetAdapterFactory) Class.forName(adapterFactoryClassname).newInstance();
+			} else {
+				adapterFactoryClassname = adapterFactoryMapping.get(adapterName);
+				if (adapterFactoryClassname == null) {
+					throw new AlgebricksException(" Unknown adapter :" + adapterName);
+				}
+				adapterFactory = (IGenericDatasetAdapterFactory) Class.forName(adapterFactoryClassname).newInstance();
+			}
+
+			adapter = ((IGenericDatasetAdapterFactory) adapterFactory).createAdapter(
+					wrapProperties(datasetDetails.getProperties()), null);
+		}
+		catch (Exception e) {
+			e.printStackTrace();
+			throw new AlgebricksException("Unable to create adapter " + e);
+		}
+		
+		try {
+			ArrayList<FileStatus> fileStatuses = ExternalDataFilesMetadataProvider.getHDFSFileStatus((AbstractDatasourceAdapter) adapter);
+			for(int i=0; i<fileStatuses.size(); i++)
+			{
+				files.add(new ExternalFile(dataset.getDataverseName(), dataset.getDatasetName(), new Date(fileStatuses.get(i).getModificationTime()),
+						fileStatuses.get(i).getLen(),
+						fileStatuses.get(i).getPath().toUri().getPath(),
+						i));
+			}
+			return files;
+		} catch (IOException e) {
+			e.printStackTrace();
+			throw new AlgebricksException("Unable to get list of HDFS files " + e);
+		}
+	}
+
+	@SuppressWarnings("rawtypes")
+	public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildExternalDataAccesByRIDRuntime(
+			JobSpecification jobSpec, Dataset dataset, Index secondaryIndex)
+					throws AlgebricksException {
+		IAType itemType = null;
+		try {
+			itemType = MetadataManager.INSTANCE.getDatatype(mdTxnCtx, dataset.getDataverseName(), dataset.getItemTypeName()).getDatatype();
+		} catch (MetadataException e) {
+			e.printStackTrace();
+			throw new AlgebricksException("Unable to get item type from metadata " + e);
+		}
+		if (itemType.getTypeTag() != ATypeTag.RECORD) {
+			throw new AlgebricksException("Can only scan datasets of records.");
+		}
+
+		ExternalDatasetDetails datasetDetails = (ExternalDatasetDetails)dataset.getDatasetDetails();
+		IGenericDatasetAdapterFactory adapterFactory;
+		IDatasourceAdapter adapter;
+		String adapterName;
+		DatasourceAdapter adapterEntity;
+		String adapterFactoryClassname;
+		try {
+			adapterName = datasetDetails.getAdapter();
+			adapterEntity = MetadataManager.INSTANCE.getAdapter(mdTxnCtx, MetadataConstants.METADATA_DATAVERSE_NAME,
+					adapterName);
+			if (adapterEntity != null) {
+				adapterFactoryClassname = adapterEntity.getClassname();
+				adapterFactory = (IGenericDatasetAdapterFactory) Class.forName(adapterFactoryClassname).newInstance();
+			} else {
+				adapterFactoryClassname = adapterFactoryMapping.get(adapterName);
+				if (adapterFactoryClassname == null) {
+					throw new AlgebricksException(" Unknown adapter :" + adapterName);
+				}
+				adapterFactory = (IGenericDatasetAdapterFactory) Class.forName(adapterFactoryClassname).newInstance();
+			}
+
+			adapter = ((IGenericDatasetAdapterFactory) adapterFactory).createAdapter(
+					wrapProperties(datasetDetails.getProperties()), itemType);
+		} catch (AlgebricksException ae) {
+			throw ae;
+		} catch (Exception e) {
+			e.printStackTrace();
+			throw new AlgebricksException("Unable to create adapter " + e);
+		}
+
+		if (!(adapter.getAdapterType().equals(IDatasourceAdapter.AdapterType.READ) || adapter.getAdapterType().equals(
+				IDatasourceAdapter.AdapterType.READ_WRITE))) {
+			throw new AlgebricksException("external dataset adapter does not support read operation");
+		}
+		IDataFormat format = NonTaggedDataFormat.INSTANCE;
+		ISerializerDeserializer payloadSerde = format.getSerdeProvider().getSerializerDeserializer(itemType);
+		RecordDescriptor outRecDesc = new RecordDescriptor(new ISerializerDeserializer[] { payloadSerde });
+
+		ExternalDataAccessByRIDOperatorDescriptor dataAccessOperator = null;
+		if(optimizeExternalIndexes)
+		{
+			//create the hashmap
+			List<ExternalFile> files=null;
+			try {
+				files = MetadataManager.INSTANCE.getDatasetExternalFiles(mdTxnCtx, dataset);
+			} catch (MetadataException e) {
+				e.printStackTrace();
+				throw new AlgebricksException("Couldn't get file names for access by optimized RIDs",e);
+			}
+			HashMap<Integer, String> filesMapping = new HashMap<Integer, String>();
+			for(int i=0; i < files.size(); i++)
+			{
+				filesMapping.put(files.get(i).getFileNumber(), files.get(i).getFileName());
+			}
+			dataAccessOperator = new ExternalDataAccessByRIDOperatorDescriptor(jobSpec, wrapPropertiesEmpty(datasetDetails.getProperties()),
+					itemType, outRecDesc, adapterFactory, filesMapping);
+		}
+		else
+		{
+			dataAccessOperator = new ExternalDataAccessByRIDOperatorDescriptor(jobSpec, wrapPropertiesEmpty(datasetDetails.getProperties()),
+					itemType, outRecDesc, adapterFactory, null);
+		}
+		Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitsAndConstraints = splitProviderAndPartitionConstraintsForExternalDataset(dataset.getDataverseName(),dataset.getDatasetName(),secondaryIndex.getIndexName());
+		return new Pair<IOperatorDescriptor, AlgebricksPartitionConstraint>(dataAccessOperator, splitsAndConstraints.second);
+	}
 
     @SuppressWarnings("rawtypes")
     public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildScannerRuntime(JobSpecification jobSpec,
@@ -504,64 +730,111 @@
     }
 
     public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildBtreeRuntime(JobSpecification jobSpec,
-            List<LogicalVariable> outputVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv,
-            JobGenContext context, boolean retainInput, Dataset dataset, String indexName, int[] lowKeyFields,
-            int[] highKeyFields, boolean lowKeyInclusive, boolean highKeyInclusive, Object implConfig)
-            throws AlgebricksException {
-        boolean isSecondary = true;
-        try {
-            Index primaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(),
-                    dataset.getDatasetName(), dataset.getDatasetName());
-            if (primaryIndex != null) {
-                isSecondary = !indexName.equals(primaryIndex.getIndexName());
-            }
-            int numPrimaryKeys = DatasetUtils.getPartitioningKeys(dataset).size();
-            RecordDescriptor outputRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
-            int numKeys = numPrimaryKeys;
-            int keysStartIndex = outputRecDesc.getFieldCount() - numKeys - 1;
-            ITypeTraits[] typeTraits = null;
-            int[] bloomFilterKeyFields;
-            if (isSecondary) {
-                Index secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(),
-                        dataset.getDatasetName(), indexName);
-                int numSecondaryKeys = secondaryIndex.getKeyFieldNames().size();
-                numKeys += numSecondaryKeys;
-                keysStartIndex = outputVars.size() - numKeys;
-                typeTraits = JobGenHelper.variablesToTypeTraits(outputVars, keysStartIndex, numKeys, typeEnv, context);
-                bloomFilterKeyFields = new int[numSecondaryKeys];
-                for (int i = 0; i < numSecondaryKeys; i++) {
-                    bloomFilterKeyFields[i] = i;
-                }
-            } else {
-                typeTraits = JobGenHelper.variablesToTypeTraits(outputVars, keysStartIndex, numKeys + 1, typeEnv,
-                        context);
-                bloomFilterKeyFields = new int[numPrimaryKeys];
-                for (int i = 0; i < numPrimaryKeys; i++) {
-                    bloomFilterKeyFields[i] = i;
-                }
-            }
-            IBinaryComparatorFactory[] comparatorFactories = JobGenHelper.variablesToAscBinaryComparatorFactories(
-                    outputVars, keysStartIndex, numKeys, typeEnv, context);
+			List<LogicalVariable> outputVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv,
+			JobGenContext context, boolean retainInput, Dataset dataset, String indexName, int[] lowKeyFields,
+			int[] highKeyFields, boolean lowKeyInclusive, boolean highKeyInclusive, Object implConfig)
+					throws AlgebricksException {
+		boolean isSecondary = true;
+		if(dataset.getDatasetType() == DatasetType.EXTERNAL){
+			try {
+				int numPrimaryKeys = DatasetUtils.getExternalRIDSize(dataset);
+				RecordDescriptor outputRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
+				int numKeys = numPrimaryKeys;;
+				ITypeTraits[] typeTraits = null;
+				int[] bloomFilterKeyFields;
+				Index secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(),
+						dataset.getDatasetName(), indexName);
+				int numSecondaryKeys = secondaryIndex.getKeyFieldNames().size();
+				numKeys += numSecondaryKeys;
+				int keysStartIndex = outputVars.size() - numKeys;
+				typeTraits = JobGenHelper.variablesToTypeTraits(outputVars, keysStartIndex, numKeys, typeEnv, context);
+				bloomFilterKeyFields = new int[numSecondaryKeys];
+				for (int i = 0; i < numSecondaryKeys; i++) {
+					bloomFilterKeyFields[i] = i;
+				}
+				IBinaryComparatorFactory[] comparatorFactories = JobGenHelper.variablesToAscBinaryComparatorFactories(
+						outputVars, keysStartIndex, numKeys, typeEnv, context);
+				IAsterixApplicationContextInfo appContext = (IAsterixApplicationContextInfo) context.getAppContext();
+				Pair<IFileSplitProvider, AlgebricksPartitionConstraint> spPc;
+				try {
+					spPc = splitProviderAndPartitionConstraintsForExternalDataset(dataset.getDataverseName(),
+							dataset.getDatasetName(), indexName);
+				} catch (Exception e) {
+					throw new AlgebricksException(e);
+				}
+				ISearchOperationCallbackFactory searchCallbackFactory = null;
+				searchCallbackFactory = new SecondaryIndexSearchOperationCallbackFactory();
+				AsterixRuntimeComponentsProvider rtcProvider = AsterixRuntimeComponentsProvider.RUNTIME_PROVIDER;
+				BTreeSearchOperatorDescriptor btreeSearchOp = new BTreeSearchOperatorDescriptor(jobSpec, outputRecDesc,
+	                    appContext.getStorageManagerInterface(), appContext.getIndexLifecycleManagerProvider(), spPc.first,
+	                    typeTraits, comparatorFactories, bloomFilterKeyFields, lowKeyFields, highKeyFields,
+	                    lowKeyInclusive, highKeyInclusive, new LSMBTreeDataflowHelperFactory(
+	                            new AsterixVirtualBufferCacheProvider(dataset.getDatasetId()), rtcProvider,
+	                            isSecondary ? new SecondaryIndexOperationTrackerProvider(
+	                                    LSMBTreeIOOperationCallbackFactory.INSTANCE, dataset.getDatasetId())
+	                                    : new PrimaryIndexOperationTrackerProvider(dataset.getDatasetId()), rtcProvider,
+	                            rtcProvider, storageProperties.getBloomFilterFalsePositiveRate()), retainInput,
+	                    searchCallbackFactory);
+				return new Pair<IOperatorDescriptor, AlgebricksPartitionConstraint>(btreeSearchOp, spPc.second);
+			} catch (MetadataException me) {
+				throw new AlgebricksException(me);
+			}
+		}
+		else
+		{
+			try {
+				Index primaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(),
+						dataset.getDatasetName(), dataset.getDatasetName());
+				if (primaryIndex != null) {
+					isSecondary = !indexName.equals(primaryIndex.getIndexName());
+				}
+				int numPrimaryKeys = DatasetUtils.getPartitioningKeys(dataset).size();
+				RecordDescriptor outputRecDesc = JobGenHelper.mkRecordDescriptor(typeEnv, opSchema, context);
+				int numKeys = numPrimaryKeys;
+				int keysStartIndex = outputRecDesc.getFieldCount() - numKeys - 1;
+				ITypeTraits[] typeTraits = null;
+				int[] bloomFilterKeyFields;
+				if (isSecondary) {
+					Index secondaryIndex = MetadataManager.INSTANCE.getIndex(mdTxnCtx, dataset.getDataverseName(),
+							dataset.getDatasetName(), indexName);
+					int numSecondaryKeys = secondaryIndex.getKeyFieldNames().size();
+					numKeys += numSecondaryKeys;
+					keysStartIndex = outputVars.size() - numKeys;
+					typeTraits = JobGenHelper.variablesToTypeTraits(outputVars, keysStartIndex, numKeys, typeEnv, context);
+					bloomFilterKeyFields = new int[numSecondaryKeys];
+					for (int i = 0; i < numSecondaryKeys; i++) {
+						bloomFilterKeyFields[i] = i;
+					}
+				} else {
+					typeTraits = JobGenHelper.variablesToTypeTraits(outputVars, keysStartIndex, numKeys + 1, typeEnv,
+							context);
+					bloomFilterKeyFields = new int[numPrimaryKeys];
+					for (int i = 0; i < numPrimaryKeys; i++) {
+						bloomFilterKeyFields[i] = i;
+					}
+				}
+				IBinaryComparatorFactory[] comparatorFactories = JobGenHelper.variablesToAscBinaryComparatorFactories(
+						outputVars, keysStartIndex, numKeys, typeEnv, context);
 
-            IAsterixApplicationContextInfo appContext = (IAsterixApplicationContextInfo) context.getAppContext();
-            Pair<IFileSplitProvider, AlgebricksPartitionConstraint> spPc;
-            try {
-                spPc = splitProviderAndPartitionConstraintsForInternalOrFeedDataset(dataset.getDataverseName(),
-                        dataset.getDatasetName(), indexName);
-            } catch (Exception e) {
-                throw new AlgebricksException(e);
-            }
+				IAsterixApplicationContextInfo appContext = (IAsterixApplicationContextInfo) context.getAppContext();
+				Pair<IFileSplitProvider, AlgebricksPartitionConstraint> spPc;
+				try {
+					spPc = splitProviderAndPartitionConstraintsForInternalOrFeedDataset(dataset.getDataverseName(),
+							dataset.getDatasetName(), indexName);
+				} catch (Exception e) {
+					throw new AlgebricksException(e);
+				}
 
-            ISearchOperationCallbackFactory searchCallbackFactory = null;
-            if (isSecondary) {
-                searchCallbackFactory = new SecondaryIndexSearchOperationCallbackFactory();
-            } else {
-                JobId jobId = ((JobEventListenerFactory) jobSpec.getJobletEventListenerFactory()).getJobId();
-                int datasetId = dataset.getDatasetId();
-                int[] primaryKeyFields = new int[numPrimaryKeys];
-                for (int i = 0; i < numPrimaryKeys; i++) {
-                    primaryKeyFields[i] = i;
-                }
+				ISearchOperationCallbackFactory searchCallbackFactory = null;
+				if (isSecondary) {
+					searchCallbackFactory = new SecondaryIndexSearchOperationCallbackFactory();
+				} else {
+					JobId jobId = ((JobEventListenerFactory) jobSpec.getJobletEventListenerFactory()).getJobId();
+					int datasetId = dataset.getDatasetId();
+					int[] primaryKeyFields = new int[numPrimaryKeys];
+					for (int i = 0; i < numPrimaryKeys; i++) {
+						primaryKeyFields[i] = i;
+					}
 
                 AqlMetadataImplConfig aqlMetadataImplConfig = (AqlMetadataImplConfig) implConfig;
                 ITransactionSubsystemProvider txnSubsystemProvider = new TransactionSubsystemProvider();
@@ -585,13 +858,14 @@
                             rtcProvider, storageProperties.getBloomFilterFalsePositiveRate()), retainInput,
                     searchCallbackFactory);
 
-            return new Pair<IOperatorDescriptor, AlgebricksPartitionConstraint>(btreeSearchOp, spPc.second);
+				return new Pair<IOperatorDescriptor, AlgebricksPartitionConstraint>(btreeSearchOp, spPc.second);
 
-        } catch (MetadataException me) {
-            throw new AlgebricksException(me);
-        }
-    }
-
+			} catch (MetadataException me) {
+				throw new AlgebricksException(me);
+			}
+		}
+	}
+    
     public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildRtreeRuntime(JobSpecification jobSpec,
             List<LogicalVariable> outputVars, IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv,
             JobGenContext context, boolean retainInput, Dataset dataset, String indexName, int[] keyFields)
@@ -1347,9 +1621,20 @@
         }
 
         int numPartitions = 0;
-        InternalDatasetDetails datasetDetails = (InternalDatasetDetails) dataset.getDatasetDetails();
-        List<String> nodeGroup = MetadataManager.INSTANCE.getNodegroup(mdTxnCtx, datasetDetails.getNodeGroupName())
-                .getNodeNames();
+        List<String> nodeGroup = null;
+        if(dataset.getDatasetType() == DatasetType.EXTERNAL)
+		{
+			ExternalDatasetDetails datasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails();
+			nodeGroup = MetadataManager.INSTANCE.getNodegroup(mdTxnCtx, datasetDetails.getNodeGroupName())
+					.getNodeNames();
+		}
+		else
+		{
+			InternalDatasetDetails datasetDetails = (InternalDatasetDetails) dataset.getDatasetDetails();
+			nodeGroup = MetadataManager.INSTANCE.getNodegroup(mdTxnCtx, datasetDetails.getNodeGroupName())
+					.getNodeNames();
+		}
+
         for (String nd : nodeGroup) {
             numPartitions += AsterixClusterProperties.INSTANCE.getNumberOfIODevices(nd);
         }
@@ -1367,6 +1652,12 @@
         return splitProviderAndPartitionConstraints(splits);
     }
 
+    public Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitProviderAndPartitionConstraintsForExternalDataset(
+			String dataverseName, String datasetName, String targetIdxName) throws AlgebricksException {
+		FileSplit[] splits = splitsForExternalDataset(mdTxnCtx, dataverseName, datasetName, targetIdxName);
+		return splitProviderAndPartitionConstraints(splits);
+	}
+    
     public Pair<IFileSplitProvider, AlgebricksPartitionConstraint> splitProviderAndPartitionConstraintsForDataverse(
             String dataverse) {
         FileSplit[] splits = splitsForDataverse(mdTxnCtx, dataverse);
@@ -1457,6 +1748,56 @@
         }
     }
 
+    private FileSplit[] splitsForExternalDataset(MetadataTransactionContext mdTxnCtx, String dataverseName,
+			String datasetName, String targetIdxName) throws AlgebricksException {
+
+		try {
+			File relPathFile = new File(getRelativePath(dataverseName, datasetName + "_idx_" + targetIdxName));
+			Dataset dataset = MetadataManager.INSTANCE.getDataset(mdTxnCtx, dataverseName, datasetName);
+			if (dataset.getDatasetType() != DatasetType.EXTERNAL) {
+				throw new AlgebricksException("Not an external dataset");
+			}
+			ExternalDatasetDetails datasetDetails = (ExternalDatasetDetails) dataset.getDatasetDetails();
+			List<String> nodeGroup = MetadataManager.INSTANCE.getNodegroup(mdTxnCtx, datasetDetails.getNodeGroupName())
+					.getNodeNames();
+			if (nodeGroup == null) {
+				throw new AlgebricksException("Couldn't find node group " + datasetDetails.getNodeGroupName());
+			}
+
+			List<FileSplit> splitArray = new ArrayList<FileSplit>();
+			for (String nd : nodeGroup) {
+				String[] nodeStores = stores.get(nd);
+				if (nodeStores == null) {
+					LOGGER.warning("Node " + nd + " has no stores.");
+					throw new AlgebricksException("Node " + nd + " has no stores.");
+				} else {
+					int numIODevices;
+					if (datasetDetails.getNodeGroupName().compareTo(MetadataConstants.METADATA_NODEGROUP_NAME) == 0) {
+						numIODevices = 1;
+					} else {
+						numIODevices = AsterixClusterProperties.INSTANCE.getNumberOfIODevices(nd);
+					}
+					String[] ioDevices = AsterixClusterProperties.INSTANCE.getIODevices(nd);
+					for (int j = 0; j < nodeStores.length; j++) {
+						for (int k = 0; k < numIODevices; k++) {
+							File f = new File(ioDevices[k] + File.separator + nodeStores[j] + File.separator
+									+ relPathFile);
+							splitArray.add(new FileSplit(nd, new FileReference(f), k));
+						}
+					}
+				}
+			}
+			FileSplit[] splits = new FileSplit[splitArray.size()];
+			int i = 0;
+			for (FileSplit fs : splitArray) {
+				splits[i++] = fs;
+			}
+			return splits;
+		} catch (MetadataException me) {
+			throw new AlgebricksException(me);
+		}
+	}
+    
     private static Map<String, String> initializeAdapterFactoryMapping() {
         Map<String, String> adapterFactoryMapping = new HashMap<String, String>();
         adapterFactoryMapping.put("edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter",
@@ -1469,6 +1810,8 @@
                 "edu.uci.ics.asterix.external.dataset.adapter..RSSFeedAdapterFactory");
         adapterFactoryMapping.put("edu.uci.ics.asterix.external.dataset.adapter.CNNFeedAdapter",
                 "edu.uci.ics.asterix.external.dataset.adapter.CNNFeedAdapterFactory");
+        adapterFactoryMapping.put("edu.uci.ics.asterix.external.dataset.adapter.HiveAdapter",
+				"edu.uci.ics.asterix.external.adapter.factory.HiveAdapterFactory");
         return adapterFactoryMapping;
     }
 
@@ -1567,3 +1910,4 @@
     }
 
 }
+

diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/entities/ExternalDatasetDetails.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/entities/ExternalDatasetDetails.java
index 18cef340..849c6bd 100644
--- a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/entities/ExternalDatasetDetails.java
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/entities/ExternalDatasetDetails.java

@@ -40,17 +40,25 @@
 
     private static final long serialVersionUID = 1L;
     private final String adapter;
-    private final Map<String, String> properties;
+    private final Map<String,String> properties;
+    private final String nodeGroupName;
 
-    private final static ARecordType externalRecordType = MetadataRecordTypes.EXTERNAL_DETAILS_RECORDTYPE;
+	private final static ARecordType externalRecordType = MetadataRecordTypes.EXTERNAL_DETAILS_RECORDTYPE;
     private final static ARecordType propertyRecordType = MetadataRecordTypes.DATASOURCE_ADAPTER_PROPERTIES_RECORDTYPE;
 
-    public ExternalDatasetDetails(String adapter, Map<String, String> properties) {
+    public ExternalDatasetDetails(String adapter, Map<String,String> properties, String nodeGroupName) {
         this.properties = properties;
         this.adapter = adapter;
+        this.nodeGroupName = nodeGroupName;
+    }
+    
+    public ExternalDatasetDetails(String adapter, Map<String,String> properties) {
+        this.properties = properties;
+        this.adapter = adapter;
+        this.nodeGroupName = null;
     }
 
-    public String getAdapter() {
+	public String getAdapter() {
         return adapter;
     }
 
@@ -58,6 +66,10 @@
         return properties;
     }
 
+    public String getNodeGroupName() {
+		return nodeGroupName;
+	}
+    
     @Override
     public DatasetType getDatasetType() {
         return DatasetType.EXTERNAL;
@@ -93,6 +105,12 @@
         fieldValue.reset();
         listBuilder.write(fieldValue.getDataOutput(), true);
         externalRecordBuilder.addField(MetadataRecordTypes.EXTERNAL_DETAILS_ARECORD_PROPERTIES_FIELD_INDEX, fieldValue);
+        
+        //write field 2
+        fieldValue.reset();
+        aString.setValue(getNodeGroupName());
+        stringSerde.serialize(aString, fieldValue.getDataOutput());
+        externalRecordBuilder.addField(MetadataRecordTypes.EXTERNAL_DETAILS_ARECORD_GROUPNAME_FIELD_INDEX, fieldValue);
 
         try {
             externalRecordBuilder.write(out, true);

diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/entities/ExternalFile.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/entities/ExternalFile.java
new file mode 100644
index 0000000..0128783
--- /dev/null
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/entities/ExternalFile.java

@@ -0,0 +1,110 @@
+package edu.uci.ics.asterix.metadata.entities;
+
+import java.util.Date;
+
+import edu.uci.ics.asterix.metadata.MetadataCache;
+import edu.uci.ics.asterix.metadata.api.IMetadataEntity;
+
+public class ExternalFile implements IMetadataEntity{
+
+	/**
+	 * A class for metadata entity externalFile
+	 * This class represents an external dataset file and is intended for use with external data indexes
+	 */
+	private static final long serialVersionUID = 1L;
+	
+	private String dataverseName;
+	private String datasetName;
+	private Date lastModefiedTime;
+	private long size;
+	private String fileName;
+	private int fileNumber;
+	
+	
+	public ExternalFile(String dataverseName, String datasetName, Date lastModefiedTime, long size, String fileName,
+			int fileNumber) {
+		this.dataverseName = dataverseName;
+		this.datasetName = datasetName;
+		this.lastModefiedTime = lastModefiedTime;
+		this.size = size;
+		this.fileName = fileName;
+		this.fileNumber = fileNumber;
+	}
+
+	public String getDataverseName() {
+		return dataverseName;
+	}
+
+	public void setDataverseName(String dataverseName) {
+		this.dataverseName = dataverseName;
+	}
+
+	public String getDatasetName() {
+		return datasetName;
+	}
+
+	public void setDatasetName(String datasetName) {
+		this.datasetName = datasetName;
+	}
+	public Date getLastModefiedTime() {
+		return lastModefiedTime;
+	}
+
+	public void setLastModefiedTime(Date lastModefiedTime) {
+		this.lastModefiedTime = lastModefiedTime;
+	}
+
+	public long getSize() {
+		return size;
+	}
+
+	public void setSize(long size) {
+		this.size = size;
+	}
+
+	public String getFileName() {
+		return fileName;
+	}
+
+	public void setFileName(String fileName) {
+		this.fileName = fileName;
+	}
+
+	public int getFileNumber() {
+		return fileNumber;
+	}
+
+	public void setFileNumber(int fileNumber) {
+		this.fileNumber = fileNumber;
+	}
+
+	@Override
+	public Object addToCache(MetadataCache cache) {
+		//return cache.addExternalFileIfNotExists(this);
+		return null;
+	}
+
+	@Override
+	public Object dropFromCache(MetadataCache cache) {
+		//cache.dropExternalFile(this);
+		return null;
+	}
+
+	@Override
+	public boolean equals(Object obj)
+	{
+		if (obj == null)
+            return false;
+        if (obj == this)
+            return true;
+        if (!(obj instanceof ExternalFile))
+            return false;
+        ExternalFile anotherFile = (ExternalFile) obj;
+        if(fileNumber != anotherFile.fileNumber)
+        	return false;
+        if(!fileName.equals(anotherFile.fileName))
+        	return false;
+        return true;
+	}
+	
+}
\ No newline at end of file

diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java
index 61f856a..d346a6c 100644
--- a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java

@@ -219,7 +219,10 @@
                             .getStringValue();
                     properties.put(key, value);
                 }
-                datasetDetails = new ExternalDatasetDetails(adapter, properties);
+                String groupName = ((AString) datasetDetailsRecord
+                        .getValueByPos(MetadataRecordTypes.EXTERNAL_DETAILS_ARECORD_GROUPNAME_FIELD_INDEX ))
+                        .getStringValue();
+                datasetDetails = new ExternalDatasetDetails(adapter, properties,groupName);
         }
         
         Map<String, String> hints = getDatasetHints(datasetRecord);

diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/entitytupletranslators/ExternalFileTupleTranslator.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/entitytupletranslators/ExternalFileTupleTranslator.java
new file mode 100644
index 0000000..6837c72
--- /dev/null
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/entitytupletranslators/ExternalFileTupleTranslator.java

@@ -0,0 +1,158 @@
+package edu.uci.ics.asterix.metadata.entitytupletranslators;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.Date;
+
+import edu.uci.ics.asterix.common.exceptions.AsterixException;
+import edu.uci.ics.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
+import edu.uci.ics.asterix.metadata.MetadataException;
+import edu.uci.ics.asterix.metadata.bootstrap.MetadataPrimaryIndexes;
+import edu.uci.ics.asterix.metadata.bootstrap.MetadataRecordTypes;
+import edu.uci.ics.asterix.metadata.entities.ExternalFile;
+import edu.uci.ics.asterix.om.base.ADateTime;
+import edu.uci.ics.asterix.om.base.AInt32;
+import edu.uci.ics.asterix.om.base.AInt64;
+import edu.uci.ics.asterix.om.base.AMutableDateTime;
+import edu.uci.ics.asterix.om.base.AMutableInt32;
+import edu.uci.ics.asterix.om.base.AMutableInt64;
+import edu.uci.ics.asterix.om.base.ARecord;
+import edu.uci.ics.asterix.om.base.AString;
+import edu.uci.ics.asterix.om.types.BuiltinType;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+
+public class ExternalFileTupleTranslator extends AbstractTupleTranslator<ExternalFile>{
+
+	// Field indexes of serialized ExternalFile in a tuple.
+    // First key field.
+    public static final int EXTERNAL_FILE_DATAVERSENAME_TUPLE_FIELD_INDEX = 0;
+    // Second key field.
+    public static final int EXTERNAL_FILE_DATASETNAME_TUPLE_FIELD_INDEX = 1;
+    // Third key field
+    public static final int EXTERNAL_FILE_NUMBER_TUPLE_FIELD_INDEX = 2;
+    // Payload field containing serialized ExternalFile.
+    public static final int EXTERNAL_FILE_PAYLOAD_TUPLE_FIELD_INDEX = 3;
+
+    protected AMutableInt32 aInt32 = new AMutableInt32(0);
+    protected AMutableDateTime aDateTime = new AMutableDateTime(0);
+    protected AMutableInt64 aInt64 = new AMutableInt64(0);
+    
+    @SuppressWarnings("unchecked")
+	protected ISerializerDeserializer<AInt32> intSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.AINT32);
+    @SuppressWarnings("unchecked")
+	protected ISerializerDeserializer<ADateTime> dateTimeSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.ADATETIME);
+    @SuppressWarnings("unchecked")
+	protected ISerializerDeserializer<AInt64> longSerde = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(BuiltinType.AINT64);
+    @SuppressWarnings("unchecked")
+    private ISerializerDeserializer<ARecord> recordSerDes = AqlSerializerDeserializerProvider.INSTANCE
+            .getSerializerDeserializer(MetadataRecordTypes.EXTERNAL_FILE_RECORDTYPE);
+    
+	public ExternalFileTupleTranslator(boolean getTuple) {
+		super(getTuple, MetadataPrimaryIndexes.EXTERNAL_FILE_DATASET.getFieldCount());
+	}
+
+	@Override
+	public ExternalFile getMetadataEntytiFromTuple(ITupleReference tuple)
+			throws MetadataException, IOException {
+		byte[] serRecord = tuple.getFieldData(EXTERNAL_FILE_PAYLOAD_TUPLE_FIELD_INDEX);
+		int recordStartOffset = tuple.getFieldStart(EXTERNAL_FILE_PAYLOAD_TUPLE_FIELD_INDEX);
+        int recordLength = tuple.getFieldLength(EXTERNAL_FILE_PAYLOAD_TUPLE_FIELD_INDEX);
+        ByteArrayInputStream stream = new ByteArrayInputStream(serRecord, recordStartOffset, recordLength);
+        DataInput in = new DataInputStream(stream);
+        ARecord externalFileRecord = (ARecord) recordSerDes.deserialize(in);
+        return createExternalFileFromARecord(externalFileRecord);
+	}
+
+	private ExternalFile createExternalFileFromARecord(ARecord externalFileRecord) {
+		String dataverseName = ((AString) externalFileRecord
+                .getValueByPos(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_DATAVERSENAME_FIELD_INDEX))
+                .getStringValue();
+        String datasetName = ((AString) externalFileRecord
+                .getValueByPos(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_DATASET_NAME_FIELD_INDEX)).getStringValue();
+        String FileName = ((AString) externalFileRecord
+                .getValueByPos(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_FILE_NAME_FIELD_INDEX)).getStringValue();
+        int fileNumber = ((AInt32) externalFileRecord
+                .getValueByPos(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_FILE_NUMBER_FIELD_INDEX)).getIntegerValue();
+        Date lastMoDifiedDate = new Date(((ADateTime) externalFileRecord
+                .getValueByPos(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_FILE_MOD_DATE_FIELD_INDEX)).getChrononTime());
+        long fileSize = ((AInt64) externalFileRecord
+                .getValueByPos(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_FILE_SIZE_FIELD_INDEX)).getLongValue();
+        
+        return new ExternalFile(dataverseName, datasetName,lastMoDifiedDate,fileSize,FileName,fileNumber);
+	}
+
+	@Override
+	public ITupleReference getTupleFromMetadataEntity(
+			ExternalFile externalFile) throws MetadataException, IOException {
+		// write the key in the first 3 fields of the tuple
+        tupleBuilder.reset();
+        //dataverse name
+        aString.setValue(externalFile.getDataverseName());
+        stringSerde.serialize(aString, tupleBuilder.getDataOutput());
+        tupleBuilder.addFieldEndOffset();
+        //dataset name
+        aString.setValue(externalFile.getDatasetName());
+        stringSerde.serialize(aString, tupleBuilder.getDataOutput());
+        tupleBuilder.addFieldEndOffset();
+        //file number
+        aInt32.setValue(externalFile.getFileNumber());
+        intSerde.serialize(aInt32, tupleBuilder.getDataOutput());
+        tupleBuilder.addFieldEndOffset();
+        
+        // write the pay-load in the fourth field of the tuple
+        recordBuilder.reset(MetadataRecordTypes.EXTERNAL_FILE_RECORDTYPE);
+
+        // write field 0
+        fieldValue.reset();
+        aString.setValue(externalFile.getDataverseName());
+        stringSerde.serialize(aString, fieldValue.getDataOutput());
+        recordBuilder.addField(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_DATAVERSENAME_FIELD_INDEX, fieldValue);
+
+        // write field 1
+        fieldValue.reset();
+        aString.setValue(externalFile.getDatasetName());
+        stringSerde.serialize(aString, fieldValue.getDataOutput());
+        recordBuilder.addField(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_DATASET_NAME_FIELD_INDEX, fieldValue);
+
+        // write field 2
+        fieldValue.reset();
+        aInt32.setValue(externalFile.getFileNumber());
+        intSerde.serialize(aInt32, fieldValue.getDataOutput());
+        recordBuilder.addField(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_FILE_NUMBER_FIELD_INDEX, fieldValue);
+
+        // write field 3
+        fieldValue.reset();
+        aString.setValue(externalFile.getFileName());
+        stringSerde.serialize(aString, fieldValue.getDataOutput());
+        recordBuilder.addField(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_FILE_NAME_FIELD_INDEX, fieldValue);
+
+        // write field 4
+        fieldValue.reset();
+        aInt64.setValue(externalFile.getSize());
+        longSerde.serialize(aInt64, fieldValue.getDataOutput());
+        recordBuilder.addField(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_FILE_SIZE_FIELD_INDEX, fieldValue);
+
+        //write field 5 
+        fieldValue.reset();
+        aDateTime.setValue(externalFile.getLastModefiedTime().getTime());
+        dateTimeSerde.serialize(aDateTime, fieldValue.getDataOutput());
+        recordBuilder.addField(MetadataRecordTypes.EXTERNAL_FILE_ARECORD_FILE_MOD_DATE_FIELD_INDEX, fieldValue);
+        
+        // write record
+        try {
+            recordBuilder.write(tupleBuilder.getDataOutput(), true);
+        } catch (AsterixException e) {
+            throw new MetadataException(e);
+        }
+        tupleBuilder.addFieldEndOffset();
+
+        tuple.reset(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray());
+        return tuple;
+	}
+}
\ No newline at end of file

diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/utils/DatasetUtils.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/utils/DatasetUtils.java
index 820f277..95d26d9 100644
--- a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/utils/DatasetUtils.java
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/utils/DatasetUtils.java

@@ -20,9 +20,12 @@
 
 import edu.uci.ics.asterix.common.config.DatasetConfig.DatasetType;
 import edu.uci.ics.asterix.formats.nontagged.AqlTypeTraitProvider;
+import edu.uci.ics.asterix.metadata.declared.AqlMetadataProvider;
 import edu.uci.ics.asterix.metadata.entities.Dataset;
+import edu.uci.ics.asterix.metadata.entities.ExternalDatasetDetails;
 import edu.uci.ics.asterix.metadata.entities.InternalDatasetDetails;
 import edu.uci.ics.asterix.om.types.ARecordType;
+import edu.uci.ics.asterix.om.types.BuiltinType;
 import edu.uci.ics.asterix.om.types.IAType;
 import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
 import edu.uci.ics.hyracks.algebricks.data.IBinaryComparatorFactoryProvider;
@@ -32,6 +35,8 @@
 import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
 
 public class DatasetUtils {
+	public static final String KEY_INPUT_FORMAT = "input-format";
+	public static final String INPUT_FORMAT_RC = "rc-input-format";
     public static IBinaryComparatorFactory[] computeKeysBinaryComparatorFactories(Dataset dataset,
             ARecordType itemType, IBinaryComparatorFactoryProvider comparatorFactoryProvider)
             throws AlgebricksException {
@@ -82,6 +87,46 @@
         }
         return bhffs;
     }
+    
+    public static IBinaryHashFunctionFactory[] computeExternalDataKeysBinaryHashFunFactories(Dataset dataset,
+			IBinaryHashFunctionFactoryProvider hashFunProvider) throws AlgebricksException {
+    	if (dataset.getDatasetType() != DatasetType.EXTERNAL) {
+            throw new AlgebricksException("not implemented");
+        }
+    	//get dataset details
+    	ExternalDatasetDetails edd = (ExternalDatasetDetails) dataset.getDatasetDetails();	
+    	if (edd.getProperties().get(KEY_INPUT_FORMAT).trim().equals(INPUT_FORMAT_RC))
+    	{
+    		//RID: <String(File name) OR Int32(File number), Int64(Block byte location), Int32(row number)>
+    		IBinaryHashFunctionFactory[] bhffs = new IBinaryHashFunctionFactory[3];
+    		if(AqlMetadataProvider.isOptimizeExternalIndexes())
+    		{
+    			bhffs[0] = hashFunProvider.getBinaryHashFunctionFactory(BuiltinType.AINT32);
+    		}
+    		else
+    		{
+    			bhffs[0] = hashFunProvider.getBinaryHashFunctionFactory(BuiltinType.ASTRING);
+    		}
+			bhffs[1] = hashFunProvider.getBinaryHashFunctionFactory(BuiltinType.AINT64);
+			bhffs[2] = hashFunProvider.getBinaryHashFunctionFactory(BuiltinType.AINT32);
+			return bhffs;
+    	}
+		else
+		{
+			//RID: <String(File name) OR Int32(File number), Int64(Record byte location)>
+			IBinaryHashFunctionFactory[] bhffs = new IBinaryHashFunctionFactory[2];
+			if(AqlMetadataProvider.isOptimizeExternalIndexes())
+    		{
+    			bhffs[0] = hashFunProvider.getBinaryHashFunctionFactory(BuiltinType.AINT32);
+    		}
+    		else
+    		{
+    			bhffs[0] = hashFunProvider.getBinaryHashFunctionFactory(BuiltinType.ASTRING);
+    		}
+			bhffs[1] = hashFunProvider.getBinaryHashFunctionFactory(BuiltinType.AINT64);
+			return bhffs;
+		}
+	}
 
     public static ITypeTraits[] computeTupleTypeTraits(Dataset dataset, ARecordType itemType)
             throws AlgebricksException {
@@ -112,6 +157,17 @@
         return (((InternalDatasetDetails) dataset.getDatasetDetails())).getNodeGroupName();
     }
 
+    public static int getExternalRIDSize(Dataset dataset) {
+		ExternalDatasetDetails dsd = ((ExternalDatasetDetails) dataset.getDatasetDetails());
+		if (dsd.getProperties().get(KEY_INPUT_FORMAT).equals(INPUT_FORMAT_RC))
+		{
+			return 3;
+		}
+		else{
+			return 2;
+		}
+	}
+    
     public static int getPositionOfPartitioningKeyField(Dataset dataset, String fieldExpr) {
         List<String> partitioningKeys = DatasetUtils.getPartitioningKeys(dataset);
         for (int i = 0; i < partitioningKeys.size(); i++) {

diff --git a/asterix-om/src/main/java/edu/uci/ics/asterix/om/base/AInt64.java b/asterix-om/src/main/java/edu/uci/ics/asterix/om/base/AInt64.java
index cc3fd04..00b7459 100644
--- a/asterix-om/src/main/java/edu/uci/ics/asterix/om/base/AInt64.java
+++ b/asterix-om/src/main/java/edu/uci/ics/asterix/om/base/AInt64.java

@@ -33,6 +33,10 @@
     public long getLongValue() {
         return value;
     }
+    
+    public void setValue(long value) {
+        this.value = value;
+    }
 
     @Override
     public IAType getType() {

diff --git a/asterix-om/src/main/java/edu/uci/ics/asterix/om/functions/AsterixBuiltinFunctions.java b/asterix-om/src/main/java/edu/uci/ics/asterix/om/functions/AsterixBuiltinFunctions.java
index 3c8a0c4..b6bdb4b 100644
--- a/asterix-om/src/main/java/edu/uci/ics/asterix/om/functions/AsterixBuiltinFunctions.java
+++ b/asterix-om/src/main/java/edu/uci/ics/asterix/om/functions/AsterixBuiltinFunctions.java

@@ -243,6 +243,8 @@
 
     public final static FunctionIdentifier INDEX_SEARCH = new FunctionIdentifier(FunctionConstants.ASTERIX_NS,
             "index-search", FunctionIdentifier.VARARGS);
+    public final static FunctionIdentifier EXTERNAL_ACCESS_BY_RID = new FunctionIdentifier(FunctionConstants.ASTERIX_NS,
+            "external-access-by-rid", FunctionIdentifier.VARARGS);
 
     public final static FunctionIdentifier MAKE_FIELD_INDEX_HANDLE = new FunctionIdentifier(
             FunctionConstants.ASTERIX_NS, "make-field-index-handle", 2);
@@ -672,6 +674,14 @@
                 return BuiltinType.ANY; // TODO
             }
         });
+        addPrivateFunction(EXTERNAL_ACCESS_BY_RID, new IResultTypeComputer() {
+
+            @Override
+            public IAType computeType(ILogicalExpression expression, IVariableTypeEnvironment env,
+                    IMetadataProvider<?, ?> mp) throws AlgebricksException {
+                return BuiltinType.ANY;
+            }
+        });
         addFunction(INT8_CONSTRUCTOR, OptionalAInt8TypeComputer.INSTANCE);
         addFunction(INT16_CONSTRUCTOR, OptionalAInt16TypeComputer.INSTANCE);
         addFunction(INT32_CONSTRUCTOR, OptionalAInt32TypeComputer.INSTANCE);

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java
index b8c8659..e08c509 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java

@@ -92,6 +92,16 @@
             throw new AsterixException(e);
         }
     }
+    
+    @Override
+    public void close() throws IOException{
+    	admLexer.close();
+    }
+    
+    @Override
+    public void reset(){
+    	admLexer.reset();
+    }
 
     protected boolean parseAdmInstance(IAType objectType, boolean datasetRec, DataOutput out) throws AsterixException,
             IOException {

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AbstractControlledTupleParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AbstractControlledTupleParser.java
new file mode 100644
index 0000000..a4f1691
--- /dev/null
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AbstractControlledTupleParser.java

@@ -0,0 +1,123 @@
+package edu.uci.ics.asterix.runtime.operators.file;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import edu.uci.ics.asterix.common.exceptions.AsterixException;
+import edu.uci.ics.asterix.om.types.ARecordType;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+
+/**
+ * An Abstract class implementation for IControlledTupleParser. It provides common
+ * functionality involved in parsing data in an external format in a pipelined manner and packing
+ * frames with formed tuples.
+ * (DONE)
+ */
+public abstract class AbstractControlledTupleParser extends ControlledTupleParser{
+
+	protected ArrayTupleBuilder tb = new ArrayTupleBuilder(1);
+	protected transient DataOutput dos;
+	protected final FrameTupleAppender appender;
+	protected final ByteBuffer frame;
+	protected final ARecordType recType;
+	protected final IHyracksTaskContext ctx;
+	protected IDataParser parser;
+	
+	public AbstractControlledTupleParser(IHyracksTaskContext ctx, ARecordType recType) throws HyracksDataException {
+		appender = new FrameTupleAppender(ctx.getFrameSize());
+		frame = ctx.allocateFrame();
+		this.recType = recType;
+		this.ctx = ctx;
+		dos = tb.getDataOutput();
+	}
+
+	public abstract IDataParser getDataParser();
+
+	@Override
+	public void parse(InputStream in, IFrameWriter writer)
+			throws HyracksDataException {
+		//This function when used works as non-pipelined parser
+		//This whole parser interface needs revisiting. 
+		appender.reset(frame, true);
+		parser = getDataParser();
+		try {
+			parser.initialize(in, recType, true);
+			while (true) {
+				tb.reset();
+				if (!parser.parse(tb.getDataOutput())) {
+					parser.reset();
+					break;
+				}
+				tb.addFieldEndOffset();
+				addTupleToFrame(writer);
+			}
+			parser.close();
+			if (appender.getTupleCount() > 0) {
+				FrameUtils.flushFrame(frame, writer);
+			}
+		} catch (Exception e) {
+			throw new HyracksDataException("Failed to initialize data parser");
+		}
+	}
+
+	@Override
+	public void initialize(InputStream in) throws HyracksDataException {
+		appender.reset(frame, true);
+		parser = getDataParser();
+		try {
+			parser.initialize(in, recType, true);
+
+		} catch (Exception e) {
+			throw new HyracksDataException("Failed to initialize data parser");
+		}
+	}
+
+	@Override
+	public void parseNext(IFrameWriter writer) throws HyracksDataException {
+		try {
+			while (true) {
+				tb.reset();
+				if (!parser.parse(tb.getDataOutput())) {
+					parser.reset();
+					break;
+				}
+				tb.addFieldEndOffset();
+				addTupleToFrame(writer);
+			}
+		} catch (AsterixException ae) {
+			throw new HyracksDataException(ae);
+		} catch (IOException ioe) {
+			throw new HyracksDataException(ioe);
+		}
+	}
+
+	@Override
+	public void close(IFrameWriter writer) throws HyracksDataException {
+		try{	
+			parser.close();
+			if (appender.getTupleCount() > 0) {
+				FrameUtils.flushFrame(frame, writer);
+			}
+		} catch (IOException ioe) {
+			throw new HyracksDataException(ioe);
+		}
+	}
+
+	protected void addTupleToFrame(IFrameWriter writer) throws HyracksDataException {
+		if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
+			FrameUtils.flushFrame(frame, writer);
+			appender.reset(frame, true);
+			if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
+				throw new IllegalStateException();
+			}
+		}
+
+	}
+
+}
\ No newline at end of file

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AbstractTupleParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AbstractTupleParser.java
index 2322338..78159f5 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AbstractTupleParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AbstractTupleParser.java

@@ -62,6 +62,7 @@
             while (true) {
                 tb.reset();
                 if (!parser.parse(tb.getDataOutput())) {
+                	parser.close();
                     break;
                 }
                 tb.addFieldEndOffset();

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AdmTupleParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AdmTupleParser.java
index 7cb9bb0..34f75e6 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AdmTupleParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AdmTupleParser.java

@@ -20,7 +20,7 @@
 
 /**
  * An extension of AbstractTupleParser that provides functionality for
- * parsing delimited files.
+ * parsing adm formatted input files.
  */
 public class AdmTupleParser extends AbstractTupleParser {
 

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ControlledADMTupleParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ControlledADMTupleParser.java
new file mode 100644
index 0000000..aa2d9ba
--- /dev/null
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ControlledADMTupleParser.java

@@ -0,0 +1,23 @@
+package edu.uci.ics.asterix.runtime.operators.file;
+
+import edu.uci.ics.asterix.om.types.ARecordType;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+/**
+ * An extension of AbstractControlledTupleParser that provides functionality for
+ * parsing Adm formatted input.
+ */
+public class ControlledADMTupleParser extends AbstractControlledTupleParser{
+
+	public ControlledADMTupleParser(IHyracksTaskContext ctx, ARecordType recType)
+			throws HyracksDataException {
+		super(ctx, recType);
+	}
+
+	@Override
+	public IDataParser getDataParser() {
+		return new ADMDataParser();
+	}
+
+}
\ No newline at end of file

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ControlledADMTupleParserFactory.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ControlledADMTupleParserFactory.java
new file mode 100644
index 0000000..d72366c
--- /dev/null
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ControlledADMTupleParserFactory.java

@@ -0,0 +1,28 @@
+package edu.uci.ics.asterix.runtime.operators.file;
+
+
+import edu.uci.ics.asterix.om.types.ARecordType;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.std.file.ITupleParser;
+import edu.uci.ics.hyracks.dataflow.std.file.ITupleParserFactory;
+
+/**
+ * A Controlled tuple parser factory for creating a tuple parser capable of parsing
+ * ADM data.
+ */
+public class ControlledADMTupleParserFactory implements ITupleParserFactory{
+	private static final long serialVersionUID = 1L;
+
+    protected ARecordType recType;
+    
+    public ControlledADMTupleParserFactory(ARecordType recType){
+    	this.recType = recType;
+    }
+
+	@Override
+	public ITupleParser createTupleParser(IHyracksTaskContext ctx)
+			throws HyracksDataException {
+		return new ControlledADMTupleParser(ctx, recType);
+	}
+}
\ No newline at end of file

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ControlledDelimitedDataTupleParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ControlledDelimitedDataTupleParser.java
new file mode 100644
index 0000000..15643d4
--- /dev/null
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ControlledDelimitedDataTupleParser.java

@@ -0,0 +1,23 @@
+package edu.uci.ics.asterix.runtime.operators.file;
+
+import edu.uci.ics.asterix.om.types.ARecordType;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.parsers.IValueParserFactory;
+
+public class ControlledDelimitedDataTupleParser extends AbstractControlledTupleParser{
+
+	private final DelimitedDataParser dataParser;
+	
+	public ControlledDelimitedDataTupleParser(IHyracksTaskContext ctx,
+			ARecordType recType,  IValueParserFactory[] valueParserFactories, char fieldDelimter) throws HyracksDataException {
+		super(ctx, recType);
+		dataParser = new DelimitedDataParser(recType, valueParserFactories, fieldDelimter);
+	}
+
+	@Override
+	public IDataParser getDataParser() {
+		return dataParser;
+	}
+
+}
\ No newline at end of file

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ControlledDelimitedDataTupleParserFactory.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ControlledDelimitedDataTupleParserFactory.java
new file mode 100644
index 0000000..ced33ef
--- /dev/null
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ControlledDelimitedDataTupleParserFactory.java

@@ -0,0 +1,29 @@
+package edu.uci.ics.asterix.runtime.operators.file;
+
+import edu.uci.ics.asterix.om.types.ARecordType;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.parsers.IValueParserFactory;
+import edu.uci.ics.hyracks.dataflow.std.file.ITupleParser;
+import edu.uci.ics.hyracks.dataflow.std.file.ITupleParserFactory;
+
+public class ControlledDelimitedDataTupleParserFactory implements ITupleParserFactory{
+	private static final long serialVersionUID = 1L;
+	private IValueParserFactory[] valueParserFactories;
+	private char fieldDelimiter;
+	protected ARecordType recordType;
+
+
+	public ControlledDelimitedDataTupleParserFactory(ARecordType recordType, IValueParserFactory[] fieldParserFactories, char fieldDelimiter) {
+		this.recordType = recordType;
+		this.valueParserFactories = fieldParserFactories;
+		this.fieldDelimiter = fieldDelimiter;
+	}
+	
+
+	@Override
+	public ITupleParser createTupleParser(IHyracksTaskContext ctx)
+			throws HyracksDataException {
+		return new ControlledDelimitedDataTupleParser(ctx, recordType, valueParserFactories, fieldDelimiter);
+	}
+}
\ No newline at end of file

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ControlledTupleParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ControlledTupleParser.java
new file mode 100644
index 0000000..10b09f5
--- /dev/null
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ControlledTupleParser.java

@@ -0,0 +1,29 @@
+package edu.uci.ics.asterix.runtime.operators.file;
+
+import java.io.InputStream;
+
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.std.file.ITupleParser;
+
+/**
+ * This interface is to be implemented by parsers used in a pipelined hyracks job where input is not ready all at once
+ */
+public abstract class ControlledTupleParser implements ITupleParser{
+
+	/**
+	 * This function associate an input stream with the parser
+	 */
+	public abstract void initialize(InputStream in) throws HyracksDataException;
+	
+	/**
+	 * This function should flush the tuples setting in the frame writer buffer
+	 * and free all resources
+	 */
+	public abstract void close(IFrameWriter writer) throws HyracksDataException;
+
+	/**
+	 * This function is called when there are more data ready for parsing in the input stream
+	 */
+	public abstract void parseNext(IFrameWriter writer) throws HyracksDataException;
+}
\ No newline at end of file

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java
index 5a639dc..22c43ec 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java

@@ -100,6 +100,17 @@
     }
 
     @Override
+	public void close() {
+		cursor.close();
+	}
+
+    @Override
+    public void reset()
+    {
+    	cursor.reset();
+    }
+    
+    @Override
     public boolean parse(DataOutput out) throws AsterixException, IOException {
         while (cursor.nextRecord()) {
             recBuilder.reset(recordType);
@@ -165,7 +176,7 @@
         private static final int INITIAL_BUFFER_SIZE = 4096;
         private static final int INCREMENT = 4096;
 
-        private final Reader in;
+        private Reader in;
 
         private char[] buffer;
         private int start;
@@ -182,6 +193,21 @@
             end = 0;
             state = State.INIT;
         }
+        
+        public void close(){
+				try {
+					in.close();
+				} catch (IOException e) {
+					// TODO Auto-generated catch block
+					e.printStackTrace();
+				}
+        }
+        
+        public void reset(){
+        	start = 0;
+            end = 0;
+            state = State.INIT;
+        }
 
         public boolean nextRecord() throws IOException {
             while (true) {

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/IDataParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/IDataParser.java
index 46d281b..cd9ae2e 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/IDataParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/IDataParser.java

@@ -52,4 +52,17 @@
      * @throws IOException
      */
     public boolean parse(DataOutput out) throws AsterixException, IOException;
+
+    /**
+     * Close the underlying inputStream object.
+     * 
+     */
+    public void close() throws IOException;
+    
+    /**
+     * Reset the parser before processing a new patch of input in the input stream
+     */
+    public void reset();
 }
+
+

diff --git a/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapter.java b/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapter.java
index e037ec6..ffd9edd 100644
--- a/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapter.java
+++ b/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapter.java

@@ -224,7 +224,8 @@
             while (continueIngestion) {
                 tb.reset();
                 if (!parser.parse(tb.getDataOutput())) {
-                    break;
+                	parser.close();
+                	break;
                 }
                 tb.addFieldEndOffset();
                 if (delayConfigured) {

diff --git a/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java b/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java
index bf3c086..c576cf8 100644
--- a/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java
+++ b/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java

@@ -14,14 +14,17 @@
  */
 package edu.uci.ics.asterix.tools.external.data;
 
+import java.util.HashMap;
 import java.util.Map;
 
 import edu.uci.ics.asterix.common.exceptions.AsterixException;
 import edu.uci.ics.asterix.external.adapter.factory.IGenericDatasetAdapterFactory;
 import edu.uci.ics.asterix.external.dataset.adapter.FileSystemBasedAdapter;
+import edu.uci.ics.asterix.external.dataset.adapter.IControlledAdapter;
 import edu.uci.ics.asterix.external.dataset.adapter.IDatasourceAdapter;
 import edu.uci.ics.asterix.om.types.ARecordType;
 import edu.uci.ics.asterix.om.types.IAType;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
 
 /**
  * Factory class for creating @see{RateControllerFileSystemBasedAdapter} The
@@ -67,6 +70,12 @@
     public String getName() {
         return "file_feed";
     }
+    
+    @Override
+	public IDatasourceAdapter createIndexingAdapter(
+			Map<String, Object> configuration, IAType atype, Map<String,Integer> files) throws Exception {
+		throw new NotImplementedException("Rate Controlled Indexing Adapter is not implemented for feeds");
+	}
 
     private void checkRequiredArgs(Map<String, Object> configuration) throws Exception {
         if (configuration.get(KEY_FILE_SYSTEM) == null) {
@@ -83,4 +92,10 @@
         }
     }
 
+	@Override
+	public IControlledAdapter createAccessByRIDAdapter(
+			Map<String, Object> configuration, IAType atype, HashMap<Integer,String> files) throws Exception {
+		throw new NotImplementedException("Rate Controlled Access by RID Adapter is not implemented for feeds");
+	}
+
 }
\ No newline at end of file

diff --git a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/opcallbacks/AbstractIndexModificationOperationCallback.java b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/opcallbacks/AbstractIndexModificationOperationCallback.java
index dac3e95..031a26e 100644
--- a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/opcallbacks/AbstractIndexModificationOperationCallback.java
+++ b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/opcallbacks/AbstractIndexModificationOperationCallback.java

@@ -58,6 +58,9 @@
     protected void log(int PKHash, ITupleReference newValue, IndexOperation oldOp, ITupleReference oldValue)
             throws ACIDException {
         logRecord.setPKHashValue(PKHash);
+        logRecord.setPKFields(primaryKeyFields);
+        logRecord.setPKValue(newValue);
+        logRecord.computeAndSetPKValueSize();
         if (newValue != null) {
             logRecord.setNewValueSize(tupleWriter.bytesRequired(newValue));
             logRecord.setNewValue(newValue);
@@ -73,7 +76,7 @@
                 logRecord.setOldValueSize(0);
             }
         }
-        logRecord.setUpdateLogSize();
+        logRecord.computeAndSetLogSize();
         txnSubsystem.getLogManager().log(logRecord);
     }
 }

diff --git a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/locking/LockManager.java b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/locking/LockManager.java
index e0baa3f..6d86f70 100644
--- a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/locking/LockManager.java
+++ b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/locking/LockManager.java

@@ -29,7 +29,6 @@
 import edu.uci.ics.asterix.common.exceptions.ACIDException;
 import edu.uci.ics.asterix.common.transactions.DatasetId;
 import edu.uci.ics.asterix.common.transactions.ILockManager;
-import edu.uci.ics.asterix.common.transactions.ILogRecord;
 import edu.uci.ics.asterix.common.transactions.ITransactionContext;
 import edu.uci.ics.asterix.common.transactions.ITransactionManager;
 import edu.uci.ics.asterix.common.transactions.JobId;
@@ -86,7 +85,6 @@
 
     private LockRequestTracker lockRequestTracker; //for debugging
     private ConsecutiveWakeupContext consecutiveWakeupContext;
-    private final ILogRecord logRecord;
 
     public LockManager(TransactionSubsystem txnSubsystem) throws ACIDException {
         this.txnSubsystem = txnSubsystem;
@@ -104,7 +102,6 @@
         this.tempDatasetIdObj = new DatasetId(0);
         this.tempJobIdObj = new JobId(0);
         this.consecutiveWakeupContext = new ConsecutiveWakeupContext();
-        this.logRecord = new LogRecord();
         if (IS_DEBUG_MODE) {
             this.lockRequestTracker = new LockRequestTracker();
         }
@@ -642,22 +639,16 @@
 
     @Override
     public void unlock(DatasetId datasetId, int entityHashValue, ITransactionContext txnContext) throws ACIDException {
-        internalUnlock(datasetId, entityHashValue, txnContext, false, false);
-    }
-
-    @Override
-    public void unlock(DatasetId datasetId, int entityHashValue, ITransactionContext txnContext, boolean commitFlag)
-            throws ACIDException {
-        internalUnlock(datasetId, entityHashValue, txnContext, false, commitFlag);
+        internalUnlock(datasetId, entityHashValue, txnContext, false);
     }
 
     private void instantUnlock(DatasetId datasetId, int entityHashValue, ITransactionContext txnContext)
             throws ACIDException {
-        internalUnlock(datasetId, entityHashValue, txnContext, true, false);
+        internalUnlock(datasetId, entityHashValue, txnContext, true);
     }
 
     private void internalUnlock(DatasetId datasetId, int entityHashValue, ITransactionContext txnContext,
-            boolean isInstant, boolean commitFlag) throws ACIDException {
+            boolean isInstant) throws ACIDException {
         JobId jobId = txnContext.getJobId();
         int eLockInfo = -1;
         DatasetLockInfo dLockInfo = null;
@@ -701,22 +692,6 @@
                         + "," + entityHashValue + "]: Corresponding lock info doesn't exist.");
             }
 
-            //////////////////////////////////////////////////////////
-            //[Notice]
-            //If both EntityLockCount and DatasetLockCount are 1, 
-            //then write entity-commit log and return without releasing the lock.
-            //The lock will be released when the entity-commit log is flushed. 
-            if (commitFlag && entityInfoManager.getEntityLockCount(entityInfo) == 1
-                    && entityInfoManager.getDatasetLockCount(entityInfo) == 1) {
-                if (txnContext.isWriteTxn()) {
-                    logRecord.formCommitLogRecord(txnContext, LogType.ENTITY_COMMIT, jobId.getId(), datasetId.getId(),
-                            entityHashValue);
-                    txnSubsystem.getLogManager().log(logRecord);
-                }
-                return;
-            }
-            //////////////////////////////////////////////////////////
-
             datasetLockMode = entityInfoManager.getDatasetLockMode(entityInfo) == LockMode.S ? LockMode.IS
                     : LockMode.IX;
 
@@ -758,11 +733,6 @@
                     waiterObjId = waiterObj.getNextWaiterObjId();
                 }
                 if (threadCount == 0) {
-                    if (entityInfoManager.getEntityLockMode(entityInfo) == LockMode.X) {
-                        //TODO
-                        //write a commit log for the unlocked resource
-                        //need to figure out that instantLock() also needs to write a commit log. 
-                    }
                     entityInfoManager.deallocate(entityInfo);
                 }
             }
@@ -2243,17 +2213,11 @@
                     tempDatasetIdObj.setId(logRecord.getDatasetId());
                     tempJobIdObj.setId(logRecord.getJobId());
                     txnCtx = txnSubsystem.getTransactionManager().getTransactionContext(tempJobIdObj);
-                    if (txnCtx == null) {
-                        throw new IllegalStateException("TransactionContext[" + tempJobIdObj + "] doesn't exist.");
-                    }
                     unlock(tempDatasetIdObj, logRecord.getPKHashValue(), txnCtx);
                     txnCtx.notifyOptracker(false);
                 } else if (logRecord.getLogType() == LogType.JOB_COMMIT) {
                     tempJobIdObj.setId(logRecord.getJobId());
                     txnCtx = txnSubsystem.getTransactionManager().getTransactionContext(tempJobIdObj);
-                    if (txnCtx == null) {
-                        throw new IllegalStateException("TransactionContext[" + tempJobIdObj + "] doesn't exist.");
-                    }
                     txnCtx.notifyOptracker(true);
                     ((LogPage) logPage).notifyJobCommitter();
                 }

diff --git a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogManager.java b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogManager.java
index 7fce48d..4f0bb59 100644
--- a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogManager.java
+++ b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogManager.java

@@ -24,7 +24,11 @@
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
 import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
@@ -58,6 +62,7 @@
     private FileChannel appendChannel;
     private LogPage appendPage;
     private LogFlusher logFlusher;
+    private Future<Object> futureLogFlusher;
 
     public LogManager(TransactionSubsystem txnSubsystem) throws ACIDException {
         this.txnSubsystem = txnSubsystem;
@@ -86,8 +91,7 @@
         appendChannel = getFileChannel(appendLSN, false);
         getAndInitNewPage();
         logFlusher = new LogFlusher(this, emptyQ, flushQ);
-        logFlusher.setDaemon(true);
-        AsterixThreadExecutor.INSTANCE.execute(logFlusher);
+        futureLogFlusher = AsterixThreadExecutor.INSTANCE.submit(logFlusher);
     }
 
     @Override
@@ -174,6 +178,7 @@
 
     @Override
     public void stop(boolean dumpState, OutputStream os) {
+        terminateLogFlusher();
         if (dumpState) {
             // #. dump Configurable Variables
             dumpConfVars(os);
@@ -267,15 +272,32 @@
     }
 
     private void terminateLogFlusher() {
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.info("Terminating LogFlusher thread ...");
+        }
         logFlusher.terminate();
         try {
-            logFlusher.join();
-        } catch (InterruptedException e) {
-            throw new IllegalStateException(e);
+            futureLogFlusher.get();
+        } catch (ExecutionException | InterruptedException e) {
+            if (LOGGER.isLoggable(Level.INFO)) {
+                LOGGER.info("---------- warning(begin): LogFlusher thread is terminated abnormally --------");
+                e.printStackTrace();
+                LOGGER.info("---------- warning(end)  : LogFlusher thread is terminated abnormally --------");
+            }
+        }
+        if (LOGGER.isLoggable(Level.INFO)) {
+            LOGGER.info("LogFlusher thread is terminated.");
         }
     }
 
     private void deleteAllLogFiles() {
+        if (appendChannel != null) {
+            try {
+                appendChannel.close();
+            } catch (IOException e) {
+                throw new IllegalStateException("Failed to close a fileChannel of a log file");
+            }
+        }
         List<Long> logFileIds = getLogFileIds();
         for (Long id : logFileIds) {
             File file = new File(getLogFilePath(id));
@@ -364,43 +386,69 @@
     }
 }
 
-class LogFlusher extends Thread {
-    private final static LogPage POISON_PILL = new LogPage(null, ILogRecord.COMMIT_LOG_SIZE, null);
+class LogFlusher implements Callable<Boolean> {
+    private final static LogPage POISON_PILL = new LogPage(null, ILogRecord.JOB_COMMIT_LOG_SIZE, null);
     private final LogManager logMgr;//for debugging
     private final LinkedBlockingQueue<LogPage> emptyQ;
     private final LinkedBlockingQueue<LogPage> flushQ;
     private LogPage flushPage;
+    private final AtomicBoolean isStarted;
+    private final AtomicBoolean terminateFlag;
 
     public LogFlusher(LogManager logMgr, LinkedBlockingQueue<LogPage> emptyQ, LinkedBlockingQueue<LogPage> flushQ) {
         this.logMgr = logMgr;
         this.emptyQ = emptyQ;
         this.flushQ = flushQ;
         flushPage = null;
+        isStarted = new AtomicBoolean(false);
+        terminateFlag = new AtomicBoolean(false);
+        
     }
 
     public void terminate() {
+        //make sure the LogFlusher thread started before terminating it.
+        synchronized (isStarted) {
+            while(!isStarted.get()) {
+                try {
+                    isStarted.wait();
+                } catch (InterruptedException e) {
+                    //ignore
+                }
+            }
+        }
+        
+        terminateFlag.set(true);
         if (flushPage != null) {
             synchronized (flushPage) {
                 flushPage.isStop(true);
                 flushPage.notify();
             }
         }
+        //[Notice]
+        //The return value doesn't need to be checked
+        //since terminateFlag will trigger termination if the flushQ is full.
         flushQ.offer(POISON_PILL);
     }
 
     @Override
-    public void run() {
+    public Boolean call() {
+        synchronized(isStarted) {
+            isStarted.set(true);
+            isStarted.notify();
+        }
         while (true) {
             flushPage = null;
             try {
                 flushPage = flushQ.take();
-                if (flushPage == POISON_PILL) {
-                    break;
+                if (flushPage == POISON_PILL || terminateFlag.get()) {
+                    return true;
                 }
-                flushPage.flush();
             } catch (InterruptedException e) {
-                //ignore
+                if (flushPage == null) {
+                    continue;
+                }
             }
+            flushPage.flush();
             emptyQ.offer(flushPage);
         }
     }

diff --git a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogPage.java b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogPage.java
index 45c3e65..edfec69 100644
--- a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogPage.java
+++ b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogPage.java

@@ -58,7 +58,7 @@
         appendOffset = 0;
         flushOffset = 0;
         isLastPage = false;
-        syncCommitQ = new LinkedBlockingQueue<ILogRecord>(logPageSize / ILogRecord.COMMIT_LOG_SIZE);
+        syncCommitQ = new LinkedBlockingQueue<ILogRecord>(logPageSize / ILogRecord.JOB_COMMIT_LOG_SIZE);
     }
 
     ////////////////////////////////////
@@ -144,7 +144,7 @@
                             }
                             this.wait();
                         } catch (InterruptedException e) {
-                            throw new IllegalStateException(e);
+                            continue;
                         }
                     }
                     endOffset = appendOffset;

diff --git a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogReader.java b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogReader.java
index 173088c..9dc966c 100644
--- a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogReader.java
+++ b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogReader.java

@@ -121,7 +121,7 @@
         readBuffer.limit(logPageSize);
         try {
             fileChannel.position(readLSN % logFileSize);
-            size = fileChannel.read(readBuffer, logPageSize);
+            size = fileChannel.read(readBuffer);
         } catch (IOException e) {
             throw new ACIDException(e);
         }

diff --git a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogRecord.java b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogRecord.java
index 380e524..4b0e1f2 100644
--- a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogRecord.java
+++ b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/logging/LogRecord.java

@@ -30,13 +30,18 @@
 /*
  * == LogRecordFormat ==
  * ---------------------------
- * [Header1] (13 bytes) : for all log types
+ * [Header1] (5 bytes) : for all log types
  * LogType(1)
  * JobId(4)
+ * ---------------------------
+ * [Header2] (16 bytes + PKValueSize) : for entity_commit and update log types 
  * DatasetId(4) //stored in dataset_dataset in Metadata Node
  * PKHashValue(4)
+ * PKFieldCnt(4)
+ * PKValueSize(4)
+ * PKValue(PKValueSize)
  * ---------------------------
- * [Header2] (21 bytes) : only for update log type
+ * [Header3] (21 bytes) : only for update log type
  * PrevLSN(8)
  * ResourceId(8) //stored in .metadata of the corresponding index in NC node
  * ResourceType(1)
@@ -45,18 +50,21 @@
  * [Body] (Variable size) : only for update log type
  * FieldCnt(4)
  * NewOp(1)
- * NewValueLength(4)
- * NewValue(NewValueLength)
+ * NewValueSize(4)
+ * NewValue(NewValueSize)
  * OldOp(1)
- * OldValueLength(4)
- * OldValue(OldValueLength)
+ * OldValueSize(4)
+ * OldValue(OldValueSize)
  * ---------------------------
  * [Tail] (8 bytes) : for all log types
  * Checksum(8)
  * ---------------------------
  * = LogSize =
- * 1) JOB_COMMIT and ENTITY_COMMIT: 21 bytes
- * 2) UPDATE: 56 + old and new value size (13 + 21 + 14 + old and newValueSize + 8)
+ * 1) JOB_COMMIT_LOG_SIZE: 13 bytes (5 + 8)
+ * 2) ENTITY_COMMIT: 29 + PKSize (5 + 16 + PKSize + 8)
+ *    --> ENTITY_COMMIT_LOG_BASE_SIZE = 29
+ * 3) UPDATE: 64 + PKSize + New/OldValueSize (5 + 16 + PKSize + 21 + 14 + New/OldValueSize + 8)
+ *    --> UPDATE_LOG_BASE_SIZE = 64
  */
 public class LogRecord implements ILogRecord {
 
@@ -65,6 +73,9 @@
     private int jobId;
     private int datasetId;
     private int PKHashValue;
+    private int PKFieldCnt;
+    private int PKValueSize;
+    private ITupleReference PKValue;
     private long prevLSN;
     private long resourceId;
     private byte resourceType;
@@ -84,13 +95,18 @@
     private long LSN;
     private final AtomicBoolean isFlushed;
     private final SimpleTupleWriter tupleWriter;
-    private final SimpleTupleReference newTuple;
+    private final SimpleTupleReference readPKValue;
+    private final SimpleTupleReference readNewValue;
+    private final SimpleTupleReference readOldValue;
     private final CRC32 checksumGen;
+    private int[] PKFields;
 
     public LogRecord() {
         isFlushed = new AtomicBoolean(false);
         tupleWriter = new SimpleTupleWriter();
-        newTuple = (SimpleTupleReference) tupleWriter.createTupleReference();
+        readPKValue = (SimpleTupleReference) tupleWriter.createTupleReference();
+        readNewValue = (SimpleTupleReference) tupleWriter.createTupleReference();
+        readOldValue = (SimpleTupleReference) tupleWriter.createTupleReference();
         checksumGen = new CRC32();
     }
 
@@ -99,8 +115,16 @@
         int beginOffset = buffer.position();
         buffer.put(logType);
         buffer.putInt(jobId);
-        buffer.putInt(datasetId);
-        buffer.putInt(PKHashValue);
+        if (logType != LogType.JOB_COMMIT) {
+            buffer.putInt(datasetId);
+            buffer.putInt(PKHashValue);
+            buffer.putInt(PKFieldCnt);
+            if (PKValueSize <= 0) {
+                throw new IllegalStateException("Primary Key Size is less than or equal to 0");
+            }
+            buffer.putInt(PKValueSize);
+            writePKValue(buffer);
+        }
         if (logType == LogType.UPDATE) {
             buffer.putLong(prevLSN);
             buffer.putLong(resourceId);
@@ -124,8 +148,16 @@
         buffer.putLong(checksum);
     }
 
+    private void writePKValue(ByteBuffer buffer) {
+        int i;
+        for (i = 0; i < PKFieldCnt; i++) {
+            buffer.put(PKValue.getFieldData(0), PKValue.getFieldStart(PKFields[i]), PKValue.getFieldLength(PKFields[i]));
+        }
+    }
+
     private void writeTuple(ByteBuffer buffer, ITupleReference tuple, int size) {
         tupleWriter.writeTuple(tuple, buffer.array(), buffer.position());
+        //writeTuple() doesn't change the position of the buffer. 
         buffer.position(buffer.position() + size);
     }
 
@@ -141,8 +173,19 @@
         try {
             logType = buffer.get();
             jobId = buffer.getInt();
-            datasetId = buffer.getInt();
-            PKHashValue = buffer.getInt();
+            if (logType == LogType.JOB_COMMIT) {
+                datasetId = -1;
+                PKHashValue = -1;
+            } else {
+                datasetId = buffer.getInt();    
+                PKHashValue = buffer.getInt();
+                PKFieldCnt = buffer.getInt();
+                PKValueSize = buffer.getInt();
+                if (PKValueSize <= 0) {
+                    throw new IllegalStateException("Primary Key Size is less than or equal to 0");
+                }
+                PKValue = readPKValue(buffer);
+            }
             if (logType == LogType.UPDATE) {
                 prevLSN = buffer.getLong();
                 resourceId = buffer.getLong();
@@ -151,18 +194,18 @@
                 fieldCnt = buffer.getInt();
                 newOp = buffer.get();
                 newValueSize = buffer.getInt();
-                newValue = readTuple(buffer, newValueSize);
+                newValue = readTuple(buffer, readNewValue, fieldCnt, newValueSize);
                 if (resourceType == ResourceType.LSM_BTREE) {
                     oldOp = buffer.get();
                     if (oldOp != (byte) (IndexOperation.NOOP.ordinal())) {
                         oldValueSize = buffer.getInt();
                         if (oldValueSize > 0) {
-                            oldValue = readTuple(buffer, oldValueSize);
+                            oldValue = readTuple(buffer, readOldValue, fieldCnt, oldValueSize);
                         }
                     }
                 }
             } else {
-                logSize = COMMIT_LOG_SIZE;
+                computeAndSetLogSize();
             }
             checksum = buffer.getLong();
             if (checksum != generateChecksum(buffer, beginOffset, logSize - CHECKSUM_SIZE)) {
@@ -174,27 +217,54 @@
         }
         return true;
     }
+    
+    private ITupleReference readPKValue(ByteBuffer buffer) {
+        return readTuple(buffer, readPKValue, PKFieldCnt, PKValueSize);
+    }
 
-    private ITupleReference readTuple(ByteBuffer buffer, int size) {
-        newTuple.setFieldCount(fieldCnt);
-        newTuple.resetByTupleOffset(buffer, buffer.position());
-        buffer.position(buffer.position() + size);
-        return newTuple;
+    private ITupleReference readTuple(ByteBuffer srcBuffer, SimpleTupleReference destTuple, int fieldCnt, int size) {
+        destTuple.setFieldCount(fieldCnt);
+        destTuple.resetByTupleOffset(srcBuffer, srcBuffer.position());
+        srcBuffer.position(srcBuffer.position() + size);
+        return destTuple;
     }
 
     @Override
-    public void formCommitLogRecord(ITransactionContext txnCtx, byte logType, int jobId, int datasetId, int PKHashValue) {
+    public void formJobCommitLogRecord(ITransactionContext txnCtx) {
         this.txnCtx = txnCtx;
-        this.logType = logType;
-        this.jobId = jobId;
+        this.logType = LogType.JOB_COMMIT;
+        this.jobId = txnCtx.getJobId().getId();
+        this.datasetId = -1;
+        this.PKHashValue = -1;
+        computeAndSetLogSize();
+    }
+
+    @Override
+    public void formEntityCommitLogRecord(ITransactionContext txnCtx, int datasetId, int PKHashValue,
+            ITupleReference PKValue, int[] PKFields) {
+        this.txnCtx = txnCtx;
+        this.logType = LogType.ENTITY_COMMIT;
+        this.jobId = txnCtx.getJobId().getId();
         this.datasetId = datasetId;
         this.PKHashValue = PKHashValue;
-        this.logSize = COMMIT_LOG_SIZE;
+        this.PKFieldCnt = PKFields.length;
+        this.PKValue = PKValue;
+        this.PKFields = PKFields;
+        computeAndSetPKValueSize();
+        computeAndSetLogSize();
     }
 
     @Override
-    public void setUpdateLogSize() {
-        logSize = UPDATE_LOG_BASE_SIZE + newValueSize + oldValueSize;
+    public void computeAndSetPKValueSize() {
+        int i;
+        PKValueSize = 0;
+        for (i = 0; i < PKFieldCnt; i++) {
+            PKValueSize += PKValue.getFieldLength(PKFields[i]);
+        }
+    }
+
+    private void setUpdateLogSize() {
+        logSize = UPDATE_LOG_BASE_SIZE + PKValueSize + newValueSize + oldValueSize;
         if (resourceType != ResourceType.LSM_BTREE) {
             logSize -= 5; //oldOp(byte: 1) + oldValueLength(int: 4)
         } else {
@@ -205,18 +275,39 @@
     }
 
     @Override
+    public void computeAndSetLogSize() {
+        switch (logType) {
+            case LogType.UPDATE:
+                setUpdateLogSize();
+                break;
+            case LogType.JOB_COMMIT:
+                logSize = JOB_COMMIT_LOG_SIZE;
+                break;
+            case LogType.ENTITY_COMMIT:
+                logSize = ENTITY_COMMIT_LOG_BASE_SIZE + PKValueSize;
+                break;
+            default:
+                throw new IllegalStateException("Unsupported Log Type");
+        }
+    }
+
+    @Override
     public String getLogRecordForDisplay() {
         StringBuilder builder = new StringBuilder();
         builder.append(" LSN : ").append(LSN);
         builder.append(" LogType : ").append(LogType.toString(logType));
+        builder.append(" LogSize : ").append(logSize);
         builder.append(" JobId : ").append(jobId);
-        builder.append(" DatasetId : ").append(datasetId);
-        builder.append(" PKHashValue : ").append(PKHashValue);
+        if (logType != LogType.JOB_COMMIT) {
+            builder.append(" DatasetId : ").append(datasetId);
+            builder.append(" PKHashValue : ").append(PKHashValue);
+            builder.append(" PKFieldCnt : ").append(PKFieldCnt);
+            builder.append(" PKSize: ").append(PKValueSize);
+        }
         if (logType == LogType.UPDATE) {
             builder.append(" PrevLSN : ").append(prevLSN);
             builder.append(" ResourceId : ").append(resourceId);
             builder.append(" ResourceType : ").append(resourceType);
-            builder.append(" LogSize : ").append(logSize);
         }
         return builder.toString();
     }
@@ -405,4 +496,25 @@
     public void setLSN(long LSN) {
         this.LSN = LSN;
     }
+    
+    @Override
+    public int getPKValueSize() {
+        return PKValueSize;
+    }
+    
+    @Override
+    public ITupleReference getPKValue() {
+        return PKValue;
+    }
+    
+    @Override
+    public void setPKFields(int[] primaryKeyFields) {
+        PKFields = primaryKeyFields;
+        PKFieldCnt = PKFields.length;
+    }
+
+    @Override
+    public void setPKValue(ITupleReference PKValue) {
+        this.PKValue = PKValue;
+    }
 }

diff --git a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/recovery/RecoveryManager.java b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/recovery/RecoveryManager.java
index 81a73d5..2ad3055 100644
--- a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/recovery/RecoveryManager.java
+++ b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/recovery/RecoveryManager.java

@@ -27,6 +27,7 @@
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
@@ -52,6 +53,7 @@
 import edu.uci.ics.asterix.transaction.management.service.transaction.TransactionSubsystem;
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 import edu.uci.ics.hyracks.api.lifecycle.ILifeCycleComponent;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
 import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
 import edu.uci.ics.hyracks.storage.am.common.api.IIndexLifecycleManager;
 import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
@@ -133,8 +135,10 @@
     public void startRecovery(boolean synchronous) throws IOException, ACIDException {
 
         int updateLogCount = 0;
-        int commitLogCount = 0;
+        int entityCommitLogCount = 0;
+        int jobCommitLogCount = 0;
         int redoCount = 0;
+        int jobId = -1;
 
         state = SystemState.RECOVERING;
 
@@ -142,9 +146,12 @@
             LOGGER.info("[RecoveryMgr] starting recovery ...");
         }
 
-        //winnerTxnTable is used to add pairs, <committed TxnId, the most recent commit Lsn of the TxnId>
-        Map<TxnId, Long> winnerTxnTable = new HashMap<TxnId, Long>();
-        TxnId tempKeyTxnId = new TxnId(-1, -1, -1);
+        Set<Integer> winnerJobSet = new HashSet<Integer>();
+        Map<Integer, Set<TxnId>> jobId2WinnerEntitiesMap = new HashMap<Integer, Set<TxnId>>();
+        //winnerEntity is used to add pairs, <committed TxnId, the most recent commit Lsn of the TxnId>
+        Set<TxnId> winnerEntitySet = null;
+        TxnId tempKeyTxnId = new TxnId(-1, -1, -1, null, -1, false);
+        TxnId winnerEntity = null;
 
         //#. read checkpoint file and set lowWaterMark where anaylsis and redo start
         CheckpointObject checkpointObject = readCheckpoint();
@@ -157,8 +164,6 @@
         //-------------------------------------------------------------------------
         //  [ analysis phase ]
         //  - collect all committed Lsn 
-        //  - if there are duplicate commits for the same TxnId, 
-        //    keep only the mostRecentCommitLsn among the duplicates.
         //-------------------------------------------------------------------------
         if (LOGGER.isLoggable(Level.INFO)) {
             LOGGER.info("[RecoveryMgr] in analysis phase");
@@ -176,23 +181,34 @@
             if (logRecord.getJobId() > maxJobId) {
                 maxJobId = logRecord.getJobId();
             }
-            TxnId commitTxnId = null;
             switch (logRecord.getLogType()) {
                 case LogType.UPDATE:
                     if (IS_DEBUG_MODE) {
                         updateLogCount++;
                     }
                     break;
-
                 case LogType.JOB_COMMIT:
-                case LogType.ENTITY_COMMIT:
-                    commitTxnId = new TxnId(logRecord.getJobId(), logRecord.getDatasetId(), logRecord.getPKHashValue());
-                    winnerTxnTable.put(commitTxnId, logRecord.getLSN());
+                    winnerJobSet.add(Integer.valueOf(logRecord.getJobId()));
+                    jobId2WinnerEntitiesMap.remove(Integer.valueOf(logRecord.getJobId()));
                     if (IS_DEBUG_MODE) {
-                        commitLogCount++;
+                        jobCommitLogCount++;
                     }
                     break;
-
+                case LogType.ENTITY_COMMIT:
+                    jobId = logRecord.getJobId();
+                    winnerEntity = new TxnId(jobId, logRecord.getDatasetId(), logRecord.getPKHashValue(),
+                            logRecord.getPKValue(), logRecord.getPKValueSize(), true);
+                    if (!jobId2WinnerEntitiesMap.containsKey(Integer.valueOf(jobId))) {
+                        winnerEntitySet = new HashSet<TxnId>();
+                        jobId2WinnerEntitiesMap.put(Integer.valueOf(jobId), winnerEntitySet);
+                    } else {
+                        winnerEntitySet = jobId2WinnerEntitiesMap.get(Integer.valueOf(jobId));
+                    }
+                    winnerEntitySet.add(winnerEntity);
+                    if (IS_DEBUG_MODE) {
+                        entityCommitLogCount++;
+                    }
+                    break;
                 default:
                     throw new ACIDException("Unsupported LogType: " + logRecord.getLogType());
             }
@@ -203,55 +219,48 @@
         //  [ redo phase ]
         //  - redo if
         //    1) The TxnId is committed && --> guarantee durability
-        //    2) lsn < commitLog's Lsn && --> deal with a case of pkHashValue collision
-        //    3) lsn > maxDiskLastLsn of the index --> guarantee idempotance
+        //    2) lsn > maxDiskLastLsn of the index --> guarantee idempotence
         //-------------------------------------------------------------------------
         if (LOGGER.isLoggable(Level.INFO)) {
             LOGGER.info("[RecoveryMgr] in redo phase");
         }
-        //#. set log reader to the lowWaterMarkLsn again.
-        logReader.initializeScan(lowWaterMarkLsn);
-
         long resourceId;
         long maxDiskLastLsn;
-        long lsn = -1;
-        long commitLsn = -1;
+        long LSN = -1;
         ILSMIndex index = null;
         LocalResource localResource = null;
         ILocalResourceMetadata localResourceMetadata = null;
-        Map<Long, Long> resourceId2MaxLsnMap = new HashMap<Long, Long>();
-        TxnId jobLevelTxnId = new TxnId(-1, -1, -1);
-        boolean foundWinnerTxn = false;
+        Map<Long, Long> resourceId2MaxLSNMap = new HashMap<Long, Long>();
+        boolean foundWinner = false;
 
         //#. get indexLifeCycleManager 
         IAsterixAppRuntimeContextProvider appRuntimeContext = txnSubsystem.getAsterixAppRuntimeContextProvider();
         IIndexLifecycleManager indexLifecycleManager = appRuntimeContext.getIndexLifecycleManager();
         ILocalResourceRepository localResourceRepository = appRuntimeContext.getLocalResourceRepository();
 
+        //#. set log reader to the lowWaterMarkLsn again.
+        logReader.initializeScan(lowWaterMarkLsn);
         logRecord = logReader.next();
         while (logRecord != null) {
-            lsn = logRecord.getLSN();
-            foundWinnerTxn = false;
             if (LogManager.IS_DEBUG_MODE) {
                 System.out.println(logRecord.getLogRecordForDisplay());
             }
+            LSN = logRecord.getLSN();
+            jobId = logRecord.getJobId();
+            foundWinner = false;
             switch (logRecord.getLogType()) {
                 case LogType.UPDATE:
-                    tempKeyTxnId.setTxnId(logRecord.getJobId(), logRecord.getDatasetId(), logRecord.getPKHashValue());
-                    jobLevelTxnId.setTxnId(logRecord.getJobId(), -1, -1);
-                    if (winnerTxnTable.containsKey(tempKeyTxnId)) {
-                        commitLsn = winnerTxnTable.get(tempKeyTxnId);
-                        if (lsn < commitLsn) {
-                            foundWinnerTxn = true;
-                        }
-                    } else if (winnerTxnTable.containsKey(jobLevelTxnId)) {
-                        commitLsn = winnerTxnTable.get(jobLevelTxnId);
-                        if (lsn < commitLsn) {
-                            foundWinnerTxn = true;
+                    if (winnerJobSet.contains(Integer.valueOf(jobId))) {
+                        foundWinner = true;
+                    } else if (jobId2WinnerEntitiesMap.containsKey(Integer.valueOf(jobId))) {
+                        winnerEntitySet = jobId2WinnerEntitiesMap.get(Integer.valueOf(jobId));
+                        tempKeyTxnId.setTxnId(jobId, logRecord.getDatasetId(), logRecord.getPKHashValue(),
+                                logRecord.getPKValue(), logRecord.getPKValueSize());
+                        if (winnerEntitySet.contains(tempKeyTxnId)) {
+                            foundWinner = true;
                         }
                     }
-
-                    if (foundWinnerTxn) {
+                    if (foundWinner) {
                         resourceId = logRecord.getResourceId();
                         localResource = localResourceRepository.getResourceById(resourceId);
 
@@ -294,12 +303,12 @@
                             maxDiskLastLsn = abstractLSMIOCallback.getComponentLSN(index.getImmutableComponents());
 
                             //#. set resourceId and maxDiskLastLSN to the map
-                            resourceId2MaxLsnMap.put(resourceId, maxDiskLastLsn);
+                            resourceId2MaxLSNMap.put(Long.valueOf(resourceId), Long.valueOf(maxDiskLastLsn));
                         } else {
-                            maxDiskLastLsn = resourceId2MaxLsnMap.get(resourceId);
+                            maxDiskLastLsn = resourceId2MaxLSNMap.get(Long.valueOf(resourceId));
                         }
 
-                        if (lsn > maxDiskLastLsn) {
+                        if (LSN > maxDiskLastLsn) {
                             redo(logRecord);
                             if (IS_DEBUG_MODE) {
                                 redoCount++;
@@ -316,12 +325,11 @@
                 default:
                     throw new ACIDException("Unsupported LogType: " + logRecord.getLogType());
             }
-
             logRecord = logReader.next();
         }
 
         //close all indexes
-        Set<Long> resourceIdList = resourceId2MaxLsnMap.keySet();
+        Set<Long> resourceIdList = resourceId2MaxLSNMap.keySet();
         for (long r : resourceIdList) {
             indexLifecycleManager.close(r);
         }
@@ -332,8 +340,8 @@
             LOGGER.info("[RecoveryMgr] recovery is completed.");
         }
         if (IS_DEBUG_MODE) {
-            System.out.println("[RecoveryMgr] Count: Update/Commit/Redo = " + updateLogCount + "/" + commitLogCount
-                    + "/" + redoCount);
+            System.out.println("[RecoveryMgr] Count: Update/EntityCommit/JobCommit/Redo = " + updateLogCount + "/"
+                    + entityCommitLogCount + "/" + jobCommitLogCount + "/" + redoCount);
         }
     }
 
@@ -533,15 +541,18 @@
     @Override
     public void rollbackTransaction(ITransactionContext txnContext) throws ACIDException {
         Map<TxnId, List<Long>> loserTxnTable = new HashMap<TxnId, List<Long>>();
-        TxnId tempKeyTxnId = new TxnId(-1, -1, -1);
+        TxnId tempKeyTxnId = new TxnId(-1, -1, -1, null, -1, false);
 
         int updateLogCount = 0;
-        int commitLogCount = 0;
+        int entityCommitLogCount = 0;
+        int jobId = -1;
+        int abortedJobId = txnContext.getJobId().getId();
+        long currentLSN = -1;
+        TxnId loserEntity = null;
 
-        // Obtain the first log record written by the Job
+        // Obtain the first/last log record LSNs written by the Job
         long firstLSN = txnContext.getFirstLSN();
         long lastLSN = txnContext.getLastLSN();
-        //TODO: make sure that the lastLsn is not updated anymore by another thread belonging to the same job.
         if (LOGGER.isLoggable(Level.INFO)) {
             LOGGER.info(" rollbacking transaction log records from " + firstLSN + " to " + lastLSN);
         }
@@ -559,62 +570,62 @@
         if (LOGGER.isLoggable(Level.INFO)) {
             LOGGER.info(" collecting loser transaction's LSNs from " + firstLSN + " to " + lastLSN);
         }
-        boolean reachedLastLog = false;
         List<Long> undoLSNSet = null;
         ILogReader logReader = logMgr.getLogReader(false);
         logReader.initializeScan(firstLSN);
-        ILogRecord logRecord = logReader.next();
-        while (logRecord != null) {
-            if (IS_DEBUG_MODE) {
-                System.out.println(logRecord.getLogRecordForDisplay());
+        ILogRecord logRecord = null;
+        while (currentLSN < lastLSN) {
+            logRecord = logReader.next();
+            if (logRecord == null) {
+                break;
+            } else {
+                if (IS_DEBUG_MODE) {
+                    System.out.println(logRecord.getLogRecordForDisplay());
+                }
+                currentLSN = logRecord.getLSN();
             }
-
-            tempKeyTxnId.setTxnId(logRecord.getJobId(), logRecord.getDatasetId(), logRecord.getPKHashValue());
+            jobId = logRecord.getJobId();
+            if (jobId != abortedJobId) {
+                continue;
+            }
+            tempKeyTxnId.setTxnId(jobId, logRecord.getDatasetId(), logRecord.getPKHashValue(), logRecord.getPKValue(),
+                    logRecord.getPKValueSize());
             switch (logRecord.getLogType()) {
                 case LogType.UPDATE:
                     undoLSNSet = loserTxnTable.get(tempKeyTxnId);
                     if (undoLSNSet == null) {
-                        TxnId txnId = new TxnId(logRecord.getJobId(), logRecord.getDatasetId(),
-                                logRecord.getPKHashValue());
+                        loserEntity = new TxnId(jobId, logRecord.getDatasetId(), logRecord.getPKHashValue(),
+                                logRecord.getPKValue(), logRecord.getPKValueSize(), true);
                         undoLSNSet = new LinkedList<Long>();
-                        loserTxnTable.put(txnId, undoLSNSet);
+                        loserTxnTable.put(loserEntity, undoLSNSet);
                     }
-                    undoLSNSet.add(logRecord.getLSN());
+                    undoLSNSet.add(Long.valueOf(currentLSN));
                     if (IS_DEBUG_MODE) {
                         updateLogCount++;
-                        System.out.println("" + Thread.currentThread().getId() + "======> update[" + logRecord.getLSN()
-                                + "]:" + tempKeyTxnId);
+                        System.out.println("" + Thread.currentThread().getId() + "======> update[" + currentLSN + "]:"
+                                + tempKeyTxnId);
                     }
                     break;
 
                 case LogType.JOB_COMMIT:
+                    throw new ACIDException("Unexpected LogType(" + logRecord.getLogType() + ") during abort.");
+
                 case LogType.ENTITY_COMMIT:
-                    undoLSNSet = loserTxnTable.get(tempKeyTxnId);
-                    if (undoLSNSet != null) {
-                        loserTxnTable.remove(tempKeyTxnId);
-                    }
+                    loserTxnTable.remove(tempKeyTxnId);
                     if (IS_DEBUG_MODE) {
-                        commitLogCount++;
-                        System.out.println("" + Thread.currentThread().getId() + "======> commit[" + logRecord.getLSN()
-                                + "]" + tempKeyTxnId);
+                        entityCommitLogCount++;
+                        System.out.println("" + Thread.currentThread().getId() + "======> entity_commit[" + currentLSN + "]"
+                                + tempKeyTxnId);
                     }
                     break;
 
                 default:
                     throw new ACIDException("Unsupported LogType: " + logRecord.getLogType());
             }
-            if (logRecord.getLSN() == lastLSN) {
-                reachedLastLog = true;
-                break;
-            } else if (logRecord.getLSN() > lastLSN) {
-                throw new IllegalStateException("LastLSN mismatch");
-            }
-            logRecord = logReader.next();
         }
-
-        if (!reachedLastLog) {
-            throw new ACIDException("LastLSN mismatch: " + lastLSN + " vs " + logRecord.getLSN()
-                    + " during Rollback a transaction( " + txnContext.getJobId() + ")");
+        if (currentLSN != lastLSN) {
+            throw new ACIDException("LastLSN mismatch: lastLSN(" + lastLSN + ") vs currentLSN(" + currentLSN
+                    + ") during abort( " + txnContext.getJobId() + ")");
         }
 
         //undo loserTxn's effect
@@ -622,7 +633,6 @@
             LOGGER.info(" undoing loser transaction's effect");
         }
 
-        TxnId txnId = null;
         Iterator<Entry<TxnId, List<Long>>> iter = loserTxnTable.entrySet().iterator();
         int undoCount = 0;
         while (iter.hasNext()) {
@@ -630,16 +640,15 @@
             //Sort the lsns in order to undo in one pass. 
 
             Map.Entry<TxnId, List<Long>> loserTxn = (Map.Entry<TxnId, List<Long>>) iter.next();
-            txnId = loserTxn.getKey();
-
             undoLSNSet = loserTxn.getValue();
 
             for (long undoLSN : undoLSNSet) {
-                // here, all the log records are UPDATE type. So, we don't need to check the type again.
-
+                //here, all the log records are UPDATE type. So, we don't need to check the type again.
                 //read the corresponding log record to be undone.
                 logRecord = logReader.read(undoLSN);
-                assert logRecord != null;
+                if (logRecord == null) {
+                    throw new ACIDException("IllegalState exception during abort( " + txnContext.getJobId() + ")");
+                }
                 if (IS_DEBUG_MODE) {
                     System.out.println(logRecord.getLogRecordForDisplay());
                 }
@@ -656,8 +665,8 @@
             LOGGER.info(" undone loser transaction's effect");
         }
         if (IS_DEBUG_MODE) {
-            System.out.println("UpdateLogCount/CommitLogCount/UndoCount:" + updateLogCount + "/" + commitLogCount + "/"
-                    + undoCount);
+            System.out.println("UpdateLogCount/CommitLogCount/UndoCount:" + updateLogCount + "/" + entityCommitLogCount
+                    + "/" + undoCount);
         }
     }
 
@@ -720,36 +729,53 @@
 }
 
 class TxnId {
+    public boolean isByteArrayPKValue;
     public int jobId;
     public int datasetId;
-    public int pkHashVal;
+    public int pkHashValue;
+    public int pkSize;
+    public byte[] byteArrayPKValue;
+    public ITupleReference tupleReferencePKValue;
 
-    public TxnId(int jobId, int datasetId, int pkHashVal) {
+    public TxnId(int jobId, int datasetId, int pkHashValue, ITupleReference pkValue, int pkSize,
+            boolean isByteArrayPKValue) {
         this.jobId = jobId;
         this.datasetId = datasetId;
-        this.pkHashVal = pkHashVal;
+        this.pkHashValue = pkHashValue;
+        this.pkSize = pkSize;
+        this.isByteArrayPKValue = isByteArrayPKValue;
+        if (isByteArrayPKValue) {
+            this.byteArrayPKValue = new byte[pkSize];
+            readPKValueIntoByteArray(pkValue, pkSize, byteArrayPKValue);
+        } else {
+            this.tupleReferencePKValue = pkValue;
+        }
     }
 
-    public void setTxnId(int jobId, int datasetId, int pkHashVal) {
+    private void readPKValueIntoByteArray(ITupleReference pkValue, int pkSize, byte[] byteArrayPKValue) {
+        int readOffset = pkValue.getFieldStart(0);
+        byte[] readBuffer = pkValue.getFieldData(0);
+        for (int i = 0; i < pkSize; i++) {
+            byteArrayPKValue[i] = readBuffer[readOffset + i];
+        }
+    }
+
+    public void setTxnId(int jobId, int datasetId, int pkHashValue, ITupleReference pkValue, int pkSize) {
         this.jobId = jobId;
         this.datasetId = datasetId;
-        this.pkHashVal = pkHashVal;
-    }
-
-    public void setTxnId(TxnId txnId) {
-        this.jobId = txnId.jobId;
-        this.datasetId = txnId.datasetId;
-        this.pkHashVal = txnId.pkHashVal;
+        this.pkHashValue = pkHashValue;
+        this.tupleReferencePKValue = pkValue;
+        isByteArrayPKValue = false;
     }
 
     @Override
     public String toString() {
-        return "[" + jobId + "," + datasetId + "," + pkHashVal + "]";
+        return "[" + jobId + "," + datasetId + "," + pkHashValue + "," + pkSize + "]";
     }
 
     @Override
     public int hashCode() {
-        return pkHashVal;
+        return pkHashValue;
     }
 
     @Override
@@ -761,7 +787,52 @@
             return false;
         }
         TxnId txnId = (TxnId) o;
+        return (txnId.pkHashValue == pkHashValue && txnId.datasetId == datasetId && txnId.jobId == jobId
+                && pkSize == txnId.pkSize && isEqualTo(txnId));
+    }
 
-        return (txnId.pkHashVal == pkHashVal && txnId.datasetId == datasetId && txnId.jobId == jobId);
+    private boolean isEqualTo(TxnId txnId) {
+        if (isByteArrayPKValue && txnId.isByteArrayPKValue) {
+            return isEqual(byteArrayPKValue, txnId.byteArrayPKValue, pkSize);
+        } else if (isByteArrayPKValue && (!txnId.isByteArrayPKValue)) {
+            return isEqual(byteArrayPKValue, txnId.tupleReferencePKValue, pkSize);
+        } else if ((!isByteArrayPKValue) && txnId.isByteArrayPKValue) {
+            return isEqual(txnId.byteArrayPKValue, tupleReferencePKValue, pkSize);
+        } else {
+            return isEqual(tupleReferencePKValue, txnId.tupleReferencePKValue, pkSize);
+        }
+    }
+
+    private boolean isEqual(byte[] a, byte[] b, int size) {
+        for (int i = 0; i < size; i++) {
+            if (a[i] != b[i]) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    private boolean isEqual(byte[] a, ITupleReference b, int size) {
+        int readOffset = b.getFieldStart(0);
+        byte[] readBuffer = b.getFieldData(0);
+        for (int i = 0; i < size; i++) {
+            if (a[i] != readBuffer[readOffset + i]) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    private boolean isEqual(ITupleReference a, ITupleReference b, int size) {
+        int aOffset = a.getFieldStart(0);
+        byte[] aBuffer = a.getFieldData(0);
+        int bOffset = b.getFieldStart(0);
+        byte[] bBuffer = b.getFieldData(0);
+        for (int i = 0; i < size; i++) {
+            if (aBuffer[aOffset + i] != bBuffer[bOffset + i]) {
+                return false;
+            }
+        }
+        return true;
     }
 }

diff --git a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/transaction/TransactionManager.java b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/transaction/TransactionManager.java
index 01bce83..01b38c2 100644
--- a/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/transaction/TransactionManager.java
+++ b/asterix-transactions/src/main/java/edu/uci/ics/asterix/transaction/management/service/transaction/TransactionManager.java

@@ -29,7 +29,6 @@
 import edu.uci.ics.asterix.common.transactions.ITransactionManager;
 import edu.uci.ics.asterix.common.transactions.JobId;
 import edu.uci.ics.asterix.transaction.management.service.logging.LogRecord;
-import edu.uci.ics.asterix.transaction.management.service.logging.LogType;
 import edu.uci.ics.hyracks.api.lifecycle.ILifeCycleComponent;
 
 /**
@@ -60,7 +59,8 @@
             if (LOGGER.isLoggable(Level.SEVERE)) {
                 LOGGER.severe(msg);
             }
-            throw new Error(msg, ae);
+            ae.printStackTrace();
+            throw new ACIDException(msg, ae);
         } finally {
             txnSubsystem.getLockManager().releaseLocks(txnCtx);
             transactionContextRepository.remove(txnCtx.getJobId());
@@ -90,20 +90,11 @@
 
     @Override
     public void commitTransaction(ITransactionContext txnCtx, DatasetId datasetId, int PKHashVal) throws ACIDException {
-        //There is either job-level commit or entity-level commit.
-        //The job-level commit will have -1 value both for datasetId and PKHashVal.
-
-        //for entity-level commit
-        if (PKHashVal != -1) {
-            txnSubsystem.getLockManager().unlock(datasetId, PKHashVal, txnCtx, true);
-            return;
-        }
-
-        //for job-level commit
+        //Only job-level commits call this method. 
         try {
             if (txnCtx.isWriteTxn()) {
                 LogRecord logRecord = ((TransactionContext) txnCtx).getLogRecord();
-                logRecord.formCommitLogRecord(txnCtx, LogType.JOB_COMMIT, txnCtx.getJobId().getId(), -1, -1);
+                logRecord.formJobCommitLogRecord(txnCtx);
                 txnSubsystem.getLogManager().log(logRecord);
             }
         } catch (Exception ae) {
commit	31b144f1bd05f7b965e4aa8fbba2a169b792713d	[log] [tgz]
author	JArod <jarodwen@gmail.com>	Sun Sep 01 06:48:52 2013 -0700
committer	JArod <jarodwen@gmail.com>	Sun Sep 01 06:48:52 2013 -0700
tree	d5bbb639f59b35f9d60989d9003cd1903b9bf38c
parent	71c575b2ab9c7713263e748f2556a2522a016850 [diff]
parent	6440e891f1e3d53be18156a3b51ff3f5f5ba2847 [diff]