[ASTERIXDB-2753][EXT] Support reading Parquet from S3
- user model changes: no
- storage format changes: no
- interface changes: yes
- Adds IRecordReaderFactory#getReaderSupportedFormats
Details:
- Read Parquet files from S3
- Allow different implementations for the same adapter name
* They can be distinguished using IRecordReaderFactory#getReaderSupportedFormats
- Fix HDFSUtils to get the correct partitions' locations
- Bump Parquet version to 1.12.0
- Make compiler.external.field.pushdown true by default
Change-Id: I7a8f1a9dc31d8b4af508e521d010e2ed10feb7dd
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/8984
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Wael Alkowaileet <wael.y.k@gmail.com>
Reviewed-by: Michael Blow <mblow@apache.org>
Reviewed-by: Hussain Towaileb <hussainht@gmail.com>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
diff --git a/asterixdb/asterix-app/data/hdfs/parquet/id_age-string.json b/asterixdb/asterix-app/data/hdfs/parquet/id_age-string.json
new file mode 100644
index 0000000..36680ac
--- /dev/null
+++ b/asterixdb/asterix-app/data/hdfs/parquet/id_age-string.json
@@ -0,0 +1,7 @@
+{"id": 15, "age": "10"}
+{"id": 16, "age": "20"}
+{"id": 17, "age": "30"}
+{"id": 18, "age": "40"}
+{"id": 19, "age": "50"}
+{"id": 20, "age": "60"}
+{"id": 21, "age": "70"}
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/BinaryFileConverterUtil.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/BinaryFileConverterUtil.java
new file mode 100644
index 0000000..d3865d3
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/BinaryFileConverterUtil.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.test.external_dataset;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericData.Record;
+import org.apache.hadoop.fs.Path;
+import org.apache.hyracks.api.util.IoUtil;
+import org.kitesdk.data.spi.JsonUtil;
+import org.kitesdk.data.spi.filesystem.JSONFileReader;
+
+import parquet.avro.AvroParquetWriter;
+
+public class BinaryFileConverterUtil {
+ public static final String DEFAULT_PARQUET_SRC_PATH = "data/hdfs/parquet";
+ public static final String BINARY_GEN_BASEDIR = "target" + File.separatorChar + "generated_bin_files";
+
+ //How many records should the schema inference method inspect to infer the schema for parquet files
+ private static final int NUM_OF_RECORDS_SCHEMA = 20;
+
+ private BinaryFileConverterUtil() {
+ }
+
+ public static void cleanBinaryDirectory(File localDataRoot, String binaryFilesPath) throws IOException {
+ File destPath = new File(localDataRoot, binaryFilesPath);
+ //Delete old generated files
+ if (destPath.exists()) {
+ IoUtil.delete(destPath);
+ }
+ //Create new directory
+ Files.createDirectory(Paths.get(destPath.getAbsolutePath()));
+ }
+
+ public static void convertToParquet(File localDataRoot, String src, String dest) throws IOException {
+ File srcPath = new File(localDataRoot, src);
+ File destPath = new File(localDataRoot, dest);
+
+ //Write parquet files
+ File[] listOfFiles = srcPath.listFiles();
+ for (File jsonFile : listOfFiles) {
+ String fileName = jsonFile.getName().substring(0, jsonFile.getName().indexOf(".")) + ".parquet";
+ Path outputPath = new Path(destPath.getAbsolutePath(), fileName);
+ writeParquetFile(jsonFile, outputPath);
+ }
+ }
+
+ private static void writeParquetFile(File jsonInputPath, Path parquetOutputPath) throws IOException {
+ final FileInputStream schemaInputStream = new FileInputStream(jsonInputPath);
+ final FileInputStream jsonInputStream = new FileInputStream(jsonInputPath);
+ //Infer Avro schema
+ final Schema inputSchema = JsonUtil.inferSchema(schemaInputStream, "parquet_schema", NUM_OF_RECORDS_SCHEMA);
+ try (JSONFileReader<Record> reader = new JSONFileReader<>(jsonInputStream, inputSchema, Record.class)) {
+ reader.initialize();
+ try (AvroParquetWriter<Record> writer = new AvroParquetWriter<>(parquetOutputPath, inputSchema)) {
+ for (Record record : reader) {
+ writer.write(record);
+ }
+ }
+ }
+ }
+}
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
index c064281..d445057 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
@@ -18,9 +18,11 @@
*/
package org.apache.asterix.test.external_dataset;
+import static org.apache.asterix.test.external_dataset.BinaryFileConverterUtil.BINARY_GEN_BASEDIR;
import static org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.FIXED_DATA_CONTAINER;
import java.io.BufferedWriter;
+import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
@@ -51,6 +53,7 @@
public static final String CSV_DEFINITION = "csv-data/reviews/";
public static final String TSV_DEFINITION = "tsv-data/reviews/";
public static final String MIXED_DEFINITION = "mixed-data/reviews/";
+ public static final String PARQUET_DEFINITION = "parquet-data/reviews/";
// This is used for a test to generate over 1000 number of files
public static final String OVER_1000_OBJECTS_PATH = "over-1000-objects";
@@ -74,6 +77,18 @@
this.tcCtx = tcCtx;
}
+ /**
+ * Generate binary files (e.g., parquet files)
+ */
+ public static void createBinaryFiles(String parquetRawJsonDir) throws IOException {
+ //base path
+ File basePath = new File(".");
+ //clean the binary generated files' directory
+ BinaryFileConverterUtil.cleanBinaryDirectory(basePath, BINARY_GEN_BASEDIR);
+ //Convert files in DEFAULT_PARQUET_SRC_PATH to parquet
+ BinaryFileConverterUtil.convertToParquet(basePath, parquetRawJsonDir, BINARY_GEN_BASEDIR);
+ }
+
public static void setDataPaths(String jsonDataPath, String csvDataPath, String tsvDataPath) {
JSON_DATA_PATH = jsonDataPath;
CSV_DATA_PATH = csvDataPath;
@@ -110,6 +125,10 @@
loadLargeNumberOfFiles();
LOGGER.info("Added " + OVER_1000_OBJECTS_COUNT + " files into " + OVER_1000_OBJECTS_PATH + " successfully");
+ LOGGER.info("Adding Parquet files to the bucket");
+ loadParquetFiles();
+ LOGGER.info("Parquet files added successfully");
+
LOGGER.info("Files added successfully");
}
@@ -210,6 +229,19 @@
loadGzData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
}
+ private static void loadParquetFiles() {
+ String dataBasePath = BINARY_GEN_BASEDIR;
+ String definition = PARQUET_DEFINITION;
+
+ // Normal format
+ String definitionSegment = "";
+ loadData(dataBasePath, "", "dummy_tweet.parquet", definition, definitionSegment, false, false);
+ loadData(dataBasePath, "", "id_age.parquet", definition, definitionSegment, false, false);
+ loadData(dataBasePath, "", "id_age-string.parquet", definition, definitionSegment, false, false);
+ loadData(dataBasePath, "", "id_name.parquet", definition, definitionSegment, false, false);
+ loadData(dataBasePath, "", "id_name_comment.parquet", definition, definitionSegment, false, false);
+ }
+
private static void loadData(String fileBasePath, String filePathSegment, String filename, String definition,
String definitionSegment, boolean removeExtension) {
loadData(fileBasePath, filePathSegment, filename, definition, definitionSegment, removeExtension, true);
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index c3c94f6..8035f5a 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -18,6 +18,8 @@
*/
package org.apache.asterix.test.external_dataset.aws;
+import static org.apache.asterix.test.external_dataset.BinaryFileConverterUtil.DEFAULT_PARQUET_SRC_PATH;
+import static org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.createBinaryFiles;
import static org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.setDataPaths;
import static org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.setUploaders;
import static org.apache.hyracks.util.file.FileUtil.joinPath;
@@ -128,6 +130,7 @@
public static void setUp() throws Exception {
final TestExecutor testExecutor = new AwsTestExecutor();
LangExecutionUtil.setUp(TEST_CONFIG_FILE_NAME, testExecutor);
+ createBinaryFiles(DEFAULT_PARQUET_SRC_PATH);
setNcEndpoints(testExecutor);
startAwsS3MockServer();
}
@@ -221,7 +224,6 @@
private static RequestBody getRequestBody(String content, boolean fromFile, boolean gzipped) {
RequestBody body;
-
// Content is string
if (!fromFile) {
body = RequestBody.fromString(content);
@@ -247,6 +249,7 @@
static class AwsTestExecutor extends TestExecutor {
+ @Override
public void executeTestFile(TestCaseContext testCaseCtx, TestFileContext ctx, Map<String, Object> variableCtx,
String statement, boolean isDmlRecoveryTest, ProcessBuilder pb, TestCase.CompilationUnit cUnit,
MutableInt queryCount, List<TestFileContext> expectedResultFileCtxs, File testFile, String actualPath)
@@ -328,4 +331,4 @@
}
LOGGER.info("Done creating bucket with data");
}
-}
\ No newline at end of file
+}
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/runtime/HDFSCluster.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/runtime/HDFSCluster.java
index f5c51b4..4c30d2b 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/runtime/HDFSCluster.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/runtime/HDFSCluster.java
@@ -18,15 +18,14 @@
*/
package org.apache.asterix.test.runtime;
+import static org.apache.asterix.test.external_dataset.BinaryFileConverterUtil.BINARY_GEN_BASEDIR;
+import static org.apache.asterix.test.external_dataset.BinaryFileConverterUtil.DEFAULT_PARQUET_SRC_PATH;
+
import java.io.File;
-import java.io.FileInputStream;
import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Paths;
import org.apache.asterix.external.dataset.adapter.GenericAdapter;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericData.Record;
+import org.apache.asterix.test.external_dataset.BinaryFileConverterUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -36,11 +35,6 @@
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
-import org.apache.hyracks.api.util.IoUtil;
-import org.kitesdk.data.spi.JsonUtil;
-import org.kitesdk.data.spi.filesystem.JSONFileReader;
-
-import parquet.avro.AvroParquetWriter;
/**
* Manages a Mini (local VM) HDFS cluster with a configured number of datanodes.
@@ -53,8 +47,6 @@
private static final String DATA_PATH = "data/hdfs";
private static final String HDFS_PATH = "/asterix";
private static final HDFSCluster INSTANCE = new HDFSCluster();
- //Temporary folder that holds generated binary files
- private static final String BINARY_GEN_BASEDIR = "target" + File.separatorChar + "generated_bin_files";
//How many records should the schema inference method inspect to infer the schema for parquet files
private static final int NUM_OF_RECORDS_SCHEMA = 20;
@@ -93,8 +85,10 @@
build.startupOption(StartupOption.REGULAR);
dfsCluster = build.build();
dfs = FileSystem.get(conf);
- //Generate binary files from JSON files (e.g., parquet files)
- generateBinaryFiles(basePath);
+ //clean the binary generated files' directory
+ BinaryFileConverterUtil.cleanBinaryDirectory(basePath, BINARY_GEN_BASEDIR);
+ //Convert files in DEFAULT_PARQUET_SRC_PATH to parquet
+ BinaryFileConverterUtil.convertToParquet(basePath, DEFAULT_PARQUET_SRC_PATH, BINARY_GEN_BASEDIR);
//Load JSON/ADM files to HDFS
loadData(basePath, DATA_PATH);
//Load generated binary files (e.g., parquet files) to HDFS
@@ -123,16 +117,6 @@
System.setProperty("hadoop.log.dir", "logs");
}
- private void generateBinaryFiles(File localDataRoot) throws IOException {
- File srcPath = new File(localDataRoot, DATA_PATH);
- File destPath = new File(localDataRoot, BINARY_GEN_BASEDIR);
- //Delete old generated files
- IoUtil.delete(destPath);
- Files.createDirectory(Paths.get(destPath.getAbsolutePath()));
- //Write parquet files
- writeParquetDir(new File(srcPath, "parquet"), destPath);
- }
-
public void cleanup() throws Exception {
if (dfsCluster != null) {
dfsCluster.shutdown();
@@ -161,28 +145,4 @@
conf.set("mapred.input.format.class", TextInputFormat.class.getName());
return conf;
}
-
- private void writeParquetDir(File parquetSrcDir, File destPath) throws IOException {
- File[] listOfFiles = parquetSrcDir.listFiles();
- for (File jsonFile : listOfFiles) {
- String fileName = jsonFile.getName().substring(0, jsonFile.getName().indexOf(".")) + ".parquet";
- Path outputPath = new Path(destPath.getAbsolutePath(), fileName);
- writeParquetFile(jsonFile, outputPath);
- }
- }
-
- public void writeParquetFile(File jsonInputPath, Path parquetOutputPath) throws IOException {
- final FileInputStream schemaInputStream = new FileInputStream(jsonInputPath);
- final FileInputStream jsonInputStream = new FileInputStream(jsonInputPath);
- //Infer Avro schema
- final Schema inputSchema = JsonUtil.inferSchema(schemaInputStream, "parquet_schema", NUM_OF_RECORDS_SCHEMA);
- try (JSONFileReader<Record> reader = new JSONFileReader<>(jsonInputStream, inputSchema, Record.class)) {
- reader.initialize();
- try (AvroParquetWriter<Record> writer = new AvroParquetWriter<>(parquetOutputPath, inputSchema)) {
- for (Record record : reader) {
- writer.write(record);
- }
- }
- }
- }
}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.01.ddl.sqlpp
new file mode 100644
index 0000000..f1ab714
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.01.ddl.sqlpp
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Expression pushdown
+* Expected Res : Success
+* Date : June 22nd 2020
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE ParquetType as {
+};
+
+CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="parquet-data/reviews"),
+ ("include"="*id_age.parquet"),
+ ("format" = "parquet")
+);
+
+CREATE EXTERNAL DATASET ParquetDataset2(ParquetType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="parquet-data/reviews"),
+ ("include"="*id_age.parquet"),
+ ("format" = "parquet")
+);
+
+CREATE EXTERNAL DATASET ParquetDataset3(ParquetType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="parquet-data/reviews"),
+ ("include"="*id_name_comment.parquet"),
+ ("format" = "parquet")
+);
+
+CREATE EXTERNAL DATASET ParquetDataset4(ParquetType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="parquet-data/reviews"),
+ ("include"="*dummy_tweet.parquet"),
+ ("format" = "parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.02.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.02.query.sqlpp
new file mode 100644
index 0000000..d51e173
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.02.query.sqlpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "false";
+
+SELECT VALUE p
+FROM ParquetDataset p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.03.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.03.query.sqlpp
new file mode 100644
index 0000000..18669ba
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.03.query.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "false";
+
+EXPLAIN
+SELECT VALUE p
+FROM ParquetDataset p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.04.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.04.query.sqlpp
new file mode 100644
index 0000000..824ea80
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.04.query.sqlpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+SELECT VALUE p
+FROM ParquetDataset p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.05.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.05.query.sqlpp
new file mode 100644
index 0000000..2aac7ff
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.05.query.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+EXPLAIN
+SELECT VALUE p
+FROM ParquetDataset p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.06.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.06.query.sqlpp
new file mode 100644
index 0000000..a354189
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.06.query.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "false";
+
+SELECT p1, p2.id
+FROM ParquetDataset p1, ParquetDataset2 p2
+WHERE p1.id = p2.id
+ORDER BY p2.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.07.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.07.query.sqlpp
new file mode 100644
index 0000000..96035dd
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.07.query.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "false";
+
+EXPLAIN
+SELECT p1, p2.id
+FROM ParquetDataset p1, ParquetDataset2 p2
+WHERE p1.id = p2.id
+ORDER BY p2.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.08.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.08.query.sqlpp
new file mode 100644
index 0000000..4b2003d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.08.query.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+SELECT p1, p2.id
+FROM ParquetDataset p1, ParquetDataset2 p2
+WHERE p1.id = p2.id
+ORDER BY p2.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.09.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.09.query.sqlpp
new file mode 100644
index 0000000..5a0f903
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.09.query.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ignore Field Access pushdown when requesting all fields
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+EXPLAIN
+SELECT p1, p2.id
+FROM ParquetDataset p1, ParquetDataset2 p2
+WHERE p1.id = p2.id
+ORDER BY p2.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.10.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.10.query.sqlpp
new file mode 100644
index 0000000..878875c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.10.query.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Access different fields when joining two datasets
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "false";
+
+SELECT p1.age, p2.name
+FROM ParquetDataset p1, ParquetDataset3 p2
+WHERE p1.id = p2.id
+ORDER BY p2.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.11.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.11.query.sqlpp
new file mode 100644
index 0000000..066fb53
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.11.query.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Access different fields when joining two datasets
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "false";
+
+EXPLAIN
+SELECT p1.age, p2.name
+FROM ParquetDataset p1, ParquetDataset3 p2
+WHERE p1.id = p2.id
+ORDER BY p2.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.12.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.12.query.sqlpp
new file mode 100644
index 0000000..627719e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.12.query.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Access different fields when joining two datasets
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+SELECT p1.age, p2.name
+FROM ParquetDataset p1, ParquetDataset3 p2
+WHERE p1.id = p2.id
+ORDER BY p2.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.13.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.13.query.sqlpp
new file mode 100644
index 0000000..a17d617
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.13.query.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Access different fields when joining two datasets
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+EXPLAIN
+SELECT p1.age, p2.name
+FROM ParquetDataset p1, ParquetDataset3 p2
+WHERE p1.id = p2.id
+ORDER BY p2.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.14.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.14.query.sqlpp
new file mode 100644
index 0000000..41f24d4
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.14.query.sqlpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Push only common fields when requesting nested values
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "false";
+
+SELECT p.user.id, p.user.name
+FROM ParquetDataset4 p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.15.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.15.query.sqlpp
new file mode 100644
index 0000000..aea2583
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.15.query.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Push only common fields when requesting nested values
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "false";
+
+EXPLAIN
+SELECT p.user.id, p.user.name
+FROM ParquetDataset4 p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.16.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.16.query.sqlpp
new file mode 100644
index 0000000..f44c910
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.16.query.sqlpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Push only common fields when requesting nested values
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+SELECT p.user.id, p.user.name
+FROM ParquetDataset4 p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.17.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.17.query.sqlpp
new file mode 100644
index 0000000..f855121
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.17.query.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Push only common fields when requesting nested values
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+EXPLAIN
+SELECT p.user.id, p.user.name
+FROM ParquetDataset4 p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.18.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.18.query.sqlpp
new file mode 100644
index 0000000..9ee38e8
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/expression-pushdown/expression-pushdown.18.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Ensure `compiler.external.field.pushdown` is set by default
+ * Expected Res : Success
+ * Date : July 7th 2021
+ */
+
+USE test;
+
+EXPLAIN
+SELECT p.user.id, p.user.name
+FROM ParquetDataset4 p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/missing-fields/missing-fields.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/missing-fields/missing-fields.1.ddl.sqlpp
new file mode 100644
index 0000000..51378ed
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/missing-fields/missing-fields.1.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Requesting non-existing fields should not fail
+* Expected Res : Success
+* Date : June 22nd 2020
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE ParquetType as {
+};
+
+CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="parquet-data/reviews"),
+ ("include"="*dummy_tweet.parquet"),
+ ("format" = "parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/missing-fields/missing-fields.2.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/missing-fields/missing-fields.2.query.sqlpp
new file mode 100644
index 0000000..1715bba
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/missing-fields/missing-fields.2.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Requesting non-existing fields should not fail
+* Expected Res : Success
+* Date : June 22nd 2020
+*/
+USE test;
+
+SELECT p.not_a_field1 IS MISSING as f1, p.user.not_a_field2 IS MISSING as f2
+FROM ParquetDataset p
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/missing-fields/missing-fields.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/missing-fields/missing-fields.3.query.sqlpp
new file mode 100644
index 0000000..6c030db
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/missing-fields/missing-fields.3.query.sqlpp
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Requesting non-existing fields should not fail
+* Expected Res : Success
+* Date : June 22nd 2020
+*/
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+SELECT p.not_a_field1 IS MISSING as f1, p.user.not_a_field2 IS MISSING as f2
+FROM ParquetDataset p
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
new file mode 100644
index 0000000..23c357e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Retrieve all fields from different Parquet files with different schemas
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE ParquetType as {
+};
+
+CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="parquet-data/reviews"),
+ ("include"="*id_age.parquet"),
+ ("include#1"="*id_name.parquet"),
+ ("format" = "parquet")
+);
+
+CREATE EXTERNAL DATASET ParquetDataset2(ParquetType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="parquet-data/reviews"),
+ ("include"="*id_age.parquet"),
+ ("include#1"="*id_age-string.parquet"),
+ ("format" = "parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.2.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.2.query.sqlpp
new file mode 100644
index 0000000..e332ba7
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.2.query.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Retrieve all fields from different Parquet files with different schemas
+ * Expected Res : Success
+ * Date : June 22nd 2020
+ */
+USE test;
+
+SELECT VALUE p
+FROM ParquetDataset p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.3.query.sqlpp
new file mode 100644
index 0000000..a8bc91cc
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.3.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Retrieve all fields from different Parquet files with different schemas
+ with conflicting fields
+ * Expected Res : Success
+ * Date : July 7th 2021
+ */
+
+USE test;
+
+SELECT VALUE p
+FROM ParquetDataset2 p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.4.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.4.query.sqlpp
new file mode 100644
index 0000000..1e82c7b
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.4.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : Retrieve all fields from different Parquet files with different schemas
+ with conflicting fields
+ * Expected Res : Success
+ * Date : July 7th 2021
+ */
+
+USE test;
+
+SELECT VALUE p.age
+FROM ParquetDataset2 p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/object-concat/object-concat.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/object-concat/object-concat.1.ddl.sqlpp
new file mode 100644
index 0000000..c17cf79
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/object-concat/object-concat.1.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Concat two objects after pushdown
+* Expected Res : Success
+* Date : September 22nd 2020
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE ParquetType as {
+};
+
+CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="parquet-data/reviews"),
+ ("include"="*dummy_tweet.parquet"),
+ ("format" = "parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/object-concat/object-concat.2.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/object-concat/object-concat.2.query.sqlpp
new file mode 100644
index 0000000..a429b73
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/object-concat/object-concat.2.query.sqlpp
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Concat two objects after pushdown
+* Expected Res : Success
+* Date : September 22nd 2020
+*/
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+SELECT VALUE object_concat(p.coordinates, p.user).name
+FROM ParquetDataset p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/object-concat/object-concat.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/object-concat/object-concat.3.query.sqlpp
new file mode 100644
index 0000000..dd114e6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/object-concat/object-concat.3.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Concat two objects after pushdown
+* Expected Res : Success
+* Date : September 22nd 2020
+*/
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+EXPLAIN
+SELECT VALUE object_concat(p.coordinates, p.user).name
+FROM ParquetDataset p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/object-concat/object-concat.4.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/object-concat/object-concat.4.query.sqlpp
new file mode 100644
index 0000000..06bd91c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/object-concat/object-concat.4.query.sqlpp
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Concat two objects after pushdown
+* Expected Res : Success
+* Date : September 22nd 2020
+*/
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+SELECT VALUE object_concat(p.coordinates, p.user)
+FROM ParquetDataset p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/object-concat/object-concat.5.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/object-concat/object-concat.5.query.sqlpp
new file mode 100644
index 0000000..0b42622
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/object-concat/object-concat.5.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Concat two objects after pushdown
+* Expected Res : Success
+* Date : September 22nd 2020
+*/
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+EXPLAIN
+SELECT VALUE object_concat(p.coordinates, p.user)
+FROM ParquetDataset p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-all-fields/select-all-fields.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-all-fields/select-all-fields.1.ddl.sqlpp
new file mode 100644
index 0000000..be64bb6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-all-fields/select-all-fields.1.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Retrieve all fields from a Parquet file
+* Expected Res : Success
+* Date : June 22nd 2020
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE ParquetType as {
+};
+
+CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING %adapter%
+(
+%template%,
+("container"="playground"),
+("definition"="parquet-data/reviews"),
+("include"="*dummy_tweet.parquet"),
+("format" = "parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-all-fields/select-all-fields.2.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-all-fields/select-all-fields.2.query.sqlpp
new file mode 100644
index 0000000..51a7b3e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-all-fields/select-all-fields.2.query.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Retrieve all fields from a Parquet file
+* Expected Res : Success
+* Date : June 22nd 2020
+*/
+USE test;
+
+SELECT VALUE p
+FROM ParquetDataset p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-all-fields/select-all-fields.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-all-fields/select-all-fields.3.query.sqlpp
new file mode 100644
index 0000000..c04a65b
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-all-fields/select-all-fields.3.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Retrieve all fields from a Parquet file
+* Expected Res : Success
+* Date : June 22nd 2020
+*/
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+SELECT VALUE p
+FROM ParquetDataset p
+ORDER BY p.id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-count-one-field/select-count-one-field.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-count-one-field/select-count-one-field.1.ddl.sqlpp
new file mode 100644
index 0000000..5738fad
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-count-one-field/select-count-one-field.1.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Retrieve the number of texts in all tweets
+* Expected Res : Success
+* Date : June 22nd 2020
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE ParquetType as {
+};
+
+CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="parquet-data/reviews"),
+ ("include"="*dummy_tweet.parquet"),
+ ("format" = "parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-count-one-field/select-count-one-field.2.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-count-one-field/select-count-one-field.2.query.sqlpp
new file mode 100644
index 0000000..491d789
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-count-one-field/select-count-one-field.2.query.sqlpp
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Retrieve the number of texts in all tweets (with expression pushdown)
+* Expected Res : Success
+* Date : June 22nd 2020
+*/
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+SELECT VALUE count(p.text)
+FROM ParquetDataset p;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-count-one-field/select-count-one-field.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-count-one-field/select-count-one-field.3.query.sqlpp
new file mode 100644
index 0000000..84639bf
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-count-one-field/select-count-one-field.3.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Retrieve the number of texts in all tweets
+* Expected Res : Success
+* Date : June 22nd 2020
+*/
+USE test;
+
+SELECT VALUE count(p.text)
+FROM ParquetDataset p;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-count-one-field/select-count-one-field.4.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-count-one-field/select-count-one-field.4.query.sqlpp
new file mode 100644
index 0000000..dddb9b3
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-count-one-field/select-count-one-field.4.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Retrieve the number of users
+* Expected Res : Success
+* Date : June 22nd 2020
+*/
+USE test;
+
+SELECT VALUE count(p.user.name)
+FROM ParquetDataset p;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-count-one-field/select-count-one-field.5.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-count-one-field/select-count-one-field.5.query.sqlpp
new file mode 100644
index 0000000..76ac0ce
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/select-count-one-field/select-count-one-field.5.query.sqlpp
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Retrieve the number of users (with expression pushdown)
+* Expected Res : Success
+* Date : June 22nd 2020
+*/
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+SELECT VALUE count(p.user.name)
+FROM ParquetDataset p;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
new file mode 100644
index 0000000..4e4000a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Test Standard UTF-8
+* Expected Res : Success
+* Date : August 30th 2020
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE ParquetType as {
+};
+
+CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="parquet-data/reviews"),
+ ("include"="*id_name_comment.parquet"),
+ ("format" = "parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/string-standard-utf8/string-standard-utf8.2.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/string-standard-utf8/string-standard-utf8.2.query.sqlpp
new file mode 100644
index 0000000..f849a1f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/string-standard-utf8/string-standard-utf8.2.query.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Test Standard UTF-8
+* Expected Res : Success
+* Date : August 30th 2020
+*/
+USE test;
+
+SELECT VALUE array_count(split(trim(p.comment),"𩸽"))
+FROM ParquetDataset p
+WHERE contains(p.comment, "𩸽");
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/string-standard-utf8/string-standard-utf8.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/string-standard-utf8/string-standard-utf8.3.query.sqlpp
new file mode 100644
index 0000000..0267e54
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/string-standard-utf8/string-standard-utf8.3.query.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Test Standard UTF-8
+* Expected Res : Success
+* Date : August 30th 2020
+*/
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+SELECT VALUE array_count(split(trim(p.comment),"𩸽"))
+FROM ParquetDataset p
+WHERE contains(p.comment, "𩸽");
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
index 473319c..705c8ca 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
@@ -9,7 +9,7 @@
"active\.stop\.timeout" : 3600,
"active\.suspend\.timeout" : 3600,
"compiler\.arrayindex" : false,
- "compiler\.external\.field\.pushdown" : false,
+ "compiler\.external\.field\.pushdown" : true,
"compiler\.framesize" : 32768,
"compiler\.groupmemory" : 163840,
"compiler\.indexonly" : true,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
index 171ead0..924a6a1 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
@@ -9,7 +9,7 @@
"active\.stop\.timeout" : 3600,
"active\.suspend\.timeout" : 3600,
"compiler\.arrayindex" : false,
- "compiler\.external\.field\.pushdown" : false,
+ "compiler\.external\.field\.pushdown" : true,
"compiler\.framesize" : 32768,
"compiler\.groupmemory" : 163840,
"compiler\.indexonly" : true,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
index 4a6aff5..25f0410 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
@@ -9,7 +9,7 @@
"active\.stop\.timeout" : 3600,
"active\.suspend\.timeout" : 3600,
"compiler\.arrayindex" : false,
- "compiler\.external\.field\.pushdown" : false,
+ "compiler\.external\.field\.pushdown" : true,
"compiler\.framesize" : 32768,
"compiler\.groupmemory" : 163840,
"compiler\.indexonly" : true,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.02.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.02.adm
new file mode 100644
index 0000000..7e235c5
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.02.adm
@@ -0,0 +1,7 @@
+{ "id": 8, "age": 10 }
+{ "id": 9, "age": 20 }
+{ "id": 10, "age": 30 }
+{ "id": 11, "age": 40 }
+{ "id": 12, "age": 50 }
+{ "id": 13, "age": 60 }
+{ "id": 14, "age": 70 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.03.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.03.adm
new file mode 100644
index 0000000..5260a0a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.03.adm
@@ -0,0 +1,22 @@
+distribute result [$$p]
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$p])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- SORT_MERGE_EXCHANGE [$$13(ASC) ] |PARTITIONED|
+ order (ASC, $$13)
+ -- STABLE_SORT [$$13(ASC)] |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ assign [$$13] <- [$$p.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$p] <- test.ParquetDataset
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.04.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.04.adm
new file mode 100644
index 0000000..7e235c5
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.04.adm
@@ -0,0 +1,7 @@
+{ "id": 8, "age": 10 }
+{ "id": 9, "age": 20 }
+{ "id": 10, "age": 30 }
+{ "id": 11, "age": 40 }
+{ "id": 12, "age": 50 }
+{ "id": 13, "age": 60 }
+{ "id": 14, "age": 70 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.05.adm
new file mode 100644
index 0000000..5260a0a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.05.adm
@@ -0,0 +1,22 @@
+distribute result [$$p]
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$p])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- SORT_MERGE_EXCHANGE [$$13(ASC) ] |PARTITIONED|
+ order (ASC, $$13)
+ -- STABLE_SORT [$$13(ASC)] |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ assign [$$13] <- [$$p.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$p] <- test.ParquetDataset
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.06.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.06.adm
new file mode 100644
index 0000000..88b6965
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.06.adm
@@ -0,0 +1,7 @@
+{ "p1": { "id": 8, "age": 10 }, "id": 8 }
+{ "p1": { "id": 9, "age": 20 }, "id": 9 }
+{ "p1": { "id": 10, "age": 30 }, "id": 10 }
+{ "p1": { "id": 11, "age": 40 }, "id": 11 }
+{ "p1": { "id": 12, "age": 50 }, "id": 12 }
+{ "p1": { "id": 13, "age": 60 }, "id": 13 }
+{ "p1": { "id": 14, "age": 70 }, "id": 14 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.07.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.07.adm
new file mode 100644
index 0000000..48d64c1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.07.adm
@@ -0,0 +1,46 @@
+distribute result [$$28]
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$28])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$28] <- [{"p1": $$p1, "id": $$30}]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- SORT_MERGE_EXCHANGE [$$30(ASC) ] |PARTITIONED|
+ order (ASC, $$30)
+ -- STABLE_SORT [$$30(ASC)] |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$p1, $$30])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ join (eq($$31, $$30))
+ -- HYBRID_HASH_JOIN [$$31][$$30] |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$31] |PARTITIONED|
+ assign [$$31] <- [$$p1.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$p1] <- test.ParquetDataset
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$30] |PARTITIONED|
+ project ([$$30])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$30] <- [$$p2.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$p2] <- test.ParquetDataset2
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.08.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.08.adm
new file mode 100644
index 0000000..88b6965
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.08.adm
@@ -0,0 +1,7 @@
+{ "p1": { "id": 8, "age": 10 }, "id": 8 }
+{ "p1": { "id": 9, "age": 20 }, "id": 9 }
+{ "p1": { "id": 10, "age": 30 }, "id": 10 }
+{ "p1": { "id": 11, "age": 40 }, "id": 11 }
+{ "p1": { "id": 12, "age": 50 }, "id": 12 }
+{ "p1": { "id": 13, "age": 60 }, "id": 13 }
+{ "p1": { "id": 14, "age": 70 }, "id": 14 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.09.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.09.adm
new file mode 100644
index 0000000..d22d967
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.09.adm
@@ -0,0 +1,46 @@
+distribute result [$$28]
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$28])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$28] <- [{"p1": $$p1, "id": $$30}]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- SORT_MERGE_EXCHANGE [$$30(ASC) ] |PARTITIONED|
+ order (ASC, $$30)
+ -- STABLE_SORT [$$30(ASC)] |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$p1, $$30])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ join (eq($$31, $$30))
+ -- HYBRID_HASH_JOIN [$$31][$$30] |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$31] |PARTITIONED|
+ assign [$$31] <- [$$p1.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$p1] <- test.ParquetDataset
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$30] |PARTITIONED|
+ project ([$$30])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$30] <- [$$p2.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$p2] <- test.ParquetDataset2 project (id)
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.10.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.10.adm
new file mode 100644
index 0000000..aa3f801
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.10.adm
@@ -0,0 +1 @@
+{ "age": 10, "name": "William" }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.11.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.11.adm
new file mode 100644
index 0000000..8eb02fe
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.11.adm
@@ -0,0 +1,50 @@
+distribute result [$$29]
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$29])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$29] <- [{"age": $$34, "name": $$35}]
+ -- ASSIGN |PARTITIONED|
+ project ([$$34, $$35])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- SORT_MERGE_EXCHANGE [$$32(ASC) ] |PARTITIONED|
+ order (ASC, $$32)
+ -- STABLE_SORT [$$32(ASC)] |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$34, $$35, $$32])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ join (eq($$31, $$32))
+ -- HYBRID_HASH_JOIN [$$31][$$32] |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$31] |PARTITIONED|
+ project ([$$34, $$31])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$34, $$31] <- [$$p1.getField("age"), $$p1.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$p1] <- test.ParquetDataset
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$32] |PARTITIONED|
+ project ([$$35, $$32])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$35, $$32] <- [$$p2.getField("name"), $$p2.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$p2] <- test.ParquetDataset3
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.12.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.12.adm
new file mode 100644
index 0000000..aa3f801
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.12.adm
@@ -0,0 +1 @@
+{ "age": 10, "name": "William" }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.13.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.13.adm
new file mode 100644
index 0000000..afd33c4
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.13.adm
@@ -0,0 +1,50 @@
+distribute result [$$29]
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$29])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$29] <- [{"age": $$34, "name": $$35}]
+ -- ASSIGN |PARTITIONED|
+ project ([$$34, $$35])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- SORT_MERGE_EXCHANGE [$$32(ASC) ] |PARTITIONED|
+ order (ASC, $$32)
+ -- STABLE_SORT [$$32(ASC)] |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$34, $$35, $$32])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ join (eq($$31, $$32))
+ -- HYBRID_HASH_JOIN [$$31][$$32] |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$31] |PARTITIONED|
+ project ([$$34, $$31])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$34, $$31] <- [$$p1.getField("age"), $$p1.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$p1] <- test.ParquetDataset project (age, id)
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$32] |PARTITIONED|
+ project ([$$35, $$32])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$35, $$32] <- [$$p2.getField("name"), $$p2.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$p2] <- test.ParquetDataset3 project (name, id)
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.14.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.14.adm
new file mode 100644
index 0000000..a1ad24e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.14.adm
@@ -0,0 +1,2 @@
+{ "id": 1, "name": "string" }
+{ "id": 1, "name": "string" }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.15.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.15.adm
new file mode 100644
index 0000000..dc8c103
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.15.adm
@@ -0,0 +1,32 @@
+distribute result [$$17]
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$17])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$17] <- [{"id": $$21, "name": $$22}]
+ -- ASSIGN |PARTITIONED|
+ project ([$$21, $$22])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- SORT_MERGE_EXCHANGE [$$20(ASC) ] |PARTITIONED|
+ order (ASC, $$20)
+ -- STABLE_SORT [$$20(ASC)] |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$21, $$22, $$20])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$22, $$21] <- [$$19.getField("name"), $$19.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ project ([$$19, $$20])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$19, $$20] <- [$$p.getField("user"), $$p.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$p] <- test.ParquetDataset4
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.16.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.16.adm
new file mode 100644
index 0000000..a1ad24e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.16.adm
@@ -0,0 +1,2 @@
+{ "id": 1, "name": "string" }
+{ "id": 1, "name": "string" }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.17.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.17.adm
new file mode 100644
index 0000000..d812b1f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.17.adm
@@ -0,0 +1,32 @@
+distribute result [$$17]
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$17])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$17] <- [{"id": $$21, "name": $$22}]
+ -- ASSIGN |PARTITIONED|
+ project ([$$21, $$22])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- SORT_MERGE_EXCHANGE [$$20(ASC) ] |PARTITIONED|
+ order (ASC, $$20)
+ -- STABLE_SORT [$$20(ASC)] |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$21, $$22, $$20])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$22, $$21] <- [$$19.getField("name"), $$19.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ project ([$$19, $$20])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$19, $$20] <- [$$p.getField("user"), $$p.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$p] <- test.ParquetDataset4 project (user, id)
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.18.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.18.adm
new file mode 100644
index 0000000..d812b1f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/expression-pushdown/expression-pushdown.18.adm
@@ -0,0 +1,32 @@
+distribute result [$$17]
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$17])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$17] <- [{"id": $$21, "name": $$22}]
+ -- ASSIGN |PARTITIONED|
+ project ([$$21, $$22])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- SORT_MERGE_EXCHANGE [$$20(ASC) ] |PARTITIONED|
+ order (ASC, $$20)
+ -- STABLE_SORT [$$20(ASC)] |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$21, $$22, $$20])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$22, $$21] <- [$$19.getField("name"), $$19.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ project ([$$19, $$20])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$19, $$20] <- [$$p.getField("user"), $$p.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$p] <- test.ParquetDataset4 project (user, id)
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/missing-fields/missing-fields.2.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/missing-fields/missing-fields.2.adm
new file mode 100644
index 0000000..8876910
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/missing-fields/missing-fields.2.adm
@@ -0,0 +1,2 @@
+{ "f1": true, "f2": true }
+{ "f1": true, "f2": true }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/missing-fields/missing-fields.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/missing-fields/missing-fields.3.adm
new file mode 100644
index 0000000..8876910
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/missing-fields/missing-fields.3.adm
@@ -0,0 +1,2 @@
+{ "f1": true, "f2": true }
+{ "f1": true, "f2": true }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.2.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.2.adm
new file mode 100644
index 0000000..0e2b980
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.2.adm
@@ -0,0 +1,14 @@
+{ "id": 1, "name": "John" }
+{ "id": 2, "name": "Abel" }
+{ "id": 3, "name": "Sandy" }
+{ "id": 4, "name": "Alex" }
+{ "id": 5, "name": "Mike" }
+{ "id": 6, "name": "Tom" }
+{ "id": 7, "name": "Jerry" }
+{ "id": 8, "age": 10 }
+{ "id": 9, "age": 20 }
+{ "id": 10, "age": 30 }
+{ "id": 11, "age": 40 }
+{ "id": 12, "age": 50 }
+{ "id": 13, "age": 60 }
+{ "id": 14, "age": 70 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.3.adm
new file mode 100644
index 0000000..8df138d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.3.adm
@@ -0,0 +1,14 @@
+{ "id": 8, "age": 10 }
+{ "id": 9, "age": 20 }
+{ "id": 10, "age": 30 }
+{ "id": 11, "age": 40 }
+{ "id": 12, "age": 50 }
+{ "id": 13, "age": 60 }
+{ "id": 14, "age": 70 }
+{ "id": 15, "age": "10" }
+{ "id": 16, "age": "20" }
+{ "id": 17, "age": "30" }
+{ "id": 18, "age": "40" }
+{ "id": 19, "age": "50" }
+{ "id": 20, "age": "60" }
+{ "id": 21, "age": "70" }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.4.adm
new file mode 100644
index 0000000..9256ef5
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/multi-file-multi-schema/multi-file-multi-schema.4.adm
@@ -0,0 +1,14 @@
+10
+20
+30
+40
+50
+60
+70
+"10"
+"20"
+"30"
+"40"
+"50"
+"60"
+"70"
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/object-concat/object-concat.2.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/object-concat/object-concat.2.adm
new file mode 100644
index 0000000..1b425f7
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/object-concat/object-concat.2.adm
@@ -0,0 +1,2 @@
+"string"
+"string"
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/object-concat/object-concat.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/object-concat/object-concat.3.adm
new file mode 100644
index 0000000..a097e74
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/object-concat/object-concat.3.adm
@@ -0,0 +1,24 @@
+distribute result [$$16]
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$16])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- SORT_MERGE_EXCHANGE [$$18(ASC) ] |PARTITIONED|
+ order (ASC, $$18)
+ -- STABLE_SORT [$$18(ASC)] |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$16, $$18])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$16, $$18] <- [object-concat($$p.getField("coordinates"), $$p.getField("user")).getField("name"), $$p.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$p] <- test.ParquetDataset project (coordinates, user, id)
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/object-concat/object-concat.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/object-concat/object-concat.4.adm
new file mode 100644
index 0000000..e22e026
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/object-concat/object-concat.4.adm
@@ -0,0 +1,2 @@
+{ "contributors_enabled": true, "created_at": "string", "default_profile": true, "default_profile_image": true, "description": "string", "favourites_count": 1, "followers_count": 1, "friends_count": 1, "geo_enabled": true, "id": 1, "id_str": "string", "is_translator": true, "lang": "string", "listed_count": 1, "location": "string", "name": "string", "profile_background_color": "string", "profile_background_image_url": "string", "profile_background_image_url_https": "string", "profile_background_tile": true, "profile_banner_url": "string", "profile_image_url": "string", "profile_image_url_https": "string", "profile_link_color": "string", "profile_sidebar_border_color": "string", "profile_sidebar_fill_color": "string", "profile_text_color": "string", "profile_use_background_image": true, "protected": true, "screen_name": "string", "statuses_count": 1, "time_zone": "string", "url": "string", "utc_offset": 1, "verified": true, "coordinates": [ 1.1 ], "type": "string" }
+{ "contributors_enabled": true, "created_at": "string", "default_profile": true, "default_profile_image": true, "description": "string", "favourites_count": 1, "followers_count": 1, "friends_count": 1, "geo_enabled": true, "id": 1, "id_str": "string", "is_translator": true, "lang": "string", "listed_count": 1, "location": "string", "name": "string", "profile_background_color": "string", "profile_background_image_url": "string", "profile_background_image_url_https": "string", "profile_background_tile": true, "profile_banner_url": "string", "profile_image_url": "string", "profile_image_url_https": "string", "profile_link_color": "string", "profile_sidebar_border_color": "string", "profile_sidebar_fill_color": "string", "profile_text_color": "string", "profile_use_background_image": true, "protected": true, "screen_name": "string", "statuses_count": 1, "time_zone": "string", "url": "string", "utc_offset": 1, "verified": true, "coordinates": [ 1.1 ], "type": "string" }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/object-concat/object-concat.5.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/object-concat/object-concat.5.adm
new file mode 100644
index 0000000..a5f69e0
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/object-concat/object-concat.5.adm
@@ -0,0 +1,28 @@
+distribute result [$$15]
+-- DISTRIBUTE_RESULT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$15])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$15] <- [object-concat($$18, $$19)]
+ -- ASSIGN |PARTITIONED|
+ project ([$$18, $$19])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- SORT_MERGE_EXCHANGE [$$17(ASC) ] |PARTITIONED|
+ order (ASC, $$17)
+ -- STABLE_SORT [$$17(ASC)] |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$18, $$19, $$17])
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$19, $$18, $$17] <- [$$p.getField("user"), $$p.getField("coordinates"), $$p.getField("id")]
+ -- ASSIGN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$p] <- test.ParquetDataset project (user, coordinates, id)
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-all-fields/select-all-fields.2.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-all-fields/select-all-fields.2.adm
new file mode 100644
index 0000000..53f2518
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-all-fields/select-all-fields.2.adm
@@ -0,0 +1,2 @@
+{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "entities": { "urls": [ { "display_url": "string", "expanded_url": "string", "indices": [ 1 ], "url": "string" } ], "user_mentions": [ { "id": 1, "id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string" } ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "0000000", "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "place": { "bounding_box": { "coordinates": [ [ [ 1.1 ] ] ], "type": "string" }, "country": "string", "country_code": "string", "full_name": "string", "id": "string", "name": "string", "place_type": "string", "url": "string" }, "possibly_sensitive": true, "quoted_status": { "created_at": "string", "entities": { "user_mentions": [ { "id": 1, "id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string" } ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "id": 1, "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "retweet_count": 1, "retweeted": true, "source": "string", "text": "string", "truncated": true, "user": { "contributors_enabled": true, "created_at": "string", "default_profile": true, "default_profile_image": true, "description": "string", "favourites_count": 1, "followers_count": 1, "friends_count": 1, "geo_enabled": true, "id": 1, "id_str": "string", "is_translator": true, "lang": "string", "listed_count": 1, "name": "string", "profile_background_color": "string", "profile_background_image_url": "string", "profile_background_image_url_https": "string", "profile_background_tile": true, "profile_banner_url": "string", "profile_image_url": "string", "profile_image_url_https": "string", "profile_link_color": "string", "profile_sidebar_border_color": "string", "profile_sidebar_fill_color": "string", "profile_text_color": "string", "profile_use_background_image": true, "protected": true, "screen_name": "string", "statuses_count": 1, "verified": true } }, "quoted_status_id": 1, "quoted_status_id_str": "string", "retweet_count": 1, "retweeted": true, "source": "string", "text": "string", "timestamp_ms": "string", "truncated": true, "user": { "contributors_enabled": true, "created_at": "string", "default_profile": true, "default_profile_image": true, "description": "string", "favourites_count": 1, "followers_count": 1, "friends_count": 1, "geo_enabled": true, "id": 1, "id_str": "string", "is_translator": true, "lang": "string", "listed_count": 1, "location": "string", "name": "string", "profile_background_color": "string", "profile_background_image_url": "string", "profile_background_image_url_https": "string", "profile_background_tile": true, "profile_banner_url": "string", "profile_image_url": "string", "profile_image_url_https": "string", "profile_link_color": "string", "profile_sidebar_border_color": "string", "profile_sidebar_fill_color": "string", "profile_text_color": "string", "profile_use_background_image": true, "protected": true, "screen_name": "string", "statuses_count": 1, "time_zone": "string", "url": "string", "utc_offset": 1, "verified": true } }
+{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "11111111111111111111", "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "place": { "bounding_box": { "coordinates": [ [ [ 1.1 ] ] ], "type": "string" }, "country": "string", "country_code": "string", "full_name": "string", "id": "string", "name": "string", "place_type": "string", "url": "string" }, "possibly_sensitive": true, "quoted_status": { "created_at": "string", "entities": { "user_mentions": [ { "id": 1, "id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string" } ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "id": 1, "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "retweet_count": 1, "retweeted": true, "source": "string", "text": "string", "truncated": true, "user": { "contributors_enabled": true, "created_at": "string", "default_profile": true, "default_profile_image": true, "description": "string", "favourites_count": 1, "followers_count": 1, "friends_count": 1, "geo_enabled": true, "id": 1, "id_str": "string", "is_translator": true, "lang": "string", "listed_count": 1, "name": "string", "profile_background_color": "string", "profile_background_image_url": "string", "profile_background_image_url_https": "string", "profile_background_tile": true, "profile_banner_url": "string", "profile_image_url": "string", "profile_image_url_https": "string", "profile_link_color": "string", "profile_sidebar_border_color": "string", "profile_sidebar_fill_color": "string", "profile_text_color": "string", "profile_use_background_image": true, "protected": true, "screen_name": "string", "statuses_count": 1, "verified": true } }, "quoted_status_id": 1, "quoted_status_id_str": "string", "retweet_count": 1, "retweeted": true, "source": "string", "text": "string", "timestamp_ms": "string", "truncated": true, "user": { "contributors_enabled": true, "created_at": "string", "default_profile": true, "default_profile_image": true, "description": "string", "favourites_count": 1, "followers_count": 1, "friends_count": 1, "geo_enabled": true, "id": 1, "id_str": "string", "is_translator": true, "lang": "string", "listed_count": 1, "location": "string", "name": "string", "profile_background_color": "string", "profile_background_image_url": "string", "profile_background_image_url_https": "string", "profile_background_tile": true, "profile_banner_url": "string", "profile_image_url": "string", "profile_image_url_https": "string", "profile_link_color": "string", "profile_sidebar_border_color": "string", "profile_sidebar_fill_color": "string", "profile_text_color": "string", "profile_use_background_image": true, "protected": true, "screen_name": "string", "statuses_count": 1, "time_zone": "string", "url": "string", "utc_offset": 1, "verified": true } }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-all-fields/select-all-fields.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-all-fields/select-all-fields.3.adm
new file mode 100644
index 0000000..53f2518
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-all-fields/select-all-fields.3.adm
@@ -0,0 +1,2 @@
+{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "entities": { "urls": [ { "display_url": "string", "expanded_url": "string", "indices": [ 1 ], "url": "string" } ], "user_mentions": [ { "id": 1, "id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string" } ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "0000000", "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "place": { "bounding_box": { "coordinates": [ [ [ 1.1 ] ] ], "type": "string" }, "country": "string", "country_code": "string", "full_name": "string", "id": "string", "name": "string", "place_type": "string", "url": "string" }, "possibly_sensitive": true, "quoted_status": { "created_at": "string", "entities": { "user_mentions": [ { "id": 1, "id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string" } ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "id": 1, "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "retweet_count": 1, "retweeted": true, "source": "string", "text": "string", "truncated": true, "user": { "contributors_enabled": true, "created_at": "string", "default_profile": true, "default_profile_image": true, "description": "string", "favourites_count": 1, "followers_count": 1, "friends_count": 1, "geo_enabled": true, "id": 1, "id_str": "string", "is_translator": true, "lang": "string", "listed_count": 1, "name": "string", "profile_background_color": "string", "profile_background_image_url": "string", "profile_background_image_url_https": "string", "profile_background_tile": true, "profile_banner_url": "string", "profile_image_url": "string", "profile_image_url_https": "string", "profile_link_color": "string", "profile_sidebar_border_color": "string", "profile_sidebar_fill_color": "string", "profile_text_color": "string", "profile_use_background_image": true, "protected": true, "screen_name": "string", "statuses_count": 1, "verified": true } }, "quoted_status_id": 1, "quoted_status_id_str": "string", "retweet_count": 1, "retweeted": true, "source": "string", "text": "string", "timestamp_ms": "string", "truncated": true, "user": { "contributors_enabled": true, "created_at": "string", "default_profile": true, "default_profile_image": true, "description": "string", "favourites_count": 1, "followers_count": 1, "friends_count": 1, "geo_enabled": true, "id": 1, "id_str": "string", "is_translator": true, "lang": "string", "listed_count": 1, "location": "string", "name": "string", "profile_background_color": "string", "profile_background_image_url": "string", "profile_background_image_url_https": "string", "profile_background_tile": true, "profile_banner_url": "string", "profile_image_url": "string", "profile_image_url_https": "string", "profile_link_color": "string", "profile_sidebar_border_color": "string", "profile_sidebar_fill_color": "string", "profile_text_color": "string", "profile_use_background_image": true, "protected": true, "screen_name": "string", "statuses_count": 1, "time_zone": "string", "url": "string", "utc_offset": 1, "verified": true } }
+{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "11111111111111111111", "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "place": { "bounding_box": { "coordinates": [ [ [ 1.1 ] ] ], "type": "string" }, "country": "string", "country_code": "string", "full_name": "string", "id": "string", "name": "string", "place_type": "string", "url": "string" }, "possibly_sensitive": true, "quoted_status": { "created_at": "string", "entities": { "user_mentions": [ { "id": 1, "id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string" } ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "id": 1, "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "retweet_count": 1, "retweeted": true, "source": "string", "text": "string", "truncated": true, "user": { "contributors_enabled": true, "created_at": "string", "default_profile": true, "default_profile_image": true, "description": "string", "favourites_count": 1, "followers_count": 1, "friends_count": 1, "geo_enabled": true, "id": 1, "id_str": "string", "is_translator": true, "lang": "string", "listed_count": 1, "name": "string", "profile_background_color": "string", "profile_background_image_url": "string", "profile_background_image_url_https": "string", "profile_background_tile": true, "profile_banner_url": "string", "profile_image_url": "string", "profile_image_url_https": "string", "profile_link_color": "string", "profile_sidebar_border_color": "string", "profile_sidebar_fill_color": "string", "profile_text_color": "string", "profile_use_background_image": true, "protected": true, "screen_name": "string", "statuses_count": 1, "verified": true } }, "quoted_status_id": 1, "quoted_status_id_str": "string", "retweet_count": 1, "retweeted": true, "source": "string", "text": "string", "timestamp_ms": "string", "truncated": true, "user": { "contributors_enabled": true, "created_at": "string", "default_profile": true, "default_profile_image": true, "description": "string", "favourites_count": 1, "followers_count": 1, "friends_count": 1, "geo_enabled": true, "id": 1, "id_str": "string", "is_translator": true, "lang": "string", "listed_count": 1, "location": "string", "name": "string", "profile_background_color": "string", "profile_background_image_url": "string", "profile_background_image_url_https": "string", "profile_background_tile": true, "profile_banner_url": "string", "profile_image_url": "string", "profile_image_url_https": "string", "profile_link_color": "string", "profile_sidebar_border_color": "string", "profile_sidebar_fill_color": "string", "profile_text_color": "string", "profile_use_background_image": true, "protected": true, "screen_name": "string", "statuses_count": 1, "time_zone": "string", "url": "string", "utc_offset": 1, "verified": true } }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-count-one-field/select-count-one-field.2.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-count-one-field/select-count-one-field.2.adm
new file mode 100644
index 0000000..d8263ee
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-count-one-field/select-count-one-field.2.adm
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-count-one-field/select-count-one-field.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-count-one-field/select-count-one-field.3.adm
new file mode 100644
index 0000000..d8263ee
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-count-one-field/select-count-one-field.3.adm
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-count-one-field/select-count-one-field.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-count-one-field/select-count-one-field.4.adm
new file mode 100644
index 0000000..d8263ee
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-count-one-field/select-count-one-field.4.adm
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-count-one-field/select-count-one-field.5.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-count-one-field/select-count-one-field.5.adm
new file mode 100644
index 0000000..d8263ee
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/select-count-one-field/select-count-one-field.5.adm
@@ -0,0 +1 @@
+2
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/string-standard-utf8/string-standard-utf8.2.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/string-standard-utf8/string-standard-utf8.2.adm
new file mode 100644
index 0000000..6fb86be
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/string-standard-utf8/string-standard-utf8.2.adm
@@ -0,0 +1,2 @@
+2
+301
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/string-standard-utf8/string-standard-utf8.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/string-standard-utf8/string-standard-utf8.3.adm
new file mode 100644
index 0000000..6fb86be
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/string-standard-utf8/string-standard-utf8.3.adm
@@ -0,0 +1,2 @@
+2
+301
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index b354e65..e3295fd 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -76,6 +76,50 @@
<output-dir compare="Text">common/tsv/mixed</output-dir>
</compilation-unit>
</test-case>
+ <!-- Parquet Tests Start -->
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/parquet/select-all-fields">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/parquet/select-all-fields</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/parquet/select-count-one-field">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/parquet/select-count-one-field</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/parquet/multi-file-multi-schema">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/parquet/multi-file-multi-schema</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/parquet/expression-pushdown">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/parquet/expression-pushdown</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/parquet/missing-fields">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/parquet/missing-fields</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/parquet/string-standard-utf8">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/parquet/string-standard-utf8</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/parquet/object-concat">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/parquet/object-concat</output-dir>
+ </compilation-unit>
+ </test-case>
+ <!-- Parquet Tests End -->
<test-case FilePath="external-dataset">
<compilation-unit name="common/empty-string-definition">
<placeholder name="adapter" value="S3" />
diff --git a/asterixdb/asterix-external-data/pom.xml b/asterixdb/asterix-external-data/pom.xml
index a193e31..9ca2386 100644
--- a/asterixdb/asterix-external-data/pom.xml
+++ b/asterixdb/asterix-external-data/pom.xml
@@ -484,5 +484,9 @@
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-1.2-api</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-aws</artifactId>
+ </dependency>
</dependencies>
</project>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordReaderFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordReaderFactory.java
index a936e86..8849508 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordReaderFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IRecordReaderFactory.java
@@ -19,22 +19,34 @@
package org.apache.asterix.external.api;
import java.util.List;
+import java.util.Set;
+import org.apache.asterix.external.util.ExternalDataConstants;
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.exceptions.HyracksDataException;
public interface IRecordReaderFactory<T> extends IExternalDataSourceFactory {
- public IRecordReader<? extends T> createRecordReader(IHyracksTaskContext ctx, int partition)
- throws HyracksDataException;
+ IRecordReader<? extends T> createRecordReader(IHyracksTaskContext ctx, int partition) throws HyracksDataException;
- public Class<?> getRecordClass();
+ Class<?> getRecordClass();
@Override
- public default DataSourceType getDataSourceType() {
+ default DataSourceType getDataSourceType() {
return DataSourceType.RECORDS;
}
- public List<String> getRecordReaderNames();
+ List<String> getRecordReaderNames();
+
+ /**
+ * Usually there is only a single adapter with a specific name.
+ * When two or more adapters share the same name, only one can support {@link ExternalDataConstants#ALL_FORMATS}.
+ * Other adapters must have only a subset of {@link ExternalDataConstants#ALL_FORMATS}.
+ *
+ * @return adapter's supported formats
+ */
+ default Set<String> getReaderSupportedFormats() {
+ return ExternalDataConstants.ALL_FORMATS;
+ }
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
index f436aa1..469677c 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
@@ -48,6 +48,7 @@
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.api.application.ICCServiceContext;
import org.apache.hyracks.api.application.IServiceContext;
import org.apache.hyracks.api.context.IHyracksTaskContext;
@@ -59,7 +60,9 @@
public class HDFSDataSourceFactory implements IRecordReaderFactory<Object>, IIndexibleExternalDataSource {
- protected static final long serialVersionUID = 1L;
+ private static final long serialVersionUID = 1L;
+ private static final List<String> recordReaderNames = Collections.singletonList("hdfs");
+
protected transient AlgebricksAbsolutePartitionConstraint clusterLocations;
protected transient IServiceContext serviceCtx;
protected String[] readSchedule;
@@ -79,16 +82,24 @@
private InputSplit[] inputSplits;
private String nodeName;
private Class recordReaderClazz;
- private static final List<String> recordReaderNames = Collections.unmodifiableList(Arrays.asList("hdfs"));
@Override
public void configure(IServiceContext serviceCtx, Map<String, String> configuration,
- IWarningCollector warningCollector) throws AsterixException {
+ IWarningCollector warningCollector) throws AlgebricksException, HyracksDataException {
+ JobConf hdfsConf = createHdfsConf(serviceCtx, configuration);
+ configureHdfsConf(hdfsConf, configuration);
+ }
+
+ protected JobConf createHdfsConf(IServiceContext serviceCtx, Map<String, String> configuration)
+ throws HyracksDataException {
+ this.serviceCtx = serviceCtx;
+ this.configuration = configuration;
+ init((ICCServiceContext) serviceCtx);
+ return HDFSUtils.configureHDFSJobConf(configuration);
+ }
+
+ protected void configureHdfsConf(JobConf conf, Map<String, String> configuration) throws AlgebricksException {
try {
- this.serviceCtx = serviceCtx;
- this.configuration = configuration;
- init((ICCServiceContext) serviceCtx);
- JobConf conf = HDFSUtils.configureHDFSJobConf(configuration);
confFactory = new ConfFactory(conf);
clusterLocations = getPartitionConstraint();
int numPartitions = clusterLocations.getLocations().length;
@@ -109,7 +120,8 @@
Arrays.fill(read, false);
String formatString = configuration.get(ExternalDataConstants.KEY_FORMAT);
if (formatString == null || formatString.equals(ExternalDataConstants.FORMAT_HDFS_WRITABLE)
- || formatString.equals(ExternalDataConstants.FORMAT_NOOP)) {
+ || formatString.equals(ExternalDataConstants.FORMAT_NOOP)
+ || formatString.equals(ExternalDataConstants.FORMAT_PARQUET)) {
RecordReader<?, ?> reader = conf.getInputFormat().getRecordReader(inputSplits[0], conf, Reporter.NULL);
this.recordClass = reader.createValue().getClass();
reader.close();
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/abstracts/AbstractExternalInputStreamFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/abstracts/AbstractExternalInputStreamFactory.java
index 2a063bf..eac4835 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/abstracts/AbstractExternalInputStreamFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/abstracts/AbstractExternalInputStreamFactory.java
@@ -18,24 +18,16 @@
*/
package org.apache.asterix.external.input.record.reader.abstracts;
-import static org.apache.asterix.external.util.ExternalDataConstants.*;
-
import java.io.Serializable;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.BiPredicate;
import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.util.regex.PatternSyntaxException;
import org.apache.asterix.common.dataflow.ICcApplicationContext;
-import org.apache.asterix.common.exceptions.CompilationException;
-import org.apache.asterix.common.exceptions.ErrorCode;
import org.apache.asterix.external.api.AsterixInputStream;
import org.apache.asterix.external.api.IInputStreamFactory;
-import org.apache.asterix.external.util.ExternalDataUtils;
import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.api.application.IServiceContext;
@@ -82,39 +74,6 @@
((ICcApplicationContext) ctx.getApplicationContext()).getClusterStateManager().getClusterLocations();
}
- protected IncludeExcludeMatcher getIncludeExcludeMatchers() throws CompilationException {
- // Get and compile the patterns for include/exclude if provided
- List<Matcher> includeMatchers = new ArrayList<>();
- List<Matcher> excludeMatchers = new ArrayList<>();
- String pattern = null;
- try {
- for (Map.Entry<String, String> entry : configuration.entrySet()) {
- if (entry.getKey().startsWith(KEY_INCLUDE)) {
- pattern = entry.getValue();
- includeMatchers.add(Pattern.compile(ExternalDataUtils.patternToRegex(pattern)).matcher(""));
- } else if (entry.getKey().startsWith(KEY_EXCLUDE)) {
- pattern = entry.getValue();
- excludeMatchers.add(Pattern.compile(ExternalDataUtils.patternToRegex(pattern)).matcher(""));
- }
- }
- } catch (PatternSyntaxException ex) {
- throw new CompilationException(ErrorCode.INVALID_REGEX_PATTERN, pattern);
- }
-
- IncludeExcludeMatcher includeExcludeMatcher;
- if (!includeMatchers.isEmpty()) {
- includeExcludeMatcher = new IncludeExcludeMatcher(includeMatchers,
- (matchers1, key) -> ExternalDataUtils.matchPatterns(matchers1, key));
- } else if (!excludeMatchers.isEmpty()) {
- includeExcludeMatcher = new IncludeExcludeMatcher(excludeMatchers,
- (matchers1, key) -> !ExternalDataUtils.matchPatterns(matchers1, key));
- } else {
- includeExcludeMatcher = new IncludeExcludeMatcher(Collections.emptyList(), (matchers1, key) -> true);
- }
-
- return includeExcludeMatcher;
- }
-
public static class PartitionWorkLoadBasedOnSize implements Serializable {
private static final long serialVersionUID = 1L;
private final List<String> filePaths = new ArrayList<>();
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
index 8197524..89ea39e 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
@@ -18,35 +18,20 @@
*/
package org.apache.asterix.external.input.record.reader.aws;
-import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
-import java.util.function.BiPredicate;
-import java.util.regex.Matcher;
-import org.apache.asterix.common.exceptions.CompilationException;
-import org.apache.asterix.common.exceptions.ErrorCode;
import org.apache.asterix.external.api.AsterixInputStream;
import org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStreamFactory;
-import org.apache.asterix.external.util.ExternalDataConstants;
import org.apache.asterix.external.util.ExternalDataUtils;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.api.application.IServiceContext;
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.exceptions.IWarningCollector;
-import org.apache.hyracks.api.exceptions.Warning;
-import org.apache.hyracks.api.util.CleanupUtils;
-import software.amazon.awssdk.core.exception.SdkException;
-import software.amazon.awssdk.services.s3.S3Client;
-import software.amazon.awssdk.services.s3.model.ListObjectsRequest;
-import software.amazon.awssdk.services.s3.model.ListObjectsResponse;
-import software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
-import software.amazon.awssdk.services.s3.model.ListObjectsV2Response;
-import software.amazon.awssdk.services.s3.model.S3Exception;
import software.amazon.awssdk.services.s3.model.S3Object;
public class AwsS3InputStreamFactory extends AbstractExternalInputStreamFactory {
@@ -65,156 +50,27 @@
// Ensure the validity of include/exclude
ExternalDataUtils.validateIncludeExclude(configuration);
- IncludeExcludeMatcher includeExcludeMatcher = getIncludeExcludeMatchers();
+ IncludeExcludeMatcher includeExcludeMatcher = ExternalDataUtils.getIncludeExcludeMatchers(configuration);
- // Prepare to retrieve the objects
- List<S3Object> filesOnly;
- String container = configuration.get(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME);
- S3Client s3Client = ExternalDataUtils.AwsS3.buildAwsS3Client(configuration);
-
- try {
- filesOnly = listS3Objects(s3Client, container, includeExcludeMatcher);
- } catch (S3Exception ex) {
- // New API is not implemented, try falling back to old API
- try {
- // For error code, see https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html
- if (ex.awsErrorDetails().errorCode().equals(ExternalDataConstants.AwsS3.ERROR_METHOD_NOT_IMPLEMENTED)) {
- filesOnly = oldApiListS3Objects(s3Client, container, includeExcludeMatcher);
- } else {
- throw ex;
- }
- } catch (SdkException ex2) {
- throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, ex2.getMessage());
- }
- } catch (SdkException ex) {
- throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, ex.getMessage());
- } finally {
- if (s3Client != null) {
- CleanupUtils.close(s3Client, null);
- }
- }
-
- // Warn if no files are returned
- if (filesOnly.isEmpty() && warningCollector.shouldWarn()) {
- Warning warning = Warning.of(null, ErrorCode.EXTERNAL_SOURCE_CONFIGURATION_RETURNED_NO_FILES);
- warningCollector.warn(warning);
- }
-
+ //Get a list of S3 objects
+ List<S3Object> filesOnly =
+ ExternalDataUtils.AwsS3.listS3Objects(configuration, includeExcludeMatcher, warningCollector);
// Distribute work load amongst the partitions
distributeWorkLoad(filesOnly, getPartitionsCount());
}
/**
- * Uses the latest API to retrieve the objects from the storage.
- *
- * @param s3Client S3 client
- * @param container container name
- * @param includeExcludeMatcher include/exclude matchers to apply
- */
- private List<S3Object> listS3Objects(S3Client s3Client, String container,
- IncludeExcludeMatcher includeExcludeMatcher) {
- String newMarker = null;
- List<S3Object> filesOnly = new ArrayList<>();
-
- ListObjectsV2Response listObjectsResponse;
- ListObjectsV2Request.Builder listObjectsBuilder = ListObjectsV2Request.builder().bucket(container);
- listObjectsBuilder.prefix(ExternalDataUtils.getPrefix(configuration));
-
- while (true) {
- // List the objects from the start, or from the last marker in case of truncated result
- if (newMarker == null) {
- listObjectsResponse = s3Client.listObjectsV2(listObjectsBuilder.build());
- } else {
- listObjectsResponse = s3Client.listObjectsV2(listObjectsBuilder.continuationToken(newMarker).build());
- }
-
- // Collect the paths to files only
- collectAndFilterFiles(listObjectsResponse.contents(), includeExcludeMatcher.getPredicate(),
- includeExcludeMatcher.getMatchersList(), filesOnly);
-
- // Mark the flag as done if done, otherwise, get the marker of the previous response for the next request
- if (!listObjectsResponse.isTruncated()) {
- break;
- } else {
- newMarker = listObjectsResponse.nextContinuationToken();
- }
- }
-
- return filesOnly;
- }
-
- /**
- * Uses the old API (in case the new API is not implemented) to retrieve the objects from the storage
- *
- * @param s3Client S3 client
- * @param container container name
- * @param includeExcludeMatcher include/exclude matchers to apply
- */
- private List<S3Object> oldApiListS3Objects(S3Client s3Client, String container,
- IncludeExcludeMatcher includeExcludeMatcher) {
- String newMarker = null;
- List<S3Object> filesOnly = new ArrayList<>();
-
- ListObjectsResponse listObjectsResponse;
- ListObjectsRequest.Builder listObjectsBuilder = ListObjectsRequest.builder().bucket(container);
- listObjectsBuilder.prefix(ExternalDataUtils.getPrefix(configuration));
-
- while (true) {
- // List the objects from the start, or from the last marker in case of truncated result
- if (newMarker == null) {
- listObjectsResponse = s3Client.listObjects(listObjectsBuilder.build());
- } else {
- listObjectsResponse = s3Client.listObjects(listObjectsBuilder.marker(newMarker).build());
- }
-
- // Collect the paths to files only
- collectAndFilterFiles(listObjectsResponse.contents(), includeExcludeMatcher.getPredicate(),
- includeExcludeMatcher.getMatchersList(), filesOnly);
-
- // Mark the flag as done if done, otherwise, get the marker of the previous response for the next request
- if (!listObjectsResponse.isTruncated()) {
- break;
- } else {
- newMarker = listObjectsResponse.nextMarker();
- }
- }
-
- return filesOnly;
- }
-
- /**
- * AWS S3 returns all the objects as paths, not differentiating between folder and files. The path is considered
- * a file if it does not end up with a "/" which is the separator in a folder structure.
- *
- * @param s3Objects List of returned objects
- */
- private void collectAndFilterFiles(List<S3Object> s3Objects, BiPredicate<List<Matcher>, String> predicate,
- List<Matcher> matchers, List<S3Object> filesOnly) {
- for (S3Object object : s3Objects) {
- // skip folders
- if (object.key().endsWith("/")) {
- continue;
- }
-
- // No filter, add file
- if (predicate.test(matchers, object.key())) {
- filesOnly.add(object);
- }
- }
- }
-
- /**
* To efficiently utilize the parallelism, work load will be distributed amongst the partitions based on the file
* size.
- *
+ * <p>
* Example:
* File1 1mb, File2 300kb, File3 300kb, File4 300kb
- *
+ * <p>
* Distribution:
* Partition1: [File1]
* Partition2: [File2, File3, File4]
*
- * @param fileObjects AWS S3 file objects
+ * @param fileObjects AWS S3 file objects
* @param partitionsCount Partitions count
*/
private void distributeWorkLoad(List<S3Object> fileObjects, int partitionsCount) {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java
new file mode 100644
index 0000000..c9eb489
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader.aws.parquet;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.asterix.common.exceptions.CompilationException;
+import org.apache.asterix.external.input.HDFSDataSourceFactory;
+import org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStreamFactory.IncludeExcludeMatcher;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.ExternalDataUtils;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.api.application.IServiceContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.exceptions.IWarningCollector;
+
+import software.amazon.awssdk.services.s3.model.S3Object;
+
+public class AwsS3ParquetReaderFactory extends HDFSDataSourceFactory {
+ private static final long serialVersionUID = -6140824803254158253L;
+ private static final List<String> recordReaderNames =
+ Collections.singletonList(ExternalDataConstants.KEY_ADAPTER_NAME_AWS_S3);
+
+ @Override
+ public void configure(IServiceContext serviceCtx, Map<String, String> configuration,
+ IWarningCollector warningCollector) throws AlgebricksException, HyracksDataException {
+ //Get path
+ String path = buildPathURIs(configuration, warningCollector);
+ //Put S3 configurations to AsterixDB's Hadoop configuration
+ putS3ConfToHadoopConf(configuration, path);
+
+ //Configure Hadoop S3 input splits
+ JobConf conf = createHdfsConf(serviceCtx, configuration);
+ ExternalDataUtils.AwsS3.configureAwsS3HdfsJobConf(conf, configuration);
+ configureHdfsConf(conf, configuration);
+ }
+
+ @Override
+ public List<String> getRecordReaderNames() {
+ return recordReaderNames;
+ }
+
+ @Override
+ public Set<String> getReaderSupportedFormats() {
+ return Collections.singleton(ExternalDataConstants.FORMAT_PARQUET);
+ }
+
+ /**
+ * Prepare Hadoop configurations to read parquet files
+ *
+ * @param path Comma-delimited paths
+ */
+ private static void putS3ConfToHadoopConf(Map<String, String> configuration, String path) {
+ configuration.put(ExternalDataConstants.KEY_PATH, path);
+ configuration.put(ExternalDataConstants.KEY_INPUT_FORMAT, ExternalDataConstants.INPUT_FORMAT_PARQUET);
+ configuration.put(ExternalDataConstants.KEY_PARSER, ExternalDataConstants.FORMAT_NOOP);
+ }
+
+ /**
+ * Build S3 path-style for the requested files
+ *
+ * @param configuration properties
+ * @param warningCollector warning collector
+ * @return Comma-delimited paths (e.g., "s3a://bucket/file1.parquet,s3a://bucket/file2.parquet")
+ * @throws CompilationException Compilation exception
+ */
+ private static String buildPathURIs(Map<String, String> configuration, IWarningCollector warningCollector)
+ throws CompilationException {
+ String container = configuration.get(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME);
+ IncludeExcludeMatcher includeExcludeMatcher = ExternalDataUtils.getIncludeExcludeMatchers(configuration);
+ List<S3Object> filesOnly =
+ ExternalDataUtils.AwsS3.listS3Objects(configuration, includeExcludeMatcher, warningCollector);
+ StringBuilder builder = new StringBuilder();
+
+ if (!filesOnly.isEmpty()) {
+ appendFileURI(builder, container, filesOnly.get(0));
+ for (int i = 1; i < filesOnly.size(); i++) {
+ builder.append(',');
+ appendFileURI(builder, container, filesOnly.get(i));
+ }
+ }
+
+ return builder.toString();
+ }
+
+ private static void appendFileURI(StringBuilder builder, String container, S3Object file) {
+ builder.append(ExternalDataConstants.AwsS3.HADOOP_S3_PROTOCOL);
+ builder.append("://");
+ builder.append(container);
+ builder.append('/');
+ builder.append(file.key());
+ }
+}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/azure/AzureBlobInputStreamFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/azure/AzureBlobInputStreamFactory.java
index 803694a..e71d954 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/azure/AzureBlobInputStreamFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/azure/AzureBlobInputStreamFactory.java
@@ -76,7 +76,7 @@
Iterable<BlobItem> blobItems = blobContainer.listBlobs(listBlobsOptions, null);
// Collect the paths to files only
- IncludeExcludeMatcher includeExcludeMatcher = getIncludeExcludeMatchers();
+ IncludeExcludeMatcher includeExcludeMatcher = ExternalDataUtils.getIncludeExcludeMatchers(configuration);
collectAndFilterFiles(blobItems, includeExcludeMatcher.getPredicate(),
includeExcludeMatcher.getMatchersList(), filesOnly);
@@ -96,9 +96,9 @@
/**
* Collects and filters the files only, and excludes any folders
*
- * @param items storage items
+ * @param items storage items
* @param predicate predicate to test with for file filtration
- * @param matchers include/exclude matchers to test against
+ * @param matchers include/exclude matchers to test against
* @param filesOnly List containing the files only (excluding folders)
*/
private void collectAndFilterFiles(Iterable<BlobItem> items, BiPredicate<List<Matcher>, String> predicate,
@@ -121,15 +121,15 @@
/**
* To efficiently utilize the parallelism, work load will be distributed amongst the partitions based on the file
* size.
- *
+ * <p>
* Example:
* File1 1mb, File2 300kb, File3 300kb, File4 300kb
- *
+ * <p>
* Distribution:
* Partition1: [File1]
* Partition2: [File2, File3, File4]
*
- * @param items items
+ * @param items items
* @param partitionsCount Partitions count
*/
private void distributeWorkLoad(List<BlobItem> items, int partitionsCount) {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/DatasourceFactoryProvider.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/DatasourceFactoryProvider.java
index 8fa0aca..b0aded6 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/DatasourceFactoryProvider.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/DatasourceFactoryProvider.java
@@ -20,13 +20,16 @@
import java.io.IOException;
import java.io.InputStream;
+import java.lang.reflect.InvocationTargetException;
import java.net.URL;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import org.apache.asterix.common.exceptions.AsterixException;
import org.apache.asterix.common.exceptions.ErrorCode;
@@ -46,7 +49,8 @@
public class DatasourceFactoryProvider {
private static final String RESOURCE = "META-INF/services/org.apache.asterix.external.api.IRecordReaderFactory";
- private static Map<String, Class> factories = null;
+ private static final String DEFAULT_FORMAT = "DEFAULT_FORMAT";
+ private static Map<String, Map<String, Class<?>>> factories = null;
private DatasourceFactoryProvider() {
}
@@ -96,15 +100,16 @@
return streamSourceFactory;
}
- protected static IRecordReaderFactory getInstance(Class clazz) throws AsterixException {
+ protected static IRecordReaderFactory<?> getInstance(Class<?> clazz) throws AsterixException {
try {
- return (IRecordReaderFactory) clazz.newInstance();
- } catch (InstantiationException | IllegalAccessException | ClassCastException e) {
+ return (IRecordReaderFactory<?>) clazz.getDeclaredConstructor().newInstance();
+ } catch (InstantiationException | IllegalAccessException | ClassCastException | NoSuchMethodException
+ | InvocationTargetException e) {
throw new AsterixException("Cannot create: " + clazz.getSimpleName(), e);
}
}
- public static IRecordReaderFactory getRecordReaderFactory(String adaptorName, Map<String, String> configuration)
+ public static IRecordReaderFactory<?> getRecordReaderFactory(String adaptorName, Map<String, String> configuration)
throws HyracksDataException, AsterixException {
if (adaptorName.equals(ExternalDataConstants.EXTERNAL)) {
//return ExternalDataUtils.createExternalRecordReaderFactory(libraryManager, configuration);
@@ -112,24 +117,27 @@
}
if (factories == null) {
- factories = initFactories();
+ initFactories();
}
if (factories.containsKey(adaptorName)) {
- return getInstance(factories.get(adaptorName));
+ Map<String, Class<?>> formatClassMap = factories.get(adaptorName);
+ String format = configuration.get(ExternalDataConstants.KEY_FORMAT);
+ return getInstance(formatClassMap.getOrDefault(format, formatClassMap.get(DEFAULT_FORMAT)));
}
try {
- return (IRecordReaderFactory) Class.forName(adaptorName).newInstance();
- } catch (IllegalAccessException | ClassNotFoundException | InstantiationException | ClassCastException e) {
+ return (IRecordReaderFactory<?>) Class.forName(adaptorName).getDeclaredConstructor().newInstance();
+ } catch (IllegalAccessException | ClassNotFoundException | InstantiationException | ClassCastException
+ | NoSuchMethodException | InvocationTargetException e) {
throw new RuntimeDataException(ErrorCode.UNKNOWN_RECORD_READER_FACTORY, e, adaptorName);
}
}
- protected static Map<String, Class> initFactories() throws AsterixException {
- Map<String, Class> factories = new HashMap<>();
+ protected static void initFactories() throws AsterixException {
+ factories = new HashMap<>();
ClassLoader cl = ParserFactoryProvider.class.getClassLoader();
- final Charset encoding = Charset.forName("UTF-8");
+ final Charset encoding = StandardCharsets.UTF_8;
try {
Enumeration<URL> urls = cl.getResources(RESOURCE);
for (URL url : Collections.list(urls)) {
@@ -142,19 +150,44 @@
continue;
}
final Class<?> clazz = Class.forName(className);
- List<String> formats = ((IRecordReaderFactory) clazz.newInstance()).getRecordReaderNames();
- for (String format : formats) {
- if (factories.containsKey(format)) {
- throw new AsterixException(ErrorCode.PROVIDER_DATASOURCE_FACTORY_DUPLICATE_FORMAT_MAPPING,
- format);
- }
- factories.put(format, clazz);
- }
+ final IRecordReaderFactory<?> readerFactory =
+ (IRecordReaderFactory<?>) clazz.getDeclaredConstructor().newInstance();
+ List<String> readerNames = readerFactory.getRecordReaderNames();
+ Set<String> supportedFormats = readerFactory.getReaderSupportedFormats();
+ putFactory(readerNames, supportedFormats, clazz);
}
}
- } catch (IOException | ClassNotFoundException | InstantiationException | IllegalAccessException e) {
+ } catch (IOException | ClassNotFoundException | InstantiationException | IllegalAccessException
+ | NoSuchMethodException | InvocationTargetException e) {
throw new AsterixException(e);
}
- return factories;
+ }
+
+ private static void putFactory(List<String> readerNames, Set<String> supportedFormats, Class<?> clazz)
+ throws AsterixException {
+ for (String reader : readerNames) {
+ Map<String, Class<?>> formatClassMap = factories.computeIfAbsent(reader, k -> new HashMap<>());
+ if (isDefaultFormat(supportedFormats)) {
+ //Ensure that only one reader is the default reader
+ checkDuplicates(formatClassMap, DEFAULT_FORMAT);
+ formatClassMap.put(DEFAULT_FORMAT, clazz);
+ } else {
+ //Specialized formats for the same reader name
+ for (String format : supportedFormats) {
+ checkDuplicates(formatClassMap, format);
+ formatClassMap.put(format, clazz);
+ }
+ }
+ }
+ }
+
+ private static boolean isDefaultFormat(Set<String> supportedFormats) {
+ return supportedFormats.equals(ExternalDataConstants.ALL_FORMATS);
+ }
+
+ private static void checkDuplicates(Map<String, Class<?>> factories, String key) throws AsterixException {
+ if (factories.containsKey(key)) {
+ throw new AsterixException(ErrorCode.PROVIDER_DATASOURCE_FACTORY_DUPLICATE_FORMAT_MAPPING, key);
+ }
}
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index a0cf387..92a491d 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -190,10 +190,11 @@
public static final String FORMAT_KV = "kv";
public static final String FORMAT_CSV = "csv";
public static final String FORMAT_TSV = "tsv";
+ public static final String FORMAT_PARQUET = "parquet";
public static final Set<String> ALL_FORMATS;
static {
- Set<String> formats = new HashSet<>(13);
+ Set<String> formats = new HashSet<>(14);
formats.add(FORMAT_HIVE);
formats.add(FORMAT_BINARY);
formats.add(FORMAT_ADM);
@@ -207,6 +208,7 @@
formats.add(FORMAT_KV);
formats.add(FORMAT_CSV);
formats.add(FORMAT_TSV);
+ formats.add(FORMAT_PARQUET);
ALL_FORMATS = Collections.unmodifiableSet(formats);
}
@@ -310,6 +312,17 @@
public static boolean isRetryableError(String errorCode) {
return errorCode.equals(ERROR_INTERNAL_ERROR) || errorCode.equals(ERROR_SLOW_DOWN);
}
+
+ /*
+ * Hadoop-AWS
+ * AWS connectors for s3 and s3n are deprecated.
+ */
+ public static final String HADOOP_ACCESS_KEY_ID = "fs.s3a.access.key";
+ public static final String HADOOP_SECRET_ACCESS_KEY = "fs.s3a.secret.key";
+ public static final String HADOOP_PATH_STYLE_ACCESS = "fs.s3a.path.style.access";
+ public static final String HADOOP_REGION = "fs.s3a.region";
+ public static final String HADOOP_SERVICE_END_POINT = "fs.s3a.endpoint";
+ public static final String HADOOP_S3_PROTOCOL = "s3a";
}
public static class AzureBlob {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 7872cef..86e733e 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -21,6 +21,9 @@
import static org.apache.asterix.common.exceptions.ErrorCode.REQUIRED_PARAM_IF_PARAM_IS_PRESENT;
import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.ACCESS_KEY_ID_FIELD_NAME;
import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.ERROR_METHOD_NOT_IMPLEMENTED;
+import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_ACCESS_KEY_ID;
+import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_PATH_STYLE_ACCESS;
+import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_SECRET_ACCESS_KEY;
import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.SECRET_ACCESS_KEY_FIELD_NAME;
import static org.apache.asterix.external.util.ExternalDataConstants.AzureBlob.ACCOUNT_KEY_FIELD_NAME;
import static org.apache.asterix.external.util.ExternalDataConstants.AzureBlob.ACCOUNT_NAME_FIELD_NAME;
@@ -35,7 +38,9 @@
import static org.apache.asterix.external.util.ExternalDataConstants.AzureBlob.SHARED_ACCESS_SIGNATURE_FIELD_NAME;
import static org.apache.asterix.external.util.ExternalDataConstants.KEY_DELIMITER;
import static org.apache.asterix.external.util.ExternalDataConstants.KEY_ESCAPE;
+import static org.apache.asterix.external.util.ExternalDataConstants.KEY_EXCLUDE;
import static org.apache.asterix.external.util.ExternalDataConstants.KEY_EXTERNAL_SCAN_BUFFER_SIZE;
+import static org.apache.asterix.external.util.ExternalDataConstants.KEY_INCLUDE;
import static org.apache.asterix.external.util.ExternalDataConstants.KEY_QUOTE;
import static org.apache.asterix.external.util.ExternalDataConstants.KEY_RECORD_END;
import static org.apache.asterix.external.util.ExternalDataConstants.KEY_RECORD_START;
@@ -45,10 +50,14 @@
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.EnumMap;
import java.util.List;
import java.util.Map;
+import java.util.function.BiPredicate;
import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
import org.apache.asterix.common.exceptions.AsterixException;
import org.apache.asterix.common.exceptions.CompilationException;
@@ -62,11 +71,13 @@
import org.apache.asterix.external.api.IExternalDataSourceFactory.DataSourceType;
import org.apache.asterix.external.api.IInputStreamFactory;
import org.apache.asterix.external.api.IRecordReaderFactory;
+import org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStreamFactory.IncludeExcludeMatcher;
import org.apache.asterix.external.library.JavaLibrary;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.om.types.ATypeTag;
import org.apache.asterix.om.types.AUnionType;
import org.apache.asterix.runtime.evaluators.common.NumberUtils;
+import org.apache.hadoop.mapred.JobConf;
import org.apache.hyracks.algebricks.common.exceptions.NotImplementedException;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.exceptions.IWarningCollector;
@@ -102,6 +113,7 @@
import software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
import software.amazon.awssdk.services.s3.model.ListObjectsV2Response;
import software.amazon.awssdk.services.s3.model.S3Exception;
+import software.amazon.awssdk.services.s3.model.S3Object;
import software.amazon.awssdk.services.s3.model.S3Response;
public class ExternalDataUtils {
@@ -436,7 +448,8 @@
final String inputFormat = configuration.get(ExternalDataConstants.KEY_INPUT_FORMAT);
if (ExternalDataConstants.INPUT_FORMAT_PARQUET.equals(inputFormat)) {
//Parquet supports binary-to-binary conversion. No parsing is required
- configuration.put(ExternalDataConstants.KEY_FORMAT, ExternalDataConstants.FORMAT_NOOP);
+ configuration.put(ExternalDataConstants.KEY_PARSER, ExternalDataConstants.FORMAT_NOOP);
+ configuration.put(ExternalDataConstants.KEY_FORMAT, ExternalDataConstants.FORMAT_PARQUET);
}
if (!configuration.containsKey(ExternalDataConstants.KEY_PARSER)
&& configuration.containsKey(ExternalDataConstants.KEY_FORMAT)) {
@@ -542,7 +555,7 @@
AwsS3.validateProperties(configuration, srcLoc, collector);
break;
case ExternalDataConstants.KEY_ADAPTER_NAME_AZURE_BLOB:
- ExternalDataUtils.Azure.validateProperties(configuration, srcLoc, collector);
+ Azure.validateProperties(configuration, srcLoc, collector);
break;
default:
// Nothing needs to be done
@@ -711,12 +724,46 @@
}
}
+ public static IncludeExcludeMatcher getIncludeExcludeMatchers(Map<String, String> configuration)
+ throws CompilationException {
+ // Get and compile the patterns for include/exclude if provided
+ List<Matcher> includeMatchers = new ArrayList<>();
+ List<Matcher> excludeMatchers = new ArrayList<>();
+ String pattern = null;
+ try {
+ for (Map.Entry<String, String> entry : configuration.entrySet()) {
+ if (entry.getKey().startsWith(KEY_INCLUDE)) {
+ pattern = entry.getValue();
+ includeMatchers.add(Pattern.compile(patternToRegex(pattern)).matcher(""));
+ } else if (entry.getKey().startsWith(KEY_EXCLUDE)) {
+ pattern = entry.getValue();
+ excludeMatchers.add(Pattern.compile(patternToRegex(pattern)).matcher(""));
+ }
+ }
+ } catch (PatternSyntaxException ex) {
+ throw new CompilationException(ErrorCode.INVALID_REGEX_PATTERN, pattern);
+ }
+
+ IncludeExcludeMatcher includeExcludeMatcher;
+ if (!includeMatchers.isEmpty()) {
+ includeExcludeMatcher =
+ new IncludeExcludeMatcher(includeMatchers, (matchers1, key) -> matchPatterns(matchers1, key));
+ } else if (!excludeMatchers.isEmpty()) {
+ includeExcludeMatcher =
+ new IncludeExcludeMatcher(excludeMatchers, (matchers1, key) -> !matchPatterns(matchers1, key));
+ } else {
+ includeExcludeMatcher = new IncludeExcludeMatcher(Collections.emptyList(), (matchers1, key) -> true);
+ }
+
+ return includeExcludeMatcher;
+ }
+
public static boolean supportsPushdown(Map<String, String> properties) {
//Currently, only Apache Parquet format is supported
- return ExternalDataConstants.CLASS_NAME_PARQUET_INPUT_FORMAT
- .equals(properties.get(ExternalDataConstants.KEY_INPUT_FORMAT))
- || ExternalDataConstants.INPUT_FORMAT_PARQUET
- .equals(properties.get(ExternalDataConstants.KEY_INPUT_FORMAT));
+ String inputFormat = properties.get(ExternalDataConstants.KEY_INPUT_FORMAT);
+ return ExternalDataConstants.CLASS_NAME_PARQUET_INPUT_FORMAT.equals(inputFormat)
+ || ExternalDataConstants.INPUT_FORMAT_PARQUET.equals(inputFormat)
+ || ExternalDataConstants.FORMAT_PARQUET.equals(properties.get(ExternalDataConstants.KEY_FORMAT));
}
public static class AwsS3 {
@@ -780,6 +827,33 @@
}
/**
+ * Builds the S3 client using the provided configuration
+ *
+ * @param configuration properties
+ */
+ public static void configureAwsS3HdfsJobConf(JobConf conf, Map<String, String> configuration) {
+ String accessKeyId = configuration.get(ExternalDataConstants.AwsS3.ACCESS_KEY_ID_FIELD_NAME);
+ String secretAccessKey = configuration.get(ExternalDataConstants.AwsS3.SECRET_ACCESS_KEY_FIELD_NAME);
+ String regionId = configuration.get(ExternalDataConstants.AwsS3.REGION_FIELD_NAME);
+ String serviceEndpoint = configuration.get(ExternalDataConstants.AwsS3.SERVICE_END_POINT_FIELD_NAME);
+
+ conf.set(HADOOP_ACCESS_KEY_ID, accessKeyId);
+ conf.set(HADOOP_SECRET_ACCESS_KEY, secretAccessKey);
+ /*
+ * This is to allow S3 definition to have path-style form. Should always be true to match the current
+ * way we access files in S3
+ */
+ conf.set(HADOOP_PATH_STYLE_ACCESS, ExternalDataConstants.TRUE);
+
+ if (serviceEndpoint != null) {
+ // Validation of the URL should be done at hadoop-aws level
+ conf.set(ExternalDataConstants.AwsS3.HADOOP_SERVICE_END_POINT, serviceEndpoint);
+ } else {
+ conf.set(ExternalDataConstants.AwsS3.HADOOP_REGION, regionId);
+ }
+ }
+
+ /**
* Validate external dataset properties
*
* @param configuration properties
@@ -856,11 +930,10 @@
/**
* Checks for a single object in the specified bucket to determine if the bucket is empty or not.
*
- * @param s3Client s3 client
+ * @param s3Client s3 client
* @param container the container name
- * @param prefix Prefix to be used
+ * @param prefix Prefix to be used
* @param useOldApi flag whether to use the old API or not
- *
* @return returns the S3 response
*/
private static S3Response isBucketEmpty(S3Client s3Client, String container, String prefix, boolean useOldApi) {
@@ -876,6 +949,155 @@
}
return response;
}
+
+ /**
+ * Returns the lists of S3 objects.
+ *
+ * @param configuration properties
+ * @param includeExcludeMatcher include/exclude matchers to apply
+ */
+ public static List<S3Object> listS3Objects(Map<String, String> configuration,
+ IncludeExcludeMatcher includeExcludeMatcher, IWarningCollector warningCollector)
+ throws CompilationException {
+ // Prepare to retrieve the objects
+ List<S3Object> filesOnly;
+ String container = configuration.get(ExternalDataConstants.CONTAINER_NAME_FIELD_NAME);
+ S3Client s3Client = buildAwsS3Client(configuration);
+ String prefix = getPrefix(configuration);
+
+ try {
+ filesOnly = listS3Objects(s3Client, container, prefix, includeExcludeMatcher);
+ } catch (S3Exception ex) {
+ // New API is not implemented, try falling back to old API
+ try {
+ // For error code, see https://docs.aws.amazon.com/AmazonS3/latest/API/ErrorResponses.html
+ if (ex.awsErrorDetails().errorCode()
+ .equals(ExternalDataConstants.AwsS3.ERROR_METHOD_NOT_IMPLEMENTED)) {
+ filesOnly = oldApiListS3Objects(s3Client, container, prefix, includeExcludeMatcher);
+ } else {
+ throw ex;
+ }
+ } catch (SdkException ex2) {
+ throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, ex2.getMessage());
+ }
+ } catch (SdkException ex) {
+ throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, ex.getMessage());
+ } finally {
+ if (s3Client != null) {
+ CleanupUtils.close(s3Client, null);
+ }
+ }
+
+ // Warn if no files are returned
+ if (filesOnly.isEmpty() && warningCollector.shouldWarn()) {
+ Warning warning = Warning.of(null, ErrorCode.EXTERNAL_SOURCE_CONFIGURATION_RETURNED_NO_FILES);
+ warningCollector.warn(warning);
+ }
+
+ return filesOnly;
+ }
+
+ /**
+ * Uses the latest API to retrieve the objects from the storage.
+ *
+ * @param s3Client S3 client
+ * @param container container name
+ * @param prefix definition prefix
+ * @param includeExcludeMatcher include/exclude matchers to apply
+ */
+ private static List<S3Object> listS3Objects(S3Client s3Client, String container, String prefix,
+ IncludeExcludeMatcher includeExcludeMatcher) {
+ String newMarker = null;
+ List<S3Object> filesOnly = new ArrayList<>();
+
+ ListObjectsV2Response listObjectsResponse;
+ ListObjectsV2Request.Builder listObjectsBuilder = ListObjectsV2Request.builder().bucket(container);
+ listObjectsBuilder.prefix(prefix);
+
+ while (true) {
+ // List the objects from the start, or from the last marker in case of truncated result
+ if (newMarker == null) {
+ listObjectsResponse = s3Client.listObjectsV2(listObjectsBuilder.build());
+ } else {
+ listObjectsResponse =
+ s3Client.listObjectsV2(listObjectsBuilder.continuationToken(newMarker).build());
+ }
+
+ // Collect the paths to files only
+ collectAndFilterFiles(listObjectsResponse.contents(), includeExcludeMatcher.getPredicate(),
+ includeExcludeMatcher.getMatchersList(), filesOnly);
+
+ // Mark the flag as done if done, otherwise, get the marker of the previous response for the next request
+ if (!listObjectsResponse.isTruncated()) {
+ break;
+ } else {
+ newMarker = listObjectsResponse.nextContinuationToken();
+ }
+ }
+
+ return filesOnly;
+ }
+
+ /**
+ * Uses the old API (in case the new API is not implemented) to retrieve the objects from the storage
+ *
+ * @param s3Client S3 client
+ * @param container container name
+ * @param prefix definition prefix
+ * @param includeExcludeMatcher include/exclude matchers to apply
+ */
+ private static List<S3Object> oldApiListS3Objects(S3Client s3Client, String container, String prefix,
+ IncludeExcludeMatcher includeExcludeMatcher) {
+ String newMarker = null;
+ List<S3Object> filesOnly = new ArrayList<>();
+
+ ListObjectsResponse listObjectsResponse;
+ ListObjectsRequest.Builder listObjectsBuilder = ListObjectsRequest.builder().bucket(container);
+ listObjectsBuilder.prefix(prefix);
+
+ while (true) {
+ // List the objects from the start, or from the last marker in case of truncated result
+ if (newMarker == null) {
+ listObjectsResponse = s3Client.listObjects(listObjectsBuilder.build());
+ } else {
+ listObjectsResponse = s3Client.listObjects(listObjectsBuilder.marker(newMarker).build());
+ }
+
+ // Collect the paths to files only
+ collectAndFilterFiles(listObjectsResponse.contents(), includeExcludeMatcher.getPredicate(),
+ includeExcludeMatcher.getMatchersList(), filesOnly);
+
+ // Mark the flag as done if done, otherwise, get the marker of the previous response for the next request
+ if (!listObjectsResponse.isTruncated()) {
+ break;
+ } else {
+ newMarker = listObjectsResponse.nextMarker();
+ }
+ }
+
+ return filesOnly;
+ }
+
+ /**
+ * AWS S3 returns all the objects as paths, not differentiating between folder and files. The path is considered
+ * a file if it does not end up with a "/" which is the separator in a folder structure.
+ *
+ * @param s3Objects List of returned objects
+ */
+ private static void collectAndFilterFiles(List<S3Object> s3Objects,
+ BiPredicate<List<Matcher>, String> predicate, List<Matcher> matchers, List<S3Object> filesOnly) {
+ for (S3Object object : s3Objects) {
+ // skip folders
+ if (object.key().endsWith("/")) {
+ continue;
+ }
+
+ // No filter, add file
+ if (predicate.test(matchers, object.key())) {
+ filesOnly.add(object);
+ }
+ }
+ }
}
public static class Azure {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
index 126b86f..abaaeeb 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
@@ -25,7 +25,6 @@
import java.util.Map;
import org.apache.asterix.common.api.IApplicationContext;
-import org.apache.asterix.common.cluster.IClusterStateManager;
import org.apache.asterix.common.config.DatasetConfig.ExternalFilePendingOp;
import org.apache.asterix.common.dataflow.ICcApplicationContext;
import org.apache.asterix.common.exceptions.ErrorCode;
@@ -232,20 +231,10 @@
public static AlgebricksAbsolutePartitionConstraint getPartitionConstraints(IApplicationContext appCtx,
AlgebricksAbsolutePartitionConstraint clusterLocations) {
if (clusterLocations == null) {
- IClusterStateManager clusterStateManager = ((ICcApplicationContext) appCtx).getClusterStateManager();
- ArrayList<String> locs = new ArrayList<>();
- Map<String, String[]> stores = appCtx.getMetadataProperties().getStores();
- for (String node : stores.keySet()) {
- int numIODevices = clusterStateManager.getIODevices(node).length;
- for (int k = 0; k < numIODevices; k++) {
- locs.add(node);
- }
- }
- String[] cluster = new String[locs.size()];
- cluster = locs.toArray(cluster);
- return new AlgebricksAbsolutePartitionConstraint(cluster);
+ return ((ICcApplicationContext) appCtx).getClusterStateManager().getClusterLocations();
}
return clusterLocations;
+
}
public static RecordIdType getRecordIdType(Map<String, String> configuration) {
diff --git a/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IRecordReaderFactory b/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IRecordReaderFactory
index d66c44e..3b220e9 100644
--- a/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IRecordReaderFactory
+++ b/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IRecordReaderFactory
@@ -22,3 +22,4 @@
org.apache.asterix.external.input.record.reader.http.HttpServerRecordReaderFactory
org.apache.asterix.external.input.record.reader.aws.AwsS3ReaderFactory
org.apache.asterix.external.input.record.reader.azure.AzureBlobReaderFactory
+org.apache.asterix.external.input.record.reader.aws.parquet.AwsS3ParquetReaderFactory
\ No newline at end of file
diff --git a/asterixdb/asterix-server/pom.xml b/asterixdb/asterix-server/pom.xml
index a0ac43d..424fa54 100644
--- a/asterixdb/asterix-server/pom.xml
+++ b/asterixdb/asterix-server/pom.xml
@@ -199,11 +199,25 @@
</gavs>
<noticeUrl>https://raw.githubusercontent.com/aws/aws-sdk-java-v2/2.10.83/NOTICE.txt</noticeUrl>
</override>
+ <!-- Hadoop AWS SDK -->
+ <override>
+ <gavs>
+ <gav>com.amazonaws:aws-java-sdk-core:1.12.1</gav>
+ <gav>com.amazonaws:jmespath-java:1.12.1</gav>
+ <gav>com.amazonaws:aws-java-sdk-s3:1.12.1</gav>
+ <gav>com.amazonaws:aws-java-sdk-kms:1.12.1</gav>
+ </gavs>
+ <noticeUrl>https://raw.githubusercontent.com/aws/aws-sdk-java/1.12.1/NOTICE.txt</noticeUrl>
+ </override>
<override>
<gav>software.amazon.eventstream:eventstream:1.0.1</gav>
<noticeUrl>https://raw.githubusercontent.com/awslabs/aws-eventstream-java/7be2dd80e12f8835674c8ffb0f4a2efb64c7b585/NOTICE</noticeUrl>
</override>
<override>
+ <gav>software.amazon.ion:ion-java:1.0.2</gav>
+ <noticeUrl>https://raw.githubusercontent.com/amzn/ion-java/v1.0.2/NOTICE</noticeUrl>
+ </override>
+ <override>
<gavs>
<gav>com.azure:azure-core:1.4.0</gav>
<gav>com.azure:azure-core-http-netty:1.5.0</gav>
@@ -221,6 +235,10 @@
<gav>org.msgpack:msgpack-core:0.8.20</gav>
<noticeUrl>https://raw.githubusercontent.com/msgpack/msgpack-java/0.8.20/NOTICE</noticeUrl>
</override>
+ <override>
+ <gav>com.github.luben:zstd-jni:1.4.9-1</gav>
+ <url>https://raw.githubusercontent.com/luben/zstd-jni/v1.4.9-1/LICENSE</url>
+ </override>
</overrides>
<licenses>
<license>
@@ -312,14 +330,19 @@
</aliasUrls>
</license>
<license>
- <url>https://opensource.org/licenses/bsd-license.php</url>
- <aliasUrls>http://www.opensource.org/licenses/bsd-license.php</aliasUrls>
- </license>
- <license>
<!-- this is special case- handled in ftl template -->
<url>PUBLIC_DOMAIN</url>
<aliasUrls>Public Domain</aliasUrls>
</license>
+ <license>
+ <displayName>The 2-Clause BSD License</displayName>
+ <url>https://opensource.org/licenses/BSD-2-Clause</url>
+ <contentFile>raw.githubusercontent.com_luben_zstd-jni_v1.4.9-1_LICENSE.txt</contentFile>
+ <aliasUrls>
+ <aliasURL>https://opensource.org/licenses/bsd-license.php</aliasURL>
+ <aliasUrl>https://raw.githubusercontent.com/luben/zstd-jni/v1.4.9-1/LICENSE</aliasUrl>
+ </aliasUrls>
+ </license>
</licenses>
<templateProperties>
<packageName>Apache AsterixDB Server Install</packageName>
diff --git a/asterixdb/pom.xml b/asterixdb/pom.xml
index 1cc4ea6..98884f8 100644
--- a/asterixdb/pom.xml
+++ b/asterixdb/pom.xml
@@ -88,7 +88,8 @@
<log4j.version>2.14.1</log4j.version>
<awsjavasdk.version>2.10.83</awsjavasdk.version>
<azurejavasdk.version>12.6.0</azurejavasdk.version>
- <parquet.version>1.8.2</parquet.version>
+ <parquet.version>1.12.0</parquet.version>
+ <hadoop-awsjavasdk.version>1.12.1</hadoop-awsjavasdk.version>
<implementation.title>Apache AsterixDB - ${project.name}</implementation.title>
<implementation.url>https://asterixdb.apache.org/</implementation.url>
@@ -1636,17 +1637,48 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
<version>${parquet.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop</artifactId>
<version>${parquet.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<dependency>
<groupId>org.kitesdk</groupId>
<artifactId>kite-data-core</artifactId>
<version>1.1.0</version>
</dependency>
+ <!-- Hadoop AWS start -->
+ <dependency>
+ <!-- Pick a newer AWS SDK -->
+ <groupId>com.amazonaws</groupId>
+ <artifactId>aws-java-sdk-s3</artifactId>
+ <version>${hadoop-awsjavasdk.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-aws</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <!-- Hadoop AWS end -->
</dependencies>
</dependencyManagement>
diff --git a/asterixdb/src/main/appended-resources/supplemental-models.xml b/asterixdb/src/main/appended-resources/supplemental-models.xml
index c352fe5..eb063b9 100644
--- a/asterixdb/src/main/appended-resources/supplemental-models.xml
+++ b/asterixdb/src/main/appended-resources/supplemental-models.xml
@@ -466,7 +466,7 @@
<!-- software.amazon.eventstream is ALv2, and does not contain any embedded LICENSE or NOTICE file -->
<!-- license override not needed, ALv2 is specified in its pom.xml -->
- <!-- see https://github.com/awslabs/aws-eventstream-java -->
+ <!-- see https://github.com/aws/aws-sdk-java -->
<supplement>
<project>
<groupId>software.amazon.eventstream</groupId>
@@ -480,6 +480,74 @@
</supplement>
<!-- AWS SDK end -->
+ <!-- AWS Hadoop SDK start -->
+ <!-- software.amazon.awssdk is ALv2, and does not contain any embedded LICENSE or NOTICE file -->
+ <!-- license override not needed, ALv2 is specified in its pom.xml -->
+ <!-- see https://github.com/aws/aws-sdk-java-v2/blob/master/LICENSE.txt -->
+ <supplement>
+ <project>
+ <groupId>com.amazonaws</groupId>
+ <artifactId>aws-java-sdk-core</artifactId>
+ <properties>
+ <license.ignoreMissingEmbeddedLicense>1.12.1</license.ignoreMissingEmbeddedLicense>
+ <license.ignoreMissingEmbeddedNotice>1.12.1</license.ignoreMissingEmbeddedNotice>
+ <license.ignoreNoticeOverride>1.12.1</license.ignoreNoticeOverride>
+ </properties>
+ </project>
+ </supplement>
+
+ <supplement>
+ <project>
+ <groupId>com.amazonaws</groupId>
+ <artifactId>jmespath-java</artifactId>
+ <properties>
+ <license.ignoreMissingEmbeddedLicense>1.12.1</license.ignoreMissingEmbeddedLicense>
+ <license.ignoreMissingEmbeddedNotice>1.12.1</license.ignoreMissingEmbeddedNotice>
+ <license.ignoreNoticeOverride>1.12.1</license.ignoreNoticeOverride>
+ </properties>
+ </project>
+ </supplement>
+
+ <supplement>
+ <project>
+ <groupId>com.amazonaws</groupId>
+ <artifactId>aws-java-sdk-s3</artifactId>
+ <properties>
+ <license.ignoreMissingEmbeddedLicense>1.12.1</license.ignoreMissingEmbeddedLicense>
+ <license.ignoreMissingEmbeddedNotice>1.12.1</license.ignoreMissingEmbeddedNotice>
+ <license.ignoreNoticeOverride>1.12.1</license.ignoreNoticeOverride>
+ </properties>
+ </project>
+ </supplement>
+
+ <supplement>
+ <project>
+ <groupId>com.amazonaws</groupId>
+ <artifactId>aws-java-sdk-kms</artifactId>
+ <properties>
+ <license.ignoreMissingEmbeddedLicense>1.12.1</license.ignoreMissingEmbeddedLicense>
+ <license.ignoreMissingEmbeddedNotice>1.12.1</license.ignoreMissingEmbeddedNotice>
+ <license.ignoreNoticeOverride>1.12.1</license.ignoreNoticeOverride>
+ </properties>
+ </project>
+ </supplement>
+
+ <!-- software.amazon.ion is ALv2, and does not contain any embedded LICENSE or NOTICE file -->
+ <!-- license override not needed, ALv2 is specified in its pom.xml -->
+ <!-- see https://github.com/amzn/ion-java -->
+ <supplement>
+ <project>
+ <groupId>software.amazon.ion</groupId>
+ <artifactId>ion-java</artifactId>
+ <properties>
+ <license.ignoreMissingEmbeddedLicense>1.0.2</license.ignoreMissingEmbeddedLicense>
+ <license.ignoreMissingEmbeddedNotice>1.0.2</license.ignoreMissingEmbeddedNotice>
+ <license.ignoreNoticeOverride>1.0.2</license.ignoreNoticeOverride>
+ </properties>
+ </project>
+ </supplement>
+ <!-- AWS Hadoop SDK end -->
+
<!-- com.typesafe.netty is ALv2, and does not contain any embedded LICENSE or NOTICE file -->
<!-- license override not needed, ALv2 is specified in its pom.xml -->
<!-- see https://github.com/playframework/netty-reactive-streams -->
@@ -608,6 +676,19 @@
</supplement>
<!-- Azure SDK for Java end -->
+ <!-- jackson-dataformat-cbor does not contain embedded LICENSE and NOTICE -->
+ <!-- See https://github.com/FasterXML/jackson-modules-java8 -->
+ <supplement>
+ <project>
+ <groupId>com.fasterxml.jackson.dataformat</groupId>
+ <artifactId>jackson-dataformat-cbor</artifactId>
+ <properties>
+ <license.ignoreMissingEmbeddedLicense>2.12.3</license.ignoreMissingEmbeddedLicense>
+ <license.ignoreMissingEmbeddedNotice>2.12.3</license.ignoreMissingEmbeddedNotice>
+ </properties>
+ </project>
+ </supplement>
+
<!-- jackson-datatype-jsr contains embedded license but has no NOTICE -->
<!-- See https://github.com/FasterXML/jackson-modules-java8 -->
<supplement>
@@ -665,4 +746,72 @@
</properties>
</project>
</supplement>
+
+ <supplement>
+ <project>
+ <groupId>com.github.luben</groupId>
+ <artifactId>zstd-jni</artifactId>
+ <properties>
+ <license.ignoreMissingEmbeddedLicense>1.4.9-1</license.ignoreMissingEmbeddedLicense>
+ <license.ignoreMissingEmbeddedNotice>1.4.9-1</license.ignoreMissingEmbeddedNotice>
+ <license.ignoreLicenseOverride>1.4.9-1</license.ignoreLicenseOverride>
+ </properties>
+ </project>
+ </supplement>
+ <!-- Apache Parquet -->
+ <supplement>
+ <project>
+ <groupId>org.apache.parquet</groupId>
+ <artifactId>parquet-format-structures</artifactId>
+ <properties>
+ <license.ignoreMissingEmbeddedLicense>1.12.0</license.ignoreMissingEmbeddedLicense>
+ <license.ignoreMissingEmbeddedNotice>1.12.0</license.ignoreMissingEmbeddedNotice>
+ </properties>
+ </project>
+ </supplement>
+
+ <supplement>
+ <project>
+ <groupId>org.apache.parquet</groupId>
+ <artifactId>parquet-common</artifactId>
+ <properties>
+ <license.ignoreMissingEmbeddedLicense>1.12.0</license.ignoreMissingEmbeddedLicense>
+ <license.ignoreMissingEmbeddedNotice>1.12.0</license.ignoreMissingEmbeddedNotice>
+ </properties>
+ </project>
+ </supplement>
+
+ <!-- Contains embedded LICENSE but missing NOTICE -->
+ <supplement>
+ <project>
+ <groupId>org.apache.parquet</groupId>
+ <artifactId>parquet-column</artifactId>
+ <properties>
+ <license.ignoreMissingEmbeddedNotice>1.12.0</license.ignoreMissingEmbeddedNotice>
+ </properties>
+ </project>
+ </supplement>
+
+ <!-- Contains embedded LICENSE but missing NOTICE -->
+ <supplement>
+ <project>
+ <groupId>org.apache.parquet</groupId>
+ <artifactId>parquet-encoding</artifactId>
+ <properties>
+ <license.ignoreMissingEmbeddedNotice>1.12.0</license.ignoreMissingEmbeddedNotice>
+ </properties>
+ </project>
+ </supplement>
+
+ <!-- Contains embedded LICENSE but missing NOTICE -->
+ <supplement>
+ <project>
+ <groupId>org.apache.parquet</groupId>
+ <artifactId>parquet-hadoop</artifactId>
+ <properties>
+ <license.ignoreMissingEmbeddedNotice>1.12.0</license.ignoreMissingEmbeddedNotice>
+ </properties>
+ </project>
+ </supplement>
+
</supplementalDataModels>
diff --git a/asterixdb/src/main/licenses/content/raw.githubusercontent.com_amzn_ion-java_v1.0.2_NOTICE.txt b/asterixdb/src/main/licenses/content/raw.githubusercontent.com_amzn_ion-java_v1.0.2_NOTICE.txt
new file mode 100644
index 0000000..91ea216
--- /dev/null
+++ b/asterixdb/src/main/licenses/content/raw.githubusercontent.com_amzn_ion-java_v1.0.2_NOTICE.txt
@@ -0,0 +1,2 @@
+Amazon Ion Java
+Copyright 2007-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
\ No newline at end of file
diff --git a/asterixdb/src/main/licenses/content/raw.githubusercontent.com_aws_aws-sdk-java_1.12.1_NOTICE.txt b/asterixdb/src/main/licenses/content/raw.githubusercontent.com_aws_aws-sdk-java_1.12.1_NOTICE.txt
new file mode 100644
index 0000000..797aa3c
--- /dev/null
+++ b/asterixdb/src/main/licenses/content/raw.githubusercontent.com_aws_aws-sdk-java_1.12.1_NOTICE.txt
@@ -0,0 +1,14 @@
+AWS SDK for Java
+Copyright 2010-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+
+This product includes software developed by
+Amazon Technologies, Inc (http://www.amazon.com/).
+
+**********************
+THIRD PARTY COMPONENTS
+**********************
+This software includes third party software subject to the following copyrights:
+- XML parsing and utility functions from JetS3t - Copyright 2006-2009 James Murty.
+- PKCS#1 PEM encoded private key parsing and utility functions from oauth.googlecode.com - Copyright 1998-2010 AOL Inc.
+
+The licenses for these third party components are included in LICENSE.txt
\ No newline at end of file
diff --git a/asterixdb/src/main/licenses/content/raw.githubusercontent.com_luben_zstd-jni_v1.4.9-1_LICENSE.txt b/asterixdb/src/main/licenses/content/raw.githubusercontent.com_luben_zstd-jni_v1.4.9-1_LICENSE.txt
new file mode 100644
index 0000000..7bdccb6
--- /dev/null
+++ b/asterixdb/src/main/licenses/content/raw.githubusercontent.com_luben_zstd-jni_v1.4.9-1_LICENSE.txt
@@ -0,0 +1,26 @@
+Zstd-jni: JNI bindings to Zstd Library
+
+Copyright (c) 2015-present, Luben Karavelov/ All rights reserved.
+
+BSD License
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+ list of conditions and the following disclaimer in the documentation and/or
+ other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
index 20632f1..14f6aae0 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
@@ -30,7 +30,7 @@
public static final boolean SORT_PARALLEL_DEFAULT = true;
public static final boolean INDEX_ONLY_DEFAULT = true;
public static final boolean SANITYCHECK_DEFAULT = false;
- public static final boolean EXTERNAL_FIELD_PUSHDOWN_DEFAULT = false;
+ public static final boolean EXTERNAL_FIELD_PUSHDOWN_DEFAULT = true;
public static final boolean SUBPLAN_MERGE_DEFAULT = true;
public static final boolean SUBPLAN_NESTEDPUSHDOWN_DEFAULT = true;
public static final boolean MIN_MEMORY_ALLOCATION_DEFAULT = true;