[ASTERIXDB-2918][EXT] Validate the type when creating Parquet external dataset
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
Ensure the used type - when creating an external dataset using
Parquet format - does not contain declared fields.
Change-Id: I4870a91ecf41b41996b862704b767e04abc14569
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12905
Reviewed-by: Hussain Towaileb <hussainht@gmail.com>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
index 3e0cf79..bf6e3b5 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
@@ -967,7 +967,9 @@
Datatype itemType, MetadataProvider metadataProvider, MetadataTransactionContext mdTxnCtx)
throws AlgebricksException {
ExternalDetailsDecl externalDetails = (ExternalDetailsDecl) dd.getDatasetDetailsDecl();
- return externalDetails.getProperties();
+ Map<String, String> properties = externalDetails.getProperties();
+ ExternalDataUtils.validateType(properties, (ARecordType) itemType.getDatatype());
+ return properties;
}
protected static void validateIfResourceIsActiveInFeed(ICcApplicationContext appCtx, Dataset dataset,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/invalid-type/invalid-type.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/invalid-type/invalid-type.1.ddl.sqlpp
new file mode 100644
index 0000000..ad6cd0e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/invalid-type/invalid-type.1.ddl.sqlpp
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description : Test type validation for Parquet
+* Expected Res : ASX1161: Type 'ParquetType' contains declared fields, which is not supported for 'parquet' format
+* Date : August 19th 2021
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+CREATE TYPE ParquetType as {
+ id: string,
+ text: string
+};
+
+CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING %adapter%
+(
+ %template%,
+ ("container"="playground"),
+ ("definition"="parquet-data/reviews"),
+ ("include"="*id_age.parquet"),
+ ("format" = "parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 41c769d..c461722 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -129,6 +129,13 @@
<output-dir compare="Text">parquet-temporary-access</output-dir>
</compilation-unit>
</test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/parquet/invalid-type">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">none</output-dir>
+ <expected-error>ASX1161: Type 'ParquetType' contains declared fields, which is not supported for 'parquet' format</expected-error>
+ </compilation-unit>
+ </test-case>
<!-- Parquet Tests End -->
<test-case FilePath="external-dataset">
<compilation-unit name="common/empty-string-definition">
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
index 9cfd6ea..79d663e 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
@@ -245,6 +245,7 @@
COMPILATION_BAD_VIEW_DEFINITION(1158),
UNKNOWN_VIEW(1159),
VIEW_EXISTS(1160),
+ UNSUPPORTED_TYPE_FOR_PARQUET(1161),
// Feed errors
DATAFLOW_ILLEGAL_STATE(3001),
diff --git a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
index 5f08844..159e0ef 100644
--- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
+++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
@@ -247,6 +247,7 @@
1158 = Error compiling view %1$s. %2$s
1159 = Cannot find view with name %1$s
1160 = A view with this name %1$s already exists
+1161 = Type '%1$s' contains declared fields, which is not supported for 'parquet' format
# Feed Errors
3001 = Illegal state.
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 36ee203..112e4ee 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -771,6 +771,23 @@
public static boolean supportsPushdown(Map<String, String> properties) {
//Currently, only Apache Parquet format is supported
+ return isParquetFormat(properties);
+ }
+
+ /**
+ * Validate the dataset type declared with a given type
+ *
+ * @param properties external dataset configuration
+ * @param datasetRecordType dataset declared type
+ */
+ public static void validateType(Map<String, String> properties, ARecordType datasetRecordType)
+ throws CompilationException {
+ if (isParquetFormat(properties) && datasetRecordType.getFieldTypes().length != 0) {
+ throw new CompilationException(ErrorCode.UNSUPPORTED_TYPE_FOR_PARQUET, datasetRecordType.getTypeName());
+ }
+ }
+
+ private static boolean isParquetFormat(Map<String, String> properties) {
String inputFormat = properties.get(ExternalDataConstants.KEY_INPUT_FORMAT);
return ExternalDataConstants.CLASS_NAME_PARQUET_INPUT_FORMAT.equals(inputFormat)
|| ExternalDataConstants.INPUT_FORMAT_PARQUET.equals(inputFormat)