[ASTERIXDB-3243][EXT]: Issue warning and skip file on computed field type mismatch
Change-Id: I3208bcd8b59a0ca6351053bc3ce06e76ecb1d462
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17715
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Hussain Towaileb <hussainht@gmail.com>
Reviewed-by: Wail Alkowaileet <wael.y.k@gmail.com>
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java
index efa4c79..7b00240 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java
@@ -49,7 +49,7 @@
assertTrue(prefix.getIndexToComputedFieldsMap().isEmpty());
String prefix1 = "";
- prefix = new ExternalDataPrefix(prefix1);
+ prefix = new ExternalDataPrefix(prefix1, null);
assertEquals("", prefix.getOriginal());
assertEquals("", prefix.getRoot());
assertFalse(prefix.isEndsWithSlash());
@@ -60,7 +60,7 @@
assertTrue(prefix.getIndexToComputedFieldsMap().isEmpty());
String prefix2 = "hotel";
- prefix = new ExternalDataPrefix(prefix2);
+ prefix = new ExternalDataPrefix(prefix2, null);
assertEquals("hotel", prefix.getOriginal());
assertEquals("hotel", prefix.getRoot());
assertFalse(prefix.isEndsWithSlash());
@@ -71,7 +71,7 @@
assertTrue(prefix.getIndexToComputedFieldsMap().isEmpty());
String prefix3 = "hotel/{hotel-id:inT}/";
- prefix = new ExternalDataPrefix(prefix3);
+ prefix = new ExternalDataPrefix(prefix3, null);
assertEquals("hotel/{hotel-id:inT}/", prefix.getOriginal());
assertEquals("hotel/", prefix.getRoot());
assertTrue(prefix.isEndsWithSlash());
@@ -82,7 +82,7 @@
assertEquals("(.+)", prefix.getIndexToComputedFieldsMap().get(1).getExpression());
String prefix4 = "hotel/{hotel-id:int}-{hotel-name:sTRing}";
- prefix = new ExternalDataPrefix(prefix4);
+ prefix = new ExternalDataPrefix(prefix4, null);
assertEquals("hotel/{hotel-id:int}-{hotel-name:sTRing}", prefix.getOriginal());
assertEquals("hotel", prefix.getRoot());
assertFalse(prefix.isEndsWithSlash());
@@ -93,7 +93,7 @@
assertEquals("(.+)-(.+)", prefix.getIndexToComputedFieldsMap().get(1).getExpression());
String prefix5 = "hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}-{month:int}-{day:int}/";
- prefix = new ExternalDataPrefix(prefix5);
+ prefix = new ExternalDataPrefix(prefix5, null);
assertEquals("hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}-{month:int}-{day:int}/",
prefix.getOriginal());
assertEquals("hotel/something/", prefix.getRoot());
@@ -107,7 +107,7 @@
assertEquals("(.+)-(.+)-(.+)", prefix.getIndexToComputedFieldsMap().get(4).getExpression());
String prefix6 = "hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}/{month:int}/{day:int}";
- prefix = new ExternalDataPrefix(prefix6);
+ prefix = new ExternalDataPrefix(prefix6, null);
assertEquals("hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}/{month:int}/{day:int}",
prefix.getOriginal());
assertEquals("hotel/something", prefix.getRoot());
@@ -123,7 +123,7 @@
assertEquals("(.+)", prefix.getIndexToComputedFieldsMap().get(6).getExpression());
String prefix7 = "hotel/{hotel.details.id:int}-{hotel-name:sTRing}";
- prefix = new ExternalDataPrefix(prefix7);
+ prefix = new ExternalDataPrefix(prefix7, null);
assertEquals("hotel/{hotel.details.id:int}-{hotel-name:sTRing}", prefix.getOriginal());
assertEquals("hotel", prefix.getRoot());
assertFalse(prefix.isEndsWithSlash());
@@ -134,7 +134,7 @@
String prefix8 =
"hotel/hotel-{hotel-id:int}-hotel-{hotel-name:sTRing}/review/year-{year:int}/{month:int}-month/day-{day:int}-day";
- prefix = new ExternalDataPrefix(prefix8);
+ prefix = new ExternalDataPrefix(prefix8, null);
assertEquals(
"hotel/hotel-{hotel-id:int}-hotel-{hotel-name:sTRing}/review/year-{year:int}/{month:int}-month/day-{day:int}-day",
prefix.getOriginal());
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.000.ddl.sqlpp
new file mode 100644
index 0000000..7b70bd3
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.000.ddl.sqlpp
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+USE test;
+
+CREATE TYPE test AS {
+};
+
+
+CREATE EXTERNAL DATASET test(test) USING S3 (
+ ("accessKeyId"="dummyAccessKey"),
+ ("secretAccessKey"="dummySecretKey"),
+ ("region"="us-west-2"),
+ ("serviceEndpoint"="http://127.0.0.1:8001"),
+ ("container"="playground"),
+ ("definition"="external-filter/{name:bigint}"),
+ ("format"="json")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.010.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.010.query.sqlpp
new file mode 100644
index 0000000..13b6abe
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.010.query.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+// param max-warnings:json=10
+
+USE test;
+
+SELECT value count(*)
+FROM test t
+WHERE t.name = "department";
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.999.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.999.ddl.sqlpp
new file mode 100644
index 0000000..36b2bab
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/s3/filter/type-mismatch/test.999.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/filter/type-mismatch/result.010.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/filter/type-mismatch/result.010.adm
new file mode 100644
index 0000000..c227083
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/s3/filter/type-mismatch/result.010.adm
@@ -0,0 +1 @@
+0
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 4414ebf..e50ec3c 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -226,6 +226,17 @@
<output-dir compare="Text">one-field</output-dir>
</compilation-unit>
</test-case>
+ <test-case FilePath="external-dataset/s3/filter" check-warnings="true">
+ <compilation-unit name="type-mismatch">
+ <output-dir compare="Text">type-mismatch</output-dir>
+ <expected-warn>Failed to evaluate computed field. File: 'external-filter/department/accounting/0.json'. Computed Field Name: 'name'. Computed Field Type: 'bigint'. Computed Field Value: 'department'. Reason: 'java.lang.NumberFormatException: For input string: "department"'</expected-warn>
+ <expected-warn>Failed to evaluate computed field. File: 'external-filter/department/engineering/0.json'. Computed Field Name: 'name'. Computed Field Type: 'bigint'. Computed Field Value: 'department'. Reason: 'java.lang.NumberFormatException: For input string: "department"'</expected-warn>
+ <expected-warn>Failed to evaluate computed field. File: 'external-filter/department/hr/0.json'. Computed Field Name: 'name'. Computed Field Type: 'bigint'. Computed Field Value: 'department'. Reason: 'java.lang.NumberFormatException: For input string: "department"'</expected-warn>
+ <expected-warn>Failed to evaluate computed field. File: 'external-filter/last-name/Jones/0.json'. Computed Field Name: 'name'. Computed Field Type: 'bigint'. Computed Field Value: 'last-name'. Reason: 'java.lang.NumberFormatException: For input string: "last-name"'</expected-warn>
+ <expected-warn>Failed to evaluate computed field. File: 'external-filter/last-name/miller/0.json'. Computed Field Name: 'name'. Computed Field Type: 'bigint'. Computed Field Value: 'last-name'. Reason: 'java.lang.NumberFormatException: For input string: "last-name"'</expected-warn>
+ <expected-warn>Failed to evaluate computed field. File: 'external-filter/last-name/smith/0.json'. Computed Field Name: 'name'. Computed Field Type: 'bigint'. Computed Field Value: 'last-name'. Reason: 'java.lang.NumberFormatException: For input string: "last-name"'</expected-warn>
+ </compilation-unit>
+ </test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/empty-string-definition">
<placeholder name="adapter" value="S3" />
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
index ab2a978..3ac665c 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
@@ -277,6 +277,7 @@
ERROR_READING_ICEBERG_METADATA(1180),
UNSUPPORTED_COMPUTED_FIELD_TYPE(1181),
FAILED_TO_CALCULATE_COMPUTED_FIELDS(1182),
+ FAILED_TO_EVALUATE_COMPUTED_FIELD(1183),
// Feed errors
DATAFLOW_ILLEGAL_STATE(3001),
diff --git a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
index b51b09a..3c9543a 100644
--- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
+++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
@@ -279,6 +279,7 @@
1180 = Error reading iceberg data
1181 = Unsupported computed field type: %1$s
1182 = Failed to calculate computed fields: %1$s
+1183 = Failed to evaluate computed field. File: '%1$s'. Computed Field Name: '%2$s'. Computed Field Type: '%3$s'. Computed Field Value: '%4$s'. Reason: '%5$s'
# Feed Errors
3001 = Illegal state.
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java
index 6e3556c..a07e067 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/filter/ExternalFilterValueEvaluator.java
@@ -61,16 +61,21 @@
private void writeValue(ATypeTag typeTag, String stringValue) throws HyracksDataException {
DataOutput output = value.getDataOutput();
SerializerDeserializerUtil.serializeTag(typeTag, output);
- switch (typeTag) {
- case TINYINT:
- case SMALLINT:
- case INTEGER:
- case BIGINT:
- Integer64SerializerDeserializer.write(Long.parseLong(stringValue), output);
- case DOUBLE:
- DoubleSerializerDeserializer.write(Double.parseDouble(stringValue), output);
- case STRING:
- stringSerDer.serialize(stringValue, output);
+
+ try {
+ switch (typeTag) {
+ case TINYINT:
+ case SMALLINT:
+ case INTEGER:
+ case BIGINT:
+ Integer64SerializerDeserializer.write(Long.parseLong(stringValue), output);
+ case DOUBLE:
+ DoubleSerializerDeserializer.write(Double.parseDouble(stringValue), output);
+ case STRING:
+ stringSerDer.serialize(stringValue, output);
+ }
+ } catch (Exception ex) {
+ throw HyracksDataException.create(ex);
}
}
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
index 9fa2b5a..7ae992a 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
@@ -58,7 +58,7 @@
IncludeExcludeMatcher includeExcludeMatcher = ExternalDataUtils.getIncludeExcludeMatchers(configuration);
//Get a list of S3 objects
- ExternalDataPrefix externalDataPrefix = new ExternalDataPrefix(configuration);
+ ExternalDataPrefix externalDataPrefix = new ExternalDataPrefix(configuration, warningCollector);
configuration.put(ExternalDataPrefix.PREFIX_ROOT_FIELD_NAME, externalDataPrefix.getRoot());
// TODO(htowaileb): Since we're using the root to load the files then start filtering, it might end up being
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java
index 2a707fb..66fa445 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java
@@ -46,6 +46,10 @@
import org.apache.asterix.om.utils.ProjectionFiltrationTypeUtil;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.api.exceptions.NoOpWarningCollector;
+import org.apache.hyracks.api.exceptions.Warning;
+import org.apache.hyracks.util.LogRedactionUtil;
public class ExternalDataPrefix {
@@ -53,6 +57,7 @@
private String root;
private final boolean endsWithSlash;
private final List<String> segments;
+ private final IWarningCollector warningCollector;
private final List<String> computedFieldNames = new ArrayList<>();
private final List<IAType> computedFieldTypes = new ArrayList<>();
@@ -66,13 +71,20 @@
static {
supportedTypes.add(ATypeTag.STRING);
supportedTypes.add(ATypeTag.BIGINT);
+ supportedTypes.add(ATypeTag.DOUBLE);
}
public ExternalDataPrefix(Map<String, String> configuration) throws AlgebricksException {
- this(getDefinitionOrPath(configuration));
+ this(getDefinitionOrPath(configuration), null);
}
- public ExternalDataPrefix(String prefix) throws AlgebricksException {
+ public ExternalDataPrefix(Map<String, String> configuration, IWarningCollector warningCollector)
+ throws AlgebricksException {
+ this(getDefinitionOrPath(configuration), warningCollector);
+ }
+
+ public ExternalDataPrefix(String prefix, IWarningCollector warningCollector) throws AlgebricksException {
+ this.warningCollector = warningCollector != null ? warningCollector : NoOpWarningCollector.INSTANCE;
this.original = prefix != null ? prefix : "";
this.endsWithSlash = this.original.endsWith("/");
@@ -251,10 +263,27 @@
// extract values for all compute fields and set them in the evaluator
// TODO provide the List to avoid array creation
List<String> values = extractValues(keySegments);
- for (int i = 0; i < computedFieldNames.size(); i++) {
- if (evaluator.isComputedFieldUsed(i)) {
- evaluator.setValue(i, values.get(i));
+
+ String computedFieldName = null;
+ IAType computedFieldType = null;
+ String computedFieldValue = null;
+ try {
+ for (int i = 0; i < computedFieldNames.size(); i++) {
+ computedFieldName = computedFieldNames.get(i);
+ computedFieldType = computedFieldTypes.get(i);
+ computedFieldValue = values.get(i);
+
+ if (evaluator.isComputedFieldUsed(i)) {
+ evaluator.setValue(i, computedFieldValue);
+ }
}
+ } catch (HyracksDataException ex) {
+ if (warningCollector.shouldWarn()) {
+ warningCollector.warn(Warning.of(null, ErrorCode.FAILED_TO_EVALUATE_COMPUTED_FIELD,
+ LogRedactionUtil.userData(key), computedFieldName, computedFieldType,
+ LogRedactionUtil.userData(computedFieldValue), LogRedactionUtil.userData(ex.getMessage())));
+ }
+ return false;
}
return evaluator.evaluate();