[NO ISSUE][MISC] Improve parser error reporting
Change-Id: I8707d9e2a952693b6501e9e63aff9162a77541cc
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11543
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Michael Blow <mblow@apache.org>
Reviewed-by: Till Westmann <tillw@apache.org>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
index df60e60..1e302da 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
@@ -143,11 +143,11 @@
<compilation-unit name="common/malformed-json">
<placeholder name="adapter" value="AZUREBLOB" />
<output-dir compare="Text">common/malformed-json</output-dir>
- <expected-error>Parsing error at malformed-data/duplicate-fields.json line 1 field field: Duplicate field 'field'</expected-error>
- <expected-error>Parsing error at malformed-data/malformed-json.json line 1 field field: Unexpected character ('}' (code 125)): was expecting double-quote to start field name</expected-error>
- <expected-error>Parsing error at malformed-data/malformed-json-2.json line 4 field array_f: Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
- <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json line 3 field field2: Unrecognized token 'truee': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
- <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json line 11 field array_f: Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
+ <expected-error>Parsing error at malformed-data/duplicate-fields.json line 1 field 'field': Duplicate field 'field'</expected-error>
+ <expected-error>Parsing error at malformed-data/malformed-json.json line 1 field 'field': Unexpected character ('}' (code 125)): was expecting double-quote to start field name</expected-error>
+ <expected-error>Parsing error at malformed-data/malformed-json-2.json line 4 field 'array_f': Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
+ <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json line 3 field 'field2': Unrecognized token 'truee': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
+ <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json line 11 field 'array_f': Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 9bc463c..b354e65 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -92,11 +92,11 @@
<compilation-unit name="common/malformed-json">
<placeholder name="adapter" value="S3" />
<output-dir compare="Text">common/malformed-json</output-dir>
- <expected-error>Parsing error at malformed-data/duplicate-fields.json line 1 field field: Duplicate field 'field'</expected-error>
- <expected-error>Parsing error at malformed-data/malformed-json.json line 1 field field: Unexpected character ('}' (code 125)): was expecting double-quote to start field name</expected-error>
- <expected-error>Parsing error at malformed-data/malformed-json-2.json line 4 field array_f: Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
- <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json line 3 field field2: Unrecognized token 'truee': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
- <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json line 11 field array_f: Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
+ <expected-error>Parsing error at malformed-data/duplicate-fields.json line 1 field 'field': Duplicate field 'field'</expected-error>
+ <expected-error>Parsing error at malformed-data/malformed-json.json line 1 field 'field': Unexpected character ('}' (code 125)): was expecting double-quote to start field name</expected-error>
+ <expected-error>Parsing error at malformed-data/malformed-json-2.json line 4 field 'array_f': Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
+ <expected-error>Parsing error at malformed-data/malformed-jsonl-1.json line 3 field 'field2': Unrecognized token 'truee': was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
+ <expected-error>Parsing error at malformed-data/malformed-jsonl-2.json line 11 field 'array_f': Unexpected character (']' (code 93)): expected a valid value (JSON String, Number, Array, Object or token 'null', 'true' or 'false')</expected-error>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml
index 92b5e32..d02647d 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3_one_partition.xml
@@ -49,17 +49,17 @@
<output-dir compare="Text">common/csv-warnings</output-dir>
<expected-warn>Parsing error at data_dir/no_h_missing_fields.csv line 2 field 3: some fields are missing</expected-warn>
<expected-warn>Parsing error at data_dir/no_h_no_closing_q.csv line 2 field 0: malformed input record ended abruptly</expected-warn>
- <expected-warn>Parsing error at line 2 field 0: malformed input record ended abruptly</expected-warn>
+ <expected-warn>Parsing error at line 2 field 0: malformed input record ended abruptly</expected-warn>
- <expected-warn>Parsing error at line 5 field 3: invalid value</expected-warn>
- <expected-warn>Parsing error at line 2 field 1: invalid value</expected-warn>
- <expected-warn>Parsing error at line 11 field 1: invalid value</expected-warn>
- <expected-warn>Parsing error at line 3 field 1: invalid value</expected-warn>
- <expected-warn>Parsing error at line 4 field 1: invalid value</expected-warn>
- <expected-warn>Parsing error at line 7 field 7: invalid value</expected-warn>
- <expected-warn>Parsing error at line 13 field 7: invalid value</expected-warn>
- <expected-warn>Parsing error at line 12 field 3: invalid value</expected-warn>
- <expected-warn>Parsing error at line 9 field 6: a quote should be in the beginning</expected-warn>
+ <expected-warn>Parsing error at line 5 field 3: invalid value</expected-warn>
+ <expected-warn>Parsing error at line 2 field 1: invalid value</expected-warn>
+ <expected-warn>Parsing error at line 11 field 1: invalid value</expected-warn>
+ <expected-warn>Parsing error at line 3 field 1: invalid value</expected-warn>
+ <expected-warn>Parsing error at line 4 field 1: invalid value</expected-warn>
+ <expected-warn>Parsing error at line 7 field 7: invalid value</expected-warn>
+ <expected-warn>Parsing error at line 13 field 7: invalid value</expected-warn>
+ <expected-warn>Parsing error at line 12 field 3: invalid value</expected-warn>
+ <expected-warn>Parsing error at line 9 field 6: a quote should be in the beginning</expected-warn>
<expected-warn>Parsing error at data_dir/h_invalid_values.csv line 5 field 3: invalid value</expected-warn>
<expected-warn>Parsing error at data_dir/h_invalid_values.csv line 2 field 1: invalid value</expected-warn>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
index 3a502d0..4c253bc 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
@@ -29,10 +29,10 @@
import org.apache.asterix.external.api.AsterixInputStream;
import org.apache.asterix.external.util.ExternalDataConstants;
import org.apache.asterix.external.util.ExternalDataUtils;
-import org.apache.asterix.external.util.ParseUtil;
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.util.ParseUtil;
public class QuotedLineRecordReader extends LineRecordReader {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
index 2ff5cfa..0e23e46 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
@@ -40,10 +40,10 @@
import org.apache.asterix.external.api.AsterixInputStream;
import org.apache.asterix.external.util.ExternalDataConstants;
import org.apache.asterix.external.util.ExternalDataUtils;
-import org.apache.asterix.external.util.ParseUtil;
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.util.ParseUtil;
public class SemiStructuredRecordReader extends StreamRecordReader {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
index 2bf0df4..2d20cc2 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractJsonDataParser.java
@@ -18,6 +18,8 @@
*/
package org.apache.asterix.external.parser;
+import static org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR;
+
import java.io.DataOutput;
import java.io.IOException;
import java.util.BitSet;
@@ -46,6 +48,7 @@
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.util.ExceptionUtils;
import org.apache.hyracks.data.std.api.IMutableValueStorage;
+import org.apache.hyracks.util.ParseUtil;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParseException;
@@ -433,18 +436,13 @@
}
long lineNum = lineNumber.getAsLong() + jsonParser.getCurrentLocation().getLineNr() - 1;
JsonStreamContext parsingContext = jsonParser.getParsingContext();
- String fieldName = "N/A";
- while (parsingContext != null) {
- String currentFieldName = parsingContext.getCurrentName();
- if (currentFieldName != null) {
- fieldName = currentFieldName;
- break;
- }
+ String fieldName = null;
+ while (parsingContext != null && fieldName == null) {
+ fieldName = parsingContext.getCurrentName();
parsingContext = parsingContext.getParent();
}
-
- return HyracksDataException.create(org.apache.hyracks.api.exceptions.ErrorCode.PARSING_ERROR,
- dataSourceName.get(), lineNum, fieldName, msg);
+ final String locationDetails = ParseUtil.asLocationDetailString(dataSourceName.get(), lineNum, fieldName);
+ return HyracksDataException.create(PARSING_ERROR, locationDetails, msg);
}
return new RuntimeDataException(ErrorCode.RECORD_READER_MALFORMED_INPUT_STREAM, e);
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
index 60e6e77..590f51d 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
@@ -38,7 +38,6 @@
import org.apache.asterix.external.api.IRecordDataParser;
import org.apache.asterix.external.api.IStreamDataParser;
import org.apache.asterix.external.util.ExternalDataConstants;
-import org.apache.asterix.external.util.ParseUtil;
import org.apache.asterix.om.base.AMutableString;
import org.apache.asterix.om.typecomputer.impl.TypeComputeUtils;
import org.apache.asterix.om.types.ARecordType;
@@ -52,6 +51,7 @@
import org.apache.hyracks.dataflow.common.data.parsers.IValueParser;
import org.apache.hyracks.dataflow.common.data.parsers.IValueParserFactory;
import org.apache.hyracks.dataflow.std.file.FieldCursorForDelimitedDataParser;
+import org.apache.hyracks.util.ParseUtil;
public class DelimitedDataParser extends AbstractDataParser implements IStreamDataParser, IRecordDataParser<char[]> {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java
deleted file mode 100644
index 5a46af7..0000000
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ParseUtil.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.asterix.external.util;
-
-import org.apache.hyracks.api.exceptions.ErrorCode;
-import org.apache.hyracks.api.exceptions.IWarningCollector;
-import org.apache.hyracks.api.exceptions.Warning;
-
-public class ParseUtil {
-
- private ParseUtil() {
- }
-
- public static void warn(IWarningCollector warningCollector, String dataSourceName, long lineNum, int fieldNum,
- String warnMessage) {
- warningCollector
- .warn(Warning.of(null, ErrorCode.PARSING_ERROR, dataSourceName, lineNum, fieldNum, warnMessage));
- }
-}
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java
new file mode 100644
index 0000000..63fec09
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/ParseUtil.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.util;
+
+import java.util.StringJoiner;
+
+import org.apache.hyracks.api.exceptions.ErrorCode;
+import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.api.exceptions.Warning;
+
+public class ParseUtil {
+
+ private ParseUtil() {
+ }
+
+ public static void warn(IWarningCollector warningCollector, String dataSourceName, long lineNum, int fieldNum,
+ String warnMessage) {
+ warningCollector.warn(Warning.of(null, ErrorCode.PARSING_ERROR,
+ asLocationDetailString(dataSourceName, lineNum, fieldNum), warnMessage));
+ }
+
+ public static String asLocationDetailString(String dataSource, long lineNum, Object fieldIdentifier) {
+ StringJoiner details = new StringJoiner(" ");
+ details.setEmptyValue("N/A");
+ if (dataSource != null && !dataSource.isEmpty()) {
+ details.add(dataSource);
+ }
+ if (lineNum >= 0) {
+ details.add("line " + lineNum);
+ }
+ if (fieldIdentifier instanceof Number) {
+ details.add("field " + fieldIdentifier);
+ } else if (fieldIdentifier instanceof String && !((String) fieldIdentifier).isEmpty()) {
+ details.add("field '" + fieldIdentifier + "'");
+ }
+ return "at " + details;
+ }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
index 9f04fb2..4d9c60b 100644
--- a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
@@ -141,7 +141,7 @@
121 = A numeric type promotion error has occurred: %1$s
122 = Encountered an error while printing the plan: %1$s
123 = Insufficient memory is provided for the join operators, please increase the join memory budget.
-124 = Parsing error at %1$s line %2$s field %3$s: %4$s
+124 = Parsing error %s: %s
125 = Invalid inverted list type traits: %1$s
126 = Illegal state. %1$s
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
index 936d63e..ffc87cd 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
@@ -23,9 +23,8 @@
import java.util.Arrays;
import java.util.function.Supplier;
-import org.apache.hyracks.api.exceptions.ErrorCode;
import org.apache.hyracks.api.exceptions.IWarningCollector;
-import org.apache.hyracks.api.exceptions.Warning;
+import org.apache.hyracks.util.ParseUtil;
public class FieldCursorForDelimitedDataParser {
@@ -448,6 +447,6 @@
}
private void warn(String message) {
- warnings.warn(Warning.of(null, ErrorCode.PARSING_ERROR, dataSourceName.get(), lineCount, fieldCount, message));
+ ParseUtil.warn(warnings, dataSourceName.get(), lineCount, fieldCount, message);
}
}