[ASTERIXDB-2713][EXT] CSV & TSV support for external dataset p4
- user model changes: no
- storage format changes: no
- interface changes: yes
- IValueParser
Details:
- added parameter "null" to allow user to specifiy what string represents
a null value. Only nullable fields whose values match this string will
produce NULL. Otherwise, no NULLs will be produced.
- empty fields:
- for string fields, produce empty string "".
- for non-string fields, issue a warning an ignore the record.
- changed IValueParser to return boolean to allow some implementations
to return true or false instread of throwing an exception.
- added parameter "redact-warnings" to allow user to specifiy if parser
warnings should exclude information like file name.
- changed the Integer and Long parsers to consider spaces properly and
also to handle overflow and underflow.
- changed the boolean parser to align with other parsers that ignore
leading and trailing spaces.
- added test cases.
Change-Id: Ib6aed0095a472510b69cc29a3e444e7de5a2c1ae
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/5963
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Ali Alsuliman <ali.al.solaiman@gmail.com>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-app/data/csv/header/h_invalid_values.csv b/asterixdb/asterix-app/data/csv/header/h_invalid_values.csv
new file mode 100644
index 0000000..1ec6140
--- /dev/null
+++ b/asterixdb/asterix-app/data/csv/header/h_invalid_values.csv
@@ -0,0 +1,13 @@
+bigint_t,nullable_bigint_t,double_t,nullable_double_t,str_t,nullable_str_t,boolean_t,nullable_boolean_t
+163a,12,33.4,16.1,"text","text",true,false
+1.22,12,33.4,16.1,"text","text",true,false
+999999999999999999999999999999999,12,33.4,16.1,"text","text",true,false
+1234,12,non-double,16.1,"text","text",true,false
+1234 ,12, 15.1,16.1,"fine","fine",TRUE,false
+1234,12,15.1,16.1,"text","text",non-boolean,false
+1234,12,15.1,16.1,"fine","fine", true,false
+1234,12,15.1,16.1,"text", "quote-not-at-beginning",true,false
+1234,\N,15.1,\N,\N,\N,true,\N
+\N,12,15.1,16.1,"text","text",true,false
+1234,12,\N,16.1,"text","text",true,false
+1234,12,15.1,16.1,"text","text",\N,false
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.1.ddl.sqlpp
index 113ace3..65816f6 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.1.ddl.sqlpp
@@ -30,6 +30,6 @@
CREATE EXTERNAL DATASET ds1(t1) USING localfs(("path"="asterix_nc1://data/csv/sample_09.csv"), ("format"="CSV"), ("header"="FALSE"));
CREATE EXTERNAL DATASET ds2(t2) USING localfs(("path"="asterix_nc1://data/csv/sample_10.csv"), ("format"="Csv"), ("header"="False"));
CREATE EXTERNAL DATASET ds3(t1) USING localfs(("path"="asterix_nc1://data/csv/sample_11.csv"), ("format"="csv"), ("header"="FALSE"));
-CREATE EXTERNAL DATASET ds4(t3) USING localfs(("path"="asterix_nc1://data/csv/sample_12.csv"), ("format"="csv"), ("header"="True"));
+CREATE EXTERNAL DATASET ds4(t3) USING localfs(("path"="asterix_nc1://data/csv/sample_12.csv"), ("format"="csv"), ("header"="True"), ("null"=""));
CREATE EXTERNAL DATASET ds5(t4) USING localfs(("path"="asterix_nc1://data/csv/sample_13.csv"), ("format"="csv"), ("header"="True"));
CREATE EXTERNAL DATASET ds6(t4) USING localfs(("path"="asterix_nc1://data/csv/empty_lines.csv"), ("format"="csv"), ("header"="false"));
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp
index 191ddff..0eff4c8 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.002.ddl.sqlpp
@@ -22,7 +22,9 @@
USE test;
DROP TYPE t1 IF EXISTS;
+DROP TYPE t2 IF EXISTS;
CREATE TYPE t1 AS {f1: int, f2: int, f3: int, f4: string};
+CREATE TYPE t2 AS {f1: bigint, f2: bigint?, f3: double, f4: double?, f5: string, f6: string?, f7: boolean, f8: boolean?};
DROP DATASET ds1 IF EXISTS;
CREATE EXTERNAL DATASET ds1(t1) USING S3 (
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.007.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.007.ddl.sqlpp
new file mode 100644
index 0000000..cbfe44d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.007.ddl.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+DROP DATASET ds1 IF EXISTS;
+CREATE EXTERNAL DATASET ds1(t1) USING S3 (
+("accessKey"="dummyAccessKey"),
+("secretKey"="dummySecretKey"),
+("region"="us-west-2"),
+("serviceEndpoint"="http://localhost:8001"),
+("container"="playground"),
+("definition"="data_dir"),
+("format"="CSV"),
+("header"="false"),
+("redact-warnings"="true")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.008.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.008.query.sqlpp
new file mode 100644
index 0000000..26ccfa7
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.008.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+// requesttype=application/json
+// param max-warnings:json=100
+USE test;
+
+FROM ds1 v SELECT VALUE v ORDER BY v.f1 ASC;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.009.s3bucket.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.009.s3bucket.sqlpp
new file mode 100644
index 0000000..f9da983
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.009.s3bucket.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+playground data_dir data/csv/header/h_invalid_values.csv
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.010.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.010.ddl.sqlpp
new file mode 100644
index 0000000..bd87ccb
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.010.ddl.sqlpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+DROP DATASET ds2 IF EXISTS;
+CREATE EXTERNAL DATASET ds2(t2) USING S3 (
+("accessKey"="dummyAccessKey"),
+("secretKey"="dummySecretKey"),
+("region"="us-west-2"),
+("serviceEndpoint"="http://localhost:8001"),
+("container"="playground"),
+("definition"="data_dir"),
+("format"="CSV"),
+("header"="true"),
+("redact-warnings"="true"),
+("null"="\\N")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.011.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.011.query.sqlpp
new file mode 100644
index 0000000..e6b24f3
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.011.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+// requesttype=application/json
+// param max-warnings:json=100
+USE test;
+
+FROM ds2 v SELECT VALUE v ORDER BY v.f1 ASC;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.012.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.012.ddl.sqlpp
new file mode 100644
index 0000000..e394f2d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.012.ddl.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+DROP DATASET ds2 IF EXISTS;
+CREATE EXTERNAL DATASET ds2(t2) USING S3 (
+("accessKey"="dummyAccessKey"),
+("secretKey"="dummySecretKey"),
+("region"="us-west-2"),
+("serviceEndpoint"="http://localhost:8001"),
+("container"="playground"),
+("definition"="data_dir"),
+("format"="CSV"),
+("header"="true"),
+("null"="\\N")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.013.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.013.query.sqlpp
new file mode 100644
index 0000000..e6b24f3
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv-warnings/query-dataset.013.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+// requesttype=application/json
+// param max-warnings:json=100
+USE test;
+
+FROM ds2 v SELECT VALUE v ORDER BY v.f1 ASC;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp
index 6184b19..15ba6a8 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/csv/000/query-dataset.000.ddl.sqlpp
@@ -33,5 +33,6 @@
("container"="playground"),
("definition"="csv-data/reviews"),
("format"="Csv"),
-("header"="false")
+("header"="false"),
+("null"="")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp
index 194adf6..3c6ad92 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/tsv/000/query-dataset.000.ddl.sqlpp
@@ -33,5 +33,6 @@
("container"="playground"),
("definition"="tsv-data/reviews"),
("format"="TSV"),
-("header"="False")
+("header"="False"),
+("null"="")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp
index 988ebe3..68faf3f 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_01/csv_01.2.update.sqlpp
@@ -26,5 +26,5 @@
use temp;
-load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_01.csv`),(`format`=`delimited-text`),(`delimiter`=`,`));
+load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_01.csv`),(`format`=`delimited-text`),(`delimiter`=`,`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_02/csv_02.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_02/csv_02.2.update.sqlpp
index 988ebe3..68faf3f 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_02/csv_02.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_02/csv_02.2.update.sqlpp
@@ -26,5 +26,5 @@
use temp;
-load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_01.csv`),(`format`=`delimited-text`),(`delimiter`=`,`));
+load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_01.csv`),(`format`=`delimited-text`),(`delimiter`=`,`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_03/csv_03.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_03/csv_03.2.update.sqlpp
index 7eaaf1d..fdc1d1a 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_03/csv_03.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_03/csv_03.2.update.sqlpp
@@ -26,5 +26,5 @@
use temp;
-load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_02.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_02.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_cr/csv_08.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_cr/csv_08.2.update.sqlpp
index e44dfe9..d05c821 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_cr/csv_08.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_cr/csv_08.2.update.sqlpp
@@ -26,5 +26,5 @@
use temp;
-load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_08_header.csv.cr`),(`format`=`delimited-text`),(`header`=`true`));
+load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_08_header.csv.cr`),(`format`=`delimited-text`),(`header`=`true`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_crlf/csv_08.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_crlf/csv_08.2.update.sqlpp
index e4da86b..9be6773 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_crlf/csv_08.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_crlf/csv_08.2.update.sqlpp
@@ -26,5 +26,5 @@
use temp;
-load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_08_header.csv.crlf`),(`format`=`delimited-text`),(`header`=`true`));
+load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_08_header.csv.crlf`),(`format`=`delimited-text`),(`header`=`true`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_lf/csv_08.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_lf/csv_08.2.update.sqlpp
index e024bd3..102b2aa 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_lf/csv_08.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_08_header_lf/csv_08.2.update.sqlpp
@@ -26,5 +26,5 @@
use temp;
-load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_08_header.csv.lf`),(`format`=`delimited-text`),(`header`=`true`));
+load dataset testds using localfs ((`path`=`asterix_nc1://data/csv/sample_08_header.csv.lf`),(`format`=`delimited-text`),(`header`=`true`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q19/q19.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q19/q19.2.update.sqlpp
index b59d644..fc0b44a 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q19/q19.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q19/q19.2.update.sqlpp
@@ -20,14 +20,14 @@
use tpcds;
-load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q25/q25.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q25/q25.2.update.sqlpp
index 3bf9f8d..efd670e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q25/q25.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q25/q25.2.update.sqlpp
@@ -20,14 +20,14 @@
use tpcds;
-load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store_returns using localfs ((`path`=`asterix_nc1://data/tpcds/store_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_returns using localfs ((`path`=`asterix_nc1://data/tpcds/store_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q29/q29.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q29/q29.2.update.sqlpp
index 3bf9f8d..efd670e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q29/q29.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q29/q29.2.update.sqlpp
@@ -20,14 +20,14 @@
use tpcds;
-load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store_returns using localfs ((`path`=`asterix_nc1://data/tpcds/store_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_returns using localfs ((`path`=`asterix_nc1://data/tpcds/store_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q30/q30.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q30/q30.2.update.sqlpp
index 190a142..8d5b408 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q30/q30.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q30/q30.2.update.sqlpp
@@ -20,10 +20,10 @@
use tpcds;
-load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset web_returns using localfs ((`path`=`asterix_nc1://data/tpcds/web_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset web_returns using localfs ((`path`=`asterix_nc1://data/tpcds/web_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q34/q34.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q34/q34.2.update.sqlpp
index 97ea2e8..98accdd 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q34/q34.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q34/q34.2.update.sqlpp
@@ -20,12 +20,12 @@
use tpcds;
-load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q43/q43.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q43/q43.2.update.sqlpp
index bbf1838..654a1f1 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q43/q43.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q43/q43.2.update.sqlpp
@@ -20,8 +20,8 @@
use tpcds;
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q46/q46.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q46/q46.2.update.sqlpp
index 6ab1d16..fc9d724 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q46/q46.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q46/q46.2.update.sqlpp
@@ -20,14 +20,14 @@
use tpcds;
-load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q50/q50.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q50/q50.2.update.sqlpp
index d50d706..b870b65 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q50/q50.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q50/q50.2.update.sqlpp
@@ -20,10 +20,10 @@
use tpcds;
-load dataset store_returns using localfs ((`path`=`asterix_nc1://data/tpcds/store_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_returns using localfs ((`path`=`asterix_nc1://data/tpcds/store_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q57/q57.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q57/q57.2.update.sqlpp
index 290b1ae..25b4d12 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q57/q57.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q57/q57.2.update.sqlpp
@@ -19,11 +19,11 @@
use tpcds;
-load dataset call_center using localfs ((`path`=`asterix_nc1://data/tpcds/call_center.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset call_center using localfs ((`path`=`asterix_nc1://data/tpcds/call_center.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q59/q59.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q59/q59.2.update.sqlpp
index 4cadf19..80bfe7a 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q59/q59.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q59/q59.2.update.sqlpp
@@ -20,8 +20,8 @@
use tpcds;
-load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q62/q62.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q62/q62.2.update.sqlpp
index 2aab7e4..b1a240c 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q62/q62.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q62/q62.2.update.sqlpp
@@ -20,12 +20,12 @@
use tpcds;
-load dataset web_sales using localfs ((`path`=`asterix_nc1://data/tpcds/web_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset web_sales using localfs ((`path`=`asterix_nc1://data/tpcds/web_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset web_site using localfs ((`path`=`asterix_nc1://data/tpcds/web_site.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset web_site using localfs ((`path`=`asterix_nc1://data/tpcds/web_site.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset warehouse using localfs ((`path`=`asterix_nc1://data/tpcds/warehouse.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset warehouse using localfs ((`path`=`asterix_nc1://data/tpcds/warehouse.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset ship_mode using localfs ((`path`=`asterix_nc1://data/tpcds/ship_mode.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset ship_mode using localfs ((`path`=`asterix_nc1://data/tpcds/ship_mode.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q63/q63.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q63/q63.2.update.sqlpp
index c2cef59..a56835d 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q63/q63.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q63/q63.2.update.sqlpp
@@ -19,10 +19,10 @@
use tpcds;
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q73/q73.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q73/q73.2.update.sqlpp
index 97ea2e8..98accdd 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q73/q73.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q73/q73.2.update.sqlpp
@@ -20,12 +20,12 @@
use tpcds;
-load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q79/q79.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q79/q79.2.update.sqlpp
index 97ea2e8..98accdd 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q79/q79.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q79/q79.2.update.sqlpp
@@ -20,12 +20,12 @@
use tpcds;
-load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q81/q81.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q81/q81.2.update.sqlpp
index b7e0e29..9ce02ba 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q81/q81.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q81/q81.2.update.sqlpp
@@ -20,10 +20,10 @@
use tpcds;
-load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset catalog_returns using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset catalog_returns using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q85/q85.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q85/q85.2.update.sqlpp
index fdb2d4f..821de94 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q85/q85.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q85/q85.2.update.sqlpp
@@ -20,16 +20,16 @@
use tpcds;
-load dataset web_page using localfs ((`path`=`asterix_nc1://data/tpcds/web_page.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset web_page using localfs ((`path`=`asterix_nc1://data/tpcds/web_page.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset reason using localfs ((`path`=`asterix_nc1://data/tpcds/reason.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset reason using localfs ((`path`=`asterix_nc1://data/tpcds/reason.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset customer_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/customer_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/customer_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset web_sales using localfs ((`path`=`asterix_nc1://data/tpcds/web_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset web_sales using localfs ((`path`=`asterix_nc1://data/tpcds/web_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset web_returns using localfs ((`path`=`asterix_nc1://data/tpcds/web_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset web_returns using localfs ((`path`=`asterix_nc1://data/tpcds/web_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q88/q88.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q88/q88.2.update.sqlpp
index a673f5b..87cb458 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q88/q88.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q88/q88.2.update.sqlpp
@@ -20,10 +20,10 @@
use tpcds;
-load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset time_dim using localfs ((`path`=`asterix_nc1://data/tpcds/time_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset time_dim using localfs ((`path`=`asterix_nc1://data/tpcds/time_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q89/q89.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q89/q89.2.update.sqlpp
index c2cef59..a56835d 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q89/q89.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q89/q89.2.update.sqlpp
@@ -19,10 +19,10 @@
use tpcds;
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q90/q90.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q90/q90.2.update.sqlpp
index 319491a..512eaf2 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q90/q90.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q90/q90.2.update.sqlpp
@@ -20,10 +20,10 @@
use tpcds;
-load dataset web_page using localfs ((`path`=`asterix_nc1://data/tpcds/web_page.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset web_page using localfs ((`path`=`asterix_nc1://data/tpcds/web_page.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset time_dim using localfs ((`path`=`asterix_nc1://data/tpcds/time_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset time_dim using localfs ((`path`=`asterix_nc1://data/tpcds/time_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset web_sales using localfs ((`path`=`asterix_nc1://data/tpcds/web_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset web_sales using localfs ((`path`=`asterix_nc1://data/tpcds/web_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q91/q91.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q91/q91.2.update.sqlpp
index 4599485..8d50a4a 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q91/q91.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q91/q91.2.update.sqlpp
@@ -20,16 +20,16 @@
use tpcds;
-load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset call_center using localfs ((`path`=`asterix_nc1://data/tpcds/call_center.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset call_center using localfs ((`path`=`asterix_nc1://data/tpcds/call_center.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset customer_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/customer_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/customer_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset catalog_returns using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset catalog_returns using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_returns.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q96/q96.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q96/q96.2.update.sqlpp
index a673f5b..87cb458 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q96/q96.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/q96/q96.2.update.sqlpp
@@ -20,10 +20,10 @@
use tpcds;
-load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset household_demographics using localfs ((`path`=`asterix_nc1://data/tpcds/household_demographics.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset time_dim using localfs ((`path`=`asterix_nc1://data/tpcds/time_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset time_dim using localfs ((`path`=`asterix_nc1://data/tpcds/time_dim.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
-load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`));
+load dataset store using localfs ((`path`=`asterix_nc1://data/tpcds/store.csv`),(`format`=`delimited-text`),(`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1591/query-ASTERIXDB-1591.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1591/query-ASTERIXDB-1591.2.update.sqlpp
index 9401cee..194b60e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1591/query-ASTERIXDB-1591.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1591/query-ASTERIXDB-1591.2.update.sqlpp
@@ -20,22 +20,22 @@
use tpcds;
load dataset customer_address using localfs ((`path`=`asterix_nc1://data/tpcds/customer_address.csv`),
-(`format`=`delimited-text`), (`delimiter`=`|`));
+(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``));
load dataset customer using localfs ((`path`=`asterix_nc1://data/tpcds/customer.csv`),
-(`format`=`delimited-text`), (`delimiter`=`|`));
+(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``));
load dataset store_sales using localfs ((`path`=`asterix_nc1://data/tpcds/store_sales.csv`),
-(`format`=`delimited-text`), (`delimiter`=`|`));
+(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``));
load dataset date_dim using localfs ((`path`=`asterix_nc1://data/tpcds/date_dim.csv`),
-(`format`=`delimited-text`), (`delimiter`=`|`));
+(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``));
load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`),
-(`format`=`delimited-text`), (`delimiter`=`|`));
+(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``));
load dataset catalog_returns using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_returns.csv`),
-(`format`=`delimited-text`), (`delimiter`=`|`));
+(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``));
load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),
-(`format`=`delimited-text`), (`delimiter`=`|`));
+(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1596/query-ASTERIXDB-1596.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1596/query-ASTERIXDB-1596.2.update.sqlpp
index 01cca8d..9c8aa41 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1596/query-ASTERIXDB-1596.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/query-ASTERIXDB-1596/query-ASTERIXDB-1596.2.update.sqlpp
@@ -20,10 +20,10 @@
use tpcds;
load dataset catalog_sales using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_sales.csv`),
-(`format`=`delimited-text`), (`delimiter`=`|`));
+(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``));
load dataset catalog_returns using localfs ((`path`=`asterix_nc1://data/tpcds/catalog_returns.csv`),
-(`format`=`delimited-text`), (`delimiter`=`|`));
+(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``));
load dataset item using localfs ((`path`=`asterix_nc1://data/tpcds/item.csv`),
-(`format`=`delimited-text`), (`delimiter`=`|`));
+(`format`=`delimited-text`), (`delimiter`=`|`),(`null`=``));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.003.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.003.adm
new file mode 100644
index 0000000..7d3c940
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.003.adm
@@ -0,0 +1 @@
+{ "f1": 1, "f2": 2, "f3": 3, "f4": "str" }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.004.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.004.adm
new file mode 100644
index 0000000..334381b
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.004.adm
@@ -0,0 +1,3 @@
+{ "f1": 1234, "f2": 12, "f3": 15.1, "f4": 16.1, "f5": "fine", "f6": "fine", "f7": true, "f8": false }
+{ "f1": 1234, "f2": 12, "f3": 15.1, "f4": 16.1, "f5": "fine", "f6": "fine", "f7": true, "f8": false }
+{ "f1": 1234, "f2": null, "f3": 15.1, "f4": null, "f5": "\\N", "f6": null, "f7": true, "f8": null }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.005.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.005.adm
new file mode 100644
index 0000000..334381b
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/csv-warnings/external_dataset.005.adm
@@ -0,0 +1,3 @@
+{ "f1": 1234, "f2": 12, "f3": 15.1, "f4": 16.1, "f5": "fine", "f6": "fine", "f7": true, "f8": false }
+{ "f1": 1234, "f2": 12, "f3": 15.1, "f4": 16.1, "f5": "fine", "f6": "fine", "f7": true, "f8": false }
+{ "f1": 1234, "f2": null, "f3": 15.1, "f4": null, "f5": "\\N", "f6": null, "f7": true, "f8": null }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_one_partition.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_one_partition.xml
index 876a8ef..e194c86 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_one_partition.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_one_partition.xml
@@ -42,14 +42,35 @@
<test-case FilePath="external-dataset" check-warnings="true">
<compilation-unit name="aws/s3/csv-warnings">
<output-dir compare="Text">aws/s3/csv-warnings</output-dir>
- <expected-warn>Parsing error in data_dir/no_h_missing_fields.csv at record 2 field 3: some fields are missing</expected-warn>
- <expected-warn>Parsing error in data_dir/no_h_no_closing_q.csv at record 0 field 0: malformed input record ended inside quote</expected-warn>
+ <expected-warn>Parsing error at data_dir/no_h_missing_fields.csv record 2 field 3: some fields are missing</expected-warn>
+ <expected-warn>Parsing error at data_dir/no_h_no_closing_q.csv record 0 field 0: malformed input record ended inside quote</expected-warn>
+ <expected-warn>Parsing error at record 0 field 0: malformed input record ended inside quote</expected-warn>
+
+ <expected-warn>Parsing error at record 4 field 3: invalid value</expected-warn>
+ <expected-warn>Parsing error at record 1 field 1: invalid value</expected-warn>
+ <expected-warn>Parsing error at record 10 field 1: invalid value</expected-warn>
+ <expected-warn>Parsing error at record 2 field 1: invalid value</expected-warn>
+ <expected-warn>Parsing error at record 3 field 1: invalid value</expected-warn>
+ <expected-warn>Parsing error at record 6 field 7: invalid value</expected-warn>
+ <expected-warn>Parsing error at record 12 field 7: invalid value</expected-warn>
+ <expected-warn>Parsing error at record 11 field 3: invalid value</expected-warn>
+ <expected-warn>Parsing error at record 8 field 6: a quote should be in the beginning</expected-warn>
+
+ <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 4 field 3: invalid value</expected-warn>
+ <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 1 field 1: invalid value</expected-warn>
+ <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 10 field 1: invalid value</expected-warn>
+ <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 2 field 1: invalid value</expected-warn>
+ <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 3 field 1: invalid value</expected-warn>
+ <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 6 field 7: invalid value</expected-warn>
+ <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 12 field 7: invalid value</expected-warn>
+ <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 11 field 3: invalid value</expected-warn>
+ <expected-warn>Parsing error at data_dir/h_invalid_values.csv record 8 field 6: a quote should be in the beginning</expected-warn>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset" check-warnings="true">
<compilation-unit name="aws/s3/tsv-warnings">
<output-dir compare="Text">aws/s3/tsv-warnings</output-dir>
- <expected-warn>Parsing error in data_dir/no_h_missing_fields.tsv at record 2 field 3: some fields are missing</expected-warn>
+ <expected-warn>Parsing error at data_dir/no_h_missing_fields.tsv record 2 field 3: some fields are missing</expected-warn>
</compilation-unit>
</test-case>
</test-group>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/RecordWithMetadataAndPK.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/RecordWithMetadataAndPK.java
index 959e34d..de889a2 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/RecordWithMetadataAndPK.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/RecordWithMetadataAndPK.java
@@ -155,7 +155,9 @@
fieldValueBufferOutputs[index].writeByte(ATypeTag.SERIALIZED_NULL_TYPE_TAG);
} else {
fieldValueBufferOutputs[index].writeByte(fieldTypeTags[index]);
- valueParsers[index].parse(src, offset, length, fieldValueBufferOutputs[index]);
+ if (!valueParsers[index].parse(src, offset, length, fieldValueBufferOutputs[index])) {
+ throw new RuntimeDataException(ErrorCode.FAILED_TO_PARSE_METADATA);
+ }
}
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/EmptyLineSeparatedRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/EmptyLineSeparatedRecordReader.java
index 24a68a7..b697b05 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/EmptyLineSeparatedRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/EmptyLineSeparatedRecordReader.java
@@ -137,7 +137,7 @@
@Override
public void configure(IHyracksTaskContext ctx, AsterixInputStream inputStream, Map<String, String> config) {
- super.configure(inputStream);
+ super.configure(inputStream, config);
this.config = config;
}
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java
index 0d16e0c..a3f560d 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java
@@ -45,7 +45,7 @@
@Override
public void configure(IHyracksTaskContext ctx, AsterixInputStream inputStream, Map<String, String> config)
throws HyracksDataException {
- super.configure(inputStream);
+ super.configure(inputStream, config);
this.hasHeader = ExternalDataUtils.hasHeader(config);
this.newSource = true;
inputStream.setNotificationHandler(this);
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
index 0ed1238..c6e78b0 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
@@ -60,7 +60,7 @@
@Override
public void notifyNewSource() {
if (!record.isEmptyRecord() && warnings.shouldWarn()) {
- ParseUtil.warn(warnings, reader.getStreamName(), recordNumber, 0, REC_ENDED_IN_Q);
+ ParseUtil.warn(warnings, getDataSourceName().get(), recordNumber, 0, REC_ENDED_IN_Q);
}
// restart for a new record from a new source
resetForNewSource();
@@ -106,7 +106,7 @@
if (readLength <= 0 || inQuote) {
// haven't read anything previously OR have read and in the middle and hit the end
if (inQuote && warnings.shouldWarn()) {
- ParseUtil.warn(warnings, reader.getStreamName(), recordNumber, 0, REC_ENDED_IN_Q);
+ ParseUtil.warn(warnings, getDataSourceName().get(), recordNumber, 0, REC_ENDED_IN_Q);
}
close();
return false;
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
index 5ab5730..38eec98 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
@@ -48,7 +48,7 @@
@Override
public void configure(IHyracksTaskContext ctx, AsterixInputStream stream, Map<String, String> config)
throws HyracksDataException {
- super.configure(stream);
+ super.configure(stream, config);
String recStartString = config.get(ExternalDataConstants.KEY_RECORD_START);
String recEndString = config.get(ExternalDataConstants.KEY_RECORD_END);
// set record opening char
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/StreamRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/StreamRecordReader.java
index a70f1fe..6139f82 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/StreamRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/StreamRecordReader.java
@@ -18,6 +18,9 @@
*/
package org.apache.asterix.external.input.record.reader.stream;
+import static org.apache.asterix.external.util.ExternalDataConstants.EMPTY_STRING;
+import static org.apache.asterix.external.util.ExternalDataConstants.KEY_REDACT_WARNINGS;
+
import java.io.IOException;
import java.util.List;
import java.util.Map;
@@ -31,6 +34,7 @@
import org.apache.asterix.external.input.record.CharArrayRecord;
import org.apache.asterix.external.input.stream.AsterixInputStreamReader;
import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.util.ExternalDataUtils;
import org.apache.asterix.external.util.FeedLogManager;
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.exceptions.HyracksDataException;
@@ -43,11 +47,15 @@
protected int bufferPosn = 0;
protected boolean done = false;
protected FeedLogManager feedLogManager;
+ private Supplier<String> dataSourceName = EMPTY_STRING;
- public void configure(AsterixInputStream inputStream) {
+ public void configure(AsterixInputStream inputStream, Map<String, String> config) {
this.reader = new AsterixInputStreamReader(inputStream);
record = new CharArrayRecord();
inputBuffer = new char[ExternalDataConstants.DEFAULT_BUFFER_SIZE];
+ if (!ExternalDataUtils.isTrue(config, KEY_REDACT_WARNINGS)) {
+ this.dataSourceName = reader::getStreamName;
+ }
}
@Override
@@ -106,8 +114,8 @@
}
@Override
- public Supplier<String> getDataSourceName() {
- return reader::getStreamName;
+ public final Supplier<String> getDataSourceName() {
+ return dataSourceName;
}
public abstract List<String> getRecordReaderFormats();
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
index 86b95e1..8ac483e 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
@@ -18,6 +18,8 @@
*/
package org.apache.asterix.external.parser;
+import static org.apache.asterix.external.util.ExternalDataConstants.EMPTY_FIELD;
+import static org.apache.asterix.external.util.ExternalDataConstants.INVALID_VAL;
import static org.apache.asterix.external.util.ExternalDataConstants.MISSING_FIELDS;
import java.io.DataOutput;
@@ -37,8 +39,10 @@
import org.apache.asterix.external.util.ExternalDataConstants;
import org.apache.asterix.external.util.ParseUtil;
import org.apache.asterix.om.base.AMutableString;
+import org.apache.asterix.om.typecomputer.impl.TypeComputeUtils;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.IAType;
import org.apache.asterix.om.utils.NonTaggedFormatUtil;
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.exceptions.HyracksDataException;
@@ -64,9 +68,11 @@
private final byte[] fieldTypeTags;
private final int[] fldIds;
private final ArrayBackedValueStorage[] nameBuffers;
+ private final char[] nullChars;
public DelimitedDataParser(IHyracksTaskContext ctx, IValueParserFactory[] valueParserFactories, char fieldDelimiter,
- char quote, boolean hasHeader, ARecordType recordType, boolean isStreamParser) throws HyracksDataException {
+ char quote, boolean hasHeader, ARecordType recordType, boolean isStreamParser, String nullString)
+ throws HyracksDataException {
this.dataSourceName = ExternalDataConstants.EMPTY_STRING;
this.warnings = ctx.getWarningCollector();
this.fieldDelimiter = fieldDelimiter;
@@ -110,6 +116,7 @@
if (!isStreamParser) {
cursor = new FieldCursorForDelimitedDataParser(null, this.fieldDelimiter, quote, warnings, dataSourceName);
}
+ this.nullChars = nullString != null ? nullString.toCharArray() : null;
}
@Override
@@ -153,24 +160,26 @@
}
fieldValueBuffer.reset();
- if (cursor.isFieldEmpty() && recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.STRING
- && recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.NULL) {
- // if the field is empty and the type is optional, insert
- // NULL. Note that string type can also process empty field as an
- // empty string
- if (!NonTaggedFormatUtil.isOptional(recordType.getFieldTypes()[i])) {
- throw new RuntimeDataException(ErrorCode.PARSER_DELIMITED_NONOPTIONAL_NULL,
- cursor.getRecordCount(), cursor.getFieldCount());
- }
+ if (nullChars != null && NonTaggedFormatUtil.isOptional(recordType.getFieldTypes()[i]) && fieldNull()) {
fieldValueBufferOutput.writeByte(ATypeTag.SERIALIZED_NULL_TYPE_TAG);
} else {
+ if (cursor.isFieldEmpty() && !canProcessEmptyField(recordType.getFieldTypes()[i])) {
+ ParseUtil.warn(warnings, dataSourceName.get(), cursor.getRecordCount(), cursor.getFieldCount(),
+ EMPTY_FIELD);
+ return false;
+ }
fieldValueBufferOutput.writeByte(fieldTypeTags[i]);
// Eliminate double quotes in the field that we are going to parse
if (cursor.fieldHasDoubleQuote()) {
cursor.eliminateDoubleQuote();
}
- valueParsers[i].parse(cursor.getBuffer(), cursor.getFieldStart(), cursor.getFieldLength(),
- fieldValueBufferOutput);
+ boolean success = valueParsers[i].parse(cursor.getBuffer(), cursor.getFieldStart(),
+ cursor.getFieldLength(), fieldValueBufferOutput);
+ if (!success) {
+ ParseUtil.warn(warnings, dataSourceName.get(), cursor.getRecordCount(), cursor.getFieldCount(),
+ INVALID_VAL);
+ return false;
+ }
}
if (fldIds[i] < 0) {
recBuilder.addField(nameBuffers[i], fieldValueBuffer);
@@ -223,4 +232,26 @@
public void setDataSourceName(Supplier<String> dataSourceName) {
this.dataSourceName = dataSourceName == null ? ExternalDataConstants.EMPTY_STRING : dataSourceName;
}
+
+ private static boolean canProcessEmptyField(IAType fieldType) {
+ IAType type = TypeComputeUtils.getActualType(fieldType);
+ // TODO(ali): investigate what it means for a field to have type NULL. there is no parser implemented for it
+ return type.getTypeTag() == ATypeTag.STRING || type.getTypeTag() == ATypeTag.NULL;
+ }
+
+ private boolean fieldNull() {
+ int fieldLength = cursor.getFieldLength();
+ int nullStringLength = nullChars.length;
+ if (fieldLength != nullStringLength) {
+ return false;
+ }
+ char[] fieldChars = cursor.getBuffer();
+ int fieldStart = cursor.getFieldStart();
+ for (int i = 0; i < fieldLength; i++) {
+ if (fieldChars[fieldStart + i] != nullChars[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
index 46c5152..09f9697 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
@@ -50,8 +50,9 @@
char delimiter = ExternalDataUtils.validateGetDelimiter(configuration);
char quote = ExternalDataUtils.validateGetQuote(configuration, delimiter);
boolean hasHeader = ExternalDataUtils.hasHeader(configuration);
+ String nullString = configuration.get(ExternalDataConstants.KEY_NULL_STR);
return new DelimitedDataParser(ctx, valueParserFactories, delimiter, quote, hasHeader, recordType,
- ExternalDataUtils.getDataSourceType(configuration).equals(DataSourceType.STREAM));
+ ExternalDataUtils.getDataSourceType(configuration).equals(DataSourceType.STREAM), nullString);
}
@Override
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index f08c6e6..7d1cdc0 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -111,6 +111,9 @@
public static final String KEY_HTTP_PROXY_PORT = "http-proxy-port";
public static final String KEY_HTTP_PROXY_USER = "http-proxy-user";
public static final String KEY_HTTP_PROXY_PASSWORD = "http-proxy-password";
+ // a string representing the NULL value
+ public static final String KEY_NULL_STR = "null";
+ public static final String KEY_REDACT_WARNINGS = "redact-warnings";
/**
* Keys for adapter name
@@ -265,6 +268,8 @@
public static final String ERROR_PARSE_RECORD = "Parser failed to parse record";
public static final String MISSING_FIELDS = "some fields are missing";
public static final String REC_ENDED_IN_Q = "malformed input record ended inside quote";
+ public static final String EMPTY_FIELD = "empty value";
+ public static final String INVALID_VAL = "invalid value";
public static class AwsS3Constants {
public static final String REGION_FIELD_NAME = "region";
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index b7b441b..b1f11c9 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -18,6 +18,8 @@
*/
package org.apache.asterix.external.util;
+import static org.apache.asterix.external.util.ExternalDataConstants.KEY_REDACT_WARNINGS;
+
import java.util.EnumMap;
import java.util.Map;
@@ -189,11 +191,12 @@
}
public static boolean hasHeader(Map<String, String> configuration) {
- String value = configuration.get(ExternalDataConstants.KEY_HEADER);
- if (value != null) {
- return Boolean.valueOf(value);
- }
- return false;
+ return isTrue(configuration, ExternalDataConstants.KEY_HEADER);
+ }
+
+ public static boolean isTrue(Map<String, String> configuration, String key) {
+ String value = configuration.get(key);
+ return value == null ? false : Boolean.valueOf(value);
}
public static IRecordReaderFactory<?> createExternalRecordReaderFactory(ILibraryManager libraryManager,
@@ -394,6 +397,10 @@
char delimiter = validateGetDelimiter(configuration);
validateGetQuote(configuration, delimiter);
validateGetQuoteEscape(configuration);
+ String value = configuration.get(KEY_REDACT_WARNINGS);
+ if (value != null && !isBoolean(value)) {
+ throw new RuntimeDataException(ErrorCode.INVALID_REQ_PARAM_VAL, KEY_REDACT_WARNINGS, value);
+ }
}
private static boolean isHeaderRequiredFor(String format) {
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateParserFactory.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateParserFactory.java
index ed118f9..35691a1 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateParserFactory.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateParserFactory.java
@@ -43,9 +43,10 @@
return new IValueParser() {
@Override
- public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
+ public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
try {
out.writeInt((int) (parseDatePart(buffer, start, length) / GregorianCalendarSystem.CHRONON_OF_DAY));
+ return true;
} catch (IOException ex) {
throw HyracksDataException.create(ex);
}
@@ -56,11 +57,11 @@
/**
* Parse the given char sequence as a date string, and return the milliseconds represented by the date.
*
- * @param charAccessor
+ * @param dateString
* accessor for the char sequence
- * @param isDateOnly
+ * @param start
* indicating whether it is a single date string, or it is the date part of a datetime string
- * @param errorMessage
+ * @param length
* @return
* @throws Exception
*/
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateTimeParserFactory.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateTimeParserFactory.java
index 2e32692..f9e53b9 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateTimeParserFactory.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADateTimeParserFactory.java
@@ -43,7 +43,7 @@
return new IValueParser() {
@Override
- public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
+ public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
long chrononTimeInMs = 0;
short timeOffset = (short) ((buffer[start] == '-') ? 1 : 0);
@@ -64,6 +64,7 @@
try {
out.writeLong(chrononTimeInMs);
+ return true;
} catch (IOException ex) {
throw HyracksDataException.create(ex);
}
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADurationParserFactory.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADurationParserFactory.java
index 66fb4c3..ec387e2 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADurationParserFactory.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ADurationParserFactory.java
@@ -52,11 +52,12 @@
return new IValueParser() {
@Override
- public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
+ public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
parseDuration(buffer, start, length, aMutableDuration, ADurationParseOption.All);
try {
out.writeInt(aMutableDuration.getMonths());
out.writeLong(aMutableDuration.getMilliseconds());
+ return true;
} catch (IOException ex) {
throw HyracksDataException.create(ex);
}
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java
index 40ddc55..039e026 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java
@@ -43,9 +43,10 @@
return new IValueParser() {
@Override
- public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
+ public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
try {
out.writeInt(parseTimePart(buffer, start, length));
+ return true;
} catch (IOException ex) {
throw HyracksDataException.create(ex);
}
@@ -318,7 +319,7 @@
*
* @param timeString
* @param start
- * @param length
+ *
* @return
* @throws HyracksDataException
*/
@@ -473,7 +474,7 @@
*
* @param timeString
* @param start
- * @param length
+ *
* @return
* @throws HyracksDataException
*/
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/constructors/ABinaryHexStringConstructorDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/constructors/ABinaryHexStringConstructorDescriptor.java
index 3d65b0b..505b650 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/constructors/ABinaryHexStringConstructorDescriptor.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/constructors/ABinaryHexStringConstructorDescriptor.java
@@ -111,7 +111,10 @@
utf8Ptr.set(inputArg.getByteArray(), startOffset + 1, len - 1);
char[] buffer = utf8Ptr.toString().toCharArray();
out.write(ATypeTag.BINARY.serialize());
- byteArrayParser.parse(buffer, 0, buffer.length, out);
+ if (!byteArrayParser.parse(buffer, 0, buffer.length, out)) {
+ PointableHelper.setNull(result);
+ return;
+ }
result.set(resultStorage);
} else {
throw new TypeMismatchException(sourceLoc, BuiltinFunctions.BINARY_HEX_CONSTRUCTOR, 0, tt,
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
index bed21f5..4e3cf4e 100644
--- a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
@@ -140,7 +140,7 @@
121 = A numeric type promotion error has occurred: %1$s
122 = Encountered an error while printing the plan
123 = Insufficient memory is provided for the join operators, please increase the join memory budget.
-124 = Parsing error in %1$s at record %2$s field %3$s: %4$s
+124 = Parsing error at %1$s record %2$s field %3$s: %4$s
10000 = The given rule collection %1$s is not an instance of the List class.
10001 = Cannot compose partition constraint %1$s with %2$s
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/BooleanParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/BooleanParserFactory.java
index 488be04..141b99f 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/BooleanParserFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/BooleanParserFactory.java
@@ -37,26 +37,43 @@
return BooleanParserFactory::parse;
}
- public static void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
- try {
- if (length == 4 && (buffer[start] == 't' || buffer[start] == 'T')
- && (buffer[start + 1] == 'r' || buffer[start + 1] == 'R')
- && (buffer[start + 2] == 'u' || buffer[start + 2] == 'U')
- && (buffer[start + 3] == 'e' || buffer[start + 3] == 'E')) {
- out.writeBoolean(true);
- return;
- } else if (length == 5 && (buffer[start] == 'f' || buffer[start] == 'F')
- && (buffer[start + 1] == 'a' || buffer[start + 1] == 'A')
- && (buffer[start + 2] == 'l' || buffer[start + 2] == 'L')
- && (buffer[start + 3] == 's' || buffer[start + 3] == 'S')
- && (buffer[start + 4] == 'e' || buffer[start + 4] == 'E')) {
- out.writeBoolean(false);
- return;
+ public static boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
+ char ch;
+ int i = start;
+ int end = start + length;
+ while (i < end && ((ch = buffer[i]) == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\f')) {
+ i++;
+ }
+ int remainingLength = end - i;
+ boolean gotBoolean = false;
+ boolean booleanValue = false;
+ if (remainingLength >= 4 && ((ch = buffer[i]) == 't' || ch == 'T') && ((ch = buffer[i + 1]) == 'r' || ch == 'R')
+ && ((ch = buffer[i + 2]) == 'u' || ch == 'U') && ((ch = buffer[i + 3]) == 'e' || ch == 'E')) {
+ gotBoolean = true;
+ booleanValue = true;
+ i = i + 4;
+ } else if (remainingLength >= 5 && ((ch = buffer[i]) == 'f' || ch == 'F')
+ && ((ch = buffer[i + 1]) == 'a' || ch == 'A') && ((ch = buffer[i + 2]) == 'l' || ch == 'L')
+ && ((ch = buffer[i + 3]) == 's' || ch == 'S') && ((ch = buffer[i + 4]) == 'e' || ch == 'E')) {
+ gotBoolean = true;
+ booleanValue = false;
+ i = i + 5;
+ }
+
+ for (; i < end; ++i) {
+ ch = buffer[i];
+ if (ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r' && ch != '\f') {
+ return false;
}
+ }
+ if (!gotBoolean) {
+ return false;
+ }
+ try {
+ out.writeBoolean(booleanValue);
+ return true;
} catch (IOException e) {
throw HyracksDataException.create(e);
}
-
- throw new HyracksDataException("Invalid input data");
}
}
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
index 69db7f3..cc5b68c 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
@@ -41,11 +41,12 @@
ByteArraySerializerDeserializer serializer = ByteArraySerializerDeserializer.INSTANCE;
@Override
- public void parse(char[] input, int start, int length, DataOutput out) throws HyracksDataException {
+ public boolean parse(char[] input, int start, int length, DataOutput out) throws HyracksDataException {
parser.generatePureByteArrayFromBase64String(input, start, length);
try {
serializer.serialize(parser.getByteArray(), 0, parser.getLength(), out);
+ return true;
} catch (IOException e) {
throw HyracksDataException.create(e);
}
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
index c15b2ff..2191c75 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
@@ -40,10 +40,11 @@
ByteArraySerializerDeserializer serializer = ByteArraySerializerDeserializer.INSTANCE;
@Override
- public void parse(char[] input, int start, int length, DataOutput out) throws HyracksDataException {
+ public boolean parse(char[] input, int start, int length, DataOutput out) throws HyracksDataException {
try {
parser.generateByteArrayFromHexString(input, start, length);
serializer.serialize(parser.getByteArray(), 0, parser.getLength(), out);
+ return true;
} catch (IOException e) {
throw HyracksDataException.create(e);
}
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/DoubleParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/DoubleParserFactory.java
index 8998798..2004397 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/DoubleParserFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/DoubleParserFactory.java
@@ -35,12 +35,13 @@
public IValueParser createValueParser() {
return new IValueParser() {
@Override
- public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
+ public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
String s = String.valueOf(buffer, start, length);
try {
out.writeDouble(Double.parseDouble(s));
+ return true;
} catch (NumberFormatException e) {
- throw HyracksDataException.create(e);
+ return false;
} catch (IOException e) {
throw HyracksDataException.create(e);
}
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/FloatParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/FloatParserFactory.java
index 414946c..2b476f1 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/FloatParserFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/FloatParserFactory.java
@@ -35,12 +35,13 @@
public IValueParser createValueParser() {
return new IValueParser() {
@Override
- public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
+ public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
String s = String.valueOf(buffer, start, length);
try {
out.writeFloat(Float.parseFloat(s));
+ return true;
} catch (NumberFormatException e) {
- throw HyracksDataException.create(e);
+ return false;
} catch (IOException e) {
throw HyracksDataException.create(e);
}
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IValueParser.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IValueParser.java
index 0fcfa90..7b0090a 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IValueParser.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IValueParser.java
@@ -23,5 +23,5 @@
import org.apache.hyracks.api.exceptions.HyracksDataException;
public interface IValueParser {
- void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException;
+ boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException;
}
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IntegerParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IntegerParserFactory.java
index c6cffb4..450aa70 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IntegerParserFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/IntegerParserFactory.java
@@ -35,91 +35,61 @@
public IValueParser createValueParser() {
return new IValueParser() {
@Override
- public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
- int n = 0;
- int sign = 1;
- int i = 0;
- boolean pre = true;
- for (; pre && i < length; ++i) {
- char ch = buffer[i + start];
- switch (ch) {
- case ' ':
- case '\t':
- case '\n':
- case '\r':
- case '\f':
- break;
-
- case '-':
- sign = -1;
- pre = false;
- break;
-
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- pre = false;
- n = n * 10 + (ch - '0');
- break;
-
- default:
- String errorString = new String(buffer, i + start, length - i);
- throw new HyracksDataException(
- "Integer Parser - a digit is expected. But, encountered this character: " + ch
- + " in the incoming input: " + errorString);
+ public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
+ // accumulating negatively like Integer.parse() to avoid surprises near MAX_VALUE
+ char c;
+ int i = start;
+ int end = start + length;
+ while (i < end && ((c = buffer[i]) == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f')) {
+ i++;
+ }
+ boolean negative = false;
+ int limit = -Integer.MAX_VALUE;
+ if (i < end) {
+ c = buffer[i];
+ if (c == '-') {
+ negative = true;
+ limit = Integer.MIN_VALUE;
+ i++;
+ }
+ if (c == '+') {
+ i++;
}
}
- boolean post = false;
- for (; !post && i < length; ++i) {
- char ch = buffer[i + start];
- switch (ch) {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- n = n * 10 + (ch - '0');
- break;
- default:
- String errorString = new String(buffer, i + start, length - i);
- throw new HyracksDataException(
- "Integer Parser - a digit is expected. But, encountered this character: " + ch
- + " in the incoming input: " + errorString);
+ int result = 0;
+ int multiplicationMin = limit / 10;
+ boolean gotNumber = false;
+ for (; i < end; i++) {
+ c = buffer[i];
+ if (c >= '0' && c <= '9') {
+ gotNumber = true;
+ if (result < multiplicationMin) {
+ return false;
+ }
+ result *= 10;
+ int digit = c - '0';
+ if (result < limit + digit) {
+ return false;
+ }
+ result -= digit;
+ } else {
+ break;
}
}
- for (; i < length; ++i) {
- char ch = buffer[i + start];
- switch (ch) {
- case ' ':
- case '\t':
- case '\n':
- case '\r':
- case '\f':
- break;
-
- default:
- String errorString = new String(buffer, i + start, length - i);
- throw new HyracksDataException("Integer Parser - a whitespace, tab, new line, or "
- + "form-feed expected. But, encountered this character: " + ch
- + " in the incoming input: " + errorString);
+ for (; i < end; ++i) {
+ c = buffer[i];
+ if (c != ' ' && c != '\t' && c != '\n' && c != '\r' && c != '\f') {
+ return false;
}
}
+ if (!gotNumber) {
+ return false;
+ }
try {
- out.writeInt(n * sign);
+ out.writeInt(negative ? result : -result);
+ return true;
} catch (IOException e) {
throw HyracksDataException.create(e);
}
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/LongParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/LongParserFactory.java
index fd47475..2cd350c 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/LongParserFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/LongParserFactory.java
@@ -35,92 +35,61 @@
public IValueParser createValueParser() {
return new IValueParser() {
@Override
- public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
- long n = 0;
- int sign = 1;
- int i = 0;
- boolean pre = true;
- for (; pre && i < length; ++i) {
- char ch = buffer[i + start];
- switch (ch) {
- case ' ':
- case '\t':
- case '\n':
- case '\r':
- case '\f':
- break;
-
- case '-':
- sign = -1;
- pre = false;
- break;
-
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- pre = false;
- n = n * 10 + (ch - '0');
- break;
-
- default:
- String errorString = new String(buffer, i + start, length - i);
- throw new HyracksDataException(
- "Long Parser - a digit is expected. But, encountered this character: " + ch
- + " in the incoming input: " + errorString);
+ public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
+ // accumulating negatively like Long.parse() to avoid surprises near MAX_VALUE
+ char c;
+ int i = start;
+ int end = start + length;
+ while (i < end && ((c = buffer[i]) == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f')) {
+ i++;
+ }
+ boolean negative = false;
+ long limit = -Long.MAX_VALUE;
+ if (i < end) {
+ c = buffer[i];
+ if (c == '-') {
+ negative = true;
+ limit = Long.MIN_VALUE;
+ i++;
+ }
+ if (c == '+') {
+ i++;
}
}
- boolean post = false;
- for (; !post && i < length; ++i) {
- char ch = buffer[i + start];
- switch (ch) {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- n = n * 10 + (ch - '0');
- break;
- default:
- String errorString = new String(buffer, i + start, length - i);
- throw new HyracksDataException(
- "Long Parser - a digit is expected. But, encountered this character: " + ch
- + " in the incoming input: " + errorString);
+ long result = 0;
+ long multiplicationMin = limit / 10;
+ boolean gotNumber = false;
+ for (; i < end; i++) {
+ c = buffer[i];
+ if (c >= '0' && c <= '9') {
+ gotNumber = true;
+ if (result < multiplicationMin) {
+ return false;
+ }
+ result *= 10;
+ int digit = c - '0';
+ if (result < limit + digit) {
+ return false;
+ }
+ result -= digit;
+ } else {
+ break;
}
}
- for (; i < length; ++i) {
- char ch = buffer[i + start];
- switch (ch) {
- case ' ':
- case '\t':
- case '\n':
- case '\r':
- case '\f':
- break;
-
- default:
- String errorString = new String(buffer, i + start, length - i);
- throw new HyracksDataException(
- "Long Parser - a whitespace, tab, new line, or form-feed expected. "
- + "But, encountered this character: " + ch + " in the incoming input: "
- + errorString);
+ for (; i < end; ++i) {
+ c = buffer[i];
+ if (c != ' ' && c != '\t' && c != '\n' && c != '\r' && c != '\f') {
+ return false;
}
}
+ if (!gotNumber) {
+ return false;
+ }
try {
- out.writeLong(n * sign);
+ out.writeLong(negative ? result : -result);
+ return true;
} catch (IOException e) {
throw HyracksDataException.create(e);
}
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java
index 7848500..22f98a6 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java
@@ -38,9 +38,10 @@
private UTF8StringWriter writer = new UTF8StringWriter();
@Override
- public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
+ public boolean parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
try {
writer.writeUTF8(buffer, start, length, out);
+ return true;
} catch (IOException e) {
throw HyracksDataException.create(e);
}
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java
index 29a6d6d..5fcaf65 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java
@@ -66,7 +66,8 @@
DataOutputStream outputStream = new DataOutputStream(bos);
ByteArrayPointable bytePtr = new ByteArrayPointable();
- parser.parse(test.toCharArray(), 0, test.length(), outputStream);
+ boolean result = parser.parse(test.toCharArray(), 0, test.length(), outputStream);
+ assertTrue(result);
bytePtr.set(bos.toByteArray(), 0, bos.size());
byte[] answer = DatatypeConverter.parseBase64Binary(test);
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java
index c959c8d..a6196e7 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java
@@ -54,7 +54,8 @@
DataOutputStream outputStream = new DataOutputStream(bos);
ByteArrayPointable bytePtr = new ByteArrayPointable();
- parser.parse(test.toCharArray(), 0, test.length(), outputStream);
+ boolean result = parser.parse(test.toCharArray(), 0, test.length(), outputStream);
+ assertTrue(result);
bytePtr.set(bos.toByteArray(), 0, bos.size());
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ParserFactoryTest.java b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ParserFactoryTest.java
new file mode 100644
index 0000000..e3e77da
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ParserFactoryTest.java
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.dataflow.common.data.parsers;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.primitive.BooleanPointable;
+import org.apache.hyracks.data.std.primitive.IntegerPointable;
+import org.apache.hyracks.data.std.primitive.LongPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.junit.Test;
+
+import junit.framework.TestCase;
+
+public class ParserFactoryTest extends TestCase {
+
+ private final ArrayBackedValueStorage storage = new ArrayBackedValueStorage();
+ private final IValueParser integerParser = IntegerParserFactory.INSTANCE.createValueParser();
+ private final IValueParser longParser = LongParserFactory.INSTANCE.createValueParser();
+ private final IValueParser booleanParser = BooleanParserFactory.INSTANCE.createValueParser();
+ private String chars = "";
+
+ @Test
+ public void testInteger() throws HyracksDataException {
+ String number = "12";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true);
+ number = "+12";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true);
+ number = "-12";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, -12, true);
+ number = " 12";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true);
+ number = " +12";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true);
+ number = " -12";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, -12, true);
+ number = "12 ";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true);
+ number = "+12 ";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true);
+ number = "-12 ";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, -12, true);
+ number = " 12 ";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true);
+ number = " +12 ";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, 12, true);
+ number = " -12 ";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, -12, true);
+
+ number = Integer.toString(Integer.MAX_VALUE);
+ parse(number, integerParser, storage, IntegerPointable::getInteger, Integer.MAX_VALUE, true);
+ number = Integer.toString(Integer.MIN_VALUE);
+ parse(number, integerParser, storage, IntegerPointable::getInteger, Integer.MIN_VALUE, true);
+
+ // overflow and underflow
+ number = Long.toString(Integer.MAX_VALUE + 1L);
+ parse(number, integerParser, storage, IntegerPointable::getInteger, null, false);
+ number = Long.toString(Integer.MIN_VALUE - 1L);
+ parse(number, integerParser, storage, IntegerPointable::getInteger, null, false);
+
+ // invalid
+ number = "a";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, null, false);
+ number = "12a";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, null, false);
+ number = "12 a";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, null, false);
+ number = " a 12";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, null, false);
+ number = "a12";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, null, false);
+ number = "+ 12";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, null, false);
+ number = "- 12";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, null, false);
+ number = "1 2";
+ parse(number, integerParser, storage, IntegerPointable::getInteger, null, false);
+ }
+
+ @Test
+ public void testLong() throws HyracksDataException {
+ storage.reset();
+ String number = "12";
+ parse(number, longParser, storage, LongPointable::getLong, 12L, true);
+ number = "+12";
+ parse(number, longParser, storage, LongPointable::getLong, 12L, true);
+ number = "-12";
+ parse(number, longParser, storage, LongPointable::getLong, -12L, true);
+ number = " 12";
+ parse(number, longParser, storage, LongPointable::getLong, 12L, true);
+ number = " +12";
+ parse(number, longParser, storage, LongPointable::getLong, 12L, true);
+ number = " -12";
+ parse(number, longParser, storage, LongPointable::getLong, -12L, true);
+ number = "12 ";
+ parse(number, longParser, storage, LongPointable::getLong, 12L, true);
+ number = "+12 ";
+ parse(number, longParser, storage, LongPointable::getLong, 12L, true);
+ number = "-12 ";
+ parse(number, longParser, storage, LongPointable::getLong, -12L, true);
+ number = " 12 ";
+ parse(number, longParser, storage, LongPointable::getLong, 12L, true);
+ number = " +12 ";
+ parse(number, longParser, storage, LongPointable::getLong, 12L, true);
+ number = " -12 ";
+ parse(number, longParser, storage, LongPointable::getLong, -12L, true);
+
+ number = Long.toString(Long.MAX_VALUE);
+ parse(number, longParser, storage, LongPointable::getLong, Long.MAX_VALUE, true);
+ number = Long.toString(Long.MIN_VALUE);
+ parse(number, longParser, storage, LongPointable::getLong, Long.MIN_VALUE, true);
+
+ // overflow and underflow
+ number = Long.toString(Long.MAX_VALUE) + "1";
+ parse(number, longParser, storage, LongPointable::getLong, null, false);
+ number = Long.toString(Long.MIN_VALUE) + "1";
+ parse(number, longParser, storage, LongPointable::getLong, null, false);
+
+ // invalid
+ number = "a";
+ parse(number, longParser, storage, LongPointable::getLong, null, false);
+ number = "12a";
+ parse(number, longParser, storage, LongPointable::getLong, null, false);
+ number = "12 a";
+ parse(number, longParser, storage, LongPointable::getLong, null, false);
+ number = " a 12";
+ parse(number, longParser, storage, LongPointable::getLong, null, false);
+ number = "+ 12";
+ parse(number, longParser, storage, LongPointable::getLong, null, false);
+ number = "- 12";
+ parse(number, longParser, storage, LongPointable::getLong, null, false);
+ number = "1 2";
+ parse(number, longParser, storage, LongPointable::getLong, null, false);
+ }
+
+ @Test
+ public void testBoolean() throws HyracksDataException {
+ storage.reset();
+ String bool = "true";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.TRUE, true);
+ bool = "TRUE";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.TRUE, true);
+ bool = "True";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.TRUE, true);
+ bool = "true ";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.TRUE, true);
+ bool = " true";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.TRUE, true);
+ bool = " True ";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.TRUE, true);
+
+ bool = "false";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.FALSE, true);
+ bool = "FALSE";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.FALSE, true);
+ bool = "False";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.FALSE, true);
+ bool = " false ";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.FALSE, true);
+ bool = " false";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.FALSE, true);
+ bool = "false ";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, Boolean.FALSE, true);
+
+ // invalid
+ bool = "foo";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, null, false);
+ bool = "truea";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, null, false);
+ bool = "ffalse";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, null, false);
+ bool = "ffalse";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, null, false);
+ bool = "t rue";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, null, false);
+ bool = "true a";
+ parse(bool, booleanParser, storage, BooleanPointable::getBoolean, null, false);
+ }
+
+ private <T> void parse(String test, IValueParser parser, ArrayBackedValueStorage storage, Getter<T> getter,
+ T expectedVal, boolean expectedResult) throws HyracksDataException {
+ int oldSize = storage.getLength();
+ int start = storage.getLength();
+ int stringStart = chars.length();
+ chars = chars + test;
+ int stringLength = chars.length() - stringStart;
+ boolean result = parser.parse(chars.toCharArray(), stringStart, stringLength, storage.getDataOutput());
+ int newSize = storage.getLength();
+ if (!result) {
+ assertEquals(oldSize, newSize);
+ } else {
+ assertEquals(expectedVal, getter.get(storage.getByteArray(), start));
+ }
+ assertEquals(expectedResult, result);
+
+ }
+
+ @FunctionalInterface
+ private interface Getter<T> {
+ T get(byte[] bytes, int start);
+ }
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java
index e91992d..b8b2ba8 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java
@@ -92,8 +92,10 @@
if (cursor.fieldHasDoubleQuote()) {
cursor.eliminateDoubleQuote();
}
- valueParsers[i].parse(cursor.getBuffer(), cursor.getFieldStart(), cursor.getFieldLength(),
- dos);
+ if (!valueParsers[i].parse(cursor.getBuffer(), cursor.getFieldStart(),
+ cursor.getFieldLength(), dos)) {
+ throw new HyracksDataException("Failed to parse field");
+ }
tb.addFieldEndOffset();
}
FrameUtils.appendToWriter(writer, appender, tb.getFieldEndOffsets(), tb.getByteArray(), 0,
diff --git a/hyracks-fullstack/hyracks/hyracks-examples/text-example/texthelper/src/main/java/org/apache/hyracks/examples/text/WordTupleParserFactory.java b/hyracks-fullstack/hyracks/hyracks-examples/text-example/texthelper/src/main/java/org/apache/hyracks/examples/text/WordTupleParserFactory.java
index 2219a14..0032c40 100644
--- a/hyracks-fullstack/hyracks/hyracks-examples/text-example/texthelper/src/main/java/org/apache/hyracks/examples/text/WordTupleParserFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-examples/text-example/texthelper/src/main/java/org/apache/hyracks/examples/text/WordTupleParserFactory.java
@@ -54,7 +54,9 @@
WordCursor cursor = new WordCursor(new InputStreamReader(in));
while (cursor.nextWord()) {
tb.reset();
- utf8StringParser.parse(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart, dos);
+ if (!utf8StringParser.parse(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart, dos)) {
+ throw new HyracksDataException("Failed to parse word");
+ }
tb.addFieldEndOffset();
FrameUtils.appendToWriter(writer, appender, tb.getFieldEndOffsets(), tb.getByteArray(), 0,
tb.getSize());