[ASTERIXDB-3429][EXT] Configurable CSV escape char

- user model changes: no
- storage format changes: no
- interface changes: yes

Details:
- Allow the CSV/delimited text parser to use escapes
  for quotes within quoted fields other than quote
  itself

Change-Id: I50bebc4b8b683889855cb5dd048ab27d7c93af76
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18373
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Ian Maxon <imaxon@apache.org>
Reviewed-by: Ali Alsuliman <ali.al.solaiman@gmail.com>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-app/data/csv/nonstandard_escape.csv b/asterixdb/asterix-app/data/csv/nonstandard_escape.csv
new file mode 100644
index 0000000..b46a9f0
--- /dev/null
+++ b/asterixdb/asterix-app/data/csv/nonstandard_escape.csv
@@ -0,0 +1 @@
+1,"It says \"The quick, fox jumped over the lazy dog\""
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.1.ddl.sqlpp
index 65816f6..6b8738c 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.1.ddl.sqlpp
@@ -26,10 +26,12 @@
 CREATE TYPE t2 AS {f1: string, f2: string, f3: string};
 CREATE TYPE t3 AS {f1: int?, f2: boolean, f3: string?};
 CREATE TYPE t4 AS {f1: string, f2: string, f3: string, f4: string};
+CREATE TYPE t5 AS {f1: int, f2: string};
 
 CREATE EXTERNAL DATASET ds1(t1) USING localfs(("path"="asterix_nc1://data/csv/sample_09.csv"), ("format"="CSV"), ("header"="FALSE"));
 CREATE EXTERNAL DATASET ds2(t2) USING localfs(("path"="asterix_nc1://data/csv/sample_10.csv"), ("format"="Csv"), ("header"="False"));
 CREATE EXTERNAL DATASET ds3(t1) USING localfs(("path"="asterix_nc1://data/csv/sample_11.csv"), ("format"="csv"), ("header"="FALSE"));
 CREATE EXTERNAL DATASET ds4(t3) USING localfs(("path"="asterix_nc1://data/csv/sample_12.csv"), ("format"="csv"), ("header"="True"), ("null"=""));
 CREATE EXTERNAL DATASET ds5(t4) USING localfs(("path"="asterix_nc1://data/csv/sample_13.csv"), ("format"="csv"), ("header"="True"));
-CREATE EXTERNAL DATASET ds6(t4) USING localfs(("path"="asterix_nc1://data/csv/empty_lines.csv"), ("format"="csv"), ("header"="false"));
\ No newline at end of file
+CREATE EXTERNAL DATASET ds6(t4) USING localfs(("path"="asterix_nc1://data/csv/empty_lines.csv"), ("format"="csv"), ("header"="false"));
+CREATE EXTERNAL DATASET ds7(t5) USING localfs(("path"="asterix_nc1://data/csv/nonstandard_escape.csv"), ("format"="csv"), ("header"="false"),("escape"="\\"));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.8.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.8.query.sqlpp
new file mode 100644
index 0000000..f3a3606
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.8.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+FROM ds7 v SELECT VALUE v ORDER BY v.f1;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.8.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.99.ddl.sqlpp
similarity index 100%
rename from asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.8.ddl.sqlpp
rename to asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/csv-tsv-parser/csv-parser-001/csv-parser-001.99.ddl.sqlpp
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_04/csv_04.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_04/csv_04.2.update.sqlpp
index 32df960..b79ae18 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_04/csv_04.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_04/csv_04.2.update.sqlpp
@@ -26,5 +26,5 @@
 use temp;
 
 
-load  dataset testds using localfs (("path"="asterix_nc1://data/csv/sample_03.csv"),("format"="delimited-text"),("delimiter"=","),("quote"="\""));
+load  dataset testds using localfs (("path"="asterix_nc1://data/csv/sample_03.csv"),("format"="csv"),("header"="false"));
 
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_05/csv_05.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_05/csv_05.2.update.sqlpp
index 71a79c7..156bbf2 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_05/csv_05.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_05/csv_05.2.update.sqlpp
@@ -26,5 +26,5 @@
 use temp;
 
 
-load  dataset testds using localfs (("path"="asterix_nc1://data/csv/sample_04_quote_error.csv"),("format"="delimited-text"),("delimiter"=","),("quote"="\""));
+load  dataset testds using localfs (("path"="asterix_nc1://data/csv/sample_04_quote_error.csv"),("format"="csv"),("header"="false"));
 
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_06/csv_06.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_06/csv_06.2.update.sqlpp
index e7b19f5..68b4f4a 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_06/csv_06.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_06/csv_06.2.update.sqlpp
@@ -27,5 +27,5 @@
 use temp;
 
 
-load  dataset testds using localfs (("path"="asterix_nc1://data/csv/sample_05_space_error_1.csv"),("format"="delimited-text"),("delimiter"=","),("quote"="\""));
+load  dataset testds using localfs (("path"="asterix_nc1://data/csv/sample_05_space_error_1.csv"),("format"="csv"),("header"="false"));
 
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_07/csv_07.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_07/csv_07.2.update.sqlpp
index 32988b6..103fe3a 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_07/csv_07.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_07/csv_07.2.update.sqlpp
@@ -26,5 +26,5 @@
 use temp;
 
 
-load  dataset testds using localfs (("path"="asterix_nc1://data/csv/sample_06_space_error_2.csv"),("format"="delimited-text"),("delimiter"=","),("quote"="\""));
+load  dataset testds using localfs (("path"="asterix_nc1://data/csv/sample_06_space_error_2.csv"),("format"="csv"),("header"="false"));
 
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_nonstandard/csv_nonstandard.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_nonstandard/csv_nonstandard.1.ddl.sqlpp
new file mode 100644
index 0000000..191a8ac
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_nonstandard/csv_nonstandard.1.ddl.sqlpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+drop  dataverse temp if exists;
+create  dataverse temp;
+
+use temp;
+
+CREATE TYPE temp.test AS {f1: int, f2: string};
+
+create  dataset testds(test) primary key id;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_nonstandard/csv_nonstandard.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_nonstandard/csv_nonstandard.2.update.sqlpp
new file mode 100644
index 0000000..b5633a9
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_nonstandard/csv_nonstandard.2.update.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+use temp;
+
+
+load  dataset testds using localfs ((`path`=`asterix_nc1://data/csv/nonstandard_escape.csv`),(`format`=`csv`),(`header`=`false`),(`null`=``),(`escape`=`\\`);
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_nonstandard/csv_nonstandard.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_nonstandard/csv_nonstandard.3.query.sqlpp
new file mode 100644
index 0000000..7a9bdc6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/load/csv_nonstandard/csv_nonstandard.3.query.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+use temp;
+
+FROM testds v SELECT VALUE v ORDER BY v.f1;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/csv-parser-001/csv-parser-001.8.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/csv-parser-001/csv-parser-001.8.adm
new file mode 100644
index 0000000..73d07f3
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/csv-tsv-parser/csv-parser-001/csv-parser-001.8.adm
@@ -0,0 +1 @@
+{ "f1": 1, "f2": "It says \"The quick, fox jumped over the lazy dog\"" }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/load/csv_nonstandard/csv_nonstandard.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/load/csv_nonstandard/csv_nonstandard.1.adm
new file mode 100644
index 0000000..73d07f3
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/load/csv_nonstandard/csv_nonstandard.1.adm
@@ -0,0 +1 @@
+{ "f1": 1, "f2": "It says \"The quick, fox jumped over the lazy dog\"" }
\ No newline at end of file
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/CSVToRecordWithMetadataAndPKConverter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/CSVToRecordWithMetadataAndPKConverter.java
index ad59f75..84d1541 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/CSVToRecordWithMetadataAndPKConverter.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/converter/CSVToRecordWithMetadataAndPKConverter.java
@@ -47,7 +47,7 @@
             IExternalDataRuntimeContext context) {
         IWarningCollector warningCollector = context.getTaskContext().getWarningCollector();
         this.cursor = new FieldCursorForDelimitedDataParser(null, delimiter, ExternalDataConstants.QUOTE,
-                warningCollector, ExternalDataConstants.EMPTY_STRING);
+                ExternalDataConstants.QUOTE, warningCollector, ExternalDataConstants.EMPTY_STRING);
         this.record = new CharArrayRecord();
         this.valueIndex = valueIndex;
         this.recordWithMetadata = new RecordWithMetadataAndPK<>(record, metaType.getFieldTypes(), recordType,
@@ -64,8 +64,8 @@
         int j = 0;
         FieldCursorForDelimitedDataParser.Result lastResult;
         while ((lastResult = cursor.nextField()) == FieldCursorForDelimitedDataParser.Result.OK) {
-            if (cursor.fieldHasDoubleQuote()) {
-                cursor.eliminateDoubleQuote();
+            if (cursor.fieldHasEscapedQuote()) {
+                cursor.eliminateEscapeChar();
             }
             if (i == valueIndex) {
                 record.setValue(cursor.getBuffer(), cursor.getFieldStart(), cursor.getFieldLength());
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
index 4433b49..2035a8e 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
@@ -55,7 +55,7 @@
         String quoteString = config.get(ExternalDataConstants.KEY_QUOTE);
         ExternalDataUtils.validateChar(quoteString, ExternalDataConstants.KEY_QUOTE);
         this.quote = quoteString.charAt(0);
-        this.escape = ExternalDataUtils.validateGetEscape(config);
+        this.escape = ExternalDataUtils.validateGetEscape(config, config.get(ExternalDataConstants.KEY_FORMAT));
     }
 
     @Override
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
index b8a7480..7f890e1 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/DelimitedDataParser.java
@@ -61,6 +61,7 @@
     private final IWarningCollector warnings;
     private final char fieldDelimiter;
     private final char quote;
+    private final char escape;
     private final boolean hasHeader;
     private final ARecordType recordType;
     private final IARecordBuilder recBuilder;
@@ -79,14 +80,15 @@
     private FieldCursorForDelimitedDataParser cursor;
 
     public DelimitedDataParser(IExternalDataRuntimeContext context, IValueParserFactory[] valueParserFactories,
-            char fieldDelimiter, char quote, boolean hasHeader, ARecordType recordType, boolean isStreamParser,
-            String nullString) throws HyracksDataException {
+            char fieldDelimiter, char quote, char escape, boolean hasHeader, ARecordType recordType,
+            boolean isStreamParser, String nullString) throws HyracksDataException {
         this.dataSourceName = context.getDatasourceNameSupplier();
         this.lineNumber = context.getLineNumberSupplier();
         this.warnings = context.getTaskContext().getWarningCollector();
         this.valueEmbedder = context.getValueEmbedder();
         this.fieldDelimiter = fieldDelimiter;
         this.quote = quote;
+        this.escape = escape;
         this.hasHeader = hasHeader;
         this.recordType = recordType;
         valueParsers = new IValueParser[valueParserFactories.length];
@@ -127,7 +129,7 @@
             fieldNames[i] = name;
         }
         if (!isStreamParser) {
-            cursor = new FieldCursorForDelimitedDataParser(null, this.fieldDelimiter, quote, warnings,
+            cursor = new FieldCursorForDelimitedDataParser(null, this.fieldDelimiter, quote, escape, warnings,
                     this::getDataSourceName);
         }
         this.nullChars = nullString != null ? nullString.toCharArray() : null;
@@ -186,8 +188,8 @@
                     }
                     fieldValueBufferOutput.writeByte(fieldTypeTags[i]);
                     // Eliminate double quotes in the field that we are going to parse
-                    if (cursor.fieldHasDoubleQuote()) {
-                        cursor.eliminateDoubleQuote();
+                    if (cursor.fieldHasEscapedQuote()) {
+                        cursor.eliminateEscapeChar();
                     }
                     boolean success = valueParsers[i].parse(cursor.getBuffer(), cursor.getFieldStart(),
                             cursor.getFieldLength(), fieldValueBufferOutput);
@@ -232,8 +234,8 @@
     @Override
     public void setInputStream(InputStream in) throws IOException {
         // TODO(ali): revisit this in regards to stream
-        cursor = new FieldCursorForDelimitedDataParser(new InputStreamReader(in), fieldDelimiter, quote, warnings,
-                this::getDataSourceName);
+        cursor = new FieldCursorForDelimitedDataParser(new InputStreamReader(in), fieldDelimiter, quote, escape,
+                warnings, this::getDataSourceName);
         if (hasHeader) {
             cursor.nextRecord();
             FieldCursorForDelimitedDataParser.Result result;
@@ -249,8 +251,8 @@
     @Override
     public boolean reset(InputStream in) throws IOException {
         // TODO(ali): revisit this in regards to stream
-        cursor = new FieldCursorForDelimitedDataParser(new InputStreamReader(in), fieldDelimiter, quote, warnings,
-                this::getDataSourceName);
+        cursor = new FieldCursorForDelimitedDataParser(new InputStreamReader(in), fieldDelimiter, quote, escape,
+                warnings, this::getDataSourceName);
         return true;
     }
 
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
index 8b88a69..bad742e 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
@@ -50,9 +50,11 @@
         IValueParserFactory[] valueParserFactories = ExternalDataUtils.getValueParserFactories(recordType);
         char delimiter = ExternalDataUtils.validateGetDelimiter(configuration);
         char quote = ExternalDataUtils.validateGetQuote(configuration, delimiter);
+        char escape =
+                ExternalDataUtils.validateGetEscape(configuration, configuration.get(ExternalDataConstants.KEY_FORMAT));
         boolean hasHeader = ExternalDataUtils.hasHeader(configuration);
         String nullString = configuration.get(ExternalDataConstants.KEY_NULL_STR);
-        return new DelimitedDataParser(context, valueParserFactories, delimiter, quote, hasHeader, recordType,
+        return new DelimitedDataParser(context, valueParserFactories, delimiter, quote, escape, hasHeader, recordType,
                 ExternalDataUtils.getDataSourceType(configuration).equals(DataSourceType.STREAM), nullString);
     }
 
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index 5dfa803..3139be7 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -246,6 +246,8 @@
      * Constant characters
      */
     public static final char ESCAPE = '\\';
+
+    public static final char CSV_ESCAPE = '\"';
     public static final char QUOTE = '"';
     public static final char SPACE = ' ';
     public static final char TAB = '\t';
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 3298759..e5b2c9e 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -143,7 +143,10 @@
         return quote;
     }
 
-    public static char validateGetEscape(Map<String, String> configuration) throws HyracksDataException {
+    public static char validateGetEscape(Map<String, String> configuration, String format) throws HyracksDataException {
+        if (ExternalDataConstants.FORMAT_CSV.equals(format)) {
+            return validateCharOrDefault(configuration, KEY_ESCAPE, ExternalDataConstants.CSV_ESCAPE);
+        }
         return validateCharOrDefault(configuration, KEY_ESCAPE, ExternalDataConstants.ESCAPE);
     }
 
@@ -578,7 +581,7 @@
         }
         char delimiter = validateGetDelimiter(configuration);
         validateGetQuote(configuration, delimiter);
-        validateGetEscape(configuration);
+        validateGetEscape(configuration, format);
         String value = configuration.get(ExternalDataConstants.KEY_REDACT_WARNINGS);
         if (value != null && !isBoolean(value)) {
             throw new RuntimeDataException(ErrorCode.INVALID_REQ_PARAM_VAL, ExternalDataConstants.KEY_REDACT_WARNINGS,
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java
index b8b2ba8..34d17c9 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java
@@ -43,16 +43,18 @@
     private IValueParserFactory[] valueParserFactories;
     private char fieldDelimiter;
     private char quote;
+    private char escape;
 
     public DelimitedDataTupleParserFactory(IValueParserFactory[] fieldParserFactories, char fieldDelimiter) {
-        this(fieldParserFactories, fieldDelimiter, '\"');
+        this(fieldParserFactories, fieldDelimiter, '\"', '\"');
     }
 
-    public DelimitedDataTupleParserFactory(IValueParserFactory[] fieldParserFactories, char fieldDelimiter,
-            char quote) {
+    public DelimitedDataTupleParserFactory(IValueParserFactory[] fieldParserFactories, char fieldDelimiter, char quote,
+            char escape) {
         this.valueParserFactories = fieldParserFactories;
         this.fieldDelimiter = fieldDelimiter;
         this.quote = quote;
+        this.escape = escape;
     }
 
     @Override
@@ -74,7 +76,7 @@
                     DataOutput dos = tb.getDataOutput();
 
                     FieldCursorForDelimitedDataParser cursor = new FieldCursorForDelimitedDataParser(
-                            new InputStreamReader(in), fieldDelimiter, quote, warningCollector, () -> "");
+                            new InputStreamReader(in), fieldDelimiter, quote, escape, warningCollector, () -> "");
                     while (cursor.nextRecord()) {
                         tb.reset();
                         for (int i = 0; i < valueParsers.length; ++i) {
@@ -88,9 +90,9 @@
                                 default:
                                     throw new IllegalStateException();
                             }
-                            // Eliminate double quotes in the field that we are going to parse
-                            if (cursor.fieldHasDoubleQuote()) {
-                                cursor.eliminateDoubleQuote();
+                            // Eliminate escaped quotes in the field that we are going to parse
+                            if (cursor.fieldHasEscapedQuote()) {
+                                cursor.eliminateEscapeChar();
                             }
                             if (!valueParsers[i].parse(cursor.getBuffer(), cursor.getFieldStart(),
                                     cursor.getFieldLength(), dos)) {
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
index ffc87cd..7cba88c 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
@@ -53,8 +53,8 @@
     private int fEnd; //end position for field
     private long lineCount; //count of lines
     private int fieldCount; //count of fields in current record
-    private int doubleQuoteCount; //count of double quotes
-    private boolean isDoubleQuoteIncludedInThisField; //does current field include double quotes
+    private int escapedQuoteCount; //count of escaped quotes
+    private boolean containsEscapedQuotes; //does current field contain escaped quotes
 
     private static final int INITIAL_BUFFER_SIZE = 4096;//initial buffer size
     private static final int INCREMENT = 4096; //increment size
@@ -66,15 +66,18 @@
     private State state; //state (see states above)
 
     private int lastQuotePosition; //position of last quote
-    private int lastDoubleQuotePosition; //position of last double quote
+    private int lastEscapedQuotePosition; //position of last escaped quote
     private int lastDelimiterPosition; //position of last delimiter
+    private int lastEscapePosition; //position of last escape
     private int quoteCount; //count of single quotes
     private boolean startedQuote; //whether a quote has been started
 
     private final char quote; //the quote character
     private final char fieldDelimiter; //the delimiter
 
-    public FieldCursorForDelimitedDataParser(Reader in, char fieldDelimiter, char quote,
+    private final char escape;
+
+    public FieldCursorForDelimitedDataParser(Reader in, char fieldDelimiter, char quote, char escape,
             IWarningCollector warningCollector, Supplier<String> dataSourceName) {
         this.warnings = warningCollector;
         this.dataSourceName = dataSourceName;
@@ -89,13 +92,15 @@
         state = State.INIT;
         this.quote = quote;
         this.fieldDelimiter = fieldDelimiter;
+        this.escape = escape;
         lastDelimiterPosition = -1;
         lastQuotePosition = -1;
-        lastDoubleQuotePosition = -1;
+        lastEscapedQuotePosition = -1;
+        lastEscapePosition = -1;
         quoteCount = 0;
-        doubleQuoteCount = 0;
+        escapedQuoteCount = 0;
         startedQuote = false;
-        isDoubleQuoteIncludedInThisField = false;
+        containsEscapedQuotes = false;
         lineCount = 1;
         fieldCount = 0;
     }
@@ -116,8 +121,8 @@
         return fStart == fEnd;
     }
 
-    public boolean fieldHasDoubleQuote() {
-        return isDoubleQuoteIncludedInThisField;
+    public boolean fieldHasEscapedQuote() {
+        return containsEscapedQuotes;
     }
 
     public int getFieldCount() {
@@ -133,11 +138,12 @@
         fieldCount = 0;
         lastDelimiterPosition = -1;
         lastQuotePosition = -1;
-        lastDoubleQuotePosition = -1;
+        lastEscapedQuotePosition = -1;
+        lastEscapePosition = -1;
         quoteCount = 0;
-        doubleQuoteCount = 0;
+        escapedQuoteCount = 0;
         startedQuote = false;
-        isDoubleQuoteIncludedInThisField = false;
+        containsEscapedQuotes = false;
         start = 0;
         end = recordLength;
         state = State.IN_RECORD;
@@ -171,22 +177,34 @@
                             }
                             p -= (s - start);
                             lastQuotePosition -= (s - start);
-                            lastDoubleQuotePosition -= (s - start);
+                            lastEscapedQuotePosition -= (s - start);
                             lastDelimiterPosition -= (s - start);
                         }
                         char ch = buffer[p];
                         // We perform rough format correctness (delimiter, quote) check here
                         // to set the starting position of a record.
                         // In the field level, more checking will be conducted.
+                        if (ch == escape) {
+                            // this may or may not be an escape. the next character must be a quote for it to be.
+                            lastEscapePosition = p;
+                        }
                         if (ch == quote) {
-                            startedQuote = true;
-                            // check two quotes in a row - "". This is an escaped quote
-                            if (lastQuotePosition == p - 1 && start != p - 1 && lastDoubleQuotePosition != p - 1) {
-                                lastDoubleQuotePosition = p;
+                            boolean couldBeEscapedQuote =
+                                    lastEscapePosition == p - 1 && lastEscapedQuotePosition != p - 1;
+                            if (quote == escape) {
+                                startedQuote = true;
+                                // check two quotes in a row that aren't at the start of a field if quote is escape, e.g. ""
+                                if (couldBeEscapedQuote && start != p - 1) {
+                                    lastEscapedQuotePosition = p;
+                                }
+                            } else {
+                                if (couldBeEscapedQuote) {
+                                    lastEscapedQuotePosition = p;
+                                }
                             }
                             lastQuotePosition = p;
                         } else if (ch == fieldDelimiter) {
-                            if (startedQuote && lastQuotePosition == p - 1 && lastDoubleQuotePosition != p - 1) {
+                            if (startedQuote && lastQuotePosition == p - 1 && lastEscapedQuotePosition != p - 1) {
                                 startedQuote = false;
                                 lastDelimiterPosition = p;
                             }
@@ -266,11 +284,12 @@
                 fieldCount++;
                 // reset quote related values
                 startedQuote = false;
-                isDoubleQuoteIncludedInThisField = false;
+                containsEscapedQuotes = false;
                 lastQuotePosition = -1;
-                lastDoubleQuotePosition = -1;
+                lastEscapedQuotePosition = -1;
+                lastEscapePosition = -1;
                 quoteCount = 0;
-                doubleQuoteCount = 0;
+                escapedQuoteCount = 0;
 
                 char lastChar = '\0';
                 int p = start;
@@ -280,7 +299,7 @@
                         boolean eof = !readMore();
                         p -= (s - start);
                         lastQuotePosition -= (lastQuotePosition > -1) ? (s - start) : 0;
-                        lastDoubleQuotePosition -= (lastDoubleQuotePosition > -1) ? (s - start) : 0;
+                        lastEscapedQuotePosition -= (lastEscapedQuotePosition > -1) ? (s - start) : 0;
                         lastDelimiterPosition -= (lastDelimiterPosition > -1) ? (s - start) : 0;
                         if (eof) {
                             state = State.EOF;
@@ -288,8 +307,8 @@
                                 fStart = start;
                                 fEnd = p;
                             } else {
-                                if (lastQuotePosition == p - 1 && lastDoubleQuotePosition != p - 1
-                                        && quoteCount == doubleQuoteCount * 2 + 2) {
+                                if (lastQuotePosition == p - 1 && lastEscapedQuotePosition != p - 1
+                                        && quoteCount == escapedQuoteCount * (escape == quote ? 2 : 1) + 2) {
                                     // set the position of fStart to +1, fEnd to -1 to remove quote character
                                     fStart = start + 1;
                                     fEnd = p - 1;
@@ -319,16 +338,18 @@
                                 return Result.ERROR;
                             }
                         }
-                        // Check double quotes - "". We check [start != p-2]
+                        // Check escaped quotes - \ESC". We check [start != p-2] if escape is quote
                         // to avoid false positive where there is no value in a field,
-                        // since it looks like a double quote. However, it's not a double quote.
+                        // since it looks like an escaped quote. However, it's not an escaped quote.
                         // (e.g. if field2 has no value:
                         //       field1,"",field3 ... )
-                        if (lastQuotePosition == p - 1 && lastDoubleQuotePosition != p - 1
-                                && lastQuotePosition != start) {
-                            isDoubleQuoteIncludedInThisField = true;
-                            doubleQuoteCount++;
-                            lastDoubleQuotePosition = p;
+                        boolean couldBeEscaped = lastEscapePosition == p - 1 && lastEscapedQuotePosition != p - 1;
+                        boolean isEscapedQuote =
+                                quote == escape ? couldBeEscaped && lastQuotePosition != start : couldBeEscaped;
+                        if (isEscapedQuote) {
+                            containsEscapedQuotes = true;
+                            escapedQuoteCount++;
+                            lastEscapedQuotePosition = p;
                         }
                         lastQuotePosition = p;
                         quoteCount++;
@@ -343,9 +364,9 @@
                             return Result.OK;
                         }
 
-                        if (lastQuotePosition == p - 1 && lastDoubleQuotePosition != p - 1
+                        if (lastQuotePosition == p - 1 && lastEscapedQuotePosition != p - 1
                                 && lastQuotePosition != start) {
-                            // There is a quote right before the delimiter (e.g. ",)  and it is not two quote,
+                            // There is a quote right before the delimiter (e.g. ",)  and it is not an escaped quote,
                             // then the field contains a valid string.
                             // We set the position of fStart to +1, fEnd to -1 to remove quote character
                             fStart = start + 1;
@@ -354,8 +375,8 @@
                             lastDelimiterPosition = p;
                             startedQuote = false;
                             return Result.OK;
-                        } else if (lastQuotePosition < p - 1 && lastQuotePosition != lastDoubleQuotePosition
-                                && quoteCount == doubleQuoteCount * 2 + 2) {
+                        } else if (lastQuotePosition < p - 1 && lastQuotePosition != lastEscapedQuotePosition
+                                && quoteCount == escapedQuoteCount * (escape == quote ? 2 : 1) + 2) {
                             // There is a quote before the delimiter, however it is not directly placed before the delimiter.
                             // In this case, we throw an exception.
                             // quoteCount == doubleQuoteCount * 2 + 2 : only true when we have two quotes except double-quotes.
@@ -376,8 +397,8 @@
                             state = ch == '\n' ? State.EOR : State.CR;
                             lastDelimiterPosition = p;
                             return Result.OK;
-                        } else if (lastQuotePosition == p - 1 && lastDoubleQuotePosition != p - 1
-                                && quoteCount == doubleQuoteCount * 2 + 2) {
+                        } else if (lastQuotePosition == p - 1 && lastEscapedQuotePosition != p - 1
+                                && quoteCount == escapedQuoteCount * (escape == quote ? 2 : 1) + 2) {
                             // set the position of fStart to +1, fEnd to -1 to remove quote character
                             fStart = start + 1;
                             fEnd = p - 1;
@@ -388,6 +409,12 @@
                             return Result.OK;
                         }
                     }
+                    if (ch == escape) {
+                        //RFC4180 defines the escape character for quotes as quotes. however CSV is not a well-defined
+                        //format, and so frequently nonstandard escaping such as C-style \ escaping is used.
+                        //Therefore, we need to track potential escapes separately to support these cases.
+                        lastEscapePosition = p;
+                    }
                     // count lines inside quotes
                     if (ch == '\r' || (ch == '\n' && lastChar != '\r')) {
                         lineCount++;
@@ -421,17 +448,17 @@
         return true;
     }
 
-    // Eliminate escaped double quotes("") in a field
-    public void eliminateDoubleQuote() {
-        int lastDoubleQuotePosition = -1;
+    // Eliminate escaped quotes("" by default) in a field
+    public void eliminateEscapeChar() {
+        int lastEsc = -1;
         int writepos = fStart;
         int readpos = fStart;
         int length = fEnd - fStart;
         // Find positions where double quotes appear
         for (int i = 0; i < length; i++) {
             // Skip double quotes
-            if (buffer[readpos] == quote && lastDoubleQuotePosition != readpos - 1) {
-                lastDoubleQuotePosition = readpos;
+            if (buffer[readpos] == escape && lastEsc != readpos - 1) {
+                lastEsc = readpos;
                 readpos++;
             } else {
                 // Moving characters except double quote to the front
@@ -442,8 +469,8 @@
                 readpos++;
             }
         }
-        fEnd -= doubleQuoteCount;
-        isDoubleQuoteIncludedInThisField = false;
+        fEnd -= escapedQuoteCount;
+        containsEscapedQuotes = false;
     }
 
     private void warn(String message) {
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/file/CursorTest.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/file/CursorTest.java
index 5561ad1..10b3cd3 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/file/CursorTest.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/file/CursorTest.java
@@ -40,7 +40,7 @@
             reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
             // skip header
             final FieldCursorForDelimitedDataParser cursor =
-                    new FieldCursorForDelimitedDataParser(reader, ',', '"', null, () -> "");
+                    new FieldCursorForDelimitedDataParser(reader, ',', '"', '"', null, () -> "");
             // get number of fields from header (first record is header)
             cursor.nextRecord();
             int numOfFields = 0;
@@ -55,8 +55,8 @@
             while (cursor.nextRecord()) {
                 int fieldNumber = 0;
                 while ((lastResult = cursor.nextField()) == FieldCursorForDelimitedDataParser.Result.OK) {
-                    if (cursor.fieldHasDoubleQuote()) {
-                        cursor.eliminateDoubleQuote();
+                    if (cursor.fieldHasEscapedQuote()) {
+                        cursor.eliminateEscapeChar();
                     }
                     fieldNumber++;
                 }