Fixed CSV parser to recognize quote and delimiter inside a string Change-Id: Iaaabc23e86df4f9bbee9f06b7976d7fbdcbb0f3f Reviewed-on: http://fulliautomatix.ics.uci.edu:8443/135 Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu> Reviewed-by: Till Westmann <westmann@gmail.com>

commit: efb236d412a5ff66e6ca1c8d258eee2a4b3f1b56 [log] [tgz]
author: Taewoo Kim <wangsaeu@gmail.com> Wed Sep 17 10:55:05 2014 -0700
committer: Ian Maxon <imaxon@uci.edu> Wed Sep 17 16:36:16 2014 -0800
tree: ff5214018bb80dc47191f18eb74f35da6edd1400
parent: 83259465e090c5797597a2a1a9487bd35621d775 [diff]
diff --git a/asterix-app/data/csv/sample_02.csv b/asterix-app/data/csv/sample_02.csv
new file mode 100644
index 0000000..a4c5b3d
--- /dev/null
+++ b/asterix-app/data/csv/sample_02.csv

@@ -0,0 +1,4 @@
+1|0.1|0.1|0.1|0.1|abc|abc
+2|0.2||0.2||""|""
+3|0.3||0.3|||
+4|0.4||0.4||||extra||

diff --git a/asterix-app/data/csv/sample_03.csv b/asterix-app/data/csv/sample_03.csv
new file mode 100644
index 0000000..eed0699
--- /dev/null
+++ b/asterix-app/data/csv/sample_03.csv

@@ -0,0 +1,5 @@
+1,0.1,"test"",1a","test""1b"
+2,0.2,test2a,test2b
+3,0.3,"test,3a,3a,3a","""""test"""""
+4,0.4,"test""4a"",4a"," test with
+line break "

diff --git a/asterix-app/data/csv/sample_04_quote_error.csv b/asterix-app/data/csv/sample_04_quote_error.csv
new file mode 100644
index 0000000..27b6b7c
--- /dev/null
+++ b/asterix-app/data/csv/sample_04_quote_error.csv

@@ -0,0 +1,5 @@
+1,0.1,"test",1a","test""1b"
+2,0.2,test2a,test2b
+3,0.3,"test,3a,3a,3a","""""test"""""
+4,0.4,"test""4a"",4a"," test with
+line break "

diff --git a/asterix-app/data/csv/sample_05_space_error_1.csv b/asterix-app/data/csv/sample_05_space_error_1.csv
new file mode 100644
index 0000000..92a9862
--- /dev/null
+++ b/asterix-app/data/csv/sample_05_space_error_1.csv

@@ -0,0 +1,5 @@
+1,0.1, "test"",1a","test""1b"
+2,0.2,test2a,test2b
+3,0.3,"test,3a,3a,3a","""""test"""""
+4,0.4,"test""4a"",4a"," test with
+line break "

diff --git a/asterix-app/data/csv/sample_06_space_error_2.csv b/asterix-app/data/csv/sample_06_space_error_2.csv
new file mode 100644
index 0000000..d1e7711
--- /dev/null
+++ b/asterix-app/data/csv/sample_06_space_error_2.csv

@@ -0,0 +1,5 @@
+1,0.1,"test"",1a" ,"test""1b"
+2,0.2,test2a,test2b
+3,0.3,"test,3a,3a,3a","""""test"""""
+4,0.4,"test""4a"",4a"," test with
+line break "

diff --git a/asterix-app/src/test/resources/fuzzyjoin/amerix/10-load-csx-small.aql b/asterix-app/src/test/resources/fuzzyjoin/amerix/10-load-csx-small.aql
index 5e819bd..01c29de 100644
--- a/asterix-app/src/test/resources/fuzzyjoin/amerix/10-load-csx-small.aql
+++ b/asterix-app/src/test/resources/fuzzyjoin/amerix/10-load-csx-small.aql

@@ -1,7 +1,7 @@
 use dataverse fuzzy1;
 
 declare type CSXType as open {
-  id: int32, 
+  id: int32,
   csxid: string,
   title: string,
   authors: string,
@@ -10,11 +10,11 @@
 
 declare nodegroup group1 on nc1, nc2;
 
-declare dataset CSXSmall(CSXType) 
+declare dataset CSXSmall(CSXType)
   primary key id on group1;
 
-load dataset CSXSmall 
+load dataset CSXSmall
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/fuzzyjoin/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+(("path"="nc1://data/fuzzyjoin/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")) pre-sorted;
 
 

diff --git a/asterix-app/src/test/resources/fuzzyjoin/pub/040-load-csx.aql b/asterix-app/src/test/resources/fuzzyjoin/pub/040-load-csx.aql
index a87398d..c1eb120 100644
--- a/asterix-app/src/test/resources/fuzzyjoin/pub/040-load-csx.aql
+++ b/asterix-app/src/test/resources/fuzzyjoin/pub/040-load-csx.aql

@@ -1,7 +1,7 @@
 use dataverse fuzzy1;
 
 declare type CSXType as open {
-  id: int32, 
+  id: int32,
   csxid: string,
   title: string,
   authors: string,
@@ -10,11 +10,11 @@
 
 declare nodegroup group1 on nc1, nc2;
 
-declare dataset CSX(CSXType) 
+declare dataset CSX(CSXType)
   primary key id on group1;
 
 // load dataset CSX from nc1:'/asterix/asterix-app/data/pub-small/csx-small-id.txt'
 load dataset  CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1:///asterix/asterix-app/data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
+(("path"="nc1:///asterix/asterix-app/data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000")) pre-sorted;
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_1/dblp-csx-2_1.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_1/dblp-csx-2_1.2.update.aql
index 47d8420..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_1/dblp-csx-2_1.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_1/dblp-csx-2_1.2.update.aql

@@ -1,10 +1,10 @@
 use dataverse fuzzyjoin;
 
-load dataset DBLP 
+load dataset DBLP
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
 (("path"="nc1://data/pub-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
 
-load dataset CSX 
+load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_2/dblp-csx-2_2.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_2/dblp-csx-2_2.2.update.aql
index 47d8420..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_2/dblp-csx-2_2.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_2/dblp-csx-2_2.2.update.aql

@@ -1,10 +1,10 @@
 use dataverse fuzzyjoin;
 
-load dataset DBLP 
+load dataset DBLP
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
 (("path"="nc1://data/pub-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
 
-load dataset CSX 
+load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_3/dblp-csx-2_3.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_3/dblp-csx-2_3.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_3/dblp-csx-2_3.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_3/dblp-csx-2_3.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_4/dblp-csx-2_4.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_4/dblp-csx-2_4.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_4/dblp-csx-2_4.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_4/dblp-csx-2_4.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.1/dblp-csx-2_5.1.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.1/dblp-csx-2_5.1.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.1/dblp-csx-2_5.1.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.1/dblp-csx-2_5.1.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.2/dblp-csx-2_5.2.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.2/dblp-csx-2_5.2.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.2/dblp-csx-2_5.2.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.2/dblp-csx-2_5.2.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.3.1/dblp-csx-2_5.3.1.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.3.1/dblp-csx-2_5.3.1.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.3.1/dblp-csx-2_5.3.1.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.3.1/dblp-csx-2_5.3.1.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.3/dblp-csx-2_5.3.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.3/dblp-csx-2_5.3.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.3/dblp-csx-2_5.3.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5.3/dblp-csx-2_5.3.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5/dblp-csx-2_5.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5/dblp-csx-2_5.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5/dblp-csx-2_5.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-2_5/dblp-csx-2_5.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_1/dblp-csx-3_1.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_1/dblp-csx-3_1.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_1/dblp-csx-3_1.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_1/dblp-csx-3_1.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_2/dblp-csx-3_2.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_2/dblp-csx-3_2.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_2/dblp-csx-3_2.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_2/dblp-csx-3_2.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_3/dblp-csx-3_3.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_3/dblp-csx-3_3.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_3/dblp-csx-3_3.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_3/dblp-csx-3_3.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_4/dblp-csx-3_4.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_4/dblp-csx-3_4.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_4/dblp-csx-3_4.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_4/dblp-csx-3_4.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.1/dblp-csx-3_5.1.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.1/dblp-csx-3_5.1.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.1/dblp-csx-3_5.1.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.1/dblp-csx-3_5.1.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.2/dblp-csx-3_5.2.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.2/dblp-csx-3_5.2.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.2/dblp-csx-3_5.2.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.2/dblp-csx-3_5.2.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.3.1/dblp-csx-3_5.3.1.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.3.1/dblp-csx-3_5.3.1.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.3.1/dblp-csx-3_5.3.1.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.3.1/dblp-csx-3_5.3.1.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.3/dblp-csx-3_5.3.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.3/dblp-csx-3_5.3.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.3/dblp-csx-3_5.3.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.3/dblp-csx-3_5.3.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.4.1/dblp-csx-3_5.4.1.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.4.1/dblp-csx-3_5.4.1.2.update.aql
index 50972b4..a452348 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.4.1/dblp-csx-3_5.4.1.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.4.1/dblp-csx-3_5.4.1.2.update.aql

@@ -6,6 +6,6 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.4/dblp-csx-3_5.4.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.4/dblp-csx-3_5.4.2.update.aql
index 50972b4..a452348 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.4/dblp-csx-3_5.4.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5.4/dblp-csx-3_5.4.2.update.aql

@@ -6,6 +6,6 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5/dblp-csx-3_5.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5/dblp-csx-3_5.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5/dblp-csx-3_5.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-3_5/dblp-csx-3_5.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_1/dblp-csx-aqlplus_1.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_1/dblp-csx-aqlplus_1.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_1/dblp-csx-aqlplus_1.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_1/dblp-csx-aqlplus_1.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_2/dblp-csx-aqlplus_2.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_2/dblp-csx-aqlplus_2.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_2/dblp-csx-aqlplus_2.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_2/dblp-csx-aqlplus_2.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_3/dblp-csx-aqlplus_3.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_3/dblp-csx-aqlplus_3.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_3/dblp-csx-aqlplus_3.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-aqlplus_3/dblp-csx-aqlplus_3.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-dblp-aqlplus_1/dblp-csx-dblp-aqlplus_1.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-dblp-aqlplus_1/dblp-csx-dblp-aqlplus_1.2.update.aql
index 916bd56..e0e46d8 100644
--- a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-dblp-aqlplus_1/dblp-csx-dblp-aqlplus_1.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/dblp-csx-dblp-aqlplus_1/dblp-csx-dblp-aqlplus_1.2.update.aql

@@ -6,5 +6,5 @@
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/index-join/btree-secondary-equi-join/btree-secondary-equi-join.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/index-join/btree-secondary-equi-join/btree-secondary-equi-join.2.update.aql
index 78a8e47..2eeb41e 100644
--- a/asterix-app/src/test/resources/runtimets/queries/index-join/btree-secondary-equi-join/btree-secondary-equi-join.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/index-join/btree-secondary-equi-join/btree-secondary-equi-join.2.update.aql

@@ -1,17 +1,17 @@
 /*
  * Description    : Equi joins two datasets, DBLP and CSX, based on their title.
- *                  DBLP has a secondary btree index on title, and given the 'indexnl' hint 
+ *                  DBLP has a secondary btree index on title, and given the 'indexnl' hint
  *                  we expect the join to be transformed into an indexed nested-loop join.
  * Success        : Yes
  */
 
 use dataverse test;
 
-load dataset DBLP 
+load dataset DBLP
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
 (("path"="nc1://data/pub-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/ngram-jaccard-inline/ngram-jaccard-inline.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/ngram-jaccard-inline/ngram-jaccard-inline.2.update.aql
index 9388959..8c1fd38 100644
--- a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/ngram-jaccard-inline/ngram-jaccard-inline.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/ngram-jaccard-inline/ngram-jaccard-inline.2.update.aql

@@ -8,11 +8,11 @@
 
 use dataverse test;
 
-load dataset DBLP 
+load dataset DBLP
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
 (("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/ngram-jaccard/ngram-jaccard.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/ngram-jaccard/ngram-jaccard.2.update.aql
index 319c927..d683bff 100644
--- a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/ngram-jaccard/ngram-jaccard.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/ngram-jaccard/ngram-jaccard.2.update.aql

@@ -7,11 +7,11 @@
 
 use dataverse test;
 
-load dataset DBLP 
+load dataset DBLP
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
 (("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/word-jaccard-inline/word-jaccard-inline.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/word-jaccard-inline/word-jaccard-inline.2.update.aql
index 0a2a629..7faf85f 100644
--- a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/word-jaccard-inline/word-jaccard-inline.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/word-jaccard-inline/word-jaccard-inline.2.update.aql

@@ -8,11 +8,11 @@
 
 use dataverse test;
 
-load dataset DBLP 
+load dataset DBLP
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
 (("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/word-jaccard/word-jaccard.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/word-jaccard/word-jaccard.2.update.aql
index a166535..22aa5b7 100644
--- a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/word-jaccard/word-jaccard.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join-noeqjoin/word-jaccard/word-jaccard.2.update.aql

@@ -7,11 +7,11 @@
 
 use dataverse test;
 
-load dataset DBLP 
+load dataset DBLP
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
 (("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/ngram-jaccard-inline/ngram-jaccard-inline.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/ngram-jaccard-inline/ngram-jaccard-inline.2.update.aql
index 8fbfdb9..68f309f 100644
--- a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/ngram-jaccard-inline/ngram-jaccard-inline.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/ngram-jaccard-inline/ngram-jaccard-inline.2.update.aql

@@ -7,11 +7,11 @@
 
 use dataverse test;
 
-load dataset DBLP 
+load dataset DBLP
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
 (("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/ngram-jaccard/ngram-jaccard.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/ngram-jaccard/ngram-jaccard.2.update.aql
index 93233c1..a267f03 100644
--- a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/ngram-jaccard/ngram-jaccard.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/ngram-jaccard/ngram-jaccard.2.update.aql

@@ -6,11 +6,11 @@
 
 use dataverse test;
 
-load dataset DBLP 
+load dataset DBLP
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
 (("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/word-jaccard-inline/word-jaccard-inline.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/word-jaccard-inline/word-jaccard-inline.2.update.aql
index 3064796..a90d3f4 100644
--- a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/word-jaccard-inline/word-jaccard-inline.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/word-jaccard-inline/word-jaccard-inline.2.update.aql

@@ -7,11 +7,11 @@
 
 use dataverse test;
 
-load dataset DBLP 
+load dataset DBLP
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
 (("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/word-jaccard/word-jaccard.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/word-jaccard/word-jaccard.2.update.aql
index 5a307f6..e80da53 100644
--- a/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/word-jaccard/word-jaccard.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/inverted-index-join/word-jaccard/word-jaccard.2.update.aql

@@ -6,11 +6,11 @@
 
 use dataverse test;
 
-load dataset DBLP 
+load dataset DBLP
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
 (("path"="nc1://data/dblp-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":")) pre-sorted;
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/leftouterjoin/query_issue285-2/query_issue285-2.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/leftouterjoin/query_issue285-2/query_issue285-2.2.update.aql
index 923f6e4..54290df 100644
--- a/asterix-app/src/test/resources/runtimets/queries/leftouterjoin/query_issue285-2/query_issue285-2.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/leftouterjoin/query_issue285-2/query_issue285-2.2.update.aql

@@ -1,17 +1,17 @@
 /*
  * Description    : Left-outer joins two datasets, DBLP and CSX, based on their title.
- *                  DBLP has a secondary btree index on title, and given the 'indexnl' hint 
+ *                  DBLP has a secondary btree index on title, and given the 'indexnl' hint
  *                  we expect the join to be transformed into an indexed nested-loop join.
  * Success        : Yes
  */
 
 use dataverse test;
 
-load dataset DBLP 
+load dataset DBLP
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
 (("path"="nc1://data/pub-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/leftouterjoin/query_issue285/query_issue285.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/leftouterjoin/query_issue285/query_issue285.2.update.aql
index 923f6e4..54290df 100644
--- a/asterix-app/src/test/resources/runtimets/queries/leftouterjoin/query_issue285/query_issue285.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/leftouterjoin/query_issue285/query_issue285.2.update.aql

@@ -1,17 +1,17 @@
 /*
  * Description    : Left-outer joins two datasets, DBLP and CSX, based on their title.
- *                  DBLP has a secondary btree index on title, and given the 'indexnl' hint 
+ *                  DBLP has a secondary btree index on title, and given the 'indexnl' hint
  *                  we expect the join to be transformed into an indexed nested-loop join.
  * Success        : Yes
  */
 
 use dataverse test;
 
-load dataset DBLP 
+load dataset DBLP
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
 (("path"="nc1://data/pub-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/leftouterjoin/query_issue658/query_issue658.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/leftouterjoin/query_issue658/query_issue658.2.update.aql
index 4e2123a..1cc3923 100644
--- a/asterix-app/src/test/resources/runtimets/queries/leftouterjoin/query_issue658/query_issue658.2.update.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/leftouterjoin/query_issue658/query_issue658.2.update.aql

@@ -5,11 +5,11 @@
 
 use dataverse test;
 
-load dataset DBLP 
+load dataset DBLP
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
 (("path"="nc1://data/pub-small/dblp-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
 
 load dataset CSX
 using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
-(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"));
+(("path"="nc1://data/pub-small/csx-small-id.txt"),("format"="delimited-text"),("delimiter"=":"),("quote"="\u0000"));
 

diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_03/csv_03.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_03/csv_03.1.ddl.aql
new file mode 100644
index 0000000..0b7c16f
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_03/csv_03.1.ddl.aql

@@ -0,0 +1,23 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+drop dataverse temp if exists;
+create dataverse temp
+use dataverse temp;
+
+create type test as closed {
+  id: int32,
+  float: float,
+  floatq: float?,
+  double: double,
+  doubleq: double?,
+  string: string,
+  stringq: string?
+};
+
+create dataset testds (test)
+primary key id;

diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_03/csv_03.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_03/csv_03.2.update.aql
new file mode 100644
index 0000000..e0e04ed
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_03/csv_03.2.update.aql

@@ -0,0 +1,12 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+use dataverse temp;
+
+load dataset testds
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/csv/sample_02.csv"),("format"="delimited-text"),("delimiter"="|"));
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_03/csv_03.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_03/csv_03.3.query.aql
new file mode 100644
index 0000000..e7d5f60
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_03/csv_03.3.query.aql

@@ -0,0 +1,20 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+use dataverse temp;
+
+for $i in dataset testds
+order by $i.id
+return {
+  "id": $i.id,
+  "float": $i.float,
+  "floatq": $i.floatq,
+  "double": $i.double,
+  "doubleq": $i.doubleq,
+  "string": $i.string,
+  "stringq": $i.stringq
+}
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_04/csv_04.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_04/csv_04.1.ddl.aql
new file mode 100644
index 0000000..b51d617
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_04/csv_04.1.ddl.aql

@@ -0,0 +1,22 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ * In this test, we check quote
+ * and delimiter in a field
+ *
+ */
+
+drop dataverse temp if exists;
+create dataverse temp
+use dataverse temp;
+
+create type test as closed {
+  id: int32,
+  float: float,
+  stringa: string,
+  stringb: string?
+};
+
+create dataset testds (test)
+primary key id;

diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_04/csv_04.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_04/csv_04.2.update.aql
new file mode 100644
index 0000000..df24a65
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_04/csv_04.2.update.aql

@@ -0,0 +1,12 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+use dataverse temp;
+
+load dataset testds
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/csv/sample_03.csv"),("format"="delimited-text"),("delimiter"=","),("quote"="\""));
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_04/csv_04.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_04/csv_04.3.query.aql
new file mode 100644
index 0000000..ee72474
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_04/csv_04.3.query.aql

@@ -0,0 +1,17 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+use dataverse temp;
+
+for $i in dataset testds
+order by $i.id
+return {
+  "id": $i.id,
+  "float": $i.float,
+  "stringa": $i.stringa,
+  "stringb": $i.stringb
+}
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_05/csv_05.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_05/csv_05.1.ddl.aql
new file mode 100644
index 0000000..cba880c
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_05/csv_05.1.ddl.aql

@@ -0,0 +1,20 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: fail - a field is not enclosed in two quotes properly. It misses one quote.
+ *
+ */
+
+drop dataverse temp if exists;
+create dataverse temp
+use dataverse temp;
+
+create type test as closed {
+  id: int32,
+  float: float,
+  stringa: string,
+  stringb: string?
+};
+
+create dataset testds (test)
+primary key id;

diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_05/csv_05.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_05/csv_05.2.update.aql
new file mode 100644
index 0000000..b851701
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_05/csv_05.2.update.aql

@@ -0,0 +1,12 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: fail - a field is not enclosed in two quotes properly. It misses one quote.
+ *
+ */
+
+use dataverse temp;
+
+load dataset testds
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/csv/sample_04_quote_error.csv"),("format"="delimited-text"),("delimiter"=","),("quote"="\""));
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_05/csv_05.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_05/csv_05.3.query.aql
new file mode 100644
index 0000000..ba85528
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_05/csv_05.3.query.aql

@@ -0,0 +1,17 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: fail - a field is not enclosed in two quotes properly. It misses one quote.
+ *
+ */
+
+use dataverse temp;
+
+for $i in dataset testds
+order by $i.id
+return {
+  "id": $i.id,
+  "float": $i.float,
+  "stringa": $i.stringa,
+  "stringb": $i.stringb
+}
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_06/csv_06.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_06/csv_06.1.ddl.aql
new file mode 100644
index 0000000..e2b5d8f
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_06/csv_06.1.ddl.aql

@@ -0,0 +1,21 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: fail - a whitespace is placed after the delimiter, and there is a quote after that.
+ * According to RFC (http://tools.ietf.org/html/rfc4180), this is not allowed.
+ *
+ */
+
+drop dataverse temp if exists;
+create dataverse temp
+use dataverse temp;
+
+create type test as closed {
+  id: int32,
+  float: float,
+  stringa: string,
+  stringb: string?
+};
+
+create dataset testds (test)
+primary key id;

diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_06/csv_06.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_06/csv_06.2.update.aql
new file mode 100644
index 0000000..468d7cb
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_06/csv_06.2.update.aql

@@ -0,0 +1,13 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: fail - a whitespace is placed after the delimiter, and there is a quote after that.
+ * According to RFC (http://tools.ietf.org/html/rfc4180), this is not allowed.
+ *
+ */
+
+use dataverse temp;
+
+load dataset testds
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/csv/sample_05_space_error_1.csv"),("format"="delimited-text"),("delimiter"=","),("quote"="\""));
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_06/csv_06.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_06/csv_06.3.query.aql
new file mode 100644
index 0000000..da3a4fa
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_06/csv_06.3.query.aql

@@ -0,0 +1,18 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: fail - a whitespace is placed after the delimiter, and there is a quote after that.
+ * According to RFC (http://tools.ietf.org/html/rfc4180), this is not allowed.
+ *
+ */
+
+use dataverse temp;
+
+for $i in dataset testds
+order by $i.id
+return {
+  "id": $i.id,
+  "float": $i.float,
+  "stringa": $i.stringa,
+  "stringb": $i.stringb
+}
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_07/csv_07.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_07/csv_07.1.ddl.aql
new file mode 100644
index 0000000..41a66ef
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_07/csv_07.1.ddl.aql

@@ -0,0 +1,21 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: fail - a whitespace is placed after a quote, and there is a delimiter after that space.
+ * According to RFC (http://tools.ietf.org/html/rfc4180), this is not allowed.
+ *
+ */
+
+drop dataverse temp if exists;
+create dataverse temp
+use dataverse temp;
+
+create type test as closed {
+  id: int32,
+  float: float,
+  stringa: string,
+  stringb: string?
+};
+
+create dataset testds (test)
+primary key id;

diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_07/csv_07.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_07/csv_07.2.update.aql
new file mode 100644
index 0000000..c5fbc69
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_07/csv_07.2.update.aql

@@ -0,0 +1,12 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+use dataverse temp;
+
+load dataset testds
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/csv/sample_06_space_error_2.csv"),("format"="delimited-text"),("delimiter"=","),("quote"="\""));
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_07/csv_07.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_07/csv_07.3.query.aql
new file mode 100644
index 0000000..ee72474
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_07/csv_07.3.query.aql

@@ -0,0 +1,17 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+use dataverse temp;
+
+for $i in dataset testds
+order by $i.id
+return {
+  "id": $i.id,
+  "float": $i.float,
+  "stringa": $i.stringa,
+  "stringb": $i.stringb
+}
\ No newline at end of file

diff --git a/asterix-app/src/test/resources/runtimets/results/load/csv_03/csv_03.1.adm b/asterix-app/src/test/resources/runtimets/results/load/csv_03/csv_03.1.adm
new file mode 100644
index 0000000..712d6fd
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/load/csv_03/csv_03.1.adm

@@ -0,0 +1,4 @@
+{ "id": 1, "float": 0.1f, "floatq": 0.1f, "double": 0.1d, "doubleq": 0.1d, "string": "abc", "stringq": "abc" }
+{ "id": 2, "float": 0.2f, "floatq": null, "double": 0.2d, "doubleq": null, "string": "", "stringq": null }
+{ "id": 3, "float": 0.3f, "floatq": null, "double": 0.3d, "doubleq": null, "string": "", "stringq": null }
+{ "id": 4, "float": 0.4f, "floatq": null, "double": 0.4d, "doubleq": null, "string": "", "stringq": null }

diff --git a/asterix-app/src/test/resources/runtimets/results/load/csv_04/csv_04.1.adm b/asterix-app/src/test/resources/runtimets/results/load/csv_04/csv_04.1.adm
new file mode 100644
index 0000000..292a507
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/load/csv_04/csv_04.1.adm

@@ -0,0 +1,4 @@
+{ "id": 1, "float": 0.1f, "stringa": "test\",1a", "stringb": "test\"1b" }
+{ "id": 2, "float": 0.2f, "stringa": "test2a", "stringb": "test2b" }
+{ "id": 3, "float": 0.3f, "stringa": "test,3a,3a,3a", "stringb": "\"\"test\"\"" }
+{ "id": 4, "float": 0.4f, "stringa": "test\"4a\",4a", "stringb": " test with\nline break " }

diff --git a/asterix-app/src/test/resources/runtimets/results/load/csv_05/csv_05.1.adm b/asterix-app/src/test/resources/runtimets/results/load/csv_05/csv_05.1.adm
new file mode 100644
index 0000000..292a507
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/load/csv_05/csv_05.1.adm

@@ -0,0 +1,4 @@
+{ "id": 1, "float": 0.1f, "stringa": "test\",1a", "stringb": "test\"1b" }
+{ "id": 2, "float": 0.2f, "stringa": "test2a", "stringb": "test2b" }
+{ "id": 3, "float": 0.3f, "stringa": "test,3a,3a,3a", "stringb": "\"\"test\"\"" }
+{ "id": 4, "float": 0.4f, "stringa": "test\"4a\",4a", "stringb": " test with\nline break " }

diff --git a/asterix-app/src/test/resources/runtimets/results/load/csv_06/csv_06.1.adm b/asterix-app/src/test/resources/runtimets/results/load/csv_06/csv_06.1.adm
new file mode 100644
index 0000000..292a507
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/load/csv_06/csv_06.1.adm

@@ -0,0 +1,4 @@
+{ "id": 1, "float": 0.1f, "stringa": "test\",1a", "stringb": "test\"1b" }
+{ "id": 2, "float": 0.2f, "stringa": "test2a", "stringb": "test2b" }
+{ "id": 3, "float": 0.3f, "stringa": "test,3a,3a,3a", "stringb": "\"\"test\"\"" }
+{ "id": 4, "float": 0.4f, "stringa": "test\"4a\",4a", "stringb": " test with\nline break " }

diff --git a/asterix-app/src/test/resources/runtimets/results/load/csv_07/csv_07.1.adm b/asterix-app/src/test/resources/runtimets/results/load/csv_07/csv_07.1.adm
new file mode 100644
index 0000000..292a507
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/load/csv_07/csv_07.1.adm

@@ -0,0 +1,4 @@
+{ "id": 1, "float": 0.1f, "stringa": "test\",1a", "stringb": "test\"1b" }
+{ "id": 2, "float": 0.2f, "stringa": "test2a", "stringb": "test2b" }
+{ "id": 3, "float": 0.3f, "stringa": "test,3a,3a,3a", "stringb": "\"\"test\"\"" }
+{ "id": 4, "float": 0.4f, "stringa": "test\"4a\",4a", "stringb": " test with\nline break " }

diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index 3464a29..2dd0f18 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml

@@ -4814,6 +4814,34 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="load">
+      <compilation-unit name="csv_03">
+        <output-dir compare="Text">csv_03</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="load">
+      <compilation-unit name="csv_04">
+        <output-dir compare="Text">csv_04</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="load">
+      <compilation-unit name="csv_05">
+        <output-dir compare="Text">csv_05</output-dir>
+        <expected-error>edu.uci.ics.hyracks.api.exceptions.HyracksDataException</expected-error>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="load">
+      <compilation-unit name="csv_06">
+        <output-dir compare="Text">csv_06</output-dir>
+        <expected-error>edu.uci.ics.hyracks.api.exceptions.HyracksDataException</expected-error>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="load">
+      <compilation-unit name="csv_07">
+        <output-dir compare="Text">csv_07</output-dir>
+        <expected-error>edu.uci.ics.hyracks.api.exceptions.HyracksDataException</expected-error>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="load">
       <compilation-unit name="issue14_query">
         <output-dir compare="Text">none</output-dir>
         <expected-error>edu.uci.ics.asterix.common.exceptions.AsterixException</expected-error>

diff --git a/asterix-common/src/main/java/edu/uci/ics/asterix/common/parse/IParseFileSplitsDecl.java b/asterix-common/src/main/java/edu/uci/ics/asterix/common/parse/IParseFileSplitsDecl.java
index 98f2848..4a9dad5 100644
--- a/asterix-common/src/main/java/edu/uci/ics/asterix/common/parse/IParseFileSplitsDecl.java
+++ b/asterix-common/src/main/java/edu/uci/ics/asterix/common/parse/IParseFileSplitsDecl.java

@@ -3,9 +3,9 @@
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * you may obtain a copy of the License from
- * 
+ *
  *     http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,7 +19,9 @@
 public interface IParseFileSplitsDecl {
     public boolean isDelimitedFileFormat();
 
-    public Character getDelimChar();
+    public char getDelimChar();
+
+    public char getQuote();
 
     public FileSplit[] getSplits();
 }

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HDFSIndexingAdapterFactory.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HDFSIndexingAdapterFactory.java
index 5e573cb..040f506 100644
--- a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HDFSIndexingAdapterFactory.java
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HDFSIndexingAdapterFactory.java

@@ -22,6 +22,7 @@
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
 
+import edu.uci.ics.asterix.external.adapter.factory.StreamBasedAdapterFactory;
 import edu.uci.ics.asterix.external.dataset.adapter.HDFSIndexingAdapter;
 import edu.uci.ics.asterix.external.indexing.dataflow.HDFSIndexingParserFactory;
 import edu.uci.ics.asterix.external.indexing.dataflow.IndexingScheduler;
@@ -104,13 +105,14 @@
         ((HDFSIndexingParserFactory) parserFactory).setJobConf(conf);
         ((HDFSIndexingParserFactory) parserFactory).setArguments(configuration);
         HDFSIndexingAdapter hdfsIndexingAdapter = new HDFSIndexingAdapter(atype, readSchedule, executed, inputSplits,
-                conf, clusterLocations, files, parserFactory, ctx, nodeName, (String) configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT),
-                (String) configuration.get(KEY_FORMAT));
+                conf, clusterLocations, files, parserFactory, ctx, nodeName,
+                (String) configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT), (String) configuration.get(KEY_FORMAT));
         return hdfsIndexingAdapter;
     }
 
     @Override
-    public void configure(Map<String, String> configuration, ARecordType outputType, boolean isPKAutoGenerated, List<String> primaryKeys) throws Exception {
+    public void configure(Map<String, String> configuration, ARecordType outputType, boolean isPKAutoGenerated,
+            List<String> primaryKeys) throws Exception {
         if (!initialized) {
             hdfsScheduler = initializeHDFSScheduler();
             initialized = true;
@@ -130,22 +132,28 @@
         configureFormat(atype);
     }
 
-
     protected void configureFormat(IAType sourceDatatype) throws Exception {
-        parserFactory = new HDFSIndexingParserFactory((ARecordType)atype,
+
+        char delimiter = StreamBasedAdapterFactory.getDelimiter(configuration);
+        char quote = StreamBasedAdapterFactory.getQuote(configuration, delimiter);
+
+        parserFactory = new HDFSIndexingParserFactory((ARecordType) atype,
                 (String) configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT),
-                (String) configuration.get(KEY_FORMAT),
-                (String) configuration.get(KEY_DELIMITER),
+                (String) configuration.get(KEY_FORMAT), delimiter, quote,
                 (String) configuration.get(HDFSAdapterFactory.KEY_PARSER));
     }
 
     /**
      * A static function that creates and return delimited text data parser
-     * @param recordType (the record type to be parsed)
-     * @param delimiter (the dilimiter value)
+     *
+     * @param recordType
+     *            (the record type to be parsed)
+     * @param delimiter
+     *            (the delimiter value)
      * @return
      */
-    public static DelimitedDataParser getDilimitedDataParser(ARecordType recordType, Character delimiter){
+    @SuppressWarnings("null")
+    public static DelimitedDataParser getDilimitedDataParser(ARecordType recordType, char delimiter, char quote) {
         int n = recordType.getFieldTypes().length;
         IValueParserFactory[] fieldParserFactories = new IValueParserFactory[n];
         for (int i = 0; i < n; i++) {
@@ -168,7 +176,7 @@
             }
             fieldParserFactories[i] = vpf;
         }
-        return new DelimitedDataParser(recordType, fieldParserFactories, delimiter, false, -1, null);
+        return new DelimitedDataParser(recordType, fieldParserFactories, delimiter, quote, false, -1, null);
     }
 
     public static AlgebricksPartitionConstraint getClusterLocations() {

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/StreamBasedAdapterFactory.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/StreamBasedAdapterFactory.java
index b424b96..6fed446 100644
--- a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/StreamBasedAdapterFactory.java
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/StreamBasedAdapterFactory.java

@@ -51,8 +51,14 @@
     public static final String KEY_FORMAT = "format";
     public static final String KEY_PARSER_FACTORY = "parser";
     public static final String KEY_DELIMITER = "delimiter";
+    public static final String KEY_QUOTE = "quote";
     public static final String KEY_PATH = "path";
     public static final String KEY_SOURCE_DATATYPE = "output-type-name";
+    // The length of a delimiter should be 1.
+    public static final String DEFAULT_DELIMITER = ",";
+    // A quote is used to enclose a string if it includes delimiter(s) in it.
+    // The length of a quote should be 1.
+    public static final String DEFAULT_QUOTE = "\"";
     public static final String FORMAT_DELIMITED_TEXT = "delimited-text";
     public static final String FORMAT_ADM = "adm";
     public static final String NODE_RESOLVER_FACTORY_PROPERTY = "node.Resolver";
@@ -99,16 +105,13 @@
             }
             fieldParserFactories[i] = vpf;
         }
-        String delimiterValue = (String) configuration.get(KEY_DELIMITER);
-        if (delimiterValue != null && delimiterValue.length() > 1) {
-            throw new AsterixException("improper delimiter");
-        }
 
-        Character delimiter = delimiterValue.charAt(0);
+        char delimiter = getDelimiter(configuration);
+        char quote = getQuote(configuration, delimiter);
 
         return conditionalPush ? new ConditionalPushTupleParserFactory(recordType, fieldParserFactories, delimiter,
-                configuration) : new NtDelimitedDataTupleParserFactory(recordType, fieldParserFactories, delimiter,
-                        isPKAutoGenerated, primaryKeyPosition, origSourceDataTypeForAutoGeneratedPK);
+                quote, configuration) : new NtDelimitedDataTupleParserFactory(recordType, fieldParserFactories,
+                delimiter, quote, isPKAutoGenerated, primaryKeyPosition, origSourceDataTypeForAutoGeneratedPK);
     }
 
     protected ITupleParserFactory getADMDataTupleParserFactory(ARecordType recordType, boolean conditionalPush,
@@ -116,16 +119,16 @@
             throws AsterixException {
         try {
             return conditionalPush ? new ConditionalPushTupleParserFactory(recordType, configuration)
-                    : new AdmSchemafullRecordParserFactory(recordType, isPKAutoGenerated,
-                            primaryKeyPosition, origSourceDataTypeForAutoGeneratedPK);
+                    : new AdmSchemafullRecordParserFactory(recordType, isPKAutoGenerated, primaryKeyPosition,
+                            origSourceDataTypeForAutoGeneratedPK);
         } catch (Exception e) {
             throw new AsterixException(e);
         }
 
     }
 
-    protected void configureFormat(IAType sourceDatatype, boolean isPKAutoGenerated,
-            int primaryKeyPosition, IAType origSourceDataTypeForAutoGeneratedPK) throws Exception {
+    protected void configureFormat(IAType sourceDatatype, boolean isPKAutoGenerated, int primaryKeyPosition,
+            IAType origSourceDataTypeForAutoGeneratedPK) throws Exception {
         String propValue = (String) configuration.get(BATCH_SIZE);
         int batchSize = propValue != null ? Integer.parseInt(propValue) : -1;
         propValue = (String) configuration.get(BATCH_INTERVAL);
@@ -152,6 +155,36 @@
 
     }
 
+    // Get a delimiter from the given configuration
+    public static char getDelimiter(Map<String, String> configuration) throws AsterixException {
+        String delimiterValue = (String) configuration.get(KEY_DELIMITER);
+        if (delimiterValue == null) {
+            delimiterValue = DEFAULT_DELIMITER;
+        } else if (delimiterValue.length() != 1) {
+            throw new AsterixException("'" + delimiterValue
+                    + "' is not a valid delimiter. The length of a delimiter should be 1.");
+        }
+        return delimiterValue.charAt(0);
+    }
 
+    // Get a quote from the given configuration when the delimiter is given
+    // Need to pass delimiter to check whether they share the same character
+    public static char getQuote(Map<String, String> configuration, char delimiter) throws AsterixException {
+        String quoteValue = (String) configuration.get(KEY_QUOTE);
+        if (quoteValue == null) {
+            quoteValue = DEFAULT_QUOTE;
+        } else if (quoteValue.length() != 1) {
+            throw new AsterixException("'" + quoteValue + "' is not a valid quote. The length of a quote should be 1.");
+        }
+
+        // Since delimiter (char type value) can't be null,
+        // we only check whether delimiter and quote use the same character
+        if (quoteValue.charAt(0) == delimiter) {
+            throw new AsterixException("Quote '" + quoteValue + "' cannot be used with the delimiter '" + delimiter
+                    + "'. ");
+        }
+
+        return quoteValue.charAt(0);
+    }
 
 }

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSIndexingParserFactory.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSIndexingParserFactory.java
index da37399..122f15a 100644
--- a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSIndexingParserFactory.java
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSIndexingParserFactory.java

@@ -18,6 +18,7 @@
 
 import org.apache.hadoop.mapred.JobConf;
 
+import edu.uci.ics.asterix.common.exceptions.AsterixException;
 import edu.uci.ics.asterix.external.adapter.factory.HDFSAdapterFactory;
 import edu.uci.ics.asterix.external.adapter.factory.HDFSIndexingAdapterFactory;
 import edu.uci.ics.asterix.external.adapter.factory.StreamBasedAdapterFactory;
@@ -41,7 +42,9 @@
     // content format <adm, delimited-text, binary>
     private String format;
     // delimiter in case of delimited text
-    private String delimiter;
+    private char delimiter;
+    // quote in case of delimited text
+    private char quote;
     // parser class name in case of binary format
     private String parserClassName;
     // the expected data type
@@ -49,14 +52,15 @@
     // the hadoop job conf
     private transient JobConf jobConf;
     // adapter arguments
-    private Map<String,String> arguments;
+    private Map<String, String> arguments;
 
-    public HDFSIndexingParserFactory(ARecordType atype, String inputFormat, String format, String delimiter,
-            String parserClassName) {
+    public HDFSIndexingParserFactory(ARecordType atype, String inputFormat, String format, char delimiter,
+            char quote, String parserClassName) {
         this.inputFormat = inputFormat;
         this.format = format;
         this.parserClassName = parserClassName;
         this.delimiter = delimiter;
+        this.quote = quote;
         this.atype = atype;
     }
 
@@ -68,7 +72,8 @@
         if (inputFormat == null) {
             throw new IllegalArgumentException("Unspecified data format");
         }
-        if (!inputFormat.equalsIgnoreCase(HDFSAdapterFactory.INPUT_FORMAT_RC) && !inputFormat.equalsIgnoreCase(HDFSAdapterFactory.INPUT_FORMAT_TEXT)
+        if (!inputFormat.equalsIgnoreCase(HDFSAdapterFactory.INPUT_FORMAT_RC)
+                && !inputFormat.equalsIgnoreCase(HDFSAdapterFactory.INPUT_FORMAT_TEXT)
                 && !inputFormat.equalsIgnoreCase(HDFSAdapterFactory.INPUT_FORMAT_SEQUENCE)) {
             throw new IllegalArgumentException("External Indexing not supportd for format " + inputFormat);
         }
@@ -85,7 +90,8 @@
             return new AdmOrDelimitedIndexingTupleParser(ctx, atype, dataParser);
         } else if (format.equalsIgnoreCase(StreamBasedAdapterFactory.FORMAT_DELIMITED_TEXT)) {
             // choice 3 with delimited data parser
-            DelimitedDataParser dataParser = HDFSIndexingAdapterFactory.getDilimitedDataParser(atype, delimiter.charAt(0));
+            DelimitedDataParser dataParser = HDFSIndexingAdapterFactory.getDilimitedDataParser(atype,
+                    delimiter, quote);
             return new AdmOrDelimitedIndexingTupleParser(ctx, atype, dataParser);
         }
 
@@ -105,11 +111,11 @@
         } catch (Exception e) {
             throw new HyracksDataException("Unable to initialize object parser", e);
         }
-        
-        if(inputFormat.equalsIgnoreCase(HDFSAdapterFactory.INPUT_FORMAT_RC)){
+
+        if (inputFormat.equalsIgnoreCase(HDFSAdapterFactory.INPUT_FORMAT_RC)) {
             // Case 2
             return new RCFileIndexingTupleParser(ctx, atype, objectParser);
-        } else{
+        } else {
             // Case 1
             return new TextOrSeqIndexingTupleParser(ctx, atype, objectParser);
         }
@@ -123,11 +129,11 @@
         this.jobConf = jobConf;
     }
 
-    public Map<String,String> getArguments() {
+    public Map<String, String> getArguments() {
         return arguments;
     }
 
-    public void setArguments(Map<String,String> arguments) {
+    public void setArguments(Map<String, String> arguments) {
         this.arguments = arguments;
     }
 

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSLookupAdapter.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSLookupAdapter.java
index 90bde5a..4554228 100644
--- a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSLookupAdapter.java
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSLookupAdapter.java

@@ -24,6 +24,7 @@
 
 import edu.uci.ics.asterix.external.adapter.factory.HDFSAdapterFactory;
 import edu.uci.ics.asterix.external.adapter.factory.HDFSIndexingAdapterFactory;
+import edu.uci.ics.asterix.external.adapter.factory.StreamBasedAdapterFactory;
 import edu.uci.ics.asterix.external.indexing.input.RCFileLookupReader;
 import edu.uci.ics.asterix.external.indexing.input.SequenceFileLookupInputStream;
 import edu.uci.ics.asterix.external.indexing.input.SequenceFileLookupReader;
@@ -97,8 +98,11 @@
             }
         } else if (configuration.get(HDFSAdapterFactory.KEY_FORMAT).equals(HDFSAdapterFactory.FORMAT_DELIMITED_TEXT)) {
             // create a delimited text parser
+            char delimiter = StreamBasedAdapterFactory.getDelimiter(configuration);
+            char quote = StreamBasedAdapterFactory.getQuote(configuration, delimiter);
+
             DelimitedDataParser dataParser = HDFSIndexingAdapterFactory.getDilimitedDataParser((ARecordType) atype,
-                    (configuration.get(HDFSAdapterFactory.KEY_DELIMITER)).charAt(0));
+                    delimiter, quote);
             if (configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT).equals(HDFSAdapterFactory.INPUT_FORMAT_TEXT)) {
                 // Text input format
                 TextFileLookupInputStream in = new TextFileLookupInputStream(fileIndexAccessor, jobConf);
@@ -144,7 +148,8 @@
         // Do nothing
     }
 
-    private void configureRCFile(Configuration jobConf, INullWriterFactory iNullWriterFactory) throws IOException, Exception {
+    private void configureRCFile(Configuration jobConf, INullWriterFactory iNullWriterFactory) throws IOException,
+            Exception {
         // RCFileLookupReader
         RCFileLookupReader reader = new RCFileLookupReader(fileIndexAccessor,
                 HDFSAdapterFactory.configureJobConf(configuration));

diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/feeds/ConditionalPushTupleParserFactory.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/feeds/ConditionalPushTupleParserFactory.java
index f834a24..3cf8fe6 100644
--- a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/feeds/ConditionalPushTupleParserFactory.java
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/feeds/ConditionalPushTupleParserFactory.java

@@ -47,7 +47,7 @@
                 dataParser = new ADMDataParser();
                 break;
             case DELIMITED_DATA:
-                dataParser = new DelimitedDataParser(recordType, valueParserFactories, delimiter, false, -1, null);
+                dataParser = new DelimitedDataParser(recordType, valueParserFactories, delimiter, quote, false, -1, null);
                 break;
         }
         return new ConditionalPushTupleParser(ctx, recordType, dataParser, configuration);
@@ -57,6 +57,7 @@
     private final Map<String, String> configuration;
     private IValueParserFactory[] valueParserFactories;
     private char delimiter;
+    private char quote;
     private final ParserType parserType;
 
     public enum ParserType {
@@ -65,10 +66,11 @@
     }
 
     public ConditionalPushTupleParserFactory(ARecordType recordType, IValueParserFactory[] valueParserFactories,
-            char fieldDelimiter, Map<String, String> configuration) {
+            char fieldDelimiter, char quote, Map<String, String> configuration) {
         this.recordType = recordType;
         this.valueParserFactories = valueParserFactories;
         this.delimiter = fieldDelimiter;
+        this.quote = quote;
         this.configuration = configuration;
         this.parserType = ParserType.DELIMITED_DATA;
 

diff --git a/asterix-om/src/main/java/edu/uci/ics/asterix/formats/base/IDataFormat.java b/asterix-om/src/main/java/edu/uci/ics/asterix/formats/base/IDataFormat.java
index bb602a4..f2af980 100644
--- a/asterix-om/src/main/java/edu/uci/ics/asterix/formats/base/IDataFormat.java
+++ b/asterix-om/src/main/java/edu/uci/ics/asterix/formats/base/IDataFormat.java

@@ -3,9 +3,9 @@
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * you may obtain a copy of the License from
- * 
+ *
  *     http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -68,7 +68,7 @@
 
     public ITupleParserFactory createTupleParser(ARecordType recType, IParseFileSplitsDecl decl);
 
-    public ITupleParserFactory createTupleParser(ARecordType recType, boolean isDelimited, Character delimiter);
+    public ITupleParserFactory createTupleParser(ARecordType recType, boolean isDelimited, char delimiter, char quote);
 
     public IFunctionDescriptor resolveFunction(ILogicalExpression expr, IVariableTypeEnvironment typeEnvironment)
             throws AlgebricksException;

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java
index b477545..4704ae1 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java

@@ -930,11 +930,11 @@
 
     @Override
     public ITupleParserFactory createTupleParser(ARecordType recType, IParseFileSplitsDecl decl) {
-        return createTupleParser(recType, decl.isDelimitedFileFormat(), decl.getDelimChar());
+        return createTupleParser(recType, decl.isDelimitedFileFormat(), decl.getDelimChar(), decl.getQuote());
     }
 
     @Override
-    public ITupleParserFactory createTupleParser(ARecordType recType, boolean delimitedFormat, Character delimiter) {
+    public ITupleParserFactory createTupleParser(ARecordType recType, boolean delimitedFormat, char delimiter, char quote) {
         if (delimitedFormat) {
             int n = recType.getFieldTypes().length;
             IValueParserFactory[] fieldParserFactories = new IValueParserFactory[n];
@@ -946,7 +946,7 @@
                 }
                 fieldParserFactories[i] = vpf;
             }
-            return new NtDelimitedDataTupleParserFactory(recType, fieldParserFactories, delimiter, false, -1, null);
+            return new NtDelimitedDataTupleParserFactory(recType, fieldParserFactories, delimiter, quote, false, -1, null);
         } else {
             return new AdmSchemafullRecordParserFactory(recType, false, -1, null);
         }

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java
index f516075..18f9553 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java

@@ -41,6 +41,7 @@
 
     protected final IValueParserFactory[] valueParserFactories;
     protected final char fieldDelimiter;
+    protected final char quote;
     protected final ARecordType recordType;
 
     private IARecordBuilder recBuilder;
@@ -59,12 +60,23 @@
     private final ARecordType origRecordTypeForAutoGeneratedPK;
 
     private boolean areAllNullFields;
+    private boolean isDoubleQuoteIncludedInThisField;
+    private int doubleQuoteCount;
+
+    private int lineCount;
+    private int fieldCount;
 
     public DelimitedDataParser(ARecordType recordType, IValueParserFactory[] valueParserFactories, char fieldDelimter,
-            boolean isPKAutoGenerated, int primaryKeyPosition, ARecordType origRecordTypeForAutoGeneratedPK) {
+            char quote) {
+        this(recordType, valueParserFactories, fieldDelimter, quote, false, -1, null);
+    }
+
+    public DelimitedDataParser(ARecordType recordType, IValueParserFactory[] valueParserFactories, char fieldDelimter,
+            char quote, boolean isPKAutoGenerated, int primaryKeyPosition, ARecordType origRecordTypeForAutoGeneratedPK) {
         this.recordType = recordType;
         this.valueParserFactories = valueParserFactories;
         this.fieldDelimiter = fieldDelimter;
+        this.quote = quote;
         this.isPKAutoGenerated = isPKAutoGenerated;
         this.primaryKeyPosition = primaryKeyPosition;
         this.origRecordTypeForAutoGeneratedPK = origRecordTypeForAutoGeneratedPK;
@@ -80,17 +92,20 @@
         else
             recordTypeToApply = recordType;
 
+        lineCount = 1;
+
         valueParsers = new IValueParser[valueParserFactories.length];
         for (int i = 0; i < valueParserFactories.length; ++i) {
             valueParsers[i] = valueParserFactories[i].createValueParser();
         }
 
+        isDoubleQuoteIncludedInThisField = false;
+
         fieldValueBuffer = new ArrayBackedValueStorage();
         fieldValueBufferOutput = fieldValueBuffer.getDataOutput();
 
         // If PK is auto-generated, then we need to use the recordType that
-        // includes PK,
-        // since recordType variable does not include PK field.
+        // includes PK, since recordType variable does not include PK field.
         recBuilder = new RecordBuilder();
         recBuilder.reset(recordTypeToApply);
         recBuilder.init();
@@ -125,6 +140,8 @@
     @Override
     public boolean parse(DataOutput out) throws AsterixException, IOException {
         while (cursor.nextRecord()) {
+            // If PK is auto-generated, then we need to use the recordType that
+            // includes PK, since recordType variable does not include PK field.
             if (isPKAutoGenerated)
                 recBuilder.reset(origRecordTypeForAutoGeneratedPK);
             else
@@ -133,7 +150,7 @@
             recBuilder.init();
             areAllNullFields = true;
 
-            int fieldCount = 0;
+            fieldCount = 0;
 
             for (int i = 0; i < valueParsers.length; ++i) {
                 if (!cursor.nextField()) {
@@ -144,18 +161,23 @@
                 if (cursor.fStart == cursor.fEnd && recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.STRING
                         && recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.NULL) {
                     // if the field is empty and the type is optional, insert
-                    // NULL
-                    // note that string type can also process empty field as an
+                    // NULL. Note that string type can also process empty field as an
                     // empty string
                     if (recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.UNION
                             || !NonTaggedFormatUtil.isOptionalField((AUnionType) recordType.getFieldTypes()[i])) {
-                        throw new AsterixException("Field " + i
+                        throw new AsterixException("At line: " + lineCount + " - Field " + i
                                 + " is not an optional type so it cannot accept null value. ");
                     }
                     fieldValueBufferOutput.writeByte(ATypeTag.NULL.serialize());
                     ANullSerializerDeserializer.INSTANCE.serialize(ANull.NULL, out);
                 } else {
                     fieldValueBufferOutput.writeByte(fieldTypeTags[i]);
+                    // Eliminate doule quotes in the field that we are going to parse
+                    if (isDoubleQuoteIncludedInThisField) {
+                        eliminateDoubleQuote(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart);
+                        cursor.fEnd -= doubleQuoteCount;
+                        isDoubleQuoteIncludedInThisField = false;
+                    }
                     valueParsers[i].parse(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart,
                             fieldValueBufferOutput);
                     areAllNullFields = false;
@@ -170,13 +192,9 @@
 
             }
 
-            // Should not have any more fields now
-            if (cursor.nextField()) {
-                fieldCount++;
-            }
-
-            // Parsed all fields except an auto-generated PK at this point
-            // Create a new UUID and assign it as a PK.
+            // Should not have any more fields now.
+            // We parsed all fields except an auto-generated PK at this point.
+            // We now create a new UUID and assign it as a PK.
             if (isPKAutoGenerated && fieldCount == origRecordTypeForAutoGeneratedPK.getFieldTypes().length - 1) {
                 fieldValueBuffer.reset();
                 aUUID.nextUUID();
@@ -187,12 +205,11 @@
                         fieldValueBufferOutput);
                 recBuilder.addField(primaryKeyPosition, fieldValueBuffer);
                 areAllNullFields = false;
-            }
-            // If we have all fields in the file including auto-generated PK,
-            // throw an exception
-            else if (isPKAutoGenerated && fieldCount >= origRecordTypeForAutoGeneratedPK.getFieldTypes().length) {
-                throw new AsterixException(
-                        "Check number of fields. Auto-generated PK field should not exist in the input data.");
+            } else if (isPKAutoGenerated && fieldCount >= origRecordTypeForAutoGeneratedPK.getFieldTypes().length) {
+                // If we have all fields in the file including auto-generated PK,
+                // throw an exception
+                throw new AsterixException("At line: " + lineCount
+                        + " - check number of fields. Auto-generated PK field should not exist in the input data.");
             }
 
             if (!areAllNullFields) {
@@ -237,12 +254,23 @@
         private int fStart;
         private int fEnd;
 
+        private int lastQuotePosition;
+        private int lastDoubleQuotePosition;
+        private int lastDelimiterPosition;
+        private int quoteCount;
+        private boolean startedQuote;
+
         public FieldCursor(Reader in) {
             this.in = in;
             buffer = new char[INITIAL_BUFFER_SIZE];
             start = 0;
             end = 0;
             state = State.INIT;
+            lastDelimiterPosition = -99;
+            lastQuotePosition = -99;
+            lastDoubleQuotePosition = -99;
+            quoteCount = 0;
+            startedQuote = false;
         }
 
         public boolean nextRecord() throws IOException {
@@ -269,13 +297,32 @@
                                     return start < end;
                                 }
                                 p -= (s - start);
+                                lastQuotePosition -= (s - start);
+                                lastDoubleQuotePosition -= (s - start);
+                                lastDelimiterPosition -= (s - start);
                             }
                             char ch = buffer[p];
-                            if (ch == '\n') {
+                            // We perform rough format correctness (delimiter, quote) check here
+                            // to set the starting position of a record.
+                            // In the field level, more checking will be conducted.
+                            if (ch == quote) {
+                                startedQuote = true;
+                                // check two quotes in a row - "". This is an escaped quote
+                                if (lastQuotePosition == p - 1 && start != p - 1 && lastDoubleQuotePosition != p - 1) {
+                                    lastDoubleQuotePosition = p;
+                                }
+                                lastQuotePosition = p;
+                            } else if (ch == fieldDelimiter) {
+                                if (startedQuote && lastQuotePosition == p - 1 && lastDoubleQuotePosition != p - 1) {
+                                    startedQuote = false;
+                                    lastDelimiterPosition = p;
+                                }
+                            } else if (ch == '\n' && !startedQuote) {
                                 start = p + 1;
                                 state = State.EOR;
+                                lastDelimiterPosition = p;
                                 break;
-                            } else if (ch == '\r') {
+                            } else if (ch == '\r' && !startedQuote) {
                                 start = p + 1;
                                 state = State.CR;
                                 break;
@@ -293,7 +340,7 @@
                             }
                         }
                         char ch = buffer[start];
-                        if (ch == '\n') {
+                        if (ch == '\n' && !startedQuote) {
                             ++start;
                             state = State.EOR;
                         } else {
@@ -310,6 +357,7 @@
                             }
                         }
                         state = State.IN_RECORD;
+                        lastDelimiterPosition = start;
                         return start < end;
 
                     case EOF:
@@ -328,37 +376,135 @@
 
                 case IN_RECORD:
                     boolean eof;
+                    // reset quote related values
+                    startedQuote = false;
+                    isDoubleQuoteIncludedInThisField = false;
+                    lastQuotePosition = -99;
+                    lastDoubleQuotePosition = -99;
+                    quoteCount = 0;
+                    doubleQuoteCount = 0;
+
                     int p = start;
                     while (true) {
                         if (p >= end) {
                             int s = start;
                             eof = !readMore();
                             p -= (s - start);
+                            lastQuotePosition -= (s - start);
+                            lastDoubleQuotePosition -= (s - start);
+                            lastDelimiterPosition -= (s - start);
                             if (eof) {
                                 state = State.EOF;
-                                fStart = start;
-                                fEnd = p;
+                                if (startedQuote && lastQuotePosition == p - 1 && lastDoubleQuotePosition != p - 1
+                                        && quoteCount == doubleQuoteCount * 2 + 2) {
+                                    // set the position of fStart to +1, fEnd to -1 to remove quote character
+                                    fStart = start + 1;
+                                    fEnd = p - 1;
+                                } else {
+                                    fStart = start;
+                                    fEnd = p;
+                                }
                                 return true;
                             }
                         }
                         char ch = buffer[p];
-                        if (ch == fieldDelimiter) {
-                            fStart = start;
-                            fEnd = p;
-                            start = p + 1;
-                            return true;
+                        if (ch == quote) {
+                            // If this is first quote in the field, then it needs to be placed in the beginning.
+                            if (!startedQuote) {
+                                if (lastDelimiterPosition == p - 1 || lastDelimiterPosition == -99) {
+                                    startedQuote = true;
+                                } else {
+                                    // In this case, we don't have a quote in the beginning of a field.
+                                    throw new IOException(
+                                            "At line: "
+                                                    + lineCount
+                                                    + ", field#: "
+                                                    + (fieldCount+1)
+                                                    + " - a quote enclosing a field needs to be placed in the beginning of that field.");
+                                }
+                            }
+                            // Check double quotes - "". We check [start != p-2]
+                            // to avoid false positive where there is no value in a field,
+                            // since it looks like a double quote. However, it's not a double quote.
+                            // (e.g. if field2 has no value:
+                            //       field1,"",field3 ... )
+                            if (lastQuotePosition == p - 1 && lastDelimiterPosition != p - 2
+                                    && lastDoubleQuotePosition != p - 1) {
+                                isDoubleQuoteIncludedInThisField = true;
+                                doubleQuoteCount++;
+                                lastDoubleQuotePosition = p;
+                            }
+                            lastQuotePosition = p;
+                            quoteCount++;
+                        } else if (ch == fieldDelimiter) {
+                            // If there was no quote in the field,
+                            // then we assume that the field contains a valid string.
+                            if (!startedQuote) {
+                                fStart = start;
+                                fEnd = p;
+                                start = p + 1;
+                                lastDelimiterPosition = p;
+                                return true;
+                            } else if (startedQuote) {
+                                if (lastQuotePosition == p - 1 && lastDoubleQuotePosition != p - 1) {
+                                    // There is a quote right before the delimiter (e.g. ",)  and it is not two quote,
+                                    // then the field contains a valid string.
+                                    // We set the position of fStart to +1, fEnd to -1 to remove quote character
+                                    fStart = start + 1;
+                                    fEnd = p - 1;
+                                    start = p + 1;
+                                    lastDelimiterPosition = p;
+                                    return true;
+                                } else if (lastQuotePosition < p - 1 && lastQuotePosition != lastDoubleQuotePosition
+                                        && quoteCount == doubleQuoteCount * 2 + 2) {
+                                    // There is a quote before the delimiter, however it is not directly placed before the delimiter.
+                                    // In this case, we throw an exception.
+                                    // quoteCount == doubleQuoteCount * 2 + 2 : only true when we have two quotes except double-quotes.
+                                    throw new IOException("At line: " + lineCount + ", field#: " + (fieldCount+1)
+                                            + " -  A quote enclosing a field needs to be followed by the delimiter.");
+                                }
+                            }
+                            // If the control flow reaches here: we have a delimiter in this field and
+                            // there should be a quote in the beginning and the end of
+                            // this field. So, just continue reading next character
                         } else if (ch == '\n') {
-                            fStart = start;
-                            fEnd = p;
-                            start = p + 1;
-                            state = State.EOR;
-                            return true;
+                            if (!startedQuote) {
+                                fStart = start;
+                                fEnd = p;
+                                start = p + 1;
+                                state = State.EOR;
+                                lineCount++;
+                                lastDelimiterPosition = p;
+                                return true;
+                            } else if (startedQuote && lastQuotePosition == p - 1 && lastDoubleQuotePosition != p - 1
+                                    && quoteCount == doubleQuoteCount * 2 + 2) {
+                                // set the position of fStart to +1, fEnd to -1 to remove quote character
+                                fStart = start + 1;
+                                fEnd = p - 1;
+                                lastDelimiterPosition = p;
+                                start = p + 1;
+                                state = State.EOR;
+                                lineCount++;
+                                return true;
+                            }
                         } else if (ch == '\r') {
-                            fStart = start;
-                            fEnd = p;
-                            start = p + 1;
-                            state = State.CR;
-                            return true;
+                            if (!startedQuote) {
+                                fStart = start;
+                                fEnd = p;
+                                start = p + 1;
+                                state = State.CR;
+                                lastDelimiterPosition = p;
+                                return true;
+                            } else if (startedQuote && lastQuotePosition == p - 1 && lastDoubleQuotePosition != p - 1
+                                    && quoteCount == doubleQuoteCount * 2 + 2) {
+                                // set the position of fStart to +1, fEnd to -1 to remove quote character
+                                fStart = start + 1;
+                                fEnd = p - 1;
+                                lastDelimiterPosition = p;
+                                start = p + 1;
+                                state = State.CR;
+                                return true;
+                            }
                         }
                         ++p;
                     }
@@ -385,25 +531,27 @@
             return true;
         }
 
-        public int getfStart() {
-            return fStart;
-        }
-
-        public void setfStart(int fStart) {
-            this.fStart = fStart;
-        }
-
-        public int getfEnd() {
-            return fEnd;
-        }
-
-        public void setfEnd(int fEnd) {
-            this.fEnd = fEnd;
-        }
-
-        public char[] getBuffer() {
-            return buffer;
-        }
     }
 
+    // Eliminate escaped double quotes("") in a field
+    protected void eliminateDoubleQuote(char[] buffer, int start, int length) {
+        int lastDoubleQuotePosition = -99;
+        int writepos = start;
+        int readpos = start;
+        // Find positions where double quotes appear
+        for (int i = 0; i < length; i++) {
+            // Skip double quotes
+            if (buffer[readpos] == quote && lastDoubleQuotePosition != readpos - 1) {
+                lastDoubleQuotePosition = readpos;
+                readpos++;
+            } else {
+                // Moving characters except double quote to the front
+                if (writepos != readpos) {
+                    buffer[writepos] = buffer[readpos];
+                }
+                writepos++;
+                readpos++;
+            }
+        }
+    }
 }

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataTupleParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataTupleParser.java
index be6c42a..8846110 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataTupleParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataTupleParser.java

@@ -28,10 +28,10 @@
     private final DelimitedDataParser dataParser;
 
     public DelimitedDataTupleParser(IHyracksTaskContext ctx, ARecordType recType,
-            IValueParserFactory[] valueParserFactories, char fieldDelimter, boolean isPKAutoGenerated,
+            IValueParserFactory[] valueParserFactories, char fieldDelimter, char quote, boolean isPKAutoGenerated,
             int primaryKeyPosition, ARecordType origRecordTypeForAutoGeneratedPK) throws HyracksDataException {
         super(ctx, recType, isPKAutoGenerated, primaryKeyPosition, origRecordTypeForAutoGeneratedPK);
-        dataParser = new DelimitedDataParser(recType, valueParserFactories, fieldDelimter, isPKAutoGenerated,
+        dataParser = new DelimitedDataParser(recType, valueParserFactories, fieldDelimter, quote, isPKAutoGenerated,
                 primaryKeyPosition, origRecordTypeForAutoGeneratedPK);
     }
 

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/NtDelimitedDataTupleParserFactory.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/NtDelimitedDataTupleParserFactory.java
index f88d39a..a301c7d 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/NtDelimitedDataTupleParserFactory.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/NtDelimitedDataTupleParserFactory.java

@@ -30,17 +30,25 @@
     protected ARecordType recordType;
     protected IValueParserFactory[] valueParserFactories;
     protected char fieldDelimiter;
+    // quote is used to enclose a string if it includes delimiter(s) in it.
+    protected char quote;
     // To deal with an auto-generated PK
     protected boolean isPKAutoGenerated;
     protected int primaryKeyPosition;
     protected ARecordType origRecordTypeForAutoGeneratedPK;
 
     public NtDelimitedDataTupleParserFactory(ARecordType recordType, IValueParserFactory[] valueParserFactories,
-            char fieldDelimiter, boolean isPKAutoGenerated,
-            int primaryKeyposition, ARecordType origRecordTypeForAutoGeneratedPK) {
+            char fieldDelimiter, char quote) {
+        this(recordType, valueParserFactories, fieldDelimiter, quote, false, -1, null);
+    }
+
+    public NtDelimitedDataTupleParserFactory(ARecordType recordType, IValueParserFactory[] valueParserFactories,
+            char fieldDelimiter, char quote, boolean isPKAutoGenerated, int primaryKeyposition,
+            ARecordType origRecordTypeForAutoGeneratedPK) {
         this.recordType = recordType;
         this.valueParserFactories = valueParserFactories;
         this.fieldDelimiter = fieldDelimiter;
+        this.quote = quote;
         this.isPKAutoGenerated = isPKAutoGenerated;
         this.primaryKeyPosition = primaryKeyposition;
         this.origRecordTypeForAutoGeneratedPK = origRecordTypeForAutoGeneratedPK;
@@ -48,8 +56,7 @@
 
     @Override
     public ITupleParser createTupleParser(final IHyracksTaskContext ctx) throws HyracksDataException {
-        return new DelimitedDataTupleParser(ctx, recordType, valueParserFactories, fieldDelimiter,
+        return new DelimitedDataTupleParser(ctx, recordType, valueParserFactories, fieldDelimiter, quote,
                 isPKAutoGenerated, primaryKeyPosition, origRecordTypeForAutoGeneratedPK);
     }
-
 }

diff --git a/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java b/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java
index 0306a80..5f30465 100644
--- a/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java
+++ b/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java

@@ -102,7 +102,8 @@
     }
 
     @Override
-    public void configure(Map<String, String> configuration, ARecordType recordType, boolean isPKAutoGenerated, List<String> primaryKeys) throws Exception {
+    public void configure(Map<String, String> configuration, ARecordType recordType, boolean isPKAutoGenerated,
+            List<String> primaryKeys) throws Exception {
         this.configuration = configuration;
         checkRequiredArgs(configuration);
         String fileSystem = (String) configuration.get(KEY_FILE_SYSTEM);
@@ -134,13 +135,11 @@
                 break;
 
             case FORMAT_DELIMITED_TEXT:
-                String delimiterValue = (String) configuration.get(KEY_DELIMITER);
-                if (delimiterValue != null && delimiterValue.length() > 1) {
-                    throw new AsterixException("improper delimiter");
-                }
+                char delimiter = StreamBasedAdapterFactory.getDelimiter(configuration);
+                char quote = StreamBasedAdapterFactory.getQuote(configuration, delimiter);
                 IValueParserFactory[] valueParserFactories = getValueParserFactories(atype);
-                parserFactory = new RateControlledTupleParserFactory(atype, valueParserFactories,
-                        delimiterValue.charAt(0), configuration);
+                parserFactory = new RateControlledTupleParserFactory(atype, valueParserFactories, delimiter, quote,
+                        configuration);
                 break;
         }
     }
@@ -161,7 +160,7 @@
     }
 
     @Override
-    public void setFiles(List<ExternalFile> files) throws AlgebricksException{
+    public void setFiles(List<ExternalFile> files) throws AlgebricksException {
         throw new AlgebricksException("can't set files for this Adapter");
     }
 
@@ -175,6 +174,7 @@
     private final Map<String, String> configuration;
     private IValueParserFactory[] valueParserFactories;
     private char delimiter;
+    private char quote;
     private final ParserType parserType;
 
     public enum ParserType {
@@ -183,10 +183,17 @@
     }
 
     public RateControlledTupleParserFactory(ARecordType recordType, IValueParserFactory[] valueParserFactories,
-            char fieldDelimiter, Map<String, String> configuration) {
+            char fieldDelimiter, Map<String, String> configuration) throws AsterixException {
+        this(recordType, valueParserFactories, fieldDelimiter, StreamBasedAdapterFactory.getQuote(configuration,
+                fieldDelimiter), configuration);
+    }
+
+    public RateControlledTupleParserFactory(ARecordType recordType, IValueParserFactory[] valueParserFactories,
+            char fieldDelimiter, char quote, Map<String, String> configuration) {
         this.recordType = recordType;
         this.valueParserFactories = valueParserFactories;
         this.delimiter = fieldDelimiter;
+        this.quote = quote;
         this.configuration = configuration;
         this.parserType = ParserType.DELIMITED_DATA;
     }
@@ -205,7 +212,8 @@
                 dataParser = new ADMDataParser();
                 break;
             case DELIMITED_DATA:
-                dataParser = new DelimitedDataParser(recordType, valueParserFactories, delimiter, false, -1, null);
+                dataParser = new DelimitedDataParser(recordType, valueParserFactories, delimiter, quote, false, -1,
+                        null);
                 break;
         }
         return new RateControlledTupleParser(ctx, recordType, dataParser, configuration);
commit	efb236d412a5ff66e6ca1c8d258eee2a4b3f1b56	[log] [tgz]
author	Taewoo Kim <wangsaeu@gmail.com>	Wed Sep 17 10:55:05 2014 -0700
committer	Ian Maxon <imaxon@uci.edu>	Wed Sep 17 16:36:16 2014 -0800
tree	ff5214018bb80dc47191f18eb74f35da6edd1400
parent	83259465e090c5797597a2a1a9487bd35621d775 [diff]