Issue 867: Handle delimited files using CR-only line separators

Also simplify record- and field-counting logic.

Change-Id: Ie28abda93fc9e5996008fac8b60aaf906df49cb7
Reviewed-on: https://asterix-gerrit.ics.uci.edu/246
Reviewed-by: Ian Maxon <imaxon@uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Preston Carman <ecarm002@ucr.edu>
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java
index 6fd38d2..5be1eab 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/file/DelimitedDataTupleParserFactory.java
@@ -37,8 +37,6 @@
     private char fieldDelimiter;
     private char quote;
 
-    private int fieldCount;
-
     public DelimitedDataTupleParserFactory(IValueParserFactory[] fieldParserFactories, char fieldDelimiter) {
         this(fieldParserFactories, fieldDelimiter, '\"');
     }
@@ -47,7 +45,6 @@
         this.valueParserFactories = fieldParserFactories;
         this.fieldDelimiter = fieldDelimiter;
         this.quote = quote;
-        this.fieldCount = 0;
     }
 
     @Override
@@ -71,7 +68,7 @@
                     while (cursor.nextRecord()) {
                         tb.reset();
                         for (int i = 0; i < valueParsers.length; ++i) {
-                            if (!cursor.nextField(fieldCount)) {
+                            if (!cursor.nextField()) {
                                 break;
                             }
                             // Eliminate double quotes in the field that we are going to parse
@@ -82,7 +79,6 @@
                             }
                             valueParsers[i].parse(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart, dos);
                             tb.addFieldEndOffset();
-                            fieldCount++;
                         }
                         if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                             FrameUtils.flushFrame(frame, writer);
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
index 69ea0b1..780574c 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/file/FieldCursorForDelimitedDataParser.java
@@ -32,7 +32,8 @@
     public char[] buffer;
     public int fStart;
     public int fEnd;
-    public int lineCount;
+    public int recordCount;
+    public int fieldCount;
     public int doubleQuoteCount;
     public boolean isDoubleQuoteIncludedInThisField;
 
@@ -69,10 +70,13 @@
         doubleQuoteCount = 0;
         startedQuote = false;
         isDoubleQuoteIncludedInThisField = false;
-        lineCount = 1;
+        recordCount = 0;
+        fieldCount = 0;
     }
 
     public boolean nextRecord() throws IOException {
+        recordCount++;
+        fieldCount = 0;
         while (true) {
             switch (state) {
                 case INIT:
@@ -119,12 +123,12 @@
                         } else if (ch == '\n' && !startedQuote) {
                             start = p + 1;
                             state = State.EOR;
-                            lineCount++;
                             lastDelimiterPosition = p;
                             break;
                         } else if (ch == '\r' && !startedQuote) {
                             start = p + 1;
                             state = State.CR;
+                            lastDelimiterPosition = p;
                             break;
                         }
                         ++p;
@@ -143,7 +147,6 @@
                     if (ch == '\n' && !startedQuote) {
                         ++start;
                         state = State.EOR;
-                        lineCount++;
                     } else {
                         state = State.IN_RECORD;
                         return true;
@@ -167,7 +170,8 @@
         }
     }
 
-    public boolean nextField(int fieldCount) throws IOException {
+    public boolean nextField() throws IOException {
+        fieldCount++;
         switch (state) {
             case INIT:
             case EOR:
@@ -217,10 +221,10 @@
                             } else {
                                 // In this case, we don't have a quote in the beginning of a field.
                                 throw new IOException(
-                                        "At line: "
-                                                + lineCount
+                                        "At record: "
+                                                + recordCount
                                                 + ", field#: "
-                                                + (fieldCount + 1)
+                                                + fieldCount
                                                 + " - a quote enclosing a field needs to be placed in the beginning of that field.");
                             }
                         }
@@ -262,7 +266,7 @@
                                 // There is a quote before the delimiter, however it is not directly placed before the delimiter.
                                 // In this case, we throw an exception.
                                 // quoteCount == doubleQuoteCount * 2 + 2 : only true when we have two quotes except double-quotes.
-                                throw new IOException("At line: " + lineCount + ", field#: " + (fieldCount + 1)
+                                throw new IOException("At record: " + recordCount + ", field#: " + fieldCount
                                         + " -  A quote enclosing a field needs to be followed by the delimiter.");
                             }
                         }
@@ -275,7 +279,6 @@
                             fEnd = p;
                             start = p + 1;
                             state = State.EOR;
-                            lineCount++;
                             lastDelimiterPosition = p;
                             return true;
                         } else if (startedQuote && lastQuotePosition == p - 1 && lastDoubleQuotePosition != p - 1
@@ -286,7 +289,6 @@
                             lastDelimiterPosition = p;
                             start = p + 1;
                             state = State.EOR;
-                            lineCount++;
                             startedQuote = false;
                             return true;
                         }