Introduce ("header"="true") option for CSV parsing Change-Id: I473bc7b66d0c0d1355b5fb3e392cb9ece151e50e Reviewed-on: http://fulliautomatix.ics.uci.edu:8443/226 Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu> Reviewed-by: Ian Maxon <imaxon@uci.edu> Tested-by: Chris Hillery <ceej@lambda.nu>

commit: fda07252706e7154d1bc5ee1cdbe14167259ce25 [log] [tgz]
author: Chris Hillery <chillery@lambda.nu> Fri Feb 13 04:24:20 2015 -0800
committer: Chris Hillery <ceej@lambda.nu> Mon Feb 16 01:10:58 2015 -0800
tree: 115d45deb206a9663a17abc3c9a24802bd409325
parent: 386721bf09f113960fbc1cb104572233765c9f49 [diff]
diff --git a/asterix-common/src/main/java/edu/uci/ics/asterix/common/parse/IParseFileSplitsDecl.java b/asterix-common/src/main/java/edu/uci/ics/asterix/common/parse/IParseFileSplitsDecl.java
index 4a9dad5..1a2c972 100644
--- a/asterix-common/src/main/java/edu/uci/ics/asterix/common/parse/IParseFileSplitsDecl.java
+++ b/asterix-common/src/main/java/edu/uci/ics/asterix/common/parse/IParseFileSplitsDecl.java

@@ -23,5 +23,7 @@
 
     public char getQuote();
 
+    public boolean getHasHeader();
+
     public FileSplit[] getSplits();
 }

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HDFSIndexingAdapterFactory.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HDFSIndexingAdapterFactory.java
index 040f506..a11cc22 100644
--- a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HDFSIndexingAdapterFactory.java
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/HDFSIndexingAdapterFactory.java

@@ -136,11 +136,12 @@
 
         char delimiter = StreamBasedAdapterFactory.getDelimiter(configuration);
         char quote = StreamBasedAdapterFactory.getQuote(configuration, delimiter);
+        boolean hasHeader = StreamBasedAdapterFactory.getHasHeader(configuration);
 
         parserFactory = new HDFSIndexingParserFactory((ARecordType) atype,
-                (String) configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT),
-                (String) configuration.get(KEY_FORMAT), delimiter, quote,
-                (String) configuration.get(HDFSAdapterFactory.KEY_PARSER));
+                                                      configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT),
+                                                      configuration.get(KEY_FORMAT), delimiter, quote, hasHeader,
+                                                      configuration.get(HDFSAdapterFactory.KEY_PARSER));
     }
 
     /**
@@ -153,7 +154,7 @@
      * @return
      */
     @SuppressWarnings("null")
-    public static DelimitedDataParser getDilimitedDataParser(ARecordType recordType, char delimiter, char quote) {
+    public static DelimitedDataParser getDelimitedDataParser(ARecordType recordType, char delimiter, char quote, boolean hasHeader) {
         int n = recordType.getFieldTypes().length;
         IValueParserFactory[] fieldParserFactories = new IValueParserFactory[n];
         for (int i = 0; i < n; i++) {
@@ -176,7 +177,7 @@
             }
             fieldParserFactories[i] = vpf;
         }
-        return new DelimitedDataParser(recordType, fieldParserFactories, delimiter, quote, false, -1, null);
+        return new DelimitedDataParser(recordType, fieldParserFactories, delimiter, quote, hasHeader, false, -1, null);
     }
 
     public static AlgebricksPartitionConstraint getClusterLocations() {

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/StreamBasedAdapterFactory.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/StreamBasedAdapterFactory.java
index 8daa7d0..c3a3746 100644
--- a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/StreamBasedAdapterFactory.java
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/adapter/factory/StreamBasedAdapterFactory.java

@@ -52,6 +52,7 @@
     public static final String KEY_PARSER_FACTORY = "parser";
     public static final String KEY_DELIMITER = "delimiter";
     public static final String KEY_QUOTE = "quote";
+    public static final String KEY_HEADER = "header";
     public static final String KEY_PATH = "path";
     public static final String KEY_SOURCE_DATATYPE = "output-type-name";
     // The length of a delimiter should be 1.
@@ -109,10 +110,11 @@
 
         char delimiter = getDelimiter(configuration);
         char quote = getQuote(configuration, delimiter);
+        boolean hasHeader = getHasHeader(configuration);
 
         return conditionalPush ? new ConditionalPushTupleParserFactory(recordType, fieldParserFactories, delimiter,
-                quote, configuration) : new NtDelimitedDataTupleParserFactory(recordType, fieldParserFactories,
-                delimiter, quote, isPKAutoGenerated, primaryKeyPosition, origSourceDataTypeForAutoGeneratedPK);
+                quote, hasHeader, configuration) : new NtDelimitedDataTupleParserFactory(recordType, fieldParserFactories,
+                delimiter, quote, hasHeader, isPKAutoGenerated, primaryKeyPosition, origSourceDataTypeForAutoGeneratedPK);
     }
 
     protected ITupleParserFactory getADMDataTupleParserFactory(ARecordType recordType, boolean conditionalPush,
@@ -130,21 +132,21 @@
 
     protected void configureFormat(IAType sourceDatatype, boolean isPKAutoGenerated, int primaryKeyPosition,
             IAType origSourceDataTypeForAutoGeneratedPK) throws Exception {
-        String propValue = (String) configuration.get(BATCH_SIZE);
+        String propValue = configuration.get(BATCH_SIZE);
         int batchSize = propValue != null ? Integer.parseInt(propValue) : -1;
-        propValue = (String) configuration.get(BATCH_INTERVAL);
+        propValue = configuration.get(BATCH_INTERVAL);
         long batchInterval = propValue != null ? Long.parseLong(propValue) : -1;
         boolean conditionalPush = batchSize > 0 || batchInterval > 0;
 
-        String parserFactoryClassname = (String) configuration.get(KEY_PARSER_FACTORY);
+        String parserFactoryClassname = configuration.get(KEY_PARSER_FACTORY);
         if (parserFactoryClassname == null) {
-            String specifiedFormat = (String) configuration.get(KEY_FORMAT);
+            String specifiedFormat = configuration.get(KEY_FORMAT);
             if (specifiedFormat == null) {
                 throw new IllegalArgumentException(" Unspecified data format");
             } else if (FORMAT_DELIMITED_TEXT.equalsIgnoreCase(specifiedFormat)) {
                 parserFactory = getDelimitedDataTupleParserFactory((ARecordType) sourceDatatype, conditionalPush,
                         isPKAutoGenerated, primaryKeyPosition, (ARecordType) origSourceDataTypeForAutoGeneratedPK);
-            } else if (FORMAT_ADM.equalsIgnoreCase((String) configuration.get(KEY_FORMAT))) {
+            } else if (FORMAT_ADM.equalsIgnoreCase(configuration.get(KEY_FORMAT))) {
                 parserFactory = getADMDataTupleParserFactory((ARecordType) sourceDatatype, conditionalPush,
                         isPKAutoGenerated, primaryKeyPosition, (ARecordType) origSourceDataTypeForAutoGeneratedPK);
             } else {
@@ -158,7 +160,7 @@
 
     // Get a delimiter from the given configuration
     public static char getDelimiter(Map<String, String> configuration) throws AsterixException {
-        String delimiterValue = (String) configuration.get(KEY_DELIMITER);
+        String delimiterValue = configuration.get(KEY_DELIMITER);
         if (delimiterValue == null) {
             delimiterValue = DEFAULT_DELIMITER;
         } else if (delimiterValue.length() != 1) {
@@ -171,7 +173,7 @@
     // Get a quote from the given configuration when the delimiter is given
     // Need to pass delimiter to check whether they share the same character
     public static char getQuote(Map<String, String> configuration, char delimiter) throws AsterixException {
-        String quoteValue = (String) configuration.get(KEY_QUOTE);
+        String quoteValue = configuration.get(KEY_QUOTE);
         if (quoteValue == null) {
             quoteValue = DEFAULT_QUOTE;
         } else if (quoteValue.length() != 1) {
@@ -188,4 +190,8 @@
         return quoteValue.charAt(0);
     }
 
+    // Get the header flag
+    public static boolean getHasHeader(Map<String, String> configuration) {
+        return Boolean.parseBoolean(configuration.get(KEY_HEADER));
+    }
 }

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSIndexingParserFactory.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSIndexingParserFactory.java
index 122f15a..6b2174f 100644
--- a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSIndexingParserFactory.java
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSIndexingParserFactory.java

@@ -38,29 +38,32 @@
 
     private static final long serialVersionUID = 1L;
     // file input-format <text, seq, rc>
-    private String inputFormat;
+    private final String inputFormat;
     // content format <adm, delimited-text, binary>
-    private String format;
+    private final String format;
     // delimiter in case of delimited text
-    private char delimiter;
+    private final char delimiter;
     // quote in case of delimited text
-    private char quote;
+    private final char quote;
+    // whether delimited text file has a header (which should be ignored)
+    private final boolean hasHeader;
     // parser class name in case of binary format
-    private String parserClassName;
+    private final String parserClassName;
     // the expected data type
-    private ARecordType atype;
+    private final ARecordType atype;
     // the hadoop job conf
     private transient JobConf jobConf;
     // adapter arguments
     private Map<String, String> arguments;
 
     public HDFSIndexingParserFactory(ARecordType atype, String inputFormat, String format, char delimiter,
-            char quote, String parserClassName) {
+                                     char quote, boolean hasHeader, String parserClassName) {
         this.inputFormat = inputFormat;
         this.format = format;
         this.parserClassName = parserClassName;
         this.delimiter = delimiter;
         this.quote = quote;
+        this.hasHeader = hasHeader;
         this.atype = atype;
     }
 
@@ -90,8 +93,8 @@
             return new AdmOrDelimitedIndexingTupleParser(ctx, atype, dataParser);
         } else if (format.equalsIgnoreCase(StreamBasedAdapterFactory.FORMAT_DELIMITED_TEXT)) {
             // choice 3 with delimited data parser
-            DelimitedDataParser dataParser = HDFSIndexingAdapterFactory.getDilimitedDataParser(atype,
-                    delimiter, quote);
+            DelimitedDataParser dataParser = HDFSIndexingAdapterFactory.getDelimitedDataParser(atype,
+                delimiter, quote, hasHeader);
             return new AdmOrDelimitedIndexingTupleParser(ctx, atype, dataParser);
         }
 

diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSLookupAdapter.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSLookupAdapter.java
index 4554228..13090626 100644
--- a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSLookupAdapter.java
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/indexing/dataflow/HDFSLookupAdapter.java

@@ -100,9 +100,10 @@
             // create a delimited text parser
             char delimiter = StreamBasedAdapterFactory.getDelimiter(configuration);
             char quote = StreamBasedAdapterFactory.getQuote(configuration, delimiter);
+            boolean hasHeader = StreamBasedAdapterFactory.getHasHeader(configuration);
 
-            DelimitedDataParser dataParser = HDFSIndexingAdapterFactory.getDilimitedDataParser((ARecordType) atype,
-                    delimiter, quote);
+            DelimitedDataParser dataParser = HDFSIndexingAdapterFactory.getDelimitedDataParser((ARecordType) atype,
+                    delimiter, quote, hasHeader);
             if (configuration.get(HDFSAdapterFactory.KEY_INPUT_FORMAT).equals(HDFSAdapterFactory.INPUT_FORMAT_TEXT)) {
                 // Text input format
                 TextFileLookupInputStream in = new TextFileLookupInputStream(fileIndexAccessor, jobConf);

diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/feeds/ConditionalPushTupleParserFactory.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/feeds/ConditionalPushTupleParserFactory.java
index 3cf8fe6..1114cc9 100644
--- a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/feeds/ConditionalPushTupleParserFactory.java
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/feeds/ConditionalPushTupleParserFactory.java

@@ -47,7 +47,8 @@
                 dataParser = new ADMDataParser();
                 break;
             case DELIMITED_DATA:
-                dataParser = new DelimitedDataParser(recordType, valueParserFactories, delimiter, quote, false, -1, null);
+                dataParser = new DelimitedDataParser(recordType, valueParserFactories, delimiter, quote, hasHeader,
+                                                     false, -1, null);
                 break;
         }
         return new ConditionalPushTupleParser(ctx, recordType, dataParser, configuration);
@@ -58,6 +59,7 @@
     private IValueParserFactory[] valueParserFactories;
     private char delimiter;
     private char quote;
+    private boolean hasHeader;
     private final ParserType parserType;
 
     public enum ParserType {
@@ -66,14 +68,14 @@
     }
 
     public ConditionalPushTupleParserFactory(ARecordType recordType, IValueParserFactory[] valueParserFactories,
-            char fieldDelimiter, char quote, Map<String, String> configuration) {
+            char fieldDelimiter, char quote, boolean hasHeader, Map<String, String> configuration) {
         this.recordType = recordType;
         this.valueParserFactories = valueParserFactories;
         this.delimiter = fieldDelimiter;
         this.quote = quote;
+        this.hasHeader = hasHeader;
         this.configuration = configuration;
         this.parserType = ParserType.DELIMITED_DATA;
-
     }
 
     public ConditionalPushTupleParserFactory(ARecordType recordType, Map<String, String> configuration) {
@@ -81,7 +83,6 @@
         this.configuration = configuration;
         this.parserType = ParserType.ADM;
     }
-
 }
 
 class ConditionalPushTupleParser extends AbstractTupleParser {
@@ -214,4 +215,4 @@
 
     }
 
-}
\ No newline at end of file
+}

diff --git a/asterix-om/src/main/java/edu/uci/ics/asterix/formats/base/IDataFormat.java b/asterix-om/src/main/java/edu/uci/ics/asterix/formats/base/IDataFormat.java
index c30cf3a..165b797 100644
--- a/asterix-om/src/main/java/edu/uci/ics/asterix/formats/base/IDataFormat.java
+++ b/asterix-om/src/main/java/edu/uci/ics/asterix/formats/base/IDataFormat.java

@@ -71,7 +71,7 @@
 
     public ITupleParserFactory createTupleParser(ARecordType recType, IParseFileSplitsDecl decl);
 
-    public ITupleParserFactory createTupleParser(ARecordType recType, boolean isDelimited, char delimiter, char quote);
+    public ITupleParserFactory createTupleParser(ARecordType recType, boolean isDelimited, char delimiter, char quote, boolean hasHeader);
 
     public IFunctionDescriptor resolveFunction(ILogicalExpression expr, IVariableTypeEnvironment typeEnvironment)
             throws AlgebricksException;

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java
index 65a4ea2..4ad3bf5 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java

@@ -964,12 +964,12 @@
 
     @Override
     public ITupleParserFactory createTupleParser(ARecordType recType, IParseFileSplitsDecl decl) {
-        return createTupleParser(recType, decl.isDelimitedFileFormat(), decl.getDelimChar(), decl.getQuote());
+        return createTupleParser(recType, decl.isDelimitedFileFormat(), decl.getDelimChar(), decl.getQuote(), decl.getHasHeader());
     }
 
     @Override
     public ITupleParserFactory createTupleParser(ARecordType recType, boolean delimitedFormat, char delimiter,
-            char quote) {
+                                                 char quote, boolean hasHeader) {
         if (delimitedFormat) {
             int n = recType.getFieldTypes().length;
             IValueParserFactory[] fieldParserFactories = new IValueParserFactory[n];
@@ -981,8 +981,8 @@
                 }
                 fieldParserFactories[i] = vpf;
             }
-            return new NtDelimitedDataTupleParserFactory(recType, fieldParserFactories, delimiter, quote, false, -1,
-                    null);
+            return new NtDelimitedDataTupleParserFactory(recType, fieldParserFactories, delimiter, quote, hasHeader,
+                                                         false, -1, null);
         } else {
             return new AdmSchemafullRecordParserFactory(recType, false, -1, null);
         }

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java
index 7a0ae4d..e160586 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java

@@ -42,6 +42,7 @@
     protected final IValueParserFactory[] valueParserFactories;
     protected final char fieldDelimiter;
     protected final char quote;
+    protected final boolean hasHeader;
     protected final ARecordType recordType;
 
     private IARecordBuilder recBuilder;
@@ -55,8 +56,8 @@
     private byte AUUIDTag = ATypeTag.UUID.serialize();
 
     // Variables used to set a UUID for the auto-generated PK field
-    private boolean isPKAutoGenerated;
-    private int primaryKeyPosition;
+    private final boolean isPKAutoGenerated;
+    private final int primaryKeyPosition;
     private final ARecordType origRecordTypeForAutoGeneratedPK;
 
     private boolean areAllNullFields;
@@ -67,16 +68,18 @@
     private int fieldCount;
 
     public DelimitedDataParser(ARecordType recordType, IValueParserFactory[] valueParserFactories, char fieldDelimter,
-            char quote) {
-        this(recordType, valueParserFactories, fieldDelimter, quote, false, -1, null);
+                               char quote, boolean hasHeader) {
+        this(recordType, valueParserFactories, fieldDelimter, quote, hasHeader, false, -1, null);
     }
 
     public DelimitedDataParser(ARecordType recordType, IValueParserFactory[] valueParserFactories, char fieldDelimter,
-            char quote, boolean isPKAutoGenerated, int primaryKeyPosition, ARecordType origRecordTypeForAutoGeneratedPK) {
+                               char quote, boolean hasHeader, boolean isPKAutoGenerated, int primaryKeyPosition,
+                               ARecordType origRecordTypeForAutoGeneratedPK) {
         this.recordType = recordType;
         this.valueParserFactories = valueParserFactories;
         this.fieldDelimiter = fieldDelimter;
         this.quote = quote;
+        this.hasHeader = hasHeader;
         this.isPKAutoGenerated = isPKAutoGenerated;
         this.primaryKeyPosition = primaryKeyPosition;
         this.origRecordTypeForAutoGeneratedPK = origRecordTypeForAutoGeneratedPK;
@@ -134,11 +137,15 @@
         }
 
         cursor = new FieldCursor(new InputStreamReader(in));
-
     }
 
     @Override
     public boolean parse(DataOutput out) throws AsterixException, IOException {
+        if (hasHeader && lineCount == 1) {
+            // Consume all fields of first record
+            cursor.nextRecord();
+            while (cursor.nextField());
+        }
         while (cursor.nextRecord()) {
             // If PK is auto-generated, then we need to use the recordType that
             // includes PK, since recordType variable does not include PK field.

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataTupleParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataTupleParser.java
index 8846110..798b510 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataTupleParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataTupleParser.java

@@ -28,10 +28,12 @@
     private final DelimitedDataParser dataParser;
 
     public DelimitedDataTupleParser(IHyracksTaskContext ctx, ARecordType recType,
-            IValueParserFactory[] valueParserFactories, char fieldDelimter, char quote, boolean isPKAutoGenerated,
-            int primaryKeyPosition, ARecordType origRecordTypeForAutoGeneratedPK) throws HyracksDataException {
+                                    IValueParserFactory[] valueParserFactories,
+                                    char fieldDelimter, char quote, boolean hasHeader,
+                                    boolean isPKAutoGenerated, int primaryKeyPosition,
+                                    ARecordType origRecordTypeForAutoGeneratedPK) throws HyracksDataException {
         super(ctx, recType, isPKAutoGenerated, primaryKeyPosition, origRecordTypeForAutoGeneratedPK);
-        dataParser = new DelimitedDataParser(recType, valueParserFactories, fieldDelimter, quote, isPKAutoGenerated,
+        dataParser = new DelimitedDataParser(recType, valueParserFactories, fieldDelimter, quote, hasHeader, isPKAutoGenerated,
                 primaryKeyPosition, origRecordTypeForAutoGeneratedPK);
     }
 

diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/NtDelimitedDataTupleParserFactory.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/NtDelimitedDataTupleParserFactory.java
index a301c7d..137067d 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/NtDelimitedDataTupleParserFactory.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/NtDelimitedDataTupleParserFactory.java

@@ -27,28 +27,31 @@
  */
 public class NtDelimitedDataTupleParserFactory implements ITupleParserFactory {
     private static final long serialVersionUID = 1L;
-    protected ARecordType recordType;
+    protected final ARecordType recordType;
     protected IValueParserFactory[] valueParserFactories;
-    protected char fieldDelimiter;
+    protected final char fieldDelimiter;
     // quote is used to enclose a string if it includes delimiter(s) in it.
-    protected char quote;
+    protected final char quote;
+    // whether delimited text file has a header (which should be ignored)
+    protected final boolean hasHeader;
     // To deal with an auto-generated PK
-    protected boolean isPKAutoGenerated;
-    protected int primaryKeyPosition;
-    protected ARecordType origRecordTypeForAutoGeneratedPK;
+    protected final boolean isPKAutoGenerated;
+    protected final int primaryKeyPosition;
+    protected final ARecordType origRecordTypeForAutoGeneratedPK;
 
     public NtDelimitedDataTupleParserFactory(ARecordType recordType, IValueParserFactory[] valueParserFactories,
-            char fieldDelimiter, char quote) {
-        this(recordType, valueParserFactories, fieldDelimiter, quote, false, -1, null);
+                                             char fieldDelimiter, char quote, boolean hasHeader) {
+        this(recordType, valueParserFactories, fieldDelimiter, quote, hasHeader, false, -1, null);
     }
 
     public NtDelimitedDataTupleParserFactory(ARecordType recordType, IValueParserFactory[] valueParserFactories,
-            char fieldDelimiter, char quote, boolean isPKAutoGenerated, int primaryKeyposition,
+            char fieldDelimiter, char quote, boolean hasHeader, boolean isPKAutoGenerated, int primaryKeyposition,
             ARecordType origRecordTypeForAutoGeneratedPK) {
         this.recordType = recordType;
         this.valueParserFactories = valueParserFactories;
         this.fieldDelimiter = fieldDelimiter;
         this.quote = quote;
+        this.hasHeader = hasHeader;
         this.isPKAutoGenerated = isPKAutoGenerated;
         this.primaryKeyPosition = primaryKeyposition;
         this.origRecordTypeForAutoGeneratedPK = origRecordTypeForAutoGeneratedPK;
@@ -57,6 +60,6 @@
     @Override
     public ITupleParser createTupleParser(final IHyracksTaskContext ctx) throws HyracksDataException {
         return new DelimitedDataTupleParser(ctx, recordType, valueParserFactories, fieldDelimiter, quote,
-                isPKAutoGenerated, primaryKeyPosition, origRecordTypeForAutoGeneratedPK);
+            hasHeader, isPKAutoGenerated, primaryKeyPosition, origRecordTypeForAutoGeneratedPK);
     }
 }

diff --git a/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java b/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java
index 5f30465..8f62320 100644
--- a/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java
+++ b/asterix-tools/src/main/java/edu/uci/ics/asterix/tools/external/data/RateControlledFileSystemBasedAdapterFactory.java

@@ -137,9 +137,10 @@
             case FORMAT_DELIMITED_TEXT:
                 char delimiter = StreamBasedAdapterFactory.getDelimiter(configuration);
                 char quote = StreamBasedAdapterFactory.getQuote(configuration, delimiter);
+                boolean hasHeader = StreamBasedAdapterFactory.getHasHeader(configuration);
                 IValueParserFactory[] valueParserFactories = getValueParserFactories(atype);
                 parserFactory = new RateControlledTupleParserFactory(atype, valueParserFactories, delimiter, quote,
-                        configuration);
+                                                                     hasHeader, configuration);
                 break;
         }
     }
@@ -175,6 +176,7 @@
     private IValueParserFactory[] valueParserFactories;
     private char delimiter;
     private char quote;
+    private boolean hasHeader;
     private final ParserType parserType;
 
     public enum ParserType {
@@ -183,17 +185,12 @@
     }
 
     public RateControlledTupleParserFactory(ARecordType recordType, IValueParserFactory[] valueParserFactories,
-            char fieldDelimiter, Map<String, String> configuration) throws AsterixException {
-        this(recordType, valueParserFactories, fieldDelimiter, StreamBasedAdapterFactory.getQuote(configuration,
-                fieldDelimiter), configuration);
-    }
-
-    public RateControlledTupleParserFactory(ARecordType recordType, IValueParserFactory[] valueParserFactories,
-            char fieldDelimiter, char quote, Map<String, String> configuration) {
+            char fieldDelimiter, char quote, boolean hasHeader,  Map<String, String> configuration) {
         this.recordType = recordType;
         this.valueParserFactories = valueParserFactories;
         this.delimiter = fieldDelimiter;
         this.quote = quote;
+        this.hasHeader = hasHeader;
         this.configuration = configuration;
         this.parserType = ParserType.DELIMITED_DATA;
     }
@@ -212,7 +209,8 @@
                 dataParser = new ADMDataParser();
                 break;
             case DELIMITED_DATA:
-                dataParser = new DelimitedDataParser(recordType, valueParserFactories, delimiter, quote, false, -1,
+                dataParser = new DelimitedDataParser(recordType, valueParserFactories, delimiter,
+                                                     quote, hasHeader, false, -1,
                         null);
                 break;
         }
commit	fda07252706e7154d1bc5ee1cdbe14167259ce25	[log] [tgz]
author	Chris Hillery <chillery@lambda.nu>	Fri Feb 13 04:24:20 2015 -0800
committer	Chris Hillery <ceej@lambda.nu>	Mon Feb 16 01:10:58 2015 -0800
tree	115d45deb206a9663a17abc3c9a24802bd409325
parent	386721bf09f113960fbc1cb104572233765c9f49 [diff]