added support for optional types for delimited data parser; fixed an issue on temporal constructors on incorrect string parsing if the string is empty
diff --git a/asterix-app/data/csv/sample_01.csv b/asterix-app/data/csv/sample_01.csv
index a77258c..4dd437a 100644
--- a/asterix-app/data/csv/sample_01.csv
+++ b/asterix-app/data/csv/sample_01.csv
@@ -1,4 +1,8 @@
1,0.899682764,5.6256,2013-08-07,07:22:35,1979-02-25T23:48:27.034
2,0.669052398,,-1923-03-29,19:33:34,-1979-02-25T23:48:27.002
-,,,,,,,,,,
-3,0.572733058,192674,-1923-03-29,19:33:34,-1979-02-25T23:48:27.001
\ No newline at end of file
+3,0.572733058,192674,-1923-03-28,19:33:34,-1979-02-25T23:48:27.001
+4,,192674,-1923-03-27,19:33:34,-1979-02-25T23:48:27.001
+5,0.572733058,192674,,19:33:34,-1979-02-25T23:48:27.001
+6,0.572733058,192674,-1923-03-25,,-1979-02-25T23:48:27.001
+7,0.572733058,192674,-1923-03-24,19:33:34,
+8,,,,,
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_01/csv_01.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_01/csv_01.1.ddl.aql
index 7a0e4b0..e890942 100644
--- a/asterix-app/src/test/resources/runtimets/queries/load/csv_01/csv_01.1.ddl.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_01/csv_01.1.ddl.aql
@@ -10,12 +10,12 @@
use dataverse temp;
create type test as closed {
- id: string,
- float: string,
- double: string,
- date: string,
- time: string,
- datetime: string
+ id: int32,
+ float: float?,
+ double: double?,
+ date: string?,
+ time: string?,
+ datetime: string?
};
create dataset testds (test)
diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_01/csv_01.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_01/csv_01.3.query.aql
index f798b20..efa6dbc 100644
--- a/asterix-app/src/test/resources/runtimets/queries/load/csv_01/csv_01.3.query.aql
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_01/csv_01.3.query.aql
@@ -8,10 +8,11 @@
use dataverse temp;
for $i in dataset testds
+order by $i.id
return { "id": $i.id,
"float": $i.float,
"double": $i.double,
- "date": date($i.date),
- "time": time($i.time),
- "datetime": datetime($i.datetime)
+ "date-before": $i.date, "date-after": date($i.date),
+ "time-before": $i.time, "time-after": time($i.time),
+ "datetime-before": $i.datetime, "datetime-after": datetime($i.datetime)
}
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_02/csv_02.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_02/csv_02.1.ddl.aql
new file mode 100644
index 0000000..b6884a8
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_02/csv_02.1.ddl.aql
@@ -0,0 +1,22 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+drop dataverse temp if exists;
+create dataverse temp
+use dataverse temp;
+
+create type test as closed {
+ id: int32,
+ float: float?,
+ double: double?,
+ date: string,
+ time: string,
+ datetime: string
+};
+
+create dataset testds (test)
+primary key id;
diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_02/csv_02.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_02/csv_02.2.update.aql
new file mode 100644
index 0000000..c3161d5
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_02/csv_02.2.update.aql
@@ -0,0 +1,12 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+use dataverse temp;
+
+load dataset testds
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/csv/sample_01.csv"),("format"="delimited-text"),("delimiter"=","));
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/load/csv_02/csv_02.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/load/csv_02/csv_02.3.query.aql
new file mode 100644
index 0000000..1299235
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/load/csv_02/csv_02.3.query.aql
@@ -0,0 +1,18 @@
+/**
+ *
+ * CSV file loading test
+ * Expected result: success
+ *
+ */
+
+use dataverse temp;
+
+for $i in dataset testds
+order by $i.id
+return { "id": $i.id,
+ "float": $i.float,
+ "double": $i.double,
+ "date-string": $i.date,
+ "time-string": $i.time,
+ "datetime-string": $i.datetime
+ }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/results/load/csv_01/csv_01.1.adm b/asterix-app/src/test/resources/runtimets/results/load/csv_01/csv_01.1.adm
index 1e28c78..b8d4151 100644
--- a/asterix-app/src/test/resources/runtimets/results/load/csv_01/csv_01.1.adm
+++ b/asterix-app/src/test/resources/runtimets/results/load/csv_01/csv_01.1.adm
@@ -1,3 +1,8 @@
-{ "id": "1", "float": "0.899682764", "double": "5.6256", "date": date("2013-08-07"), "time": time("07:22:35.000Z"), "datetime": datetime("1979-02-25T23:48:27.034Z") }
-{ "id": "3", "float": "0.572733058", "double": "192674", "date": date("-1923-03-29"), "time": time("19:33:34.000Z"), "datetime": datetime("-1979-02-25T23:48:27.001Z") }
-{ "id": "2", "float": "0.669052398", "double": "", "date": date("-1923-03-29"), "time": time("19:33:34.000Z"), "datetime": datetime("-1979-02-25T23:48:27.002Z") }
\ No newline at end of file
+{ "id": 1, "float": 0.89968276f, "double": 5.6256d, "date-before": "2013-08-07", "date-after": date("2013-08-07"), "time-before": "07:22:35", "time-after": time("07:22:35.000Z"), "datetime-before": "1979-02-25T23:48:27.034", "datetime-after": datetime("1979-02-25T23:48:27.034Z") }
+{ "id": 2, "float": 0.6690524f, "double": null, "date-before": "-1923-03-29", "date-after": date("-1923-03-29"), "time-before": "19:33:34", "time-after": time("19:33:34.000Z"), "datetime-before": "-1979-02-25T23:48:27.002", "datetime-after": datetime("-1979-02-25T23:48:27.002Z") }
+{ "id": 3, "float": 0.57273304f, "double": 192674.0d, "date-before": "-1923-03-28", "date-after": date("-1923-03-28"), "time-before": "19:33:34", "time-after": time("19:33:34.000Z"), "datetime-before": "-1979-02-25T23:48:27.001", "datetime-after": datetime("-1979-02-25T23:48:27.001Z") }
+{ "id": 4, "float": null, "double": 192674.0d, "date-before": "-1923-03-27", "date-after": date("-1923-03-27"), "time-before": "19:33:34", "time-after": time("19:33:34.000Z"), "datetime-before": "-1979-02-25T23:48:27.001", "datetime-after": datetime("-1979-02-25T23:48:27.001Z") }
+{ "id": 5, "float": 0.57273304f, "double": 192674.0d, "date-before": null, "date-after": null, "time-before": "19:33:34", "time-after": time("19:33:34.000Z"), "datetime-before": "-1979-02-25T23:48:27.001", "datetime-after": datetime("-1979-02-25T23:48:27.001Z") }
+{ "id": 6, "float": 0.57273304f, "double": 192674.0d, "date-before": "-1923-03-25", "date-after": date("-1923-03-25"), "time-before": null, "time-after": null, "datetime-before": "-1979-02-25T23:48:27.001", "datetime-after": datetime("-1979-02-25T23:48:27.001Z") }
+{ "id": 7, "float": 0.57273304f, "double": 192674.0d, "date-before": "-1923-03-24", "date-after": date("-1923-03-24"), "time-before": "19:33:34", "time-after": time("19:33:34.000Z"), "datetime-before": null, "datetime-after": null }
+{ "id": 8, "float": null, "double": null, "date-before": null, "date-after": null, "time-before": null, "time-after": null, "datetime-before": null, "datetime-after": null }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/results/load/csv_02/csv_02.1.adm b/asterix-app/src/test/resources/runtimets/results/load/csv_02/csv_02.1.adm
new file mode 100644
index 0000000..44240dd
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/load/csv_02/csv_02.1.adm
@@ -0,0 +1,8 @@
+{ "id": 1, "float": 0.89968276f, "double": 5.6256d, "date-string": "2013-08-07", "time-string": "07:22:35", "datetime-string": "1979-02-25T23:48:27.034" }
+{ "id": 2, "float": 0.6690524f, "double": null, "date-string": "-1923-03-29", "time-string": "19:33:34", "datetime-string": "-1979-02-25T23:48:27.002" }
+{ "id": 3, "float": 0.57273304f, "double": 192674.0d, "date-string": "-1923-03-28", "time-string": "19:33:34", "datetime-string": "-1979-02-25T23:48:27.001" }
+{ "id": 4, "float": null, "double": 192674.0d, "date-string": "-1923-03-27", "time-string": "19:33:34", "datetime-string": "-1979-02-25T23:48:27.001" }
+{ "id": 5, "float": 0.57273304f, "double": 192674.0d, "date-string": "", "time-string": "19:33:34", "datetime-string": "-1979-02-25T23:48:27.001" }
+{ "id": 6, "float": 0.57273304f, "double": 192674.0d, "date-string": "-1923-03-25", "time-string": "", "datetime-string": "-1979-02-25T23:48:27.001" }
+{ "id": 7, "float": 0.57273304f, "double": 192674.0d, "date-string": "-1923-03-24", "time-string": "19:33:34", "datetime-string": "" }
+{ "id": 8, "float": null, "double": null, "date-string": "", "time-string": "", "datetime-string": "" }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index 1ec46dc..d65da69 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -4258,6 +4258,11 @@
</compilation-unit>
</test-case>
<test-case FilePath="load">
+ <compilation-unit name="csv_02">
+ <output-dir compare="Text">csv_02</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="load">
<compilation-unit name="issue14_query">
<output-dir compare="Text">none</output-dir>
<expected-error>edu.uci.ics.asterix.common.exceptions.AsterixException</expected-error>
diff --git a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/FileSystemBasedAdapter.java b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/FileSystemBasedAdapter.java
index 38903ec..33ee11f 100644
--- a/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/FileSystemBasedAdapter.java
+++ b/asterix-external-data/src/main/java/edu/uci/ics/asterix/external/dataset/adapter/FileSystemBasedAdapter.java
@@ -16,6 +16,7 @@
import java.io.IOException;
import java.io.InputStream;
+import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -26,6 +27,7 @@
import edu.uci.ics.asterix.external.util.INodeResolverFactory;
import edu.uci.ics.asterix.om.types.ARecordType;
import edu.uci.ics.asterix.om.types.ATypeTag;
+import edu.uci.ics.asterix.om.types.AUnionType;
import edu.uci.ics.asterix.om.types.IAType;
import edu.uci.ics.asterix.runtime.operators.file.AdmSchemafullRecordParserFactory;
import edu.uci.ics.asterix.runtime.operators.file.NtDelimitedDataTupleParserFactory;
@@ -89,7 +91,7 @@
throw new IllegalArgumentException(" Unspecified data format");
} else if (FORMAT_DELIMITED_TEXT.equalsIgnoreCase(specifiedFormat)) {
parserFactory = getDelimitedDataTupleParserFactory((ARecordType) atype);
- } else if (FORMAT_ADM.equalsIgnoreCase((String)configuration.get(KEY_FORMAT))) {
+ } else if (FORMAT_ADM.equalsIgnoreCase((String) configuration.get(KEY_FORMAT))) {
parserFactory = getADMDataTupleParserFactory((ARecordType) atype);
} else {
throw new IllegalArgumentException(" format " + configuration.get(KEY_FORMAT) + " not supported");
@@ -104,7 +106,19 @@
int n = recordType.getFieldTypes().length;
IValueParserFactory[] fieldParserFactories = new IValueParserFactory[n];
for (int i = 0; i < n; i++) {
- ATypeTag tag = recordType.getFieldTypes()[i].getTypeTag();
+ ATypeTag tag = null;
+ if (recordType.getFieldTypes()[i].getTypeTag() == ATypeTag.UNION) {
+ List<IAType> unionTypes = ((AUnionType) recordType.getFieldTypes()[i]).getUnionList();
+ if (unionTypes.size() != 2 && unionTypes.get(0).getTypeTag() != ATypeTag.NULL) {
+ throw new NotImplementedException("Non-optional UNION type is not supported.");
+ }
+ tag = unionTypes.get(1).getTypeTag();
+ } else {
+ tag = recordType.getFieldTypes()[i].getTypeTag();
+ }
+ if (tag == null) {
+ throw new NotImplementedException("Failed to get the type information for field " + i + ".");
+ }
IValueParserFactory vpf = typeToValueParserFactMap.get(tag);
if (vpf == null) {
throw new NotImplementedException("No value parser factory for delimited fields of type " + tag);
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/constructors/ADateConstructorDescriptor.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/constructors/ADateConstructorDescriptor.java
index 03c4140..f13f39c 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/constructors/ADateConstructorDescriptor.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/constructors/ADateConstructorDescriptor.java
@@ -83,6 +83,13 @@
int stringLength = (serString[1] & 0xff << 8) + (serString[2] & 0xff << 0);
+ // the string to be parsed should be at least 14 characters: YYYYMMDD
+ if (stringLength < 8) {
+ throw new AlgebricksException(errorMessage
+ + ": the string length should be at least 8 (YYYYMMDD) but it is "
+ + stringLength);
+ }
+
int startOffset = 3;
while (serString[startOffset] == ' ') {
startOffset++;
@@ -111,8 +118,6 @@
}
} catch (IOException e1) {
throw new AlgebricksException(errorMessage);
- } catch (Exception e2) {
- throw new AlgebricksException(e2);
}
}
};
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/constructors/ADateTimeConstructorDescriptor.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/constructors/ADateTimeConstructorDescriptor.java
index 986b158..880de44 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/constructors/ADateTimeConstructorDescriptor.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/constructors/ADateTimeConstructorDescriptor.java
@@ -82,7 +82,12 @@
if (serString[0] == SER_STRING_TYPE_TAG) {
int stringLength = (serString[1] & 0xff << 8) + (serString[2] & 0xff << 0);
-
+ // the string to be parsed should be at least 14 characters: YYYYMMDDhhmmss
+ if (stringLength < 14) {
+ throw new AlgebricksException(errorMessage
+ + ": the string length should be at least 14 (YYYYMMDDhhmmss) but it is "
+ + stringLength);
+ }
// +1 if it is negative (-)
short timeOffset = (short) ((serString[3] == '-') ? 1 : 0);
@@ -109,8 +114,6 @@
}
} catch (IOException e1) {
throw new AlgebricksException(errorMessage);
- } catch (Exception e2) {
- throw new AlgebricksException(e2);
}
}
};
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/constructors/ATimeConstructorDescriptor.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/constructors/ATimeConstructorDescriptor.java
index 1b3e7f6..d344d1c 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/constructors/ATimeConstructorDescriptor.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/constructors/ATimeConstructorDescriptor.java
@@ -83,6 +83,13 @@
int stringLength = (serString[1] & 0xff << 8) + (serString[2] & 0xff << 0);
+ // the string to be parsed should be at least 6 characters: hhmmss
+ if (stringLength < 6) {
+ throw new AlgebricksException(errorMessage
+ + ": the string length should be at least 6 (hhmmss) but it is "
+ + stringLength);
+ }
+
int chrononTimeInMs = ATimeParserFactory.parseTimePart(serString, 3, stringLength);
if (chrononTimeInMs < 0) {
@@ -99,8 +106,6 @@
}
} catch (IOException e1) {
throw new AlgebricksException(errorMessage);
- } catch (Exception e2) {
- throw new AlgebricksException(e2);
}
}
};
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java
index 85242ec..5a639dc 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java
@@ -24,7 +24,9 @@
import edu.uci.ics.asterix.builders.IARecordBuilder;
import edu.uci.ics.asterix.builders.RecordBuilder;
import edu.uci.ics.asterix.common.exceptions.AsterixException;
+import edu.uci.ics.asterix.dataflow.data.nontagged.serde.ANullSerializerDeserializer;
import edu.uci.ics.asterix.om.base.AMutableString;
+import edu.uci.ics.asterix.om.base.ANull;
import edu.uci.ics.asterix.om.types.ARecordType;
import edu.uci.ics.asterix.om.types.ATypeTag;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
@@ -109,17 +111,27 @@
}
fieldValueBuffer.reset();
- if (cursor.fStart != cursor.fEnd) {
+ if (cursor.fStart == cursor.fEnd && recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.STRING
+ && recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.NULL) {
+ // if the field is empty and the type is optional, insert NULL
+ // note that string type can also process empty field as an empty string
+ if (recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.UNION) {
+ throw new AsterixException("Field " + i + " cannot be NULL. ");
+ }
+ fieldValueBufferOutput.writeByte(ATypeTag.NULL.serialize());
+ ANullSerializerDeserializer.INSTANCE.serialize(ANull.NULL, out);
+ } else {
+ fieldValueBufferOutput.writeByte(fieldTypeTags[i]);
+ valueParsers[i].parse(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart,
+ fieldValueBufferOutput);
areAllNullFields = false;
}
- fieldValueBufferOutput.writeByte(fieldTypeTags[i]);
- valueParsers[i]
- .parse(cursor.buffer, cursor.fStart, cursor.fEnd - cursor.fStart, fieldValueBufferOutput);
if (fldIds[i] < 0) {
recBuilder.addField(nameBuffers[i], fieldValueBuffer);
} else {
recBuilder.addField(fldIds[i], fieldValueBuffer);
}
+
}
if (!areAllNullFields) {
recBuilder.write(out, true);