ASTERIXDB-1281 - Interval format update to AQL and ADM
The new interval format takes a more generic approach to representing intervals.
Here is an example for a date interval:
interval(date("2012-01-01”), date(”2013-04-01”))
Note that the interval type is defined by the arguments to the interval expression.
Currently only date, time, and datetime types are supported for intervals. The new
format is used for ADM and AQL.
In addition to the format change, the internal byte structure of an interval has been
updated. The format looks like the following:
byte tag, T start, T end (where T is a date, time or datetime type)
Note how the tag has been moved to the front. Also with the new sturcture, an
interval is variable length, not fixed length as before.
Change-Id: I009c71b7a445d141e228ba15d56d0b6cf3c8a3f5
Reviewed-on: https://asterix-gerrit.ics.uci.edu/602
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Till Westmann <tillw@apache.org>
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java
index d9a93ff..0fe2758 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java
@@ -40,7 +40,9 @@
import org.apache.asterix.external.api.IStreamDataParser;
import org.apache.asterix.external.util.ExternalDataUtils;
import org.apache.asterix.om.base.ABoolean;
+import org.apache.asterix.om.base.AMutableInterval;
import org.apache.asterix.om.base.ANull;
+import org.apache.asterix.om.base.temporal.GregorianCalendarSystem;
import org.apache.asterix.om.types.AOrderedListType;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.om.types.ATypeTag;
@@ -53,6 +55,7 @@
import org.apache.asterix.om.util.container.IObjectPool;
import org.apache.asterix.om.util.container.ListObjectPool;
import org.apache.asterix.runtime.operators.file.adm.AdmLexer;
+import org.apache.asterix.runtime.operators.file.adm.AdmLexerException;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IMutableValueStorage;
import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
@@ -76,6 +79,8 @@
private IObjectPool<IMutableValueStorage, ATypeTag> abvsBuilderPool = new ListObjectPool<IMutableValueStorage, ATypeTag>(
new AbvsBuilderFactory());
+ protected final AMutableInterval aInterval = new AMutableInterval(0L, 0L, (byte) 0);
+
private String mismatchErrorMessage = "Mismatch Type, expecting a value of type ";
private String mismatchErrorMessage2 = " got a value of type ";
private Map<String, String> configuration;
@@ -328,44 +333,14 @@
parseConstructor(ATypeTag.DATETIME, objectType, out);
break;
}
- case AdmLexer.TOKEN_INTERVAL_DATE_CONS: {
+ case AdmLexer.TOKEN_INTERVAL_CONS: {
if (checkType(ATypeTag.INTERVAL, objectType)) {
- if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
- if (admLexer.next() == AdmLexer.TOKEN_STRING_LITERAL) {
- parseDateInterval(admLexer.getLastTokenImage(), out);
- if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
- break;
- }
- }
- }
+ objectType = getComplexType(objectType, ATypeTag.INTERVAL);
+ parseInterval(ATypeTag.INTERVAL, objectType, out);
+ } else {
+ throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
}
- throw new ParseException("Wrong interval data parsing for date interval.");
- }
- case AdmLexer.TOKEN_INTERVAL_TIME_CONS: {
- if (checkType(ATypeTag.INTERVAL, objectType)) {
- if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
- if (admLexer.next() == AdmLexer.TOKEN_STRING_LITERAL) {
- parseTimeInterval(admLexer.getLastTokenImage(), out);
- if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
- break;
- }
- }
- }
- }
- throw new ParseException("Wrong interval data parsing for time interval.");
- }
- case AdmLexer.TOKEN_INTERVAL_DATETIME_CONS: {
- if (checkType(ATypeTag.INTERVAL, objectType)) {
- if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
- if (admLexer.next() == AdmLexer.TOKEN_STRING_LITERAL) {
- parseDateTimeInterval(admLexer.getLastTokenImage(), out);
- if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
- break;
- }
- }
- }
- }
- throw new ParseException("Wrong interval data parsing for datetime interval.");
+ break;
}
case AdmLexer.TOKEN_DURATION_CONS: {
parseConstructor(ATypeTag.DURATION, objectType, out);
@@ -408,17 +383,16 @@
objectType = getComplexType(objectType, ATypeTag.UNORDEREDLIST);
parseUnorderedList((AUnorderedListType) objectType, out);
} else {
- throw new ParseException(mismatchErrorMessage + objectType.getTypeTag());
+ throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
}
break;
}
-
case AdmLexer.TOKEN_START_ORDERED_LIST: {
if (checkType(ATypeTag.ORDEREDLIST, objectType)) {
objectType = getComplexType(objectType, ATypeTag.ORDEREDLIST);
parseOrderedList((AOrderedListType) objectType, out);
} else {
- throw new ParseException(mismatchErrorMessage + objectType.getTypeTag());
+ throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
}
break;
}
@@ -427,7 +401,7 @@
objectType = getComplexType(objectType, ATypeTag.RECORD);
parseRecord((ARecordType) objectType, out);
} else {
- throw new ParseException(mismatchErrorMessage + objectType.getTypeTag());
+ throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
}
break;
}
@@ -552,7 +526,6 @@
}
private void parseRecord(ARecordType recType, DataOutput out) throws IOException {
-
ArrayBackedValueStorage fieldValueBuffer = getTempBuffer();
ArrayBackedValueStorage fieldNameBuffer = getTempBuffer();
IARecordBuilder recBuilder = getRecordBuilder();
@@ -693,6 +666,100 @@
return -1;
}
+ private void parseInterval(ATypeTag typeTag, IAType objectType, DataOutput out) throws IOException {
+ long start = 0, end = 0;
+ byte tag = 0;
+ int token = admLexer.next();
+ if (token == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
+ ATypeTag intervalType;
+
+ token = admLexer.next();
+ switch (token) {
+ case AdmLexer.TOKEN_DATE_CONS:
+ intervalType = ATypeTag.DATE;
+ break;
+ case AdmLexer.TOKEN_TIME_CONS:
+ intervalType = ATypeTag.TIME;
+ break;
+ case AdmLexer.TOKEN_DATETIME_CONS:
+ intervalType = ATypeTag.DATETIME;
+ break;
+ default:
+ throw new ParseException("Unsupported interval type: " + AdmLexer.tokenKindToString(token) + ".");
+ }
+
+ // Interval
+ start = parseIntervalArgument(intervalType);
+ end = parseIntervalSecondArgument(token, intervalType);
+ tag = intervalType.serialize();
+ }
+
+ // Closing interval.
+ token = admLexer.next();
+ if (token == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
+ try {
+ aInterval.setValue(start, end, tag);
+ } catch (HyracksDataException e) {
+ throw new ParseException(e);
+ }
+ } else {
+ throw new ParseException("Interval was not closed.");
+ }
+ intervalSerde.serialize(aInterval, out);
+ }
+
+ private long parseIntervalSecondArgument(int startToken, ATypeTag parseType) throws IOException {
+ int token = admLexer.next();
+ if (token == AdmLexer.TOKEN_COMMA) {
+ token = admLexer.next();
+ if (token == startToken) {
+ return parseIntervalArgument(parseType);
+ } else {
+ throw new ParseException("The interval start and end point types do not match: "
+ + AdmLexer.tokenKindToString(startToken) + " != " + AdmLexer.tokenKindToString(token));
+ }
+ } else {
+ throw new ParseException("Missing COMMA before interval end point.");
+ }
+ }
+
+ private long parseIntervalArgument(ATypeTag tag) throws IOException {
+ int token = admLexer.next();
+ if (token == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
+ token = admLexer.next();
+ if (token == AdmLexer.TOKEN_STRING_LITERAL) {
+ long chrononTimeInMs = 0;
+ final String arg = admLexer.getLastTokenImage();
+ switch (tag) {
+ case DATE:
+ chrononTimeInMs += (parseDatePart(arg, 0, arg.length() - 1)
+ / GregorianCalendarSystem.CHRONON_OF_DAY);
+ break;
+ case TIME:
+ chrononTimeInMs += parseTimePart(arg, 0, arg.length() - 1);
+ break;
+ case DATETIME:
+ int timeSeperatorOffsetInDatetimeString = arg.indexOf('T');
+ if (timeSeperatorOffsetInDatetimeString < 0) {
+ throw new ParseException(
+ "This can not be an instance of interval: missing T for a datetime value.");
+ }
+ chrononTimeInMs += parseDatePart(arg, 0, timeSeperatorOffsetInDatetimeString - 1);
+ chrononTimeInMs += parseTimePart(arg, timeSeperatorOffsetInDatetimeString + 1,
+ arg.length() - 1);
+ break;
+ default:
+ throw new ParseException("Unsupported interval type: " + tag.name() + ".");
+ }
+ token = admLexer.next();
+ if (token == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
+ return chrononTimeInMs;
+ }
+ }
+ }
+ throw new ParseException("Interval argument not properly constructed.");
+ }
+
private void parseOrderedList(AOrderedListType oltype, DataOutput out) throws IOException {
ArrayBackedValueStorage itemBuffer = getTempBuffer();
OrderedListBuilder orderedListBuilder = (OrderedListBuilder) getOrderedListBuilder();
diff --git a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java
index 20b4124..b10b9e9 100644
--- a/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java
+++ b/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java
@@ -71,7 +71,6 @@
import org.apache.asterix.om.base.temporal.ADurationParserFactory.ADurationParseOption;
import org.apache.asterix.om.base.temporal.ATimeParserFactory;
import org.apache.asterix.om.base.temporal.GregorianCalendarSystem;
-import org.apache.asterix.om.types.ATypeTag;
import org.apache.asterix.om.types.BuiltinType;
import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
import org.apache.hyracks.api.exceptions.HyracksDataException;
@@ -187,7 +186,7 @@
protected final static ISerializerDeserializer<ALine> lineSerde = AqlSerializerDeserializerProvider.INSTANCE
.getSerializerDeserializer(BuiltinType.ALINE);
@SuppressWarnings("unchecked")
- private static final ISerializerDeserializer<AInterval> intervalSerde = AqlSerializerDeserializerProvider.INSTANCE
+ protected static final ISerializerDeserializer<AInterval> intervalSerde = AqlSerializerDeserializerProvider.INSTANCE
.getSerializerDeserializer(BuiltinType.AINTERVAL);
protected String filename;
@@ -333,104 +332,7 @@
binarySerde.serialize(aBinary, out);
}
- protected void parseDateTimeInterval(String interval, DataOutput out) throws HyracksDataException {
- // the starting point for parsing (so for the accessor)
- int startOffset = 0;
- int endOffset, timeSeperatorOffsetInDatetimeString;
-
- // Get the index for the comma
- int commaIndex = interval.indexOf(',');
- if (commaIndex < 1) {
- throw new HyracksDataException("comma is missing for a string of interval");
- }
-
- endOffset = commaIndex - 1;
- timeSeperatorOffsetInDatetimeString = interval.indexOf('T');
-
- if (timeSeperatorOffsetInDatetimeString < 0) {
- throw new HyracksDataException("This can not be an instance of interval: missing T for a datetime value.");
- }
-
- long chrononTimeInMsStart = parseDatePart(interval, startOffset, timeSeperatorOffsetInDatetimeString - 1);
-
- chrononTimeInMsStart += parseTimePart(interval, timeSeperatorOffsetInDatetimeString + 1, endOffset);
-
- // Interval End
- startOffset = commaIndex + 1;
- endOffset = interval.length() - 1;
-
- timeSeperatorOffsetInDatetimeString = interval.indexOf('T', startOffset);
-
- if (timeSeperatorOffsetInDatetimeString < 0) {
- throw new HyracksDataException("This can not be an instance of interval: missing T for a datetime value.");
- }
-
- long chrononTimeInMsEnd = parseDatePart(interval, startOffset, timeSeperatorOffsetInDatetimeString - 1);
-
- chrononTimeInMsEnd += parseTimePart(interval, timeSeperatorOffsetInDatetimeString + 1, endOffset);
-
- aInterval.setValue(chrononTimeInMsStart, chrononTimeInMsEnd, ATypeTag.DATETIME.serialize());
-
- intervalSerde.serialize(aInterval, out);
- }
-
- protected void parseTimeInterval(String interval, DataOutput out) throws HyracksDataException {
- int startOffset = 0;
- int endOffset;
-
- // Get the index for the comma
- int commaIndex = interval.indexOf(',');
- if (commaIndex < 0) {
- throw new HyracksDataException("comma is missing for a string of interval");
- }
-
- endOffset = commaIndex - 1;
- // Interval Start
- long chrononTimeInMsStart = parseTimePart(interval, startOffset, endOffset);
-
- if (chrononTimeInMsStart < 0) {
- chrononTimeInMsStart += GregorianCalendarSystem.CHRONON_OF_DAY;
- }
-
- // Interval End
- startOffset = commaIndex + 1;
- endOffset = interval.length() - 1;
-
- long chrononTimeInMsEnd = parseTimePart(interval, startOffset, endOffset);
- if (chrononTimeInMsEnd < 0) {
- chrononTimeInMsEnd += GregorianCalendarSystem.CHRONON_OF_DAY;
- }
-
- aInterval.setValue(chrononTimeInMsStart, chrononTimeInMsEnd, ATypeTag.TIME.serialize());
- intervalSerde.serialize(aInterval, out);
- }
-
- protected void parseDateInterval(String interval, DataOutput out) throws HyracksDataException {
- // the starting point for parsing (so for the accessor)
- int startOffset = 0;
- int endOffset;
-
- // Get the index for the comma
- int commaIndex = interval.indexOf(',');
- if (commaIndex < 1) {
- throw new HyracksDataException("comma is missing for a string of interval");
- }
-
- endOffset = commaIndex - 1;
- long chrononTimeInMsStart = parseDatePart(interval, startOffset, endOffset);
-
- // Interval End
- startOffset = commaIndex + 1;
- endOffset = interval.length() - 1;
-
- long chrononTimeInMsEnd = parseDatePart(interval, startOffset, endOffset);
-
- aInterval.setValue((chrononTimeInMsStart / GregorianCalendarSystem.CHRONON_OF_DAY),
- (chrononTimeInMsEnd / GregorianCalendarSystem.CHRONON_OF_DAY), ATypeTag.DATE.serialize());
- intervalSerde.serialize(aInterval, out);
- }
-
- private long parseDatePart(String interval, int startOffset, int endOffset) throws HyracksDataException {
+ protected long parseDatePart(String interval, int startOffset, int endOffset) throws HyracksDataException {
while (interval.charAt(endOffset) == '"' || interval.charAt(endOffset) == ' ') {
endOffset--;
@@ -443,7 +345,7 @@
return ADateParserFactory.parseDatePart(interval, startOffset, endOffset - startOffset + 1);
}
- private int parseTimePart(String interval, int startOffset, int endOffset) throws HyracksDataException {
+ protected int parseTimePart(String interval, int startOffset, int endOffset) throws HyracksDataException {
while (interval.charAt(endOffset) == '"' || interval.charAt(endOffset) == ' ') {
endOffset--;
diff --git a/asterix-external-data/src/main/resources/adm.grammar b/asterix-external-data/src/main/resources/adm.grammar
index 973c2b9..2626a31 100644
--- a/asterix-external-data/src/main/resources/adm.grammar
+++ b/asterix-external-data/src/main/resources/adm.grammar
@@ -47,9 +47,7 @@
RECTANGLE_CONS = string(rectangle)
CIRCLE_CONS = string(circle)
TIME_CONS = string(time)
-INTERVAL_TIME_CONS = string(interval-time)
-INTERVAL_DATE_CONS = string(interval-date)
-INTERVAL_DATETIME_CONS = string(interval-datetime)
+INTERVAL_CONS = string(interval)
YEAR_MONTH_DURATION_CONS = string(year-month-duration)
DAY_TIME_DURATION_CONS = string(day-time-duration)
UUID_CONS = string(uuid)
diff --git a/asterix-external-data/src/test/java/org/apache/asterix/runtime/operator/file/ADMDataParserTest.java b/asterix-external-data/src/test/java/org/apache/asterix/runtime/operator/file/ADMDataParserTest.java
index 698e414..c6939c9 100644
--- a/asterix-external-data/src/test/java/org/apache/asterix/runtime/operator/file/ADMDataParserTest.java
+++ b/asterix-external-data/src/test/java/org/apache/asterix/runtime/operator/file/ADMDataParserTest.java
@@ -24,7 +24,9 @@
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.asterix.external.parser.ADMDataParser;
-import org.apache.asterix.om.base.AMutableInterval;
+import org.apache.asterix.om.base.AMutableDate;
+import org.apache.asterix.om.base.AMutableDateTime;
+import org.apache.asterix.om.base.AMutableTime;
import org.junit.Assert;
import org.junit.Test;
@@ -34,25 +36,23 @@
@Test
public void test() {
- String[] dateIntervals = { "-9537-08-04, 9656-06-03", "-9537-04-04, 9656-06-04", "-9537-10-04, 9626-09-05" };
- AMutableInterval[] parsedDateIntervals = new AMutableInterval[] {
- new AMutableInterval(-4202630, 2807408, (byte) 17), new AMutableInterval(-4202752, 2807409, (byte) 17),
- new AMutableInterval(-4202569, 2796544, (byte) 17), };
+ String[] dates = { "-9537-08-04", "9656-06-03", "-9537-04-04", "9656-06-04", "-9537-10-04", "9626-09-05" };
+ AMutableDate[] parsedDates = new AMutableDate[] { new AMutableDate(-4202630), new AMutableDate(2807408),
+ new AMutableDate(-4202752), new AMutableDate(2807409), new AMutableDate(-4202569),
+ new AMutableDate(2796544), };
- String[] timeIntervals = { "12:04:45.689Z, 12:41:59.002Z", "12:10:45.169Z, 15:37:48.736Z",
- "04:16:42.321Z, 12:22:56.816Z" };
- AMutableInterval[] parsedTimeIntervals = new AMutableInterval[] {
- new AMutableInterval(43485689, 45719002, (byte) 18),
- new AMutableInterval(43845169, 56268736, (byte) 18),
- new AMutableInterval(15402321, 44576816, (byte) 18), };
+ String[] times = { "12:04:45.689Z", "12:41:59.002Z", "12:10:45.169Z", "15:37:48.736Z", "04:16:42.321Z",
+ "12:22:56.816Z" };
+ AMutableTime[] parsedTimes = new AMutableTime[] { new AMutableTime(43485689), new AMutableTime(45719002),
+ new AMutableTime(43845169), new AMutableTime(56268736), new AMutableTime(15402321),
+ new AMutableTime(44576816), };
- String[] dateTimeIntervals = { "-2640-10-11T17:32:15.675Z, 4104-02-01T05:59:11.902Z",
- "0534-12-08T08:20:31.487Z, 6778-02-16T22:40:21.653Z",
- "2129-12-12T13:18:35.758Z, 8647-07-01T13:10:19.691Z" };
- AMutableInterval[] parsedDateTimeIntervals = new AMutableInterval[] {
- new AMutableInterval(-145452954464325L, 67345192751902L, (byte) 16),
- new AMutableInterval(-45286270768513L, 151729886421653L, (byte) 16),
- new AMutableInterval(5047449515758L, 210721439419691L, (byte) 16) };
+ String[] dateTimes = { "-2640-10-11T17:32:15.675Z", "4104-02-01T05:59:11.902Z", "0534-12-08T08:20:31.487Z",
+ "6778-02-16T22:40:21.653Z", "2129-12-12T13:18:35.758Z", "8647-07-01T13:10:19.691Z" };
+ AMutableDateTime[] parsedDateTimes = new AMutableDateTime[] { new AMutableDateTime(-145452954464325L),
+ new AMutableDateTime(67345192751902L), new AMutableDateTime(-45286270768513L),
+ new AMutableDateTime(151729886421653L), new AMutableDateTime(5047449515758L),
+ new AMutableDateTime(210721439419691L) };
Thread[] threads = new Thread[16];
AtomicInteger errorCount = new AtomicInteger(0);
@@ -67,28 +67,28 @@
try {
int round = 0;
while (round++ < 10000) {
- // Test parseDateInterval.
- for (int index = 0; index < dateIntervals.length; ++index) {
- PA.invokeMethod(parser, "parseDateInterval(java.lang.String, java.io.DataOutput)",
- dateIntervals[index], dos);
- AMutableInterval aInterval = (AMutableInterval) PA.getValue(parser, "aInterval");
- Assert.assertTrue(aInterval.equals(parsedDateIntervals[index]));
+ // Test parseDate.
+ for (int index = 0; index < dates.length; ++index) {
+ PA.invokeMethod(parser, "parseDate(java.lang.String, java.io.DataOutput)",
+ dates[index], dos);
+ AMutableDate aDate = (AMutableDate) PA.getValue(parser, "aDate");
+ Assert.assertTrue(aDate.equals(parsedDates[index]));
}
- // Tests parseTimeInterval.
- for (int index = 0; index < timeIntervals.length; ++index) {
- PA.invokeMethod(parser, "parseTimeInterval(java.lang.String, java.io.DataOutput)",
- timeIntervals[index], dos);
- AMutableInterval aInterval = (AMutableInterval) PA.getValue(parser, "aInterval");
- Assert.assertTrue(aInterval.equals(parsedTimeIntervals[index]));
+ // Tests parseTime.
+ for (int index = 0; index < times.length; ++index) {
+ PA.invokeMethod(parser, "parseTime(java.lang.String, java.io.DataOutput)",
+ times[index], dos);
+ AMutableTime aTime = (AMutableTime) PA.getValue(parser, "aTime");
+ Assert.assertTrue(aTime.equals(parsedTimes[index]));
}
- // Tests parseDateTimeInterval.
- for (int index = 0; index < dateTimeIntervals.length; ++index) {
- PA.invokeMethod(parser, "parseDateTimeInterval(java.lang.String, java.io.DataOutput)",
- dateTimeIntervals[index], dos);
- AMutableInterval aInterval = (AMutableInterval) PA.getValue(parser, "aInterval");
- Assert.assertTrue(aInterval.equals(parsedDateTimeIntervals[index]));
+ // Tests parseDateTime.
+ for (int index = 0; index < dateTimes.length; ++index) {
+ PA.invokeMethod(parser, "parseDateTime(java.lang.String, java.io.DataOutput)",
+ dateTimes[index], dos);
+ AMutableDateTime aDateTime = (AMutableDateTime) PA.getValue(parser, "aDateTime");
+ Assert.assertTrue(aDateTime.equals(parsedDateTimes[index]));
}
}
} catch (Exception e) {