introduced new adm lexer into stabilization. Issue #215
git-svn-id: https://asterixdb.googlecode.com/svn/branches/asterix_stabilization@1205 eaa15691-b419-025a-1212-ee371bd00084
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java
index 8606088..2e64ad4 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java
@@ -22,10 +22,8 @@
import java.util.List;
import java.util.Queue;
-import edu.uci.ics.asterix.adm.parser.nontagged.AdmLexer;
-import edu.uci.ics.asterix.adm.parser.nontagged.AdmLexerConstants;
-import edu.uci.ics.asterix.adm.parser.nontagged.ParseException;
-import edu.uci.ics.asterix.adm.parser.nontagged.Token;
+import edu.uci.ics.asterix.runtime.operators.file.adm.AdmLexer;
+import edu.uci.ics.asterix.runtime.operators.file.adm.AdmLexerException;
import edu.uci.ics.asterix.builders.IARecordBuilder;
import edu.uci.ics.asterix.builders.IAsterixListBuilder;
import edu.uci.ics.asterix.builders.OrderedListBuilder;
@@ -36,6 +34,7 @@
import edu.uci.ics.asterix.dataflow.data.nontagged.serde.ADateSerializerDeserializer;
import edu.uci.ics.asterix.dataflow.data.nontagged.serde.ADateTimeSerializerDeserializer;
import edu.uci.ics.asterix.dataflow.data.nontagged.serde.ADurationSerializerDeserializer;
+import edu.uci.ics.asterix.dataflow.data.nontagged.serde.AIntervalSerializerDeserializer;
import edu.uci.ics.asterix.dataflow.data.nontagged.serde.ALineSerializerDeserializer;
import edu.uci.ics.asterix.dataflow.data.nontagged.serde.APoint3DSerializerDeserializer;
import edu.uci.ics.asterix.dataflow.data.nontagged.serde.APointSerializerDeserializer;
@@ -55,7 +54,7 @@
import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
/**
- * Parser for ADM formatted data.
+ * Parser for ADM formatted data.
*/
public class ADMDataParser extends AbstractDataParser implements IDataParser {
@@ -82,21 +81,25 @@
}
@Override
- public void initialize(InputStream in, ARecordType recordType, boolean datasetRec) {
- admLexer = new AdmLexer(in);
+ public void initialize(InputStream in, ARecordType recordType, boolean datasetRec) throws AsterixException {
this.recordType = recordType;
this.datasetRec = datasetRec;
+ try {
+ admLexer = new AdmLexer(new java.io.InputStreamReader(in));
+ } catch (IOException e) {
+ throw new AsterixException(e);
+ }
}
protected boolean parseAdmInstance(IAType objectType, boolean datasetRec, DataOutput out) throws AsterixException,
IOException {
- Token token;
+ int token;
try {
token = admLexer.next();
- } catch (ParseException pe) {
- throw new AsterixException(pe);
+ } catch (AdmLexerException e) {
+ throw new AsterixException(e);
}
- if (token.kind == AdmLexerConstants.EOF) {
+ if (token == AdmLexer.TOKEN_EOF) {
return false;
} else {
admFromLexerStream(token, objectType, out, datasetRec);
@@ -104,157 +107,212 @@
}
}
- private void admFromLexerStream(Token token, IAType objectType, DataOutput out, Boolean datasetRec)
+ private void admFromLexerStream(int token, IAType objectType, DataOutput out, Boolean datasetRec)
throws AsterixException, IOException {
- switch (token.kind) {
- case AdmLexerConstants.NULL_LITERAL: {
+ switch (token) {
+ case AdmLexer.TOKEN_NULL_LITERAL: {
if (checkType(ATypeTag.NULL, objectType, out)) {
nullSerde.serialize(ANull.NULL, out);
} else
throw new AsterixException(" This field can not be null ");
break;
}
- case AdmLexerConstants.TRUE_LITERAL: {
+ case AdmLexer.TOKEN_TRUE_LITERAL: {
if (checkType(ATypeTag.BOOLEAN, objectType, out)) {
booleanSerde.serialize(ABoolean.TRUE, out);
} else
throw new AsterixException(mismatchErrorMessage + objectType.getTypeName());
break;
}
- case AdmLexerConstants.BOOLEAN_CONS: {
+ case AdmLexer.TOKEN_BOOLEAN_CONS: {
parseConstructor(ATypeTag.BOOLEAN, objectType, out);
break;
}
- case AdmLexerConstants.FALSE_LITERAL: {
+ case AdmLexer.TOKEN_FALSE_LITERAL: {
if (checkType(ATypeTag.BOOLEAN, objectType, out)) {
booleanSerde.serialize(ABoolean.FALSE, out);
} else
throw new AsterixException(mismatchErrorMessage + objectType.getTypeName());
break;
}
- case AdmLexerConstants.DOUBLE_LITERAL: {
+ case AdmLexer.TOKEN_DOUBLE_LITERAL: {
if (checkType(ATypeTag.DOUBLE, objectType, out)) {
- aDouble.setValue(Double.parseDouble(token.image));
+ aDouble.setValue(Double.parseDouble(admLexer.getLastTokenImage()));
doubleSerde.serialize(aDouble, out);
} else
throw new AsterixException(mismatchErrorMessage + objectType.getTypeName());
break;
}
- case AdmLexerConstants.DOUBLE_CONS: {
+ case AdmLexer.TOKEN_DOUBLE_CONS: {
parseConstructor(ATypeTag.DOUBLE, objectType, out);
break;
}
- case AdmLexerConstants.FLOAT_LITERAL: {
+ case AdmLexer.TOKEN_FLOAT_LITERAL: {
if (checkType(ATypeTag.FLOAT, objectType, out)) {
- aFloat.setValue(Float.parseFloat(token.image));
+ aFloat.setValue(Float.parseFloat(admLexer.getLastTokenImage()));
floatSerde.serialize(aFloat, out);
} else
throw new AsterixException(mismatchErrorMessage + objectType.getTypeName());
break;
}
- case AdmLexerConstants.FLOAT_CONS: {
+ case AdmLexer.TOKEN_FLOAT_CONS: {
parseConstructor(ATypeTag.FLOAT, objectType, out);
break;
}
- case AdmLexerConstants.INT8_LITERAL: {
+ case AdmLexer.TOKEN_INT8_LITERAL: {
if (checkType(ATypeTag.INT8, objectType, out)) {
- parseInt8(token.image, out);
+ parseInt8(admLexer.getLastTokenImage(), out);
} else
throw new AsterixException(mismatchErrorMessage + objectType.getTypeName());
break;
}
- case AdmLexerConstants.INT8_CONS: {
+ case AdmLexer.TOKEN_INT8_CONS: {
parseConstructor(ATypeTag.INT8, objectType, out);
break;
}
- case AdmLexerConstants.INT16_LITERAL: {
+ case AdmLexer.TOKEN_INT16_LITERAL: {
if (checkType(ATypeTag.INT16, objectType, out)) {
- parseInt16(token.image, out);
+ parseInt16(admLexer.getLastTokenImage(), out);
} else
throw new AsterixException(mismatchErrorMessage + objectType.getTypeName());
break;
}
- case AdmLexerConstants.INT16_CONS: {
+ case AdmLexer.TOKEN_INT16_CONS: {
parseConstructor(ATypeTag.INT16, objectType, out);
break;
}
- case AdmLexerConstants.INT_LITERAL:
- case AdmLexerConstants.INT32_LITERAL: {
+ case AdmLexer.TOKEN_INT_LITERAL:
+ case AdmLexer.TOKEN_INT32_LITERAL: {
if (checkType(ATypeTag.INT32, objectType, out)) {
- parseInt32(token.image, out);
+ parseInt32(admLexer.getLastTokenImage(), out);
} else
throw new AsterixException(mismatchErrorMessage + objectType.getTypeName());
break;
}
- case AdmLexerConstants.INT32_CONS: {
+ case AdmLexer.TOKEN_INT32_CONS: {
parseConstructor(ATypeTag.INT32, objectType, out);
break;
}
- case AdmLexerConstants.INT64_LITERAL: {
+ case AdmLexer.TOKEN_INT64_LITERAL: {
if (checkType(ATypeTag.INT64, objectType, out)) {
- parseInt64(token.image, out);
+ parseInt64(admLexer.getLastTokenImage(), out);
} else
throw new AsterixException(mismatchErrorMessage + objectType.getTypeName());
break;
}
- case AdmLexerConstants.INT64_CONS: {
+ case AdmLexer.TOKEN_INT64_CONS: {
parseConstructor(ATypeTag.INT64, objectType, out);
break;
}
- case AdmLexerConstants.STRING_LITERAL: {
+ case AdmLexer.TOKEN_STRING_LITERAL: {
if (checkType(ATypeTag.STRING, objectType, out)) {
- aString.setValue(token.image.substring(1, token.image.length() - 1));
+ aString.setValue(admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1));
stringSerde.serialize(aString, out);
} else
throw new AsterixException(mismatchErrorMessage + objectType.getTypeName());
break;
}
- case AdmLexerConstants.STRING_CONS: {
+ case AdmLexer.TOKEN_STRING_CONS: {
parseConstructor(ATypeTag.STRING, objectType, out);
break;
}
- case AdmLexerConstants.DATE_CONS: {
+ case AdmLexer.TOKEN_DATE_CONS: {
parseConstructor(ATypeTag.DATE, objectType, out);
break;
}
- case AdmLexerConstants.TIME_CONS: {
+ case AdmLexer.TOKEN_TIME_CONS: {
parseConstructor(ATypeTag.TIME, objectType, out);
break;
}
- case AdmLexerConstants.DATETIME_CONS: {
+ case AdmLexer.TOKEN_DATETIME_CONS: {
parseConstructor(ATypeTag.DATETIME, objectType, out);
break;
}
- case AdmLexerConstants.DURATION_CONS: {
+ case AdmLexer.TOKEN_INTERVAL_DATE_CONS: {
+ try {
+ if (checkType(ATypeTag.INTERVAL, objectType, out)) {
+ if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
+ if (admLexer.next() == AdmLexer.TOKEN_STRING_CONS) {
+ AIntervalSerializerDeserializer.parseDate(admLexer.getLastTokenImage(), out);
+
+ if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
+ break;
+ }
+ }
+ }
+ }
+ } catch (AdmLexerException ex) {
+ throw new AsterixException(ex);
+ }
+ throw new AsterixException("Wrong interval data parsing for date interval.");
+ }
+ case AdmLexer.TOKEN_INTERVAL_TIME_CONS: {
+ try {
+ if (checkType(ATypeTag.INTERVAL, objectType, out)) {
+ if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
+ if (admLexer.next() == AdmLexer.TOKEN_STRING_CONS) {
+ AIntervalSerializerDeserializer.parseTime(admLexer.getLastTokenImage(), out);
+
+ if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
+ break;
+ }
+ }
+ }
+ }
+ } catch (AdmLexerException ex) {
+ throw new AsterixException(ex);
+ }
+ throw new AsterixException("Wrong interval data parsing for time interval.");
+ }
+ case AdmLexer.TOKEN_INTERVAL_DATETIME_CONS: {
+ try {
+ if (checkType(ATypeTag.INTERVAL, objectType, out)) {
+ if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
+ if (admLexer.next() == AdmLexer.TOKEN_STRING_CONS) {
+ AIntervalSerializerDeserializer.parseDatetime(admLexer.getLastTokenImage(), out);
+
+ if (admLexer.next() == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE) {
+ break;
+ }
+ }
+ }
+ }
+ } catch (AdmLexerException ex) {
+ throw new AsterixException(ex);
+ }
+ throw new AsterixException("Wrong interval data parsing for datetime interval.");
+ }
+ case AdmLexer.TOKEN_DURATION_CONS: {
parseConstructor(ATypeTag.DURATION, objectType, out);
break;
}
- case AdmLexerConstants.POINT_CONS: {
+ case AdmLexer.TOKEN_POINT_CONS: {
parseConstructor(ATypeTag.POINT, objectType, out);
break;
}
- case AdmLexerConstants.POINT3D_CONS: {
+ case AdmLexer.TOKEN_POINT3D_CONS: {
parseConstructor(ATypeTag.POINT3D, objectType, out);
break;
}
- case AdmLexerConstants.CIRCLE_CONS: {
+ case AdmLexer.TOKEN_CIRCLE_CONS: {
parseConstructor(ATypeTag.CIRCLE, objectType, out);
break;
}
- case AdmLexerConstants.RECTANGLE_CONS: {
+ case AdmLexer.TOKEN_RECTANGLE_CONS: {
parseConstructor(ATypeTag.RECTANGLE, objectType, out);
break;
}
- case AdmLexerConstants.LINE_CONS: {
+ case AdmLexer.TOKEN_LINE_CONS: {
parseConstructor(ATypeTag.LINE, objectType, out);
break;
}
- case AdmLexerConstants.POLYGON_CONS: {
+ case AdmLexer.TOKEN_POLYGON_CONS: {
parseConstructor(ATypeTag.POLYGON, objectType, out);
break;
}
- case AdmLexerConstants.START_UNORDERED_LIST: {
+ case AdmLexer.TOKEN_START_UNORDERED_LIST: {
if (checkType(ATypeTag.UNORDEREDLIST, objectType, out)) {
objectType = getComplexType(objectType, ATypeTag.UNORDEREDLIST);
parseUnorderedList((AUnorderedListType) objectType, out);
@@ -263,7 +321,7 @@
break;
}
- case AdmLexerConstants.START_ORDERED_LIST: {
+ case AdmLexer.TOKEN_START_ORDERED_LIST: {
if (checkType(ATypeTag.ORDEREDLIST, objectType, out)) {
objectType = getComplexType(objectType, ATypeTag.ORDEREDLIST);
parseOrderedList((AOrderedListType) objectType, out);
@@ -271,7 +329,7 @@
throw new AsterixException(mismatchErrorMessage + objectType.getTypeTag());
break;
}
- case AdmLexerConstants.START_RECORD: {
+ case AdmLexer.TOKEN_START_RECORD: {
if (checkType(ATypeTag.RECORD, objectType, out)) {
objectType = getComplexType(objectType, ATypeTag.RECORD);
parseRecord((ARecordType) objectType, out, datasetRec);
@@ -279,11 +337,11 @@
throw new AsterixException(mismatchErrorMessage + objectType.getTypeTag());
break;
}
- case AdmLexerConstants.EOF: {
+ case AdmLexer.TOKEN_EOF: {
break;
}
default: {
- throw new AsterixException("Unexpected ADM token kind: " + admLexer.tokenKindToString(token.kind) + ".");
+ throw new AsterixException("Unexpected ADM token kind: " + AdmLexer.tokenKindToString(token) + ".");
}
}
}
@@ -365,7 +423,7 @@
recBuilder.reset(null);
recBuilder.init();
- Token token = null;
+ int token;
boolean inRecord = true;
boolean expectingRecordField = false;
boolean first = true;
@@ -375,15 +433,15 @@
IAType fieldType = null;
do {
token = nextToken();
- switch (token.kind) {
- case AdmLexerConstants.END_RECORD: {
+ switch (token) {
+ case AdmLexer.TOKEN_END_RECORD: {
if (expectingRecordField) {
throw new AsterixException("Found END_RECORD while expecting a record field.");
}
inRecord = false;
break;
}
- case AdmLexerConstants.STRING_LITERAL: {
+ case AdmLexer.TOKEN_STRING_LITERAL: {
// we've read the name of the field
// now read the content
fieldNameBuffer.reset();
@@ -391,12 +449,14 @@
expectingRecordField = false;
if (recType != null) {
- String fldName = token.image.substring(1, token.image.length() - 1);
+ String fldName = admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1);
fieldId = recBuilder.getFieldId(fldName);
if (fieldId < 0 && !recType.isOpen()) {
throw new AsterixException("This record is closed, you can not add extra fields !!");
} else if (fieldId < 0 && recType.isOpen()) {
- aStringFieldName.setValue(token.image.substring(1, token.image.length() - 1));
+ aStringFieldName.setValue(admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1));
stringSerde.serialize(aStringFieldName, fieldNameBuffer.getDataOutput());
openRecordField = true;
fieldType = null;
@@ -407,16 +467,17 @@
openRecordField = false;
}
} else {
- aStringFieldName.setValue(token.image.substring(1, token.image.length() - 1));
+ aStringFieldName.setValue(admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1));
stringSerde.serialize(aStringFieldName, fieldNameBuffer.getDataOutput());
openRecordField = true;
fieldType = null;
}
token = nextToken();
- if (token.kind != AdmLexerConstants.COLON) {
- throw new AsterixException("Unexpected ADM token kind: "
- + admLexer.tokenKindToString(token.kind) + " while expecting \":\".");
+ if (token != AdmLexer.TOKEN_COLON) {
+ throw new AsterixException("Unexpected ADM token kind: " + AdmLexer.tokenKindToString(token)
+ + " while expecting \":\".");
}
token = nextToken();
@@ -436,7 +497,7 @@
break;
}
- case AdmLexerConstants.COMMA: {
+ case AdmLexer.TOKEN_COMMA: {
if (first) {
throw new AsterixException("Found COMMA before any record field.");
}
@@ -447,7 +508,7 @@
break;
}
default: {
- throw new AsterixException("Unexpected ADM token kind: " + admLexer.tokenKindToString(token.kind)
+ throw new AsterixException("Unexpected ADM token kind: " + AdmLexer.tokenKindToString(token)
+ " while parsing record fields.");
}
}
@@ -498,18 +559,18 @@
itemType = oltype.getItemType();
orderedListBuilder.reset(oltype);
- Token token = null;
+ int token;
boolean inList = true;
boolean expectingListItem = false;
boolean first = true;
do {
token = nextToken();
- if (token.kind == AdmLexerConstants.END_ORDERED_LIST) {
+ if (token == AdmLexer.TOKEN_END_ORDERED_LIST) {
if (expectingListItem) {
throw new AsterixException("Found END_COLLECTION while expecting a list item.");
}
inList = false;
- } else if (token.kind == AdmLexerConstants.COMMA) {
+ } else if (token == AdmLexer.TOKEN_COMMA) {
if (first) {
throw new AsterixException("Found COMMA before any list item.");
}
@@ -542,18 +603,18 @@
itemType = uoltype.getItemType();
unorderedListBuilder.reset(uoltype);
- Token token = null;
+ int token;
boolean inList = true;
boolean expectingListItem = false;
boolean first = true;
do {
token = nextToken();
- if (token.kind == AdmLexerConstants.END_UNORDERED_LIST) {
+ if (token == AdmLexer.TOKEN_END_UNORDERED_LIST) {
if (expectingListItem) {
throw new AsterixException("Found END_COLLECTION while expecting a list item.");
}
inList = false;
- } else if (token.kind == AdmLexerConstants.COMMA) {
+ } else if (token == AdmLexer.TOKEN_COMMA) {
if (first) {
throw new AsterixException("Found COMMA before any list item.");
}
@@ -574,11 +635,13 @@
returnTempBuffer(itemBuffer);
}
- private Token nextToken() throws AsterixException {
+ private int nextToken() throws AsterixException {
try {
return admLexer.next();
- } catch (ParseException pe) {
- throw new AsterixException(pe);
+ } catch (AdmLexerException e) {
+ throw new AsterixException(e);
+ } catch (IOException e) {
+ throw new AsterixException(e);
}
}
@@ -633,73 +696,109 @@
private void parseConstructor(ATypeTag typeTag, IAType objectType, DataOutput out) throws AsterixException {
try {
- Token token = admLexer.next();
- if (token.kind == AdmLexerConstants.CONSTRUCTOR_OPEN) {
+ int token = admLexer.next();
+ if (token == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
if (checkType(typeTag, objectType, out)) {
token = admLexer.next();
- if (token.kind == AdmLexerConstants.STRING_LITERAL) {
+ if (token == AdmLexer.TOKEN_STRING_LITERAL) {
switch (typeTag) {
case BOOLEAN:
- parseBoolean(token.image.substring(1, token.image.length() - 1), out);
+ parseBoolean(
+ admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1), out);
break;
case INT8:
- parseInt8(token.image.substring(1, token.image.length() - 1), out);
+ parseInt8(
+ admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1), out);
break;
case INT16:
- parseInt16(token.image.substring(1, token.image.length() - 1), out);
+ parseInt16(
+ admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1), out);
break;
case INT32:
- parseInt32(token.image.substring(1, token.image.length() - 1), out);
+ parseInt32(
+ admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1), out);
break;
case INT64:
- parseInt64(token.image.substring(1, token.image.length() - 1), out);
+ parseInt64(
+ admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1), out);
break;
case FLOAT:
- aFloat.setValue(Float.parseFloat(token.image.substring(1, token.image.length() - 1)));
+ aFloat.setValue(Float.parseFloat(admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1)));
floatSerde.serialize(aFloat, out);
break;
case DOUBLE:
- aDouble.setValue(Double.parseDouble(token.image.substring(1, token.image.length() - 1)));
+ aDouble.setValue(Double.parseDouble(admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1)));
doubleSerde.serialize(aDouble, out);
break;
case STRING:
- aString.setValue(token.image.substring(1, token.image.length() - 1));
+ aString.setValue(admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1));
stringSerde.serialize(aString, out);
break;
case TIME:
- parseTime(token.image.substring(1, token.image.length() - 1), out);
+ parseTime(
+ admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1), out);
break;
case DATE:
- parseDate(token.image.substring(1, token.image.length() - 1), out);
+ parseDate(
+ admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1), out);
break;
case DATETIME:
- parseDatetime(token.image.substring(1, token.image.length() - 1), out);
+ parseDatetime(
+ admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1), out);
break;
case DURATION:
- parseDuration(token.image.substring(1, token.image.length() - 1), out);
+ parseDuration(
+ admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1), out);
break;
case POINT:
- parsePoint(token.image.substring(1, token.image.length() - 1), out);
+ parsePoint(
+ admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1), out);
break;
case POINT3D:
- parsePoint3d(token.image.substring(1, token.image.length() - 1), out);
+ parsePoint3d(
+ admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1), out);
break;
case CIRCLE:
- parseCircle(token.image.substring(1, token.image.length() - 1), out);
+ parseCircle(
+ admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1), out);
break;
case RECTANGLE:
- parseRectangle(token.image.substring(1, token.image.length() - 1), out);
+ parseRectangle(
+ admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1), out);
break;
case LINE:
- parseLine(token.image.substring(1, token.image.length() - 1), out);
+ parseLine(
+ admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1), out);
break;
case POLYGON:
- parsePolygon(token.image.substring(1, token.image.length() - 1), out);
+ parsePolygon(
+ admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1), out);
break;
+ default:
+ throw new AsterixException("Missing deserializer method for constructor: "
+ + AdmLexer.tokenKindToString(token) + ".");
}
token = admLexer.next();
- if (token.kind == AdmLexerConstants.CONSTRUCTOR_CLOSE)
+ if (token == AdmLexer.TOKEN_CONSTRUCTOR_CLOSE)
return;
}
}
diff --git a/asterix-runtime/src/main/javacc/AdmLexer.jj b/asterix-runtime/src/main/javacc/AdmLexer.jj
deleted file mode 100644
index fbab62f..0000000
--- a/asterix-runtime/src/main/javacc/AdmLexer.jj
+++ /dev/null
@@ -1,150 +0,0 @@
-options {
-
-
- STATIC = false;
-
-}
-
-PARSER_BEGIN(AdmLexer)
-
-package edu.uci.ics.asterix.adm.parser;
-
-import java.io.*;
-
-public class AdmLexer {
-
- public static void main(String args[]) throws ParseException, TokenMgrError, IOException, FileNotFoundException {
- File file = new File(args[0]);
- Reader freader = new BufferedReader(new InputStreamReader
- (new FileInputStream(file), "UTF-8"));
- AdmLexer flexer = new AdmLexer(freader);
- Token t = null;
- do {
- t = flexer.next();
- System.out.println(AdmLexerConstants.tokenImage[t.kind]);
- } while (t.kind != EOF);
- freader.close();
- }
-
- public Token next() throws ParseException {
- return getNextToken();
- }
-
- public String tokenKindToString(int tokenKind) {
- return AdmLexerConstants.tokenImage[tokenKind];
- }
-}
-
-PARSER_END(AdmLexer)
-
-<DEFAULT>
-TOKEN :
-{
- <NULL_LITERAL : "null">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <TRUE_LITERAL : "true">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <FALSE_LITERAL : "false">
-}
-
-
-<DEFAULT>
-TOKEN :
-{
- <INTEGER_LITERAL : ("-")? (<DIGIT>)+ >
-}
-
-
-<DEFAULT>
-TOKEN :
-{
- <#DIGIT : ["0" - "9"]>
-}
-
-
-TOKEN:
-{
- < DOUBLE_LITERAL:
- ("-")? <INTEGER> ( "." <INTEGER> )? (<EXPONENT>)?
- | ("-")? "." <INTEGER>
- >
- | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
- | <INTEGER : (<DIGIT>)+ >
- | <FLOAT_LITERAL: <DOUBLE_LITERAL>("f"|"F")>
- }
-
-<DEFAULT>
-TOKEN :
-{
- <STRING_LITERAL : ("\"" (<EscapeQuot> | ~["\""])* "\"") >
- |
- < #EscapeQuot: "\\\"" >
-}
-
-<DEFAULT>
-TOKEN :
-{
- <START_RECORD : "{">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <END_RECORD : "}">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <COMMA : ",">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <COLON : ":">
-}
-
-
-<DEFAULT>
-TOKEN :
-{
- <START_ORDERED_LIST : "[">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <END_ORDERED_LIST : "]">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <START_UNORDERED_LIST : "{{">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <END_UNORDERED_LIST : "}}">
-}
-
-
-
-
-SKIP:
-{
- " "
-| "\t"
-| "\r"
-| "\n"
-}
diff --git a/asterix-runtime/src/main/javacc/nontagged/AdmLexer.jj b/asterix-runtime/src/main/javacc/nontagged/AdmLexer.jj
deleted file mode 100644
index d94033d..0000000
--- a/asterix-runtime/src/main/javacc/nontagged/AdmLexer.jj
+++ /dev/null
@@ -1,385 +0,0 @@
-options {
-
-
- STATIC = false;
-
-}
-
-PARSER_BEGIN(AdmLexer)
-
-package edu.uci.ics.asterix.adm.parser.nontagged;
-
-import java.io.*;
-
-public class AdmLexer {
-
- public static void main(String args[]) throws ParseException, TokenMgrError, IOException, FileNotFoundException {
- File file = new File(args[0]);
- Reader freader = new BufferedReader(new InputStreamReader
- (new FileInputStream(file), "UTF-8"));
- AdmLexer flexer = new AdmLexer(freader);
- Token t = null;
- do {
- t = flexer.next();
- System.out.println(AdmLexerConstants.tokenImage[t.kind]);
- } while (t.kind != EOF);
- freader.close();
- }
-
- public Token next() throws ParseException {
- return getNextToken();
- }
-
- public String tokenKindToString(int tokenKind) {
- return AdmLexerConstants.tokenImage[tokenKind];
- }
-}
-
-PARSER_END(AdmLexer)
-
-<DEFAULT>
-TOKEN :
-{
- <NULL_LITERAL : "null">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <TRUE_LITERAL : "true">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <FALSE_LITERAL : "false">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <BOOLEAN_CONS : ("boolean") >
-}
-
-<DEFAULT>
-TOKEN :
-{
- <CONSTRUCTOR_OPEN : ("(")>
-}
-
-
-<DEFAULT>
-TOKEN :
-{
- <CONSTRUCTOR_CLOSE : (")")>
-}
-
-<DEFAULT>
-TOKEN:
-{
- <INT8_LITERAL : ("-" | "+")? (<DIGIT>)+ ("i8")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <INT8_CONS : ("int8") >
-}
-
-<DEFAULT>
-TOKEN:
-{
- <INT16_LITERAL : ("-" | "+")? (<DIGIT>)+ ("i16")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <INT16_CONS : ("int16") >
-}
-
-<DEFAULT>
-TOKEN :
-{
- <INT32_LITERAL : ("-" | "+")? (<DIGIT>)+ ("i32")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <INT32_CONS : ("int32")>
-}
-
-<DEFAULT>
-TOKEN:
-{
- <INT64_LITERAL : ("-" | "+")? (<DIGIT>)+ ("i64")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <INT64_CONS : ("int64") >
-}
-
-<DEFAULT>
-TOKEN:
-{
- <INT_LITERAL : ("-" | "+")? (<DIGIT>)+>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <CIRCLE_LITERAL : "P"<DOUBLE_LITERAL>(",") <DOUBLE_LITERAL> ("R") <DOUBLE_LITERAL> >
-}
-
-<DEFAULT>
-TOKEN :
-{
- <CIRCLE_CONS : ("circle") >
-}
-
-
-<DEFAULT>
-TOKEN :
-{
- <TIMEZONE_LITERAL : (("+"|"-")<DIGIT><DIGIT>(":")<DIGIT><DIGIT>) | (("+"|"-")<DIGIT><DIGIT><DIGIT><DIGIT>) | ("Z") >
-}
-
-<DEFAULT>
-TOKEN :
-{
- <DATE_LITERAL : (("-")?<DIGIT><DIGIT><DIGIT><DIGIT>("-")<DIGIT><DIGIT>("-")<DIGIT><DIGIT>) | (("-")?<DIGIT><DIGIT><DIGIT><DIGIT><DIGIT><DIGIT><DIGIT><DIGIT>) >
-}
-
-<DEFAULT>
-TOKEN :
-{
- <DATE_CONS : ("date")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <TIME_LITERAL : (<DIGIT><DIGIT>(":")<DIGIT><DIGIT>(":")<DIGIT><DIGIT> ( (".")<DIGIT>(<DIGIT>((<DIGIT>)?))?)? ((("+"|"-")<DIGIT><DIGIT>(":")<DIGIT><DIGIT>) | ("Z"))?) | (<DIGIT><DIGIT><DIGIT><DIGIT><DIGIT><DIGIT> (<DIGIT>(<DIGIT>((<DIGIT>)?))?)? ((("+"|"-")<DIGIT><DIGIT><DIGIT><DIGIT>) | ("Z"))?) >
-}
-
-<DEFAULT>
-TOKEN :
-{
- <TIME_CONS : ("time")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <DATETIME_LITERAL : (("-")?<DIGIT><DIGIT><DIGIT><DIGIT>("-")<DIGIT><DIGIT>("-")<DIGIT><DIGIT>("T")<DIGIT><DIGIT>(":")<DIGIT><DIGIT>(":")<DIGIT><DIGIT> ( (".")<DIGIT>(<DIGIT>((<DIGIT>)?))?)? ((("+"|"-")<DIGIT><DIGIT>(":")<DIGIT><DIGIT>) | ("Z"))?) | (("-")?<DIGIT><DIGIT><DIGIT><DIGIT><DIGIT><DIGIT><DIGIT><DIGIT>("T")<DIGIT><DIGIT><DIGIT><DIGIT><DIGIT><DIGIT> (<DIGIT>(<DIGIT>((<DIGIT>)?))?)? ((("+"|"-")<DIGIT><DIGIT><DIGIT><DIGIT>) | ("Z"))?)>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <DATETIME_CONS : ("datetime")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <DURATION_LITERAL : ("-")? ("P")(<INTEGER>("Y"))?(<INTEGER>("M"))?(<INTEGER>("D"))?(("T")(((<INTEGER>("H"))(<INTEGER>("M"))?(<INTEGER>((".")<DIGIT>(<DIGIT>(<DIGIT>)?)?)?("S"))?) | ((<INTEGER>("M"))(<INTEGER>((".")<DIGIT>(<DIGIT>(<DIGIT>)?)?)?("S"))?) | ((<INTEGER>((".")<DIGIT>(<DIGIT>(<DIGIT>)?)?)?("S")))))?>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <DURATION_CONS : ("duration")>
-}
-
-<DEFAULT>
-TOKEN :
-{
<INTERVAL_CONS : ("interval")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <TIME_INTERVAL_CONS : ("tinterval")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <DATE_INTERVAL_CONS : ("dinterval")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <DATETIME_INTERVAL_CONS : ("dtinterval")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <#DIGIT : ["0" - "9"]>
-}
-
-TOKEN:
-{
- < DOUBLE_LITERAL:
- ("-" | "+")? <INTEGER> ( "." <INTEGER> )? (<EXPONENT>)?
- | ("-" | "+")? "." <INTEGER>
- >
- | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
- | <INTEGER : (<DIGIT>)+ >
- | <FLOAT_LITERAL: <DOUBLE_LITERAL>("f"|"F")>
- }
-
-
-<DEFAULT>
-TOKEN :
-{
- <FLOAT_CONS : ("float")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <DOUBLE_CONS : ("double")>
-}
-
-
-<DEFAULT>
-TOKEN :
-{
- <STRING_LITERAL : ("\"" (<EscapeQuot> | ~["\""])* "\"") >
- |
- < #EscapeQuot: "\\\"" >
-}
-
-<DEFAULT>
-TOKEN :
-{
- <STRING_CONS : ("string")>
-}
-
-
-<DEFAULT>
-TOKEN :
-{
- <POINT_LITERAL : "P"<DOUBLE_LITERAL>(",")<DOUBLE_LITERAL>>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <POINT_CONS : ("point")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <POINT3D_LITERAL : "P" <DOUBLE_LITERAL>(",") <DOUBLE_LITERAL> (",") <DOUBLE_LITERAL>>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <POINT3D_CONS : ("point3d")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <LINE_LITERAL : "P"<DOUBLE_LITERAL>(",") <DOUBLE_LITERAL> ("P") <DOUBLE_LITERAL> (",") <DOUBLE_LITERAL>>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <LINE_CONS : ("line")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <POLYGON_LITERAL : "P"<DOUBLE_LITERAL>(",") <DOUBLE_LITERAL> ("P") <DOUBLE_LITERAL> (",") <DOUBLE_LITERAL> (("P") <DOUBLE_LITERAL> (",") <DOUBLE_LITERAL>)+>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <POLYGON_CONS : ("polygon")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <RECTANGLE_CONS : ("rectangle")>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <RECTANGLE_LITERAL : "P"<DOUBLE_LITERAL>(",") <DOUBLE_LITERAL> ("P") <DOUBLE_LITERAL> (",") <DOUBLE_LITERAL>>
-}
-
-<DEFAULT>
-TOKEN :
-{
- <START_RECORD : "{">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <END_RECORD : "}">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <COMMA : ",">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <COLON : ":">
-}
-
-
-<DEFAULT>
-TOKEN :
-{
- <START_ORDERED_LIST : "[">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <END_ORDERED_LIST : "]">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <START_UNORDERED_LIST : "{{">
-}
-
-<DEFAULT>
-TOKEN :
-{
- <END_UNORDERED_LIST : "}}">
-}
-
-
-
-
-SKIP:
-{
- " "
-| "\t"
-| "\r"
-| "\n"
-}
diff --git a/asterix-runtime/src/main/resources/adm.grammar b/asterix-runtime/src/main/resources/adm.grammar
new file mode 100644
index 0000000..56c7212
--- /dev/null
+++ b/asterix-runtime/src/main/resources/adm.grammar
@@ -0,0 +1,63 @@
+# LEXER GENERATOR configuration file
+# ---------------------------------------
+# Place *first* the generic configuration
+# then list your grammar.
+
+PACKAGE: edu.uci.ics.asterix.runtime.operators.file.adm
+LEXER_NAME: AdmLexer
+
+TOKENS:
+
+BOOLEAN_CONS = string(boolean)
+INT8_CONS = string(int8)
+INT16_CONS = string(int16)
+INT32_CONS = string(int32)
+INT64_CONS = string(int64)
+FLOAT_CONS = string(float)
+DOUBLE_CONS = string(double)
+DATE_CONS = string(date)
+DATETIME_CONS = string(datetime)
+DURATION_CONS = string(duration)
+STRING_CONS = string(string)
+POINT_CONS = string(point)
+POINT3D_CONS = string(point3d)
+LINE_CONS = string(line)
+POLYGON_CONS = string(polygon)
+RECTANGLE_CONS = string(rectangle)
+CIRCLE_CONS = string(circle)
+TIME_CONS = string(time)
+INTERVAL_TIME_CONS = string(interval_time)
+INTERVAL_DATE_CONS = string(interval_date)
+INTERVAL_DATETIME_CONS = string(interval_datetime)
+
+NULL_LITERAL = string(null)
+TRUE_LITERAL = string(true)
+FALSE_LITERAL = string(false)
+
+CONSTRUCTOR_OPEN = char(()
+CONSTRUCTOR_CLOSE = char())
+START_RECORD = char({)
+END_RECORD = char(})
+COMMA = char(\,)
+COLON = char(:)
+START_ORDERED_LIST = char([)
+END_ORDERED_LIST = char(])
+START_UNORDERED_LIST = string({{)
+END_UNORDERED_LIST = string(}})
+
+STRING_LITERAL = char("), anythingUntil(")
+
+INT_LITERAL = signOrNothing(), digitSequence()
+INT8_LITERAL = token(INT_LITERAL), string(i8)
+INT16_LITERAL = token(INT_LITERAL), string(i16)
+INT32_LITERAL = token(INT_LITERAL), string(i32)
+INT64_LITERAL = token(INT_LITERAL), string(i64)
+
+@EXPONENT = caseInsensitiveChar(e), signOrNothing(), digitSequence()
+
+DOUBLE_LITERAL = signOrNothing(), char(.), digitSequence()
+DOUBLE_LITERAL = signOrNothing(), digitSequence(), char(.), digitSequence()
+DOUBLE_LITERAL = signOrNothing(), digitSequence(), char(.), digitSequence(), token(@EXPONENT)
+DOUBLE_LITERAL = signOrNothing(), digitSequence(), token(@EXPONENT)
+
+FLOAT_LITERAL = token(DOUBLE_LITERAL), caseInsensitiveChar(f)