[ASTERIXDB-2357] ADMParser Improvements
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- The current ADMParser heavily relies on string operations,
which results in a lot of objects being created. This patch optimize
this by directly operating on char[].
- Improved exception handling in ADMParser.
Change-Id: I106b58e79746b0a6f3d8b79473202653341a7009
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2573
Reviewed-by: abdullah alamoudi <bamousaa@gmail.com>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
index 92aac98..ff3b253 100644
--- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
+++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
@@ -209,7 +209,7 @@
3071 = Found END_RECORD while expecting a list item
3072 = Cannot cast the %1$s type to the %2$s type
3073 = Missing deserializer method for constructor: %1$s
-3074 = This can not be an instance of %1$s
+3074 = %1$s cannot be an instance of %2$s
3075 = Closed field %1$s has null value
3076 = %1$s: no files found
3077 = %1$s: path not found
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java
index 216cadb..f1eba4c 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ADMDataParser.java
@@ -22,7 +22,6 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
-import java.io.Serializable;
import java.util.BitSet;
import java.util.List;
@@ -39,7 +38,6 @@
import org.apache.asterix.external.api.IRecordDataParser;
import org.apache.asterix.external.api.IStreamDataParser;
import org.apache.asterix.om.base.ABoolean;
-import org.apache.asterix.om.base.AGeometry;
import org.apache.asterix.om.base.ANull;
import org.apache.asterix.om.base.temporal.GregorianCalendarSystem;
import org.apache.asterix.om.types.AOrderedListType;
@@ -47,13 +45,13 @@
import org.apache.asterix.om.types.ATypeTag;
import org.apache.asterix.om.types.AUnionType;
import org.apache.asterix.om.types.AUnorderedListType;
-import org.apache.asterix.om.types.BuiltinType;
import org.apache.asterix.om.types.IAType;
import org.apache.asterix.om.types.hierachy.ATypeHierarchy;
import org.apache.asterix.om.types.hierachy.ITypeConvertComputer;
import org.apache.asterix.om.util.container.IObjectPool;
import org.apache.asterix.om.util.container.ListObjectPool;
import org.apache.asterix.runtime.operators.file.adm.AdmLexer;
+import org.apache.asterix.runtime.operators.file.adm.AdmLexer.TokenImage;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IMutableValueStorage;
import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
@@ -74,64 +72,11 @@
private final IObjectPool<IMutableValueStorage, ATypeTag> abvsBuilderPool =
new ListObjectPool<IMutableValueStorage, ATypeTag>(new AbvsBuilderFactory());
+ private final TokenImage tmpTokenImage = new TokenImage();
+
private final String mismatchErrorMessage = "Mismatch Type, expecting a value of type ";
private final String mismatchErrorMessage2 = " got a value of type ";
- static class ParseException extends HyracksDataException {
- private static final long serialVersionUID = 1L;
- private String filename;
- private int line = -1;
- private int column = -1;
-
- public ParseException(String message) {
- super(message);
- }
-
- public ParseException(int errorCode, Serializable... param) {
- super(ErrorCode.ASTERIX, errorCode, ErrorCode.getErrorMessage(errorCode), param);
- }
-
- public ParseException(int errorCode, Throwable e, Serializable... param) {
- super(ErrorCode.ASTERIX, errorCode, e, ErrorCode.getErrorMessage(errorCode), param);
- addSuppressed(e);
- }
-
- public ParseException(Throwable cause) {
- super(cause);
- }
-
- public ParseException(String message, Throwable cause) {
- super(message, cause);
- }
-
- public ParseException(Throwable cause, String filename, int line, int column) {
- super(cause);
- setLocation(filename, line, column);
- }
-
- public void setLocation(String filename, int line, int column) {
- this.filename = filename;
- this.line = line;
- this.column = column;
- }
-
- @Override
- public String getMessage() {
- StringBuilder msg = new StringBuilder("Parse error");
- if (filename != null) {
- msg.append(" in file " + filename);
- }
- if (line >= 0) {
- if (column >= 0) {
- msg.append(" at (" + line + ", " + column + ")");
- } else {
- msg.append(" in line " + line);
- }
- }
- return msg.append(": " + super.getMessage()).toString();
- }
- }
-
public ADMDataParser(ARecordType recordType, boolean isStream) {
this(null, recordType, isStream);
}
@@ -256,15 +201,18 @@
break;
case AdmLexer.TOKEN_STRING_LITERAL:
if (checkType(ATypeTag.STRING, objectType)) {
- String tokenImage =
- admLexer.getLastTokenImage().substring(1, admLexer.getLastTokenImage().length() - 1);
- aString.setValue(admLexer.containsEscapes() ? replaceEscapes(tokenImage) : tokenImage);
- stringSerde.serialize(aString, out);
+ admLexer.getLastTokenImage(tmpTokenImage);
+ if (admLexer.containsEscapes()) {
+ replaceEscapes(tmpTokenImage);
+ }
+ int begin = tmpTokenImage.getBegin() + 1;
+ int len = tmpTokenImage.getLength() - 2;
+ parseString(tmpTokenImage.getBuffer(), begin, len, out);
} else if (checkType(ATypeTag.UUID, objectType)) {
// Dealing with UUID type that is represented by a string
- String tokenImage =
- admLexer.getLastTokenImage().substring(1, admLexer.getLastTokenImage().length() - 1);
- aUUID.parseUUIDString(tokenImage);
+ admLexer.getLastTokenImage(tmpTokenImage);
+ aUUID.parseUUIDString(tmpTokenImage.getBuffer(), tmpTokenImage.getBegin() + 1,
+ tmpTokenImage.getLength() - 2);
uuidSerde.serialize(aUUID, out);
} else if (checkType(ATypeTag.GEOMETRY, objectType)) {
// Parse the string as a WKT-encoded geometry
@@ -373,13 +321,14 @@
}
- private String replaceEscapes(String tokenImage) throws ParseException {
- char[] chars = tokenImage.toCharArray();
- int len = chars.length;
- int readpos = 0;
- int writepos = 0;
- int movemarker = 0;
- while (readpos < len) {
+ // TODO: This function should be optimized. Currently it has complexity of O(N*N)!
+ private void replaceEscapes(TokenImage tokenImage) throws ParseException {
+ char[] chars = tokenImage.getBuffer();
+ int end = tokenImage.getBegin() + tokenImage.getLength();
+ int readpos = tokenImage.getBegin();
+ int writepos = tokenImage.getBegin();
+ int movemarker = tokenImage.getBegin();
+ while (readpos < end) {
if (chars[readpos] == '\\') {
moveChars(chars, movemarker, readpos, readpos - writepos);
switch (chars[readpos + 1]) {
@@ -416,8 +365,8 @@
++writepos;
++readpos;
}
- moveChars(chars, movemarker, len, readpos - writepos);
- return new String(chars, 0, len - (readpos - writepos));
+ moveChars(chars, movemarker, end, readpos - writepos);
+ tokenImage.reset(chars, tokenImage.getBegin(), tokenImage.getLength() - (readpos - writepos));
}
private static void moveChars(char[] chars, int start, int end, int offset) {
@@ -517,16 +466,16 @@
expectingRecordField = false;
if (recType != null) {
- String fldName =
- admLexer.getLastTokenImage().substring(1, admLexer.getLastTokenImage().length() - 1);
+ admLexer.getLastTokenImage(tmpTokenImage);
+ String fldName = new String(tmpTokenImage.getBuffer(), tmpTokenImage.getBegin() + 1,
+ tmpTokenImage.getLength() - 2);
fieldId = recBuilder.getFieldId(fldName);
if ((fieldId < 0) && !recType.isOpen()) {
throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_EXTRA_FIELD_IN_CLOSED_RECORD,
fldName);
} else if ((fieldId < 0) && recType.isOpen()) {
- aStringFieldName.setValue(admLexer.getLastTokenImage().substring(1,
- admLexer.getLastTokenImage().length() - 1));
- stringSerde.serialize(aStringFieldName, fieldNameBuffer.getDataOutput());
+ parseString(tmpTokenImage.getBuffer(), tmpTokenImage.getBegin() + 1,
+ tmpTokenImage.getLength() - 2, fieldNameBuffer.getDataOutput());
openRecordField = true;
fieldType = null;
} else {
@@ -536,9 +485,9 @@
openRecordField = false;
}
} else {
- aStringFieldName.setValue(
- admLexer.getLastTokenImage().substring(1, admLexer.getLastTokenImage().length() - 1));
- stringSerde.serialize(aStringFieldName, fieldNameBuffer.getDataOutput());
+ admLexer.getLastTokenImage(tmpTokenImage);
+ parseString(tmpTokenImage.getBuffer(), tmpTokenImage.getBegin() + 1,
+ tmpTokenImage.getLength() - 2, fieldNameBuffer.getDataOutput());
openRecordField = true;
fieldType = null;
}
@@ -816,7 +765,13 @@
private void parseToNumericTarget(ATypeTag typeTag, IAType objectType, DataOutput out) throws IOException {
ATypeTag targetTypeTag = getTargetTypeTag(typeTag, objectType);
- if ((targetTypeTag == null) || !parseValue(admLexer.getLastTokenImage(), targetTypeTag, out)) {
+ boolean parsed = false;
+ if (targetTypeTag != null) {
+ admLexer.getLastTokenImage(tmpTokenImage);
+ parsed = parseValue(tmpTokenImage.getBuffer(), tmpTokenImage.getBegin(), tmpTokenImage.getLength(),
+ targetTypeTag, out);
+ }
+ if (!parsed) {
throw new ParseException(mismatchErrorMessage + objectType.getTypeName() + mismatchErrorMessage2 + typeTag);
}
}
@@ -828,8 +783,13 @@
castBuffer.reset();
dataOutput = castBuffer.getDataOutput();
}
-
- if ((targetTypeTag == null) || !parseValue(admLexer.getLastTokenImage(), typeTag, dataOutput)) {
+ boolean parsed = false;
+ if (targetTypeTag != null) {
+ admLexer.getLastTokenImage(tmpTokenImage);
+ parsed = parseValue(tmpTokenImage.getBuffer(), tmpTokenImage.getBegin(), tmpTokenImage.getLength(), typeTag,
+ dataOutput);
+ }
+ if (!parsed) {
throw new ParseException(mismatchErrorMessage + objectType.getTypeName() + mismatchErrorMessage2 + typeTag);
}
@@ -871,9 +831,11 @@
if (token == AdmLexer.TOKEN_CONSTRUCTOR_OPEN) {
token = admLexer.next();
if (token == AdmLexer.TOKEN_STRING_LITERAL) {
- String unquoted =
- admLexer.getLastTokenImage().substring(1, admLexer.getLastTokenImage().length() - 1);
- if (!parseValue(unquoted, typeTag, dataOutput)) {
+ admLexer.getLastTokenImage(tmpTokenImage);
+ int begin = tmpTokenImage.getBegin() + 1;
+ int len = tmpTokenImage.getLength() - 2;
+ // unquoted value
+ if (!parseValue(tmpTokenImage.getBuffer(), begin, len, typeTag, dataOutput)) {
throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_CONSTRUCTOR_MISSING_DESERIALIZER,
AdmLexer.tokenKindToString(token));
}
@@ -899,85 +861,86 @@
objectType.getTypeName() + " got " + typeTag);
}
- private boolean parseValue(String unquoted, ATypeTag typeTag, DataOutput out) throws HyracksDataException {
+ private boolean parseValue(char[] buffer, int begin, int len, ATypeTag typeTag, DataOutput out)
+ throws HyracksDataException {
switch (typeTag) {
case BOOLEAN:
- parseBoolean(unquoted, out);
+ parseBoolean(buffer, begin, len, out);
return true;
case TINYINT:
- parseInt8(unquoted, out);
+ parseInt8(buffer, begin, len, out);
return true;
case SMALLINT:
- parseInt16(unquoted, out);
+ parseInt16(buffer, begin, len, out);
return true;
case INTEGER:
- parseInt32(unquoted, out);
+ parseInt32(buffer, begin, len, out);
return true;
case BIGINT:
- parseInt64(unquoted, out);
+ parseInt64(buffer, begin, len, out);
return true;
case FLOAT:
- if ("INF".equals(unquoted)) {
+ if (matches("INF", buffer, begin, len)) {
aFloat.setValue(Float.POSITIVE_INFINITY);
- } else if ("-INF".equals(unquoted)) {
+ } else if (matches("-INF", buffer, begin, len)) {
aFloat.setValue(Float.NEGATIVE_INFINITY);
} else {
- aFloat.setValue(Float.parseFloat(unquoted));
+ aFloat.setValue(parseFloat(buffer, begin, len));
}
floatSerde.serialize(aFloat, out);
return true;
case DOUBLE:
- if ("INF".equals(unquoted)) {
+ if (matches("INF", buffer, begin, len)) {
aDouble.setValue(Double.POSITIVE_INFINITY);
- } else if ("-INF".equals(unquoted)) {
+ } else if (matches("-INF", buffer, begin, len)) {
aDouble.setValue(Double.NEGATIVE_INFINITY);
} else {
- aDouble.setValue(Double.parseDouble(unquoted));
+ aDouble.setValue(parseDouble(buffer, begin, len));
}
doubleSerde.serialize(aDouble, out);
return true;
case STRING:
- aString.setValue(unquoted);
- stringSerde.serialize(aString, out);
+ parseString(buffer, begin, len, out);
return true;
case TIME:
- parseTime(unquoted, out);
+ parseTime(buffer, begin, len, out);
return true;
case DATE:
- parseDate(unquoted, out);
+ parseDate(buffer, begin, len, out);
return true;
case DATETIME:
- parseDateTime(unquoted, out);
+ parseDateTime(buffer, begin, len, out);
return true;
case DURATION:
- parseDuration(unquoted, out);
+ parseDuration(buffer, begin, len, out);
return true;
case DAYTIMEDURATION:
- parseDateTimeDuration(unquoted, out);
+ parseDateTimeDuration(buffer, begin, len, out);
return true;
case YEARMONTHDURATION:
- parseYearMonthDuration(unquoted, out);
+ parseYearMonthDuration(buffer, begin, len, out);
return true;
case POINT:
- parsePoint(unquoted, out);
+ parsePoint(buffer, begin, len, out);
return true;
case POINT3D:
- parse3DPoint(unquoted, out);
+ parse3DPoint(buffer, begin, len, out);
return true;
case CIRCLE:
- parseCircle(unquoted, out);
+ parseCircle(buffer, begin, len, out);
return true;
case RECTANGLE:
- parseRectangle(unquoted, out);
+ parseRectangle(buffer, begin, len, out);
return true;
case LINE:
- parseLine(unquoted, out);
+ parseLine(buffer, begin, len, out);
return true;
case POLYGON:
- APolygonSerializerDeserializer.parse(unquoted, out);
+ //TODO: optimize
+ APolygonSerializerDeserializer.parse(new String(buffer, begin, len), out);
return true;
case UUID:
- aUUID.parseUUIDString(unquoted);
+ aUUID.parseUUIDString(buffer, begin, len);
uuidSerde.serialize(aUUID, out);
return true;
default:
@@ -985,39 +948,53 @@
}
}
- private void parseBoolean(String bool, DataOutput out) throws HyracksDataException {
- if (bool.equals("true")) {
+ private boolean matches(String value, char[] buffer, int begin, int len) {
+ if (len != value.length()) {
+ return false;
+ }
+ for (int i = 0; i < len; i++) {
+ if (value.charAt(i) != buffer[i + begin]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private void parseBoolean(char[] buffer, int begin, int len, DataOutput out) throws HyracksDataException {
+ if (matches("true", buffer, begin, len)) {
booleanSerde.serialize(ABoolean.TRUE, out);
- } else if (bool.equals("false")) {
+ } else if (matches("false", buffer, begin, len)) {
booleanSerde.serialize(ABoolean.FALSE, out);
} else {
- throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, "boolean");
+ throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, new String(buffer, begin, len),
+ "boolean");
}
}
- private void parseInt8(String int8, DataOutput out) throws HyracksDataException {
+ private void parseInt8(char[] buffer, int begin, int len, DataOutput out) throws HyracksDataException {
boolean positive = true;
byte value = 0;
- int offset = 0;
+ int offset = begin;
- if (int8.charAt(offset) == '+') {
+ if (buffer[offset] == '+') {
offset++;
- } else if (int8.charAt(offset) == '-') {
+ } else if (buffer[offset] == '-') {
offset++;
positive = false;
}
- for (; offset < int8.length(); offset++) {
- if ((int8.charAt(offset) >= '0') && (int8.charAt(offset) <= '9')) {
- value = (byte) (((value * 10) + int8.charAt(offset)) - '0');
- } else if ((int8.charAt(offset) == 'i') && (int8.charAt(offset + 1) == '8')
- && ((offset + 2) == int8.length())) {
+ for (; offset < begin + len; offset++) {
+ if ((buffer[offset] >= '0') && (buffer[offset] <= '9')) {
+ value = (byte) (((value * 10) + buffer[offset]) - '0');
+ } else if (buffer[offset] == 'i' && buffer[offset + 1] == '8' && offset + 2 == begin + len) {
break;
} else {
- throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, "int8");
+ throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE,
+ new String(buffer, begin, len), "int8");
}
}
if (value < 0) {
- throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, "int8");
+ throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, new String(buffer, begin, len),
+ "int8");
}
if ((value > 0) && !positive) {
value *= -1;
@@ -1026,29 +1003,31 @@
int8Serde.serialize(aInt8, out);
}
- private void parseInt16(String int16, DataOutput out) throws HyracksDataException {
+ private void parseInt16(char[] buffer, int begin, int len, DataOutput out) throws HyracksDataException {
boolean positive = true;
short value = 0;
- int offset = 0;
+ int offset = begin;
- if (int16.charAt(offset) == '+') {
+ if (buffer[offset] == '+') {
offset++;
- } else if (int16.charAt(offset) == '-') {
+ } else if (buffer[offset] == '-') {
offset++;
positive = false;
}
- for (; offset < int16.length(); offset++) {
- if ((int16.charAt(offset) >= '0') && (int16.charAt(offset) <= '9')) {
- value = (short) (((value * 10) + int16.charAt(offset)) - '0');
- } else if ((int16.charAt(offset) == 'i') && (int16.charAt(offset + 1) == '1')
- && (int16.charAt(offset + 2) == '6') && ((offset + 3) == int16.length())) {
+ for (; offset < begin + len; offset++) {
+ if (buffer[offset] >= '0' && buffer[offset] <= '9') {
+ value = (short) ((value * 10) + buffer[offset] - '0');
+ } else if (buffer[offset] == 'i' && buffer[offset + 1] == '1' && buffer[offset + 2] == '6'
+ && offset + 3 == begin + len) {
break;
} else {
- throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, "int16");
+ throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE,
+ new String(buffer, begin, len), "int16");
}
}
if (value < 0) {
- throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, "int16");
+ throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, new String(buffer, begin, len),
+ "int16");
}
if ((value > 0) && !positive) {
value *= -1;
@@ -1057,29 +1036,31 @@
int16Serde.serialize(aInt16, out);
}
- private void parseInt32(String int32, DataOutput out) throws HyracksDataException {
+ private void parseInt32(char[] buffer, int begin, int len, DataOutput out) throws HyracksDataException {
boolean positive = true;
int value = 0;
- int offset = 0;
+ int offset = begin;
- if (int32.charAt(offset) == '+') {
+ if (buffer[offset] == '+') {
offset++;
- } else if (int32.charAt(offset) == '-') {
+ } else if (buffer[offset] == '-') {
offset++;
positive = false;
}
- for (; offset < int32.length(); offset++) {
- if ((int32.charAt(offset) >= '0') && (int32.charAt(offset) <= '9')) {
- value = (((value * 10) + int32.charAt(offset)) - '0');
- } else if ((int32.charAt(offset) == 'i') && (int32.charAt(offset + 1) == '3')
- && (int32.charAt(offset + 2) == '2') && ((offset + 3) == int32.length())) {
+ for (; offset < begin + len; offset++) {
+ if (buffer[offset] >= '0' && buffer[offset] <= '9') {
+ value = (value * 10) + buffer[offset] - '0';
+ } else if (buffer[offset] == 'i' && buffer[offset + 1] == '3' && buffer[offset + 2] == '2'
+ && offset + 3 == begin + len) {
break;
} else {
- throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, "int32");
+ throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE,
+ new String(buffer, begin, len), "int32");
}
}
if (value < 0) {
- throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, "int32");
+ throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, new String(buffer, begin, len),
+ "int32");
}
if ((value > 0) && !positive) {
value *= -1;
@@ -1089,29 +1070,31 @@
int32Serde.serialize(aInt32, out);
}
- private void parseInt64(String int64, DataOutput out) throws HyracksDataException {
+ private void parseInt64(char[] buffer, int begin, int len, DataOutput out) throws HyracksDataException {
boolean positive = true;
long value = 0;
- int offset = 0;
+ int offset = begin;
- if (int64.charAt(offset) == '+') {
+ if (buffer[offset] == '+') {
offset++;
- } else if (int64.charAt(offset) == '-') {
+ } else if (buffer[offset] == '-') {
offset++;
positive = false;
}
- for (; offset < int64.length(); offset++) {
- if ((int64.charAt(offset) >= '0') && (int64.charAt(offset) <= '9')) {
- value = (((value * 10) + int64.charAt(offset)) - '0');
- } else if ((int64.charAt(offset) == 'i') && (int64.charAt(offset + 1) == '6')
- && (int64.charAt(offset + 2) == '4') && ((offset + 3) == int64.length())) {
+ for (; offset < begin + len; offset++) {
+ if (buffer[offset] >= '0' && buffer[offset] <= '9') {
+ value = (value * 10) + buffer[offset] - '0';
+ } else if (buffer[offset] == 'i' && buffer[offset + 1] == '6' && buffer[offset + 2] == '4'
+ && offset + 3 == begin + len) {
break;
} else {
- throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, "int64");
+ throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE,
+ new String(buffer, begin, len), "int64");
}
}
if (value < 0) {
- throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, "int64");
+ throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, new String(buffer, begin, len),
+ "int64");
}
if ((value > 0) && !positive) {
value *= -1;
@@ -1136,4 +1119,4 @@
admLexer.reInit(new InputStreamReader(in));
return true;
}
-}
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java
index 88fcc8d..859ac22 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/AbstractDataParser.java
@@ -19,7 +19,10 @@
package org.apache.asterix.external.parser;
import java.io.DataOutput;
+import java.io.IOException;
+import org.apache.asterix.common.exceptions.ErrorCode;
+import org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
import org.apache.asterix.external.api.IDataParser;
import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
import org.apache.asterix.om.base.ABinary;
@@ -73,11 +76,14 @@
import org.apache.asterix.om.base.temporal.ADurationParserFactory.ADurationParseOption;
import org.apache.asterix.om.base.temporal.ATimeParserFactory;
import org.apache.asterix.om.base.temporal.GregorianCalendarSystem;
+import org.apache.asterix.om.types.ATypeTag;
import org.apache.asterix.om.types.BuiltinType;
import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.util.bytes.Base64Parser;
import org.apache.hyracks.util.bytes.HexParser;
+import org.apache.hyracks.util.string.UTF8StringReader;
+import org.apache.hyracks.util.string.UTF8StringWriter;
/**
* Base class for data parsers. Includes the common set of definitions for
@@ -143,6 +149,9 @@
protected ISerializerDeserializer<ANull> nullSerde =
SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ANULL);
+ protected final AStringSerializerDeserializer untaggedStringSerde =
+ new AStringSerializerDeserializer(new UTF8StringWriter(), new UTF8StringReader());
+
protected final HexParser hexParser = new HexParser();
protected final Base64Parser base64Parser = new Base64Parser();
@@ -201,14 +210,14 @@
this.filename = filename;
}
- protected void parseTime(String time, DataOutput out) throws HyracksDataException {
- int chrononTimeInMs = ATimeParserFactory.parseTimePart(time, 0, time.length());
+ protected void parseTime(char[] buffer, int begin, int len, DataOutput out) throws HyracksDataException {
+ int chrononTimeInMs = ATimeParserFactory.parseTimePart(buffer, begin, len);
aTime.setValue(chrononTimeInMs);
timeSerde.serialize(aTime, out);
}
- protected void parseDate(String date, DataOutput out) throws HyracksDataException {
- long chrononTimeInMs = ADateParserFactory.parseDatePart(date, 0, date.length());
+ protected void parseDate(char[] buffer, int begin, int len, DataOutput out) throws HyracksDataException {
+ long chrononTimeInMs = ADateParserFactory.parseDatePart(buffer, begin, len);
short temp = 0;
if (chrononTimeInMs < 0 && chrononTimeInMs % GregorianCalendarSystem.CHRONON_OF_DAY != 0) {
temp = 1;
@@ -217,84 +226,93 @@
dateSerde.serialize(aDate, out);
}
- protected void parseDateTime(String datetime, DataOutput out) throws HyracksDataException {
+ protected void parseDateTime(char[] buffer, int begin, int len, DataOutput out) throws HyracksDataException {
// +1 if it is negative (-)
- short timeOffset = (short) ((datetime.charAt(0) == '-') ? 1 : 0);
- timeOffset += 8;
+ int timeOffset = (buffer[begin] == '-') ? 1 : 0;
- if (datetime.charAt(timeOffset) != 'T') {
+ timeOffset = timeOffset + 8 + begin;
+
+ if (buffer[timeOffset] != 'T') {
timeOffset += 2;
- if (datetime.charAt(timeOffset) != 'T') {
- throw new HyracksDataException("This can not be an instance of datetime: missing T");
+ if (buffer[timeOffset] != 'T') {
+ throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_INTERVAL_INVALID_DATETIME);
}
}
- long chrononTimeInMs = ADateParserFactory.parseDatePart(datetime, 0, timeOffset);
- chrononTimeInMs +=
- ATimeParserFactory.parseTimePart(datetime, timeOffset + 1, datetime.length() - timeOffset - 1);
+ long chrononTimeInMs = ADateParserFactory.parseDatePart(buffer, begin, timeOffset - begin);
+ chrononTimeInMs += ATimeParserFactory.parseTimePart(buffer, timeOffset + 1, begin + len - timeOffset - 1);
aDateTime.setValue(chrononTimeInMs);
datetimeSerde.serialize(aDateTime, out);
}
- protected void parseDuration(String duration, DataOutput out) throws HyracksDataException {
- ADurationParserFactory.parseDuration(duration, 0, duration.length(), aDuration, ADurationParseOption.All);
+ protected void parseDuration(char[] buffer, int begin, int len, DataOutput out) throws HyracksDataException {
+ ADurationParserFactory.parseDuration(buffer, begin, len, aDuration, ADurationParseOption.All);
durationSerde.serialize(aDuration, out);
}
- protected void parseDateTimeDuration(String durationString, DataOutput out) throws HyracksDataException {
- ADurationParserFactory.parseDuration(durationString, 0, durationString.length(), aDayTimeDuration,
- ADurationParseOption.All);
+ protected void parseDateTimeDuration(char[] buffer, int begin, int len, DataOutput out)
+ throws HyracksDataException {
+ ADurationParserFactory.parseDuration(buffer, begin, len, aDayTimeDuration, ADurationParseOption.All);
dayTimeDurationSerde.serialize(aDayTimeDuration, out);
}
- protected void parseYearMonthDuration(String durationString, DataOutput out) throws HyracksDataException {
- ADurationParserFactory.parseDuration(durationString, 0, durationString.length(), aYearMonthDuration,
- ADurationParseOption.All);
+ protected void parseYearMonthDuration(char[] buffer, int begin, int len, DataOutput out)
+ throws HyracksDataException {
+ ADurationParserFactory.parseDuration(buffer, begin, len, aYearMonthDuration, ADurationParseOption.All);
yearMonthDurationSerde.serialize(aYearMonthDuration, out);
}
- protected void parsePoint(String point, DataOutput out) throws HyracksDataException {
+ protected void parsePoint(char[] buffer, int begin, int len, DataOutput out) throws HyracksDataException {
try {
- aPoint.setValue(Double.parseDouble(point.substring(0, point.indexOf(','))),
- Double.parseDouble(point.substring(point.indexOf(',') + 1, point.length())));
+ int commaIndex = indexOf(buffer, begin, len, ',');
+ aPoint.setValue(parseDouble(buffer, begin, commaIndex - begin),
+ parseDouble(buffer, commaIndex + 1, begin + len - commaIndex - 1));
pointSerde.serialize(aPoint, out);
- } catch (HyracksDataException e) {
- throw new HyracksDataException(point + " can not be an instance of point");
+ } catch (Exception e) {
+ throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, e, new String(buffer, begin, len),
+ "point");
}
}
- protected void parse3DPoint(String point3d, DataOutput out) throws HyracksDataException {
+ protected void parse3DPoint(char[] buffer, int begin, int len, DataOutput out) throws HyracksDataException {
try {
- int firstCommaIndex = point3d.indexOf(',');
- int secondCommaIndex = point3d.indexOf(',', firstCommaIndex + 1);
- aPoint3D.setValue(Double.parseDouble(point3d.substring(0, firstCommaIndex)),
- Double.parseDouble(point3d.substring(firstCommaIndex + 1, secondCommaIndex)),
- Double.parseDouble(point3d.substring(secondCommaIndex + 1, point3d.length())));
+ int firstCommaIndex = indexOf(buffer, begin, len, ',');
+ int secondCommaIndex = indexOf(buffer, firstCommaIndex + 1, begin + len - firstCommaIndex - 1, ',');
+ aPoint3D.setValue(parseDouble(buffer, begin, firstCommaIndex - begin),
+ parseDouble(buffer, firstCommaIndex + 1, secondCommaIndex - firstCommaIndex - 1),
+ parseDouble(buffer, secondCommaIndex + 1, begin + len - secondCommaIndex - 1));
point3DSerde.serialize(aPoint3D, out);
- } catch (HyracksDataException e) {
- throw new HyracksDataException(point3d + " can not be an instance of point3d");
+ } catch (Exception e) {
+ throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, e, new String(buffer, begin, len),
+ "point3d");
}
}
- protected void parseCircle(String circle, DataOutput out) throws HyracksDataException {
+ protected void parseCircle(char[] buffer, int begin, int len, DataOutput out) throws HyracksDataException {
try {
- String[] parts = circle.split(" ");
- aPoint.setValue(Double.parseDouble(parts[0].split(",")[0]), Double.parseDouble(parts[0].split(",")[1]));
- aCircle.setValue(aPoint, Double.parseDouble(parts[1].substring(0, parts[1].length())));
+ int firstCommaIndex = indexOf(buffer, begin, len, ',');
+ int spaceIndex = indexOf(buffer, firstCommaIndex + 1, begin + len - firstCommaIndex - 1, ' ');
+ aPoint.setValue(parseDouble(buffer, begin, firstCommaIndex - begin),
+ parseDouble(buffer, firstCommaIndex + 1, spaceIndex - firstCommaIndex - 1));
+ aCircle.setValue(aPoint, parseDouble(buffer, spaceIndex + 1, begin + len - spaceIndex - 1));
circleSerde.serialize(aCircle, out);
- } catch (HyracksDataException e) {
- throw new HyracksDataException(circle + " can not be an instance of circle");
+ } catch (Exception e) {
+ throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, e, new String(buffer, begin, len),
+ "circle");
}
}
- protected void parseRectangle(String rectangle, DataOutput out) throws HyracksDataException {
+ protected void parseRectangle(char[] buffer, int begin, int len, DataOutput out) throws HyracksDataException {
try {
- String[] points = rectangle.split(" ");
- if (points.length != 2) {
- throw new HyracksDataException("rectangle consists of only 2 points.");
- }
- aPoint.setValue(Double.parseDouble(points[0].split(",")[0]), Double.parseDouble(points[0].split(",")[1]));
- aPoint2.setValue(Double.parseDouble(points[1].split(",")[0]), Double.parseDouble(points[1].split(",")[1]));
+ int spaceIndex = indexOf(buffer, begin, len, ' ');
+
+ int firstCommaIndex = indexOf(buffer, begin, len, ',');
+ aPoint.setValue(parseDouble(buffer, begin, firstCommaIndex - begin),
+ parseDouble(buffer, firstCommaIndex + 1, spaceIndex - firstCommaIndex - 1));
+
+ int secondCommaIndex = indexOf(buffer, spaceIndex + 1, begin + len - spaceIndex - 1, ',');
+ aPoint2.setValue(parseDouble(buffer, spaceIndex + 1, secondCommaIndex - spaceIndex - 1),
+ parseDouble(buffer, secondCommaIndex + 1, begin + len - secondCommaIndex - 1));
if (aPoint.getX() > aPoint2.getX() && aPoint.getY() > aPoint2.getY()) {
aRectangle.setValue(aPoint2, aPoint);
} else if (aPoint.getX() < aPoint2.getX() && aPoint.getY() < aPoint2.getY()) {
@@ -304,23 +322,26 @@
"Rectangle arugment must be either (bottom left point, top right point) or (top right point, bottom left point)");
}
rectangleSerde.serialize(aRectangle, out);
- } catch (HyracksDataException e) {
- throw new HyracksDataException(rectangle + " can not be an instance of rectangle");
+ } catch (Exception e) {
+ throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, e, new String(buffer, begin, len),
+ "rectangle");
}
}
- protected void parseLine(String line, DataOutput out) throws HyracksDataException {
+ protected void parseLine(char[] buffer, int begin, int len, DataOutput out) throws HyracksDataException {
try {
- String[] points = line.split(" ");
- if (points.length != 2) {
- throw new HyracksDataException("line consists of only 2 points.");
- }
- aPoint.setValue(Double.parseDouble(points[0].split(",")[0]), Double.parseDouble(points[0].split(",")[1]));
- aPoint2.setValue(Double.parseDouble(points[1].split(",")[0]), Double.parseDouble(points[1].split(",")[1]));
+ int spaceIndex = indexOf(buffer, begin, len, ' ');
+ int firstCommaIndex = indexOf(buffer, begin, len, ',');
+ aPoint.setValue(parseDouble(buffer, begin, firstCommaIndex - begin),
+ parseDouble(buffer, firstCommaIndex + 1, spaceIndex - firstCommaIndex - 1));
+ int secondCommaIndex = indexOf(buffer, spaceIndex + 1, begin + len - spaceIndex - 1, ',');
+ aPoint2.setValue(parseDouble(buffer, spaceIndex + 1, secondCommaIndex - spaceIndex - 1),
+ parseDouble(buffer, secondCommaIndex + 1, begin + len - secondCommaIndex - 1));
aLine.setValue(aPoint, aPoint2);
lineSerde.serialize(aLine, out);
- } catch (HyracksDataException e) {
- throw new HyracksDataException(line + " can not be an instance of line");
+ } catch (Exception e) {
+ throw new ParseException(ErrorCode.PARSER_ADM_DATA_PARSER_WRONG_INSTANCE, e, new String(buffer, begin, len),
+ "line");
}
}
@@ -363,4 +384,34 @@
return ATimeParserFactory.parseTimePart(interval, startOffset, endOffset - startOffset + 1);
}
-}
+
+ protected double parseDouble(char[] buffer, int begin, int len) {
+ // TODO: parse double directly from char[]
+ String str = new String(buffer, begin, len);
+ return Double.valueOf(str);
+ }
+
+ protected float parseFloat(char[] buffer, int begin, int len) {
+ //TODO: pares float directly from char[]
+ String str = new String(buffer, begin, len);
+ return Float.valueOf(str);
+ }
+
+ protected int indexOf(char[] buffer, int begin, int len, char target) {
+ for (int i = begin; i < begin + len; i++) {
+ if (buffer[i] == target) {
+ return i;
+ }
+ }
+ throw new IllegalArgumentException("Cannot find " + target + " in " + new String(buffer, begin, len));
+ }
+
+ protected void parseString(char[] buffer, int begin, int length, DataOutput out) throws HyracksDataException {
+ try {
+ out.writeByte(ATypeTag.STRING.serialize());
+ untaggedStringSerde.serialize(buffer, begin, length, out);
+ } catch (IOException e) {
+ throw new ParseException(e);
+ }
+ }
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java
index ce8780d..209ba34 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/JSONDataParser.java
@@ -426,26 +426,27 @@
* @throws IOException
*/
private void serializeString(ATypeTag stringVariantType, DataOutput out) throws IOException {
- final String stringValue = jsonParser.getText();
+ char[] buffer = jsonParser.getTextCharacters();
+ int begin = jsonParser.getTextOffset();
+ int len = jsonParser.getTextLength();
final ATypeTag typeToUse = stringVariantType == ATypeTag.ANY ? currentToken().getTypeTag() : stringVariantType;
switch (typeToUse) {
case STRING:
- aString.setValue(stringValue);
- stringSerde.serialize(aString, out);
+ parseString(buffer, begin, len, out);
break;
case DATE:
- parseDate(stringValue, out);
+ parseDate(buffer, begin, len, out);
break;
case DATETIME:
- parseDateTime(stringValue, out);
+ parseDateTime(buffer, begin, len, out);
break;
case TIME:
- parseTime(stringValue, out);
+ parseTime(buffer, begin, len, out);
break;
default:
throw new RuntimeDataException(ErrorCode.TYPE_UNSUPPORTED, jsonParser.currentToken().toString());
}
}
-}
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ParseException.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ParseException.java
new file mode 100644
index 0000000..34e3eef
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/ParseException.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.parser;
+
+import java.io.Serializable;
+
+import org.apache.asterix.common.exceptions.ErrorCode;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+public class ParseException extends HyracksDataException {
+ private static final long serialVersionUID = 1L;
+ private String filename;
+ private int line = -1;
+ private int column = -1;
+
+ public ParseException(String message) {
+ super(message);
+ }
+
+ public ParseException(int errorCode, Serializable... param) {
+ super(ErrorCode.ASTERIX, errorCode, ErrorCode.getErrorMessage(errorCode), param);
+ }
+
+ public ParseException(int errorCode, Throwable e, Serializable... param) {
+ super(ErrorCode.ASTERIX, errorCode, e, ErrorCode.getErrorMessage(errorCode), param);
+ addSuppressed(e);
+ }
+
+ public ParseException(Throwable cause) {
+ super(cause);
+ }
+
+ public ParseException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public ParseException(Throwable cause, String filename, int line, int column) {
+ super(cause);
+ setLocation(filename, line, column);
+ }
+
+ public void setLocation(String filename, int line, int column) {
+ this.filename = filename;
+ this.line = line;
+ this.column = column;
+ }
+
+ @Override
+ public String getMessage() {
+ StringBuilder msg = new StringBuilder("Parse error");
+ if (filename != null) {
+ msg.append(" in file " + filename);
+ }
+ if (line >= 0) {
+ if (column >= 0) {
+ msg.append(" at (" + line + ", " + column + ")");
+ } else {
+ msg.append(" in line " + line);
+ }
+ }
+ return msg.append(": " + super.getMessage()).toString();
+ }
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ADMDataParserTest.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ADMDataParserTest.java
index db85e64..a4e4c33 100644
--- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ADMDataParserTest.java
+++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ADMDataParserTest.java
@@ -27,8 +27,6 @@
import java.nio.ByteBuffer;
import java.util.concurrent.atomic.AtomicInteger;
-import com.esri.core.geometry.ogc.OGCGeometry;
-import com.esri.core.geometry.ogc.OGCPoint;
import org.apache.asterix.external.parser.ADMDataParser;
import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
import org.apache.asterix.om.base.AGeometry;
@@ -42,32 +40,32 @@
import org.apache.asterix.om.types.IAType;
import org.apache.hadoop.io.DataInputByteBuffer;
import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
-import org.apache.hyracks.dataflow.common.comm.io.FrameDeserializer;
-import org.apache.hyracks.dataflow.common.comm.io.FrameDeserializingDataReader;
-import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
import org.junit.Assert;
import org.junit.Test;
+import com.esri.core.geometry.ogc.OGCPoint;
+
import junit.extensions.PA;
public class ADMDataParserTest {
@Test
public void test() throws IOException {
- String[] dates = { "-9537-08-04", "9656-06-03", "-9537-04-04", "9656-06-04", "-9537-10-04", "9626-09-05" };
+ char[][] dates = toChars(
+ new String[] { "-9537-08-04", "9656-06-03", "-9537-04-04", "9656-06-04", "-9537-10-04", "9626-09-05" });
AMutableDate[] parsedDates =
new AMutableDate[] { new AMutableDate(-4202630), new AMutableDate(2807408), new AMutableDate(-4202752),
new AMutableDate(2807409), new AMutableDate(-4202569), new AMutableDate(2796544), };
- String[] times = { "12:04:45.689Z", "12:41:59.002Z", "12:10:45.169Z", "15:37:48.736Z", "04:16:42.321Z",
- "12:22:56.816Z" };
+ char[][] times = toChars(new String[] { "12:04:45.689Z", "12:41:59.002Z", "12:10:45.169Z", "15:37:48.736Z",
+ "04:16:42.321Z", "12:22:56.816Z" });
AMutableTime[] parsedTimes =
new AMutableTime[] { new AMutableTime(43485689), new AMutableTime(45719002), new AMutableTime(43845169),
new AMutableTime(56268736), new AMutableTime(15402321), new AMutableTime(44576816), };
- String[] dateTimes = { "-2640-10-11T17:32:15.675Z", "4104-02-01T05:59:11.902Z", "0534-12-08T08:20:31.487Z",
- "6778-02-16T22:40:21.653Z", "2129-12-12T13:18:35.758Z", "8647-07-01T13:10:19.691Z" };
+ char[][] dateTimes = toChars(
+ new String[] { "-2640-10-11T17:32:15.675Z", "4104-02-01T05:59:11.902Z", "0534-12-08T08:20:31.487Z",
+ "6778-02-16T22:40:21.653Z", "2129-12-12T13:18:35.758Z", "8647-07-01T13:10:19.691Z" });
AMutableDateTime[] parsedDateTimes =
new AMutableDateTime[] { new AMutableDateTime(-145452954464325L), new AMutableDateTime(67345192751902L),
new AMutableDateTime(-45286270768513L), new AMutableDateTime(151729886421653L),
@@ -88,24 +86,24 @@
while (round++ < 10000) {
// Test parseDate.
for (int index = 0; index < dates.length; ++index) {
- PA.invokeMethod(parser, "parseDate(java.lang.String, java.io.DataOutput)", dates[index],
- dos);
+ PA.invokeMethod(parser, "parseDate(char[], int, int, java.io.DataOutput)", dates[index],
+ 0, dates[index].length, dos);
AMutableDate aDate = (AMutableDate) PA.getValue(parser, "aDate");
Assert.assertTrue(aDate.equals(parsedDates[index]));
}
// Tests parseTime.
for (int index = 0; index < times.length; ++index) {
- PA.invokeMethod(parser, "parseTime(java.lang.String, java.io.DataOutput)", times[index],
- dos);
+ PA.invokeMethod(parser, "parseTime(char[], int, int, java.io.DataOutput)", times[index],
+ 0, times[index].length, dos);
AMutableTime aTime = (AMutableTime) PA.getValue(parser, "aTime");
Assert.assertTrue(aTime.equals(parsedTimes[index]));
}
// Tests parseDateTime.
for (int index = 0; index < dateTimes.length; ++index) {
- PA.invokeMethod(parser, "parseDateTime(java.lang.String, java.io.DataOutput)",
- dateTimes[index], dos);
+ PA.invokeMethod(parser, "parseDateTime(char[], int, int, java.io.DataOutput)",
+ dateTimes[index], 0, dateTimes[index].length, dos);
AMutableDateTime aDateTime = (AMutableDateTime) PA.getValue(parser, "aDateTime");
Assert.assertTrue(aDateTime.equals(parsedDateTimes[index]));
}
@@ -132,6 +130,14 @@
Assert.assertTrue(errorCount.get() == 0);
}
+ private char[][] toChars(String[] strings) {
+ char[][] results = new char[strings.length][];
+ for (int i = 0; i < strings.length; i++) {
+ results[i] = strings[i].toCharArray();
+ }
+ return results;
+ }
+
@Test
public void testWKTParser() {
try {
@@ -177,4 +183,4 @@
}
}
-}
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java b/asterixdb/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
index 3613166..044852b 100644
--- a/asterixdb/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
+++ b/asterixdb/asterix-maven-plugins/lexer-generator-maven-plugin/src/main/resources/Lexer.java
@@ -124,6 +124,62 @@
}
}
+ public void getLastTokenImage(TokenImage image) {
+ if (bufpos >= tokenBegin) {
+ image.reset(buffer, tokenBegin, bufpos - tokenBegin);
+ } else {
+ image.reset(buffer, tokenBegin, bufsize - tokenBegin, buffer, 0, bufpos);
+ }
+ }
+
+ public static class TokenImage{
+ private char[] buffer;
+ private int begin;
+ private int length;
+
+ // used to hold circular tokens
+ private char[] tmpBuffer;
+
+ private static final double TMP_BUFFER_GROWTH = 1.5;
+
+ public void reset(char[] buffer, int begin, int length) {
+ this.buffer = buffer;
+ this.begin = begin;
+ this.length = length;
+ }
+
+ public void reset(char[] buffer, int begin, int length, char[] extraBuffer, int extraBegin, int extraLength) {
+ ensureTmpBufferSize(length + extraLength);
+ System.arraycopy(buffer, begin, tmpBuffer, 0, length);
+ System.arraycopy(extraBuffer, extraBegin, tmpBuffer, length, extraLength);
+ this.buffer = tmpBuffer;
+ this.begin = 0;
+ this.length = length + extraLength;
+ }
+
+ public char[] getBuffer() {
+ return buffer;
+ }
+
+ public int getBegin() {
+ return begin;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+
+ private void ensureTmpBufferSize(int size) {
+ int oldSize = tmpBuffer!=null?tmpBuffer.length:0;
+ if(oldSize < size) {
+ int newSize = Math.max((int)(oldSize * TMP_BUFFER_GROWTH), size);
+ tmpBuffer = new char[newSize];
+ }
+ }
+
+ }
+
public int getColumn() {
return column;
}
@@ -262,4 +318,4 @@
endOf_UNUSED_Buffer = bufsize;
tokenBegin = 0;
}
-}
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/builders/RecordBuilder.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/builders/RecordBuilder.java
index 95aef79..40b42eb 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/builders/RecordBuilder.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/builders/RecordBuilder.java
@@ -69,7 +69,7 @@
private int[] openFieldNameLengths;
private int numberOfOpenFields;
- private RuntimeRecordTypeInfo recTypeInfo;
+ private final RuntimeRecordTypeInfo recTypeInfo;
public RecordBuilder() {
this.closedPartOutputStream = new ByteArrayAccessibleOutputStream();
@@ -302,12 +302,7 @@
@Override
public int getFieldId(String fieldName) {
- for (int i = 0; i < recType.getFieldNames().length; i++) {
- if (recType.getFieldNames()[i].equals(fieldName)) {
- return i;
- }
- }
- return -1;
+ return recType.getFieldIndex(fieldName);
}
public IBinaryHashFunction getFieldNameHashFunction() {
@@ -317,4 +312,4 @@
public IBinaryComparator getFieldNameComparator() {
return utf8Comparator;
}
-}
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/serde/AStringSerializerDeserializer.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/serde/AStringSerializerDeserializer.java
index 888b34c..4bb9f08 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/serde/AStringSerializerDeserializer.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/serde/AStringSerializerDeserializer.java
@@ -69,4 +69,8 @@
throw HyracksDataException.create(e);
}
}
-}
+
+ public void serialize(char[] buffer, int start, int length, DataOutput out) throws IOException {
+ UTF8StringUtil.writeUTF8(buffer, start, length, out, utf8StringWriter);
+ }
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/AMutableUUID.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/AMutableUUID.java
index 2fb69ab..9a097dc 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/AMutableUUID.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/AMutableUUID.java
@@ -23,15 +23,16 @@
public class AMutableUUID extends AUUID {
- public void parseUUIDString(String tokenImage) throws HyracksDataException {
- if (tokenImage.length() != UUID_CHARS) {
- throw new HyracksDataException("This is not a correct UUID value: " + tokenImage);
+ private final byte[] hexBytesBuffer = new byte[UUID_CHARS];
+
+ public void parseUUIDString(char[] buffer, int begin, int len) throws HyracksDataException {
+ if (len != UUID_CHARS) {
+ throw new HyracksDataException("This is not a correct UUID value: " + new String(buffer, begin, len));
}
- byte[] hexBytes = new byte[UUID_CHARS];
- for (int i = 0; i < tokenImage.length(); i++) {
- hexBytes[i] = (byte) tokenImage.charAt(i);
+ for (int i = 0; i < len; i++) {
+ hexBytesBuffer[i] = (byte) buffer[begin + i];
}
- parseUUIDHexBytes(hexBytes, 0);
+ parseUUIDHexBytes(hexBytesBuffer, 0);
}
public void parseUUIDHexBytes(byte[] serString, int offset) throws HyracksDataException {
@@ -122,4 +123,4 @@
}
}
-}
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java
index 291d963..39f5b3a 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/base/temporal/ATimeParserFactory.java
@@ -63,7 +63,6 @@
* @throws HyracksDataException
*/
public static int parseTimePart(String timeString, int start, int length) throws HyracksDataException {
-
int offset = 0;
int hour = 0, min = 0, sec = 0, millis = 0;
@@ -518,4 +517,4 @@
return timezone;
}
-}
+}
\ No newline at end of file
diff --git a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
index 11fb6c0..78fdff1 100644
--- a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
+++ b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
@@ -595,7 +595,7 @@
out.write(tempBytes, 0, count);
}
- static void writeUTF8(char[] buffer, int start, int length, DataOutput out, UTF8StringWriter writer)
+ public static void writeUTF8(char[] buffer, int start, int length, DataOutput out, UTF8StringWriter writer)
throws IOException {
int utflen = 0;
int count = 0;