Implement TokenizeOperator in addition to the changes made by Zachary Heilbron.
Now, the compiler builds the bulk-load plan.
For other details, please refer to the corresponding Hyracks changes.
Change-Id: I646539d88ea2bdc6da4fbf2b6e9460a6189125ff
Reviewed-on: http://fulliautomatix.ics.uci.edu:8443/79
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Young-Seok Kim <kisskys@gmail.com>
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/FieldAccessByIndexEvalFactory.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/FieldAccessByIndexEvalFactory.java
index 20bd366..207f3af 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/FieldAccessByIndexEvalFactory.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/FieldAccessByIndexEvalFactory.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -54,10 +54,7 @@
this.recordEvalFactory = recordEvalFactory;
this.fieldIndexEvalFactory = fieldIndexEvalFactory;
this.recordType = recordType;
- if (NonTaggedFormatUtil.hasNullableField(recordType))
- this.nullBitmapSize = (int) Math.ceil(recordType.getFieldNames().length / 8.0);
- else
- this.nullBitmapSize = 0;
+ this.nullBitmapSize = ARecordType.computeNullBitmapSize(recordType);
}
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java
index ed0a3f3..b477545 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/formats/NonTaggedDataFormat.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -946,9 +946,9 @@
}
fieldParserFactories[i] = vpf;
}
- return new NtDelimitedDataTupleParserFactory(recType, fieldParserFactories, delimiter);
+ return new NtDelimitedDataTupleParserFactory(recType, fieldParserFactories, delimiter, false, -1, null);
} else {
- return new AdmSchemafullRecordParserFactory(recType);
+ return new AdmSchemafullRecordParserFactory(recType, false, -1, null);
}
}
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java
index 1c06604..0cd3098 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/ADMDataParser.java
@@ -18,6 +18,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayDeque;
+import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import java.util.Queue;
@@ -31,7 +32,9 @@
import edu.uci.ics.asterix.dataflow.data.nontagged.serde.AIntervalSerializerDeserializer;
import edu.uci.ics.asterix.dataflow.data.nontagged.serde.APolygonSerializerDeserializer;
import edu.uci.ics.asterix.om.base.ABoolean;
+import edu.uci.ics.asterix.om.base.AMutableUUID;
import edu.uci.ics.asterix.om.base.ANull;
+import edu.uci.ics.asterix.om.base.AUUID;
import edu.uci.ics.asterix.om.types.AOrderedListType;
import edu.uci.ics.asterix.om.types.ARecordType;
import edu.uci.ics.asterix.om.types.ATypeTag;
@@ -45,6 +48,7 @@
import edu.uci.ics.asterix.runtime.operators.file.adm.AdmLexerException;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.Integer64SerializerDeserializer;
/**
* Parser for ADM formatted data.
@@ -66,6 +70,11 @@
private String mismatchErrorMessage = "Mismatch Type, expecting a value of type ";
private String mismatchErrorMessage2 = " got a value of type ";
+ private boolean isPKAutoGenerated;
+ private int primaryKeyPosition;
+ private ARecordType origRecTypeForAutoGeneratedPK;
+ private byte AUUIDTag = ATypeTag.UUID.serialize();
+
static class ParseException extends AsterixException {
private static final long serialVersionUID = 1L;
private String filename;
@@ -112,11 +121,16 @@
}
public ADMDataParser() {
- this(null);
+ this(null, false, -1, null);
}
- public ADMDataParser(String filename) {
+ // Constructor for dealing with auto-generated PK
+ public ADMDataParser(String filename, boolean isPKAutoGenerated,
+ int primaryKeyPosition, ARecordType origRecTypeForAutoGeneratedPK) {
this.filename = filename;
+ this.isPKAutoGenerated = isPKAutoGenerated;
+ this.primaryKeyPosition = primaryKeyPosition;
+ this.origRecTypeForAutoGeneratedPK = origRecTypeForAutoGeneratedPK;
}
@Override
@@ -135,7 +149,12 @@
@Override
public void initialize(InputStream in, ARecordType recordType, boolean datasetRec) throws AsterixException {
- this.recordType = recordType;
+ // Use original record type variable - origRecTypeForAutoGeneratedPK if we have auto-generated PK.
+ // The recordType variable does not contain field information about auto-generated PK.
+ if (!isPKAutoGenerated)
+ this.recordType = recordType;
+ else
+ this.recordType = origRecTypeForAutoGeneratedPK;
this.datasetRec = datasetRec;
try {
admLexer = new AdmLexer(new java.io.InputStreamReader(in));
@@ -242,6 +261,12 @@
admLexer.getLastTokenImage().length() - 1);
aString.setValue(admLexer.containsEscapes() ? replaceEscapes(tokenImage) : tokenImage);
stringSerde.serialize(aString, out);
+ } else if (checkType(ATypeTag.UUID, objectType)) {
+ // Dealing with UUID type that is represented by a string
+ final String tokenImage = admLexer.getLastTokenImage().substring(1,
+ admLexer.getLastTokenImage().length() - 1);
+ aUUID.fromStringToAMuatbleUUID(tokenImage);
+ uuidSerde.serialize(aUUID, out);
} else
throw new ParseException(mismatchErrorMessage + objectType.getTypeName());
break;
@@ -365,6 +390,10 @@
throw new ParseException(mismatchErrorMessage + objectType.getTypeTag());
break;
}
+ case AdmLexer.TOKEN_UUID_CONS: {
+ parseConstructor(ATypeTag.UUID, objectType, out);
+ break;
+ }
case AdmLexer.TOKEN_EOF: {
break;
}
@@ -585,9 +614,28 @@
} while (inRecord);
if (recType != null) {
- nullableFieldId = checkNullConstraints(recType, nulls);
+ nullableFieldId = checkNullConstraints(recType, nulls, 0);
+ boolean insertedAutoGeneratedPK = false;
+
+ // This is a typical situation for a dataset with an auto-generated PK
+ // where nullableFieldId equals primaryKey field.
+ // In this case, we create a random ID and assign it as a PK
+ if (isPKAutoGenerated && nullableFieldId == primaryKeyPosition) {
+ fieldValueBuffer.reset();
+ aUUID.nextUUID();
+ fieldValueBuffer.getDataOutput().writeByte(AUUIDTag);
+ Integer64SerializerDeserializer.INSTANCE.serialize(aUUID.getMostSignificantBits(), fieldValueBuffer.getDataOutput());
+ Integer64SerializerDeserializer.INSTANCE.serialize(aUUID.getLeastSignificantBits(), fieldValueBuffer.getDataOutput());
+ recBuilder.addField(primaryKeyPosition, fieldValueBuffer);
+ insertedAutoGeneratedPK = true;
+ nulls.set(nullableFieldId);
+ // Check from the primary key field position to make sure no other field is missing
+ nullableFieldId = checkNullConstraints(recType, nulls, nullableFieldId);
+ }
if (nullableFieldId != -1)
- throw new ParseException("Field " + recType.getFieldNames()[nullableFieldId] + " can not be null");
+ throw new ParseException("Field: " + recType.getFieldNames()[nullableFieldId] + " can not be null");
+ if (isPKAutoGenerated && !insertedAutoGeneratedPK)
+ throw new ParseException("Auto-generated PK Field: " + recType.getFieldNames()[primaryKeyPosition] + " should not exist in the ADM file.");
}
recBuilder.write(out, true);
returnRecordBuilder(recBuilder);
@@ -595,9 +643,9 @@
returnTempBuffer(fieldValueBuffer);
}
- private int checkNullConstraints(ARecordType recType, BitSet nulls) {
+ private int checkNullConstraints(ARecordType recType, BitSet nulls, int startingPosition) {
boolean isNull = false;
- for (int i = 0; i < recType.getFieldTypes().length; i++)
+ for (int i = startingPosition; i < recType.getFieldTypes().length; i++)
if (nulls.get(i) == false) {
IAType type = recType.getFieldTypes()[i];
if (type.getTypeTag() != ATypeTag.NULL && type.getTypeTag() != ATypeTag.UNION)
@@ -892,6 +940,10 @@
case POLYGON:
APolygonSerializerDeserializer.parse(unquoted, out);
return true;
+ case UUID:
+ aUUID.fromStringToAMuatbleUUID(unquoted);
+ uuidSerde.serialize(aUUID, out);
+ return true;
default:
return false;
}
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AbstractDataParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AbstractDataParser.java
index 9820a3a..e7ae93d 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AbstractDataParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AbstractDataParser.java
@@ -47,6 +47,7 @@
import edu.uci.ics.asterix.om.base.AMutableRectangle;
import edu.uci.ics.asterix.om.base.AMutableString;
import edu.uci.ics.asterix.om.base.AMutableTime;
+import edu.uci.ics.asterix.om.base.AMutableUUID;
import edu.uci.ics.asterix.om.base.AMutableYearMonthDuration;
import edu.uci.ics.asterix.om.base.ANull;
import edu.uci.ics.asterix.om.base.APoint;
@@ -54,6 +55,7 @@
import edu.uci.ics.asterix.om.base.ARectangle;
import edu.uci.ics.asterix.om.base.AString;
import edu.uci.ics.asterix.om.base.ATime;
+import edu.uci.ics.asterix.om.base.AUUID;
import edu.uci.ics.asterix.om.base.AYearMonthDuration;
import edu.uci.ics.asterix.om.base.temporal.ADateParserFactory;
import edu.uci.ics.asterix.om.base.temporal.ADurationParserFactory;
@@ -62,8 +64,10 @@
import edu.uci.ics.asterix.om.base.temporal.GregorianCalendarSystem;
import edu.uci.ics.asterix.om.types.BuiltinType;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
/**
* Base class for data parsers. Includes the common set of definitions for
@@ -79,6 +83,7 @@
protected AMutableFloat aFloat = new AMutableFloat(0);
protected AMutableString aString = new AMutableString("");
protected AMutableString aStringFieldName = new AMutableString("");
+ protected AMutableUUID aUUID = new AMutableUUID(0, 0);
// For temporal and spatial data types
protected AMutableTime aTime = new AMutableTime(0);
protected AMutableDateTime aDateTime = new AMutableDateTime(0L);
@@ -121,6 +126,11 @@
@SuppressWarnings("unchecked")
protected ISerializerDeserializer<ANull> nullSerde = AqlSerializerDeserializerProvider.INSTANCE
.getSerializerDeserializer(BuiltinType.ANULL);
+ // For UUID, we assume that the format is the string representation of UUID
+ // (xxxxxxxx-xxxx-xxxx-xxxxxxxxxxxx) when parsing the data.
+ @SuppressWarnings("unchecked")
+ protected ISerializerDeserializer<AUUID> uuidSerde = AqlSerializerDeserializerProvider.INSTANCE
+ .getSerializerDeserializer(BuiltinType.AUUID_STRING);
// To avoid race conditions, the serdes for temporal and spatial data types needs to be one per parser
@SuppressWarnings("unchecked")
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AbstractTupleParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AbstractTupleParser.java
index 13305b1..e535cf4 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AbstractTupleParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AbstractTupleParser.java
@@ -47,12 +47,19 @@
protected final ARecordType recType;
protected final IHyracksTaskContext ctx;
protected String filename;
+ protected boolean isPKAutoGenerated;
+ protected int primaryKeyPosition;
+ protected final ARecordType origRecTypeForAutoGeneratedPK;
- public AbstractTupleParser(IHyracksTaskContext ctx, ARecordType recType) throws HyracksDataException {
+ public AbstractTupleParser(IHyracksTaskContext ctx, ARecordType recType,
+ boolean isPKAutoGenerated, int primaryKeyPosition, ARecordType origRecTypeForAutoGeneratedPK) throws HyracksDataException {
appender = new FrameTupleAppender(ctx.getFrameSize());
frame = ctx.allocateFrame();
this.recType = recType;
this.ctx = ctx;
+ this.isPKAutoGenerated = isPKAutoGenerated;
+ this.primaryKeyPosition = primaryKeyPosition;
+ this.origRecTypeForAutoGeneratedPK = origRecTypeForAutoGeneratedPK;
}
public void setFilename(String filename) {
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AdmSchemafullRecordParserFactory.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AdmSchemafullRecordParserFactory.java
index 5c84fe4..149fd0a 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AdmSchemafullRecordParserFactory.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AdmSchemafullRecordParserFactory.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -30,13 +30,22 @@
protected ARecordType recType;
- public AdmSchemafullRecordParserFactory(ARecordType recType) {
+ // To deal with an auto-generated PK field
+ protected boolean isPKAutoGenerated;
+ protected int primaryKeyPosition;
+ protected ARecordType origRecordTypeForAutoGeneratedPK;
+
+ public AdmSchemafullRecordParserFactory(ARecordType recType, boolean isPKAutoGenerated, int primaryKeyPosition,
+ ARecordType origRecordTypeForAutoGeneratedPK) {
this.recType = recType;
+ this.isPKAutoGenerated = isPKAutoGenerated;
+ this.primaryKeyPosition = primaryKeyPosition;
+ this.origRecordTypeForAutoGeneratedPK = origRecordTypeForAutoGeneratedPK;
}
@Override
public ITupleParser createTupleParser(final IHyracksTaskContext ctx) throws HyracksDataException {
- return new AdmTupleParser(ctx, recType);
+ return new AdmTupleParser(ctx, recType, isPKAutoGenerated, primaryKeyPosition, origRecordTypeForAutoGeneratedPK);
}
}
\ No newline at end of file
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AdmTupleParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AdmTupleParser.java
index 8aab2db..074a2ca 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AdmTupleParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/AdmTupleParser.java
@@ -24,13 +24,14 @@
*/
public class AdmTupleParser extends AbstractTupleParser {
- public AdmTupleParser(IHyracksTaskContext ctx, ARecordType recType) throws HyracksDataException {
- super(ctx, recType);
+ public AdmTupleParser(IHyracksTaskContext ctx, ARecordType recType, boolean isPKAutoGenerated,
+ int primaryKeyPosition, ARecordType origRecordTypeForAutoGeneratedPK) throws HyracksDataException {
+ super(ctx, recType, isPKAutoGenerated, primaryKeyPosition, origRecordTypeForAutoGeneratedPK);
}
@Override
public IDataParser getDataParser() {
- return new ADMDataParser(filename);
+ return new ADMDataParser(filename, isPKAutoGenerated, primaryKeyPosition, origRecTypeForAutoGeneratedPK);
}
}
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java
index 23f7aab..f516075 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataParser.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -33,6 +33,7 @@
import edu.uci.ics.asterix.om.util.NonTaggedFormatUtil;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.Integer64SerializerDeserializer;
import edu.uci.ics.hyracks.dataflow.common.data.parsers.IValueParser;
import edu.uci.ics.hyracks.dataflow.common.data.parsers.IValueParserFactory;
@@ -50,19 +51,35 @@
private byte[] fieldTypeTags;
private int[] fldIds;
private ArrayBackedValueStorage[] nameBuffers;
+ private byte AUUIDTag = ATypeTag.UUID.serialize();
+
+ // Variables used to set a UUID for the auto-generated PK field
+ private boolean isPKAutoGenerated;
+ private int primaryKeyPosition;
+ private final ARecordType origRecordTypeForAutoGeneratedPK;
private boolean areAllNullFields;
- public DelimitedDataParser(ARecordType recordType, IValueParserFactory[] valueParserFactories, char fieldDelimter) {
+ public DelimitedDataParser(ARecordType recordType, IValueParserFactory[] valueParserFactories, char fieldDelimter,
+ boolean isPKAutoGenerated, int primaryKeyPosition, ARecordType origRecordTypeForAutoGeneratedPK) {
this.recordType = recordType;
this.valueParserFactories = valueParserFactories;
this.fieldDelimiter = fieldDelimter;
+ this.isPKAutoGenerated = isPKAutoGenerated;
+ this.primaryKeyPosition = primaryKeyPosition;
+ this.origRecordTypeForAutoGeneratedPK = origRecordTypeForAutoGeneratedPK;
}
@Override
public void initialize(InputStream in, ARecordType recordType, boolean datasetRec) throws AsterixException,
IOException {
+ ARecordType recordTypeToApply = null;
+ if (isPKAutoGenerated)
+ recordTypeToApply = origRecordTypeForAutoGeneratedPK;
+ else
+ recordTypeToApply = recordType;
+
valueParsers = new IValueParser[valueParserFactories.length];
for (int i = 0; i < valueParserFactories.length; ++i) {
valueParsers[i] = valueParserFactories[i].createValueParser();
@@ -70,8 +87,12 @@
fieldValueBuffer = new ArrayBackedValueStorage();
fieldValueBufferOutput = fieldValueBuffer.getDataOutput();
+
+ // If PK is auto-generated, then we need to use the recordType that
+ // includes PK,
+ // since recordType variable does not include PK field.
recBuilder = new RecordBuilder();
- recBuilder.reset(recordType);
+ recBuilder.reset(recordTypeToApply);
recBuilder.init();
int n = recordType.getFieldNames().length;
@@ -104,9 +125,16 @@
@Override
public boolean parse(DataOutput out) throws AsterixException, IOException {
while (cursor.nextRecord()) {
- recBuilder.reset(recordType);
+ if (isPKAutoGenerated)
+ recBuilder.reset(origRecordTypeForAutoGeneratedPK);
+ else
+ recBuilder.reset(recordType);
+
recBuilder.init();
areAllNullFields = true;
+
+ int fieldCount = 0;
+
for (int i = 0; i < valueParsers.length; ++i) {
if (!cursor.nextField()) {
break;
@@ -115,11 +143,14 @@
if (cursor.fStart == cursor.fEnd && recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.STRING
&& recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.NULL) {
- // if the field is empty and the type is optional, insert NULL
- // note that string type can also process empty field as an empty string
+ // if the field is empty and the type is optional, insert
+ // NULL
+ // note that string type can also process empty field as an
+ // empty string
if (recordType.getFieldTypes()[i].getTypeTag() != ATypeTag.UNION
|| !NonTaggedFormatUtil.isOptionalField((AUnionType) recordType.getFieldTypes()[i])) {
- throw new AsterixException("Field " + i + " is not an optional type so it cannot accept null value. ");
+ throw new AsterixException("Field " + i
+ + " is not an optional type so it cannot accept null value. ");
}
fieldValueBufferOutput.writeByte(ATypeTag.NULL.serialize());
ANullSerializerDeserializer.INSTANCE.serialize(ANull.NULL, out);
@@ -135,7 +166,35 @@
recBuilder.addField(fldIds[i], fieldValueBuffer);
}
+ fieldCount++;
+
}
+
+ // Should not have any more fields now
+ if (cursor.nextField()) {
+ fieldCount++;
+ }
+
+ // Parsed all fields except an auto-generated PK at this point
+ // Create a new UUID and assign it as a PK.
+ if (isPKAutoGenerated && fieldCount == origRecordTypeForAutoGeneratedPK.getFieldTypes().length - 1) {
+ fieldValueBuffer.reset();
+ aUUID.nextUUID();
+ fieldValueBufferOutput.writeByte(AUUIDTag);
+ Integer64SerializerDeserializer.INSTANCE.serialize(aUUID.getMostSignificantBits(),
+ fieldValueBufferOutput);
+ Integer64SerializerDeserializer.INSTANCE.serialize(aUUID.getLeastSignificantBits(),
+ fieldValueBufferOutput);
+ recBuilder.addField(primaryKeyPosition, fieldValueBuffer);
+ areAllNullFields = false;
+ }
+ // If we have all fields in the file including auto-generated PK,
+ // throw an exception
+ else if (isPKAutoGenerated && fieldCount >= origRecordTypeForAutoGeneratedPK.getFieldTypes().length) {
+ throw new AsterixException(
+ "Check number of fields. Auto-generated PK field should not exist in the input data.");
+ }
+
if (!areAllNullFields) {
recBuilder.write(out, true);
return true;
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataTupleParser.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataTupleParser.java
index 9f5e6e9..be6c42a 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataTupleParser.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/DelimitedDataTupleParser.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -28,9 +28,11 @@
private final DelimitedDataParser dataParser;
public DelimitedDataTupleParser(IHyracksTaskContext ctx, ARecordType recType,
- IValueParserFactory[] valueParserFactories, char fieldDelimter) throws HyracksDataException {
- super(ctx, recType);
- dataParser = new DelimitedDataParser(recType, valueParserFactories, fieldDelimter);
+ IValueParserFactory[] valueParserFactories, char fieldDelimter, boolean isPKAutoGenerated,
+ int primaryKeyPosition, ARecordType origRecordTypeForAutoGeneratedPK) throws HyracksDataException {
+ super(ctx, recType, isPKAutoGenerated, primaryKeyPosition, origRecordTypeForAutoGeneratedPK);
+ dataParser = new DelimitedDataParser(recType, valueParserFactories, fieldDelimter, isPKAutoGenerated,
+ primaryKeyPosition, origRecordTypeForAutoGeneratedPK);
}
@Override
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/NtDelimitedDataTupleParserFactory.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/NtDelimitedDataTupleParserFactory.java
index 6c56094..f88d39a 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/NtDelimitedDataTupleParserFactory.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/operators/file/NtDelimitedDataTupleParserFactory.java
@@ -3,9 +3,9 @@
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -30,17 +30,26 @@
protected ARecordType recordType;
protected IValueParserFactory[] valueParserFactories;
protected char fieldDelimiter;
+ // To deal with an auto-generated PK
+ protected boolean isPKAutoGenerated;
+ protected int primaryKeyPosition;
+ protected ARecordType origRecordTypeForAutoGeneratedPK;
public NtDelimitedDataTupleParserFactory(ARecordType recordType, IValueParserFactory[] valueParserFactories,
- char fieldDelimiter) {
+ char fieldDelimiter, boolean isPKAutoGenerated,
+ int primaryKeyposition, ARecordType origRecordTypeForAutoGeneratedPK) {
this.recordType = recordType;
this.valueParserFactories = valueParserFactories;
this.fieldDelimiter = fieldDelimiter;
+ this.isPKAutoGenerated = isPKAutoGenerated;
+ this.primaryKeyPosition = primaryKeyposition;
+ this.origRecordTypeForAutoGeneratedPK = origRecordTypeForAutoGeneratedPK;
}
@Override
public ITupleParser createTupleParser(final IHyracksTaskContext ctx) throws HyracksDataException {
- return new DelimitedDataTupleParser(ctx, recordType, valueParserFactories, fieldDelimiter);
+ return new DelimitedDataTupleParser(ctx, recordType, valueParserFactories, fieldDelimiter,
+ isPKAutoGenerated, primaryKeyPosition, origRecordTypeForAutoGeneratedPK);
}
}
diff --git a/asterix-runtime/src/main/resources/adm.grammar b/asterix-runtime/src/main/resources/adm.grammar
index 06614d0..b9db2e7 100644
--- a/asterix-runtime/src/main/resources/adm.grammar
+++ b/asterix-runtime/src/main/resources/adm.grammar
@@ -30,7 +30,8 @@
INTERVAL_DATE_CONS = string(interval-date)
INTERVAL_DATETIME_CONS = string(interval-datetime)
YEAR_MONTH_DURATION_CONS = string(year-month-duration)
-DAY_TIME_DURATION_CONS = string(day-time-duration)
+DAY_TIME_DURATION_CONS = string(day-time-duration)
+UUID_CONS = string(uuid)
NULL_LITERAL = string(null)
TRUE_LITERAL = string(true)