[ASTERIXDB-3392] Add new FieldNamesDictionary
Change-Id: I798fcb4a02f017ad1b31e0cdb556e0dda1403b5e
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18663
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Wail Alkowaileet <wael.y.k@gmail.com>
Tested-by: Wail Alkowaileet <wael.y.k@gmail.com>
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
index 6480c30..cb447c8 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
@@ -18,7 +18,7 @@
*/
package org.apache.asterix.column.assembler;
-import static org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
+import static org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
import java.util.ArrayList;
import java.util.BitSet;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
index 0bea188..1f74fb3 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
@@ -18,7 +18,7 @@
*/
package org.apache.asterix.column.metadata.schema;
-import static org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
+import static org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
import java.io.DataInput;
import java.io.DataInputStream;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
index 05c4eda..c7d3df1 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaBuilderFromIATypeVisitor.java
@@ -22,12 +22,12 @@
import java.util.List;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
import org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSchemaNode;
import org.apache.asterix.column.metadata.schema.primitive.MissingFieldSchemaNode;
import org.apache.asterix.column.operation.lsm.flush.FlushColumnMetadata;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.om.types.AUnionType;
import org.apache.asterix.om.types.AbstractCollectionType;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
index afe8368..ff05568 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
@@ -21,13 +21,13 @@
import java.io.IOException;
import java.util.Map;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
import org.apache.asterix.column.metadata.schema.UnionSchemaNode;
import org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSchemaNode;
import org.apache.asterix.column.metadata.schema.primitive.MissingFieldSchemaNode;
import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.om.types.ATypeTag;
import org.apache.asterix.om.types.AUnionType;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
index f514638..87f9ff3 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
@@ -33,9 +33,7 @@
import java.util.Map;
import org.apache.asterix.column.metadata.AbstractColumnMetadata;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import org.apache.asterix.column.metadata.PathInfoSerializer;
-import org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNestedNode;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
@@ -51,6 +49,8 @@
import org.apache.asterix.column.values.IColumnValuesWriter;
import org.apache.asterix.column.values.IColumnValuesWriterFactory;
import org.apache.asterix.column.values.writer.AbstractColumnValuesWriter;
+import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.om.types.ATypeTag;
import org.apache.commons.lang3.mutable.Mutable;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
index e507d53..15a6277 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
@@ -41,8 +41,6 @@
import org.apache.asterix.column.filter.range.IColumnRangeFilterEvaluatorFactory;
import org.apache.asterix.column.filter.range.IColumnRangeFilterValueAccessor;
import org.apache.asterix.column.metadata.AbstractColumnImmutableReadMetadata;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
-import org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
@@ -50,6 +48,8 @@
import org.apache.asterix.column.values.IColumnValuesReader;
import org.apache.asterix.column.values.IColumnValuesReaderFactory;
import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader;
+import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.runtime.projection.FunctionCallInformation;
import org.apache.hyracks.api.context.IHyracksTaskContext;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
index 356ddaa..d931242 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
@@ -38,14 +38,14 @@
import org.apache.asterix.column.filter.iterable.IColumnIterableFilterEvaluatorFactory;
import org.apache.asterix.column.filter.range.IColumnRangeFilterEvaluatorFactory;
import org.apache.asterix.column.filter.range.IColumnRangeFilterValueAccessor;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
-import org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
import org.apache.asterix.column.values.IColumnValuesReader;
import org.apache.asterix.column.values.IColumnValuesReaderFactory;
import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader;
+import org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.runtime.projection.FunctionCallInformation;
import org.apache.hyracks.api.context.IHyracksTaskContext;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
index b425a26..a4b9240 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaJSONBuilderVisitor.java
@@ -21,7 +21,6 @@
import java.util.ArrayList;
import java.util.List;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ISchemaNodeVisitor;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
@@ -30,6 +29,7 @@
import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
import org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
import org.apache.asterix.om.base.AString;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IValueReference;
import org.apache.hyracks.data.std.util.ByteArrayAccessibleDataInputStream;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
index 6d991cf..ad0e460 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
@@ -21,7 +21,6 @@
import java.util.ArrayList;
import java.util.List;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ISchemaNodeVisitor;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
@@ -30,6 +29,7 @@
import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
import org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
import org.apache.asterix.om.base.AString;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IValueReference;
import org.apache.hyracks.data.std.util.ByteArrayAccessibleDataInputStream;
diff --git a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
index 63c2b22..4b33e19 100644
--- a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
+++ b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
@@ -20,11 +20,11 @@
import java.util.concurrent.TimeUnit;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
-import org.apache.asterix.column.metadata.dictionary.FieldNamesHashDictionary;
-import org.apache.asterix.column.metadata.dictionary.FieldNamesTrieDictionary;
import org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
import org.apache.asterix.om.base.AMutableString;
+import org.apache.asterix.om.dictionary.FieldNamesHashDictionary;
+import org.apache.asterix.om.dictionary.FieldNamesTrieDictionary;
+import org.apache.asterix.om.dictionary.IFieldNamesDictionary;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IValueReference;
diff --git a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
index c9f58d5..1ec468c 100644
--- a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
+++ b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
@@ -28,8 +28,8 @@
import java.util.Collections;
import java.util.List;
-import org.apache.asterix.column.metadata.dictionary.FieldNameTrie;
-import org.apache.asterix.column.metadata.dictionary.FieldNamesTrieDictionary;
+import org.apache.asterix.om.dictionary.FieldNameTrie;
+import org.apache.asterix.om.dictionary.FieldNamesTrieDictionary;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IValueReference;
import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
index b07e857..ca2ad55 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
@@ -79,7 +79,7 @@
.withType(schema).withTypeInfo(typeInfo).withRowGroupSize(rowGroupSize).withPageSize(pageSize)
.withDictionaryPageSize(ExternalDataConstants.PARQUET_DICTIONARY_PAGE_SIZE)
.enableDictionaryEncoding().withValidation(false)
- .withWriterVersion(ParquetProperties.WriterVersion.PARQUET_2_0).withConf(conf).build();
+ .withWriterVersion(ParquetProperties.WriterVersion.PARQUET_1_0).withConf(conf).build();
} catch (IOException e) {
throw HyracksDataException.create(e);
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
index cc54676..5db600f 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
@@ -31,6 +31,14 @@
public class ParquetOutputFile implements OutputFile {
private final FSDataOutputStream fs;
+ /*
+ This class wraps OutputStream as a file that Parquet SDK supports writing to.
+ By default, this assumes output stream doesn't support block size which distributed file systems use.
+ Hadoop File System Library use this as a default block size
+ Ref : https://github.com/apache/hadoop/blob/74ff00705cf67911f1ff8320c6c97354350d6952/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java#L2756
+ */
+ private static final long DEFAULT_BLOCK_SIZE = 33554432L;
+
public ParquetOutputFile(OutputStream os) {
this.fs = new FSDataOutputStream(os, new FileSystem.Statistics("test"));
}
@@ -52,6 +60,6 @@
@Override
public long defaultBlockSize() {
- return 33554432L;
+ return DEFAULT_BLOCK_SIZE;
}
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
index f36df1a..7058bf6 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
@@ -18,42 +18,35 @@
*/
package org.apache.asterix.external.writer.printer.parquet;
-import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.asterix.om.dictionary.FieldNamesTrieDictionary;
import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory;
import org.apache.hyracks.data.std.api.IValueReference;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
import org.apache.hyracks.util.string.UTF8StringUtil;
-import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
-import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;
-
public class FieldNamesDictionary {
- private final IBinaryHashFunction fieldNameHashFunction;
- private final Int2ObjectMap<String> hashToFieldNameIndexMap;
+ private final FieldNamesTrieDictionary trie;
+ private final List<String> fieldNames;
+ private final StringBuilder builder;
public FieldNamesDictionary() {
- fieldNameHashFunction =
- new PointableBinaryHashFunctionFactory(UTF8StringPointable.FACTORY).createBinaryHashFunction();
- hashToFieldNameIndexMap = new Int2ObjectOpenHashMap<>();
+ trie = new FieldNamesTrieDictionary();
+ fieldNames = new ArrayList<>();
+ builder = new StringBuilder();
}
- //TODO solve collision (they're so rare that I haven't seen any)
public String getOrCreateFieldNameIndex(IValueReference pointable) throws HyracksDataException {
-
- int hash = getHash(pointable);
- if (!hashToFieldNameIndexMap.containsKey(hash)) {
- String fieldName = UTF8StringUtil.toString(pointable.getByteArray(), pointable.getStartOffset());
- hashToFieldNameIndexMap.put(hash, fieldName);
- return fieldName;
+ int index = trie.getOrCreateFieldNameIndex(pointable);
+ if (index < fieldNames.size()) {
+ return fieldNames.get(index);
}
- return hashToFieldNameIndexMap.get(hash);
+
+ builder.setLength(0);
+ String fieldName = UTF8StringUtil.toString(pointable.getByteArray(), pointable.getStartOffset(), builder);
+ fieldNames.add(fieldName);
+ return fieldName;
}
- private int getHash(IValueReference fieldName) throws HyracksDataException {
- byte[] object = fieldName.getByteArray();
- int start = fieldName.getStartOffset();
- int length = fieldName.getLength();
- return fieldNameHashFunction.hash(object, start, length);
- }
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
index 6ad9608..cf46cc6 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
@@ -18,10 +18,10 @@
*/
package org.apache.asterix.external.writer.printer.parquet;
-import static org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.ELEMENT_FIELD;
-import static org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.GROUP_TYPE_ERROR_FIELD;
-import static org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.LIST_FIELD;
-import static org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.PRIMITIVE_TYPE_ERROR_FIELD;
+import static org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.ELEMENT_FIELD;
+import static org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.GROUP_TYPE_ERROR_FIELD;
+import static org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.LIST_FIELD;
+import static org.apache.asterix.external.writer.printer.parquet.ParquetValueWriter.PRIMITIVE_TYPE_ERROR_FIELD;
import org.apache.asterix.om.lazy.AbstractLazyVisitablePointable;
import org.apache.asterix.om.lazy.AbstractListLazyVisitablePointable;
@@ -48,7 +48,7 @@
private RecordConsumer recordConsumer;
private FieldNamesDictionary fieldNamesDictionary;
- private final ParquetRecordVisitorUtils parquetRecordVisitorUtils;
+ private final ParquetValueWriter parquetValueWriter;
public ParquetRecordLazyVisitor(MessageType schema, IAType typeInfo) {
this.schema = schema;
@@ -61,7 +61,7 @@
throw new RuntimeException("Type Unsupported for parquet printing");
}
this.fieldNamesDictionary = new FieldNamesDictionary();
- this.parquetRecordVisitorUtils = new ParquetRecordVisitorUtils();
+ this.parquetValueWriter = new ParquetValueWriter();
}
public MessageType getSchema() {
@@ -150,7 +150,7 @@
throw new HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA, PRIMITIVE_TYPE_ERROR_FIELD,
GROUP_TYPE_ERROR_FIELD, type.getName());
}
- parquetRecordVisitorUtils.addValueToColumn(recordConsumer, pointable, type.asPrimitiveType());
+ parquetValueWriter.addValueToColumn(recordConsumer, pointable, type.asPrimitiveType());
return null;
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
similarity index 95%
rename from asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java
rename to asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
index 54978c3..206a3c9 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
@@ -43,24 +43,19 @@
import org.apache.parquet.io.api.RecordConsumer;
import org.apache.parquet.schema.PrimitiveType;
-public class ParquetRecordVisitorUtils {
-
+public class ParquetValueWriter {
public static final String LIST_FIELD = "list";
public static final String ELEMENT_FIELD = "element";
public static final String GROUP_TYPE_ERROR_FIELD = "group";
public static final String PRIMITIVE_TYPE_ERROR_FIELD = "primitive";
- private VoidPointable voidPointable;
- private ATypeTag typeTag;
+ private final VoidPointable voidPointable;
+ private final ResettableByteArrayOutputStream byteArrayOutputStream;
- private byte[] b;
- int s, l;
- private ResettableByteArrayOutputStream byteArrayOutputStream;
-
- public ParquetRecordVisitorUtils() {
+ ParquetValueWriter() {
this.voidPointable = VoidPointable.FACTORY.createPointable();
- byteArrayOutputStream = new ResettableByteArrayOutputStream();
+ this.byteArrayOutputStream = new ResettableByteArrayOutputStream();
}
private void addIntegerType(long value, PrimitiveType.PrimitiveTypeName primitiveTypeName, ATypeTag typeTag,
@@ -86,8 +81,9 @@
public void addValueToColumn(RecordConsumer recordConsumer, FlatLazyVisitablePointable pointable,
PrimitiveType type) throws HyracksDataException {
- typeTag = pointable.getTypeTag();
- b = pointable.getByteArray();
+ ATypeTag typeTag = pointable.getTypeTag();
+ byte[] b = pointable.getByteArray();
+ int s, l;
if (pointable.isTagged()) {
s = pointable.getStartOffset() + 1;
@@ -196,7 +192,5 @@
default:
throw RuntimeDataException.create(ErrorCode.TYPE_MISMATCH_GENERIC, typeTag, primitiveTypeName);
}
-
}
-
}
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java
similarity index 96%
rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java
index bffdb33..b2cd223 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/AbstractFieldNamesDictionary.java
@@ -16,14 +16,12 @@
* specific language governing permissions and limitations
* under the License.
*/
-
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
import java.io.DataInput;
import java.io.IOException;
import java.util.List;
-import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
import org.apache.asterix.om.base.AMutableString;
import org.apache.hyracks.api.exceptions.HyracksDataException;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java
similarity index 97%
rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java
index 73c034b..7bfae0b 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/ByteToNodeMap.java
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
import java.io.DataInput;
import java.io.DataOutput;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java
similarity index 98%
rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java
index 4a19cd6..5333af5 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNameTrie.java
@@ -16,9 +16,9 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
-import static org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.deserializeFieldNames;
+import static org.apache.asterix.om.dictionary.AbstractFieldNamesDictionary.deserializeFieldNames;
import java.io.DataInput;
import java.io.DataOutput;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java
similarity index 98%
rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java
index 73c9a73..591f322 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesHashDictionary.java
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
import java.io.DataInput;
import java.io.DataInputStream;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java
similarity index 97%
rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java
index 10de829..30e10d6 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/FieldNamesTrieDictionary.java
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
import java.io.DataInput;
import java.io.DataInputStream;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/IFieldNamesDictionary.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java
similarity index 97%
rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/IFieldNamesDictionary.java
rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java
index 8aa0e88..ec494fa 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/IFieldNamesDictionary.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/IFieldNamesDictionary.java
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata;
+package org.apache.asterix.om.dictionary;
import java.io.DataInputStream;
import java.io.DataOutput;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java
similarity index 98%
rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
rename to asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java
index 32e902b..9ea2978 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/dictionary/TrieNode.java
@@ -16,8 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-
-package org.apache.asterix.column.metadata.dictionary;
+package org.apache.asterix.om.dictionary;
import java.io.DataInput;
import java.io.DataOutput;
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
index 5045c86..4022961 100644
--- a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
@@ -332,6 +332,10 @@
public static String toString(byte[] bytes, int start) {
StringBuilder builder = new StringBuilder();
+ return toString(bytes, start, builder);
+ }
+
+ public static String toString(byte[] bytes, int start, StringBuilder builder) {
return toString(builder, bytes, start).toString();
}