[ASTERIXDB-3432][STO] Improve Trie-based field name dictionary
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
Avoid using Byte2ObjectArrayMap
Change-Id: If0a47d7f140f367f59560e695b1b93416adab8a1
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18376
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Wail Alkowaileet <wael.y.k@gmail.com>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
index 764b1b9..6480c30 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/assembler/AssemblerBuilderVisitor.java
@@ -18,7 +18,7 @@
*/
package org.apache.asterix.column.assembler;
-import static org.apache.asterix.column.metadata.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
+import static org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
import java.util.ArrayList;
import java.util.BitSet;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/AbstractFieldNamesDictionary.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
similarity index 75%
rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/AbstractFieldNamesDictionary.java
rename to asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
index f22631b..bffdb33 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/AbstractFieldNamesDictionary.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/AbstractFieldNamesDictionary.java
@@ -17,8 +17,13 @@
* under the License.
*/
-package org.apache.asterix.column.metadata;
+package org.apache.asterix.column.metadata.dictionary;
+import java.io.DataInput;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
import org.apache.asterix.om.base.AMutableString;
import org.apache.hyracks.api.exceptions.HyracksDataException;
@@ -50,6 +55,14 @@
stringSerDer = new AStringSerializerDeserializer(new UTF8StringWriter(), new UTF8StringReader());
}
+ public static IFieldNamesDictionary create() {
+ return new FieldNamesTrieDictionary();
+ }
+
+ public static IFieldNamesDictionary deserialize(DataInput input) throws IOException {
+ return FieldNamesTrieDictionary.deserialize(input);
+ }
+
static ArrayBackedValueStorage creatFieldName(IValueReference fieldName) throws HyracksDataException {
ArrayBackedValueStorage copy = new ArrayBackedValueStorage(fieldName.getLength());
copy.append(fieldName);
@@ -66,4 +79,15 @@
mutableString.setValue(fieldName);
stringSerDer.serialize(mutableString, storage.getDataOutput());
}
+
+ static void deserializeFieldNames(DataInput input, List<IValueReference> fieldNames, int numberOfFieldNames)
+ throws IOException {
+ for (int i = 0; i < numberOfFieldNames; i++) {
+ int length = input.readInt();
+ ArrayBackedValueStorage fieldName = new ArrayBackedValueStorage(length);
+ fieldName.setSize(length);
+ input.readFully(fieldName.getByteArray(), 0, length);
+ fieldNames.add(fieldName);
+ }
+ }
}
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
new file mode 100644
index 0000000..73c034b
--- /dev/null
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/ByteToNodeMap.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.column.metadata.dictionary;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Collection;
+
+import it.unimi.dsi.fastutil.objects.ObjectArrays;
+
+final class ByteToNodeMap {
+ private static final TrieNode[] EMPTY = new TrieNode[0];
+ private TrieNode[] children;
+ private int numberOfChildren;
+
+ ByteToNodeMap() {
+ children = EMPTY;
+ numberOfChildren = 0;
+ }
+
+ private ByteToNodeMap(TrieNode[] children, int numberOfChildren) {
+ this.children = children;
+ this.numberOfChildren = numberOfChildren;
+ }
+
+ void put(byte key, TrieNode node) {
+ int index = Byte.toUnsignedInt(key);
+ ensure(index);
+ children[index] = node;
+ numberOfChildren++;
+ }
+
+ TrieNode get(byte key) {
+ int index = Byte.toUnsignedInt(key);
+ if (index < children.length) {
+ return children[index];
+ }
+
+ return null;
+ }
+
+ private void ensure(int index) {
+ if (index >= children.length) {
+ children = ObjectArrays.grow(children, index + 1, children.length);
+ }
+ }
+
+ void addAllChildren(Collection<TrieNode> collection) {
+ int addedChildren = 0;
+ for (int i = 0; i < children.length && addedChildren < numberOfChildren; i++) {
+ TrieNode child = children[i];
+ if (child != null) {
+ collection.add(children[i]);
+ addedChildren++;
+ }
+ }
+ }
+
+ void serialize(DataOutput out) throws IOException {
+ out.writeInt(numberOfChildren);
+ out.writeInt(children.length);
+ int addedChildren = 0;
+ for (int i = 0; i < children.length && addedChildren < numberOfChildren; i++) {
+ TrieNode child = children[i];
+ if (child != null) {
+ out.writeInt(i);
+ child.serialize(out);
+ addedChildren++;
+ }
+ }
+ }
+
+ static ByteToNodeMap deserialize(DataInput in) throws IOException {
+ int numberOfChildren = in.readInt();
+ int length = in.readInt();
+ TrieNode[] children = length == 0 ? EMPTY : new TrieNode[length];
+ for (int i = 0; i < numberOfChildren; i++) {
+ int index = in.readInt();
+ children[index] = TrieNode.deserialize(in);
+ }
+
+ return new ByteToNodeMap(children, numberOfChildren);
+ }
+}
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNameTrie.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
similarity index 88%
rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNameTrie.java
rename to asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
index e31026e..4a19cd6 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNameTrie.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNameTrie.java
@@ -16,19 +16,20 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata;
+package org.apache.asterix.column.metadata.dictionary;
+
+import static org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.deserializeFieldNames;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import java.util.ArrayDeque;
import java.util.ArrayList;
-import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IValueReference;
-import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
import org.apache.hyracks.util.string.UTF8StringUtil;
public class FieldNameTrie {
@@ -129,11 +130,10 @@
// resume from the stored node.
int bytesToStoreLength = UTF8StringUtil.getNumBytesToStoreLength(len);
- int start = bytesToStoreLength;
int byteIndex = lookupState.getRelativeOffsetFromStart() + bytesToStoreLength;
byte[] bytes = fieldName.getByteArray();
- int lastIndex = (start + len - 1);
+ int lastIndex = (bytesToStoreLength + len - 1);
while (byteIndex <= lastIndex) {
byte b = bytes[byteIndex];
TrieNode nextNode = searchNode.getChild(b);
@@ -191,7 +191,7 @@
// find absolute starting point in the current fieldName
int diff = searchNode.getStart() - searchNode.getBytesToStoreLength();
// since hookup happens on a new fieldName, hence start will be bytesToStoreLength
- searchNode.setIndex(fieldNames.size(), start + diff, searchNode.getLength(), bytesToStoreLength);
+ searchNode.setIndex(fieldNames.size(), bytesToStoreLength + diff, searchNode.getLength(), bytesToStoreLength);
searchNode.setIsEndOfField(true);
fieldNames.add(fieldName);
return searchNode.getIndex();
@@ -210,14 +210,25 @@
rootNode.serialize(out);
}
+ public List<IValueReference> getFieldNames() {
+ return fieldNames;
+ }
+
+ public IValueReference getFieldName(int fieldIndex) {
+ return fieldNames.get(fieldIndex);
+ }
+
+ public void clear() {
+ rootNode = null;
+ fieldNames.clear();
+ }
+
public static FieldNameTrie deserialize(DataInput in) throws IOException {
int version = in.readInt();
- switch (version) {
- case VERSION:
- return deserializeV1(in);
- default:
- throw new IllegalStateException("Unsupported version: " + version);
+ if (version == VERSION) {
+ return deserializeV1(in);
}
+ throw new IllegalStateException("Unsupported version: " + version);
}
private static FieldNameTrie deserializeV1(DataInput in) throws IOException {
@@ -232,37 +243,11 @@
return newTrie;
}
- private static void deserializeFieldNames(DataInput input, List<IValueReference> fieldNames, int numberOfFieldNames)
- throws IOException {
- for (int i = 0; i < numberOfFieldNames; i++) {
- int length = input.readInt();
- ArrayBackedValueStorage fieldName = new ArrayBackedValueStorage(length);
- fieldName.setSize(length);
- input.readFully(fieldName.getByteArray(), 0, length);
- fieldNames.add(fieldName);
- }
- }
-
- public List<IValueReference> getFieldNames() {
- return fieldNames;
- }
-
- public IValueReference getFieldName(int fieldIndex) {
- return fieldNames.get(fieldIndex);
- }
-
- public void clear() {
- rootNode = null;
- fieldNames.clear();
- }
-
@Override
public String toString() {
TrieNode currentNode = rootNode;
- Queue<TrieNode> queue = new LinkedList<>();
- for (TrieNode node : currentNode.getChildren().values()) {
- queue.offer(node);
- }
+ Queue<TrieNode> queue = new ArrayDeque<>();
+ currentNode.getChildren().addAllChildren(queue);
StringBuilder treeBuilder = new StringBuilder();
while (!queue.isEmpty()) {
int len = queue.size();
@@ -278,16 +263,14 @@
treeBuilder.append(" | ");
}
- for (TrieNode child : node.getChildren().values()) {
- queue.offer(child);
- }
+ node.getChildren().addAllChildren(queue);
}
treeBuilder.append("\n");
}
return treeBuilder.toString();
}
- class LookupState {
+ private static class LookupState {
private TrieNode lastNode;
private int relativeOffsetFromStart;
private int fieldLength;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNamesHashDictionary.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
similarity index 93%
rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNamesHashDictionary.java
rename to asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
index c83b289..73c9a73 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNamesHashDictionary.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesHashDictionary.java
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata;
+package org.apache.asterix.column.metadata.dictionary;
import java.io.DataInput;
import java.io.DataInputStream;
@@ -37,6 +37,10 @@
import it.unimi.dsi.fastutil.objects.Object2IntMap;
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
+/**
+ * @deprecated Use {@link FieldNamesTrieDictionary}
+ */
+@Deprecated
public class FieldNamesHashDictionary extends AbstractFieldNamesDictionary {
//For both declared and inferred fields
private final List<IValueReference> fieldNames;
@@ -174,18 +178,6 @@
deserializeHashToFieldNameIndex(input, hashToFieldNameIndexMap, numberOfFieldNames);
}
- private static void deserializeFieldNames(DataInput input, List<IValueReference> fieldNames, int numberOfFieldNames)
- throws IOException {
-
- for (int i = 0; i < numberOfFieldNames; i++) {
- int length = input.readInt();
- ArrayBackedValueStorage fieldName = new ArrayBackedValueStorage(length);
- fieldName.setSize(length);
- input.readFully(fieldName.getByteArray(), 0, length);
- fieldNames.add(fieldName);
- }
- }
-
private static void deserializeDeclaredFieldNames(DataInput input,
Object2IntMap<String> declaredFieldNamesToIndexMap) throws IOException {
int numberOfDeclaredFieldNames = input.readInt();
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNamesTrieDictionary.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
similarity index 95%
rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNamesTrieDictionary.java
rename to asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
index 8b2d548..10de829 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/FieldNamesTrieDictionary.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/FieldNamesTrieDictionary.java
@@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.asterix.column.metadata;
+package org.apache.asterix.column.metadata.dictionary;
import java.io.DataInput;
import java.io.DataInputStream;
@@ -37,7 +37,7 @@
this(new FieldNameTrie());
}
- public FieldNamesTrieDictionary(FieldNameTrie dictionary) {
+ private FieldNamesTrieDictionary(FieldNameTrie dictionary) {
super();
this.dictionary = dictionary;
lookupStorage = new ArrayBackedValueStorage();
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/TrieNode.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
similarity index 79%
rename from asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/TrieNode.java
rename to asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
index 18e645b..32e902b 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/TrieNode.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/dictionary/TrieNode.java
@@ -17,22 +17,18 @@
* under the License.
*/
-package org.apache.asterix.column.metadata;
+package org.apache.asterix.column.metadata.dictionary;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
-import java.util.Map;
import org.apache.hyracks.data.std.api.IValueReference;
-import it.unimi.dsi.fastutil.bytes.Byte2ObjectArrayMap;
-import it.unimi.dsi.fastutil.bytes.Byte2ObjectMap;
-
class TrieNode {
public static final int NOT_FOUND_INDEX = -1;
- private Byte2ObjectMap<TrieNode> children;
+ private ByteToNodeMap children;
private boolean isEndOfField;
private int index;
private int start; // includes the edges' byte
@@ -40,11 +36,11 @@
private int bytesToStoreLength;
TrieNode() {
- children = new Byte2ObjectArrayMap<>();
+ this.children = new ByteToNodeMap();
index = NOT_FOUND_INDEX;
}
- TrieNode(Byte2ObjectMap<TrieNode> children) {
+ TrieNode(ByteToNodeMap children) {
this.children = children;
index = NOT_FOUND_INDEX;
}
@@ -60,10 +56,6 @@
this.isEndOfField = isEndOfField;
}
- public boolean containsKey(byte key) {
- return children.containsKey(key);
- }
-
public TrieNode getChild(byte key) {
return children.get(key);
}
@@ -72,7 +64,7 @@
children.put(key, child);
}
- public Byte2ObjectMap<TrieNode> getChildren() {
+ public ByteToNodeMap getChildren() {
return children;
}
@@ -98,7 +90,7 @@
public void reset() {
// since this object went to the new node.
- children = new Byte2ObjectArrayMap<>();
+ children = new ByteToNodeMap();
}
public void split(IValueReference fieldName, int splitIndex) {
@@ -107,6 +99,7 @@
// something to be split, have to create a new node
// and do the linking.
TrieNode childNode = new TrieNode(children);
+
int leftToSplit = length - splitIndex;
childNode.setIndex(index, start + splitIndex, leftToSplit, bytesToStoreLength);
childNode.setIsEndOfField(isEndOfField);
@@ -120,33 +113,25 @@
}
public void serialize(DataOutput out) throws IOException {
+ // Serialize child first
+ children.serialize(out);
// serialize fields
out.writeBoolean(isEndOfField);
out.writeInt(index);
out.writeInt(start);
out.writeInt(length);
out.writeInt(bytesToStoreLength);
-
- out.writeInt(children.size());
- for (Map.Entry<Byte, TrieNode> entry : children.byte2ObjectEntrySet()) {
- out.writeByte(entry.getKey());
- entry.getValue().serialize(out);
- }
}
public static TrieNode deserialize(DataInput in) throws IOException {
- TrieNode node = new TrieNode();
+ ByteToNodeMap children = ByteToNodeMap.deserialize(in);
+ TrieNode node = new TrieNode(children);
node.isEndOfField = in.readBoolean();
node.index = in.readInt();
node.start = in.readInt();
node.length = in.readInt();
node.bytesToStoreLength = in.readInt();
- int childrenSize = in.readInt();
- for (int i = 0; i < childrenSize; i++) {
- byte b = in.readByte();
- node.children.put(b, TrieNode.deserialize(in));
- }
return node;
}
}
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
index 6014bf6..0bea188 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/ObjectSchemaNode.java
@@ -18,7 +18,7 @@
*/
package org.apache.asterix.column.metadata.schema;
-import static org.apache.asterix.column.metadata.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
+import static org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary.DUMMY_FIELD_NAME_INDEX;
import java.io.DataInput;
import java.io.DataInputStream;
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
index 04334a3..a966d61 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
@@ -31,9 +31,9 @@
import java.util.Map;
import org.apache.asterix.column.metadata.AbstractColumnMetadata;
-import org.apache.asterix.column.metadata.FieldNamesTrieDictionary;
import org.apache.asterix.column.metadata.IFieldNamesDictionary;
import org.apache.asterix.column.metadata.PathInfoSerializer;
+import org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNestedNode;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
@@ -95,7 +95,7 @@
columnWriters = new ArrayList<>();
level = -1;
repeated = 0;
- fieldNamesDictionary = new FieldNamesTrieDictionary();
+ fieldNamesDictionary = AbstractFieldNamesDictionary.create();
root = new ObjectSchemaNode();
metaRoot = metaType != null ? new ObjectSchemaNode() : null;
pathInfoSerializer = new PathInfoSerializer();
@@ -250,7 +250,7 @@
deserializeWriters(input, writers, columnWriterFactory);
//FieldNames
- IFieldNamesDictionary fieldNamesDictionary = FieldNamesTrieDictionary.deserialize(input);
+ IFieldNamesDictionary fieldNamesDictionary = AbstractFieldNamesDictionary.deserialize(input);
//Schema
Map<AbstractSchemaNestedNode, RunLengthIntArray> definitionLevels = new HashMap<>();
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
index 967369f..67631cd 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
@@ -39,8 +39,8 @@
import org.apache.asterix.column.filter.range.IColumnRangeFilterEvaluatorFactory;
import org.apache.asterix.column.filter.range.IColumnRangeFilterValueAccessor;
import org.apache.asterix.column.metadata.AbstractColumnImmutableReadMetadata;
-import org.apache.asterix.column.metadata.FieldNamesTrieDictionary;
import org.apache.asterix.column.metadata.IFieldNamesDictionary;
+import org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
@@ -189,7 +189,7 @@
DataInput input = new DataInputStream(new ByteArrayInputStream(bytes, fieldNamesStart, length));
//FieldNames
- IFieldNamesDictionary fieldNamesDictionary = FieldNamesTrieDictionary.deserialize(input);
+ IFieldNamesDictionary fieldNamesDictionary = AbstractFieldNamesDictionary.deserialize(input);
//Schema
ObjectSchemaNode root = (ObjectSchemaNode) AbstractSchemaNode.deserialize(input, null);
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
index cda492c..1869415 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
@@ -35,8 +35,8 @@
import org.apache.asterix.column.filter.iterable.IColumnIterableFilterEvaluatorFactory;
import org.apache.asterix.column.filter.range.IColumnRangeFilterEvaluatorFactory;
import org.apache.asterix.column.filter.range.IColumnRangeFilterValueAccessor;
-import org.apache.asterix.column.metadata.FieldNamesTrieDictionary;
import org.apache.asterix.column.metadata.IFieldNamesDictionary;
+import org.apache.asterix.column.metadata.dictionary.AbstractFieldNamesDictionary;
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
@@ -133,7 +133,7 @@
DataInput input = new DataInputStream(new ByteArrayInputStream(bytes, fieldNamesStart, length));
//FieldNames
- IFieldNamesDictionary fieldNamesDictionary = FieldNamesTrieDictionary.deserialize(input);
+ IFieldNamesDictionary fieldNamesDictionary = AbstractFieldNamesDictionary.deserialize(input);
//Schema
ObjectSchemaNode root = (ObjectSchemaNode) AbstractSchemaNode.deserialize(input, null);
diff --git a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
new file mode 100644
index 0000000..63c2b22
--- /dev/null
+++ b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameDictionaryPerfTest.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.column.metadata.trie;
+
+import java.util.concurrent.TimeUnit;
+
+import org.apache.asterix.column.metadata.IFieldNamesDictionary;
+import org.apache.asterix.column.metadata.dictionary.FieldNamesHashDictionary;
+import org.apache.asterix.column.metadata.dictionary.FieldNamesTrieDictionary;
+import org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
+import org.apache.asterix.om.base.AMutableString;
+import org.apache.commons.lang3.RandomStringUtils;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.api.IValueReference;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.util.string.UTF8StringReader;
+import org.apache.hyracks.util.string.UTF8StringWriter;
+import org.junit.Ignore;
+import org.junit.Test;
+
+@Ignore
+public class FieldNameDictionaryPerfTest {
+ private static final int NUM_RECORDS = 1000000;
+ private static final int NUMBER_OF_RANDOM_FIELD_NAMES = 1000;
+ private static final int NUM_ITER = 5;
+ private static final String[] FIELD_NAMES = { "country", "address", "free_parking", "city", "type", "url",
+ "reviews", "date", "author", "ratings", "Value", "Cleanliness", "Overall", "Check in / front desk", "Rooms",
+ "date", "author", "ratings", "Value", "Cleanliness", "Overall", "Check in / front desk", "Rooms", "date",
+ "author", "ratings", "Value", "Cleanliness", "Overall", "Check in / front desk", "Rooms", "phone", "price",
+ "avg_rating", "free_breakfast", "name", "public_likes", "email" };
+ private static final FieldNameDictionaryFactory HASH = FieldNamesHashDictionary::new;
+ private static final FieldNameDictionaryFactory TRIE = FieldNamesTrieDictionary::new;
+
+ private final AStringSerializerDeserializer stringSerDer =
+ new AStringSerializerDeserializer(new UTF8StringWriter(), new UTF8StringReader());
+ private final AMutableString string = new AMutableString("");
+
+ @Test
+ public void benchmarkRandom() throws HyracksDataException {
+ IValueReference[] fieldNames = new IValueReference[NUMBER_OF_RANDOM_FIELD_NAMES];
+ for (int i = 0; i < NUMBER_OF_RANDOM_FIELD_NAMES; i++) {
+ fieldNames[i] = getRandomString();
+ }
+ runAndReportTime(fieldNames);
+ }
+
+ @Test
+ public void benchmarkRepeated() throws HyracksDataException {
+ IValueReference[] fieldNames = new IValueReference[FIELD_NAMES.length];
+ for (int i = 0; i < FIELD_NAMES.length; i++) {
+ fieldNames[i] = serialize(FIELD_NAMES[i]);
+ }
+
+ runAndReportTime(fieldNames);
+ }
+
+ private void runAndReportTime(IValueReference[] fieldNames) throws HyracksDataException {
+ long start;
+
+ start = System.nanoTime();
+ for (int i = 0; i < NUM_ITER; i++) {
+ createAndRun(HASH, fieldNames);
+ }
+ System.out.println("HASH: " + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start));
+
+ start = System.nanoTime();
+ for (int i = 0; i < NUM_ITER; i++) {
+ createAndRun(TRIE, fieldNames);
+ }
+ System.out.println("TRIE: " + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start));
+ }
+
+ private void createAndRun(FieldNameDictionaryFactory factory, IValueReference[] fieldNames)
+ throws HyracksDataException {
+ IFieldNamesDictionary dictionary = factory.create();
+ for (int i = 0; i < NUM_RECORDS; i++) {
+ for (int j = 0; j < fieldNames.length; j++) {
+ dictionary.getOrCreateFieldNameIndex(fieldNames[j]);
+ }
+ }
+ }
+
+ private IValueReference getRandomString() throws HyracksDataException {
+ return serialize(RandomStringUtils.randomAlphanumeric(5, 20));
+ }
+
+ private IValueReference serialize(String value) throws HyracksDataException {
+ ArrayBackedValueStorage storage = new ArrayBackedValueStorage();
+ storage.reset();
+ string.setValue(value);
+ stringSerDer.serialize(string, storage.getDataOutput());
+ return storage;
+ }
+
+ @FunctionalInterface
+ private interface FieldNameDictionaryFactory {
+ IFieldNamesDictionary create();
+ }
+}
diff --git a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
index d996963..c9f58d5 100644
--- a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
+++ b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/metadata/trie/FieldNameTrieTest.java
@@ -28,8 +28,8 @@
import java.util.Collections;
import java.util.List;
-import org.apache.asterix.column.metadata.FieldNameTrie;
-import org.apache.asterix.column.metadata.FieldNamesTrieDictionary;
+import org.apache.asterix.column.metadata.dictionary.FieldNameTrie;
+import org.apache.asterix.column.metadata.dictionary.FieldNamesTrieDictionary;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IValueReference;
import org.apache.hyracks.data.std.primitive.UTF8StringPointable;