addressing Yingyi's comments
diff --git a/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/comparators/ListItemBinaryComparatorFactory.java b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/comparators/ListItemBinaryComparatorFactory.java
index 379a7e3..01e34b0 100644
--- a/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/comparators/ListItemBinaryComparatorFactory.java
+++ b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/comparators/ListItemBinaryComparatorFactory.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.asterix.dataflow.data.nontagged.comparators;
import edu.uci.ics.asterix.formats.nontagged.UTF8StringLowercasePointable;
diff --git a/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/hash/ListItemBinaryHashFunctionFactory.java b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/hash/ListItemBinaryHashFunctionFactory.java
index 1c0773c..67878a3 100644
--- a/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/hash/ListItemBinaryHashFunctionFactory.java
+++ b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/hash/ListItemBinaryHashFunctionFactory.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.asterix.dataflow.data.nontagged.hash;
import edu.uci.ics.asterix.formats.nontagged.UTF8StringLowercasePointable;
@@ -7,10 +22,13 @@
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
import edu.uci.ics.hyracks.data.std.accessors.MurmurHash3BinaryHashFunctionFamily;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory;
-import edu.uci.ics.hyracks.data.std.primitive.FloatPointable;
-import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
-import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+/**
+ * This hash function factory is introduced to be able to hash heterogeneous list items.
+ * The item type tag is also included in the hash computation to distinguish the different
+ * types with the same raw bytes.
+ *
+ */
public class ListItemBinaryHashFunctionFactory implements IBinaryHashFunctionFactory {
private static final long serialVersionUID = 1L;
@@ -27,19 +45,9 @@
public IBinaryHashFunction createBinaryHashFunction(final ATypeTag itemTypeTag, final boolean ignoreCase) {
return new IBinaryHashFunction() {
-
- private IBinaryHashFunction boolHash = BooleanBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction();
- private IBinaryHashFunction intHash = new PointableBinaryHashFunctionFactory(IntegerPointable.FACTORY)
- .createBinaryHashFunction();
- private IBinaryHashFunction longHash = LongBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction();
- private IBinaryHashFunction floatHash = new PointableBinaryHashFunctionFactory(FloatPointable.FACTORY)
- .createBinaryHashFunction();
- private IBinaryHashFunction stringHash = new PointableBinaryHashFunctionFactory(UTF8StringPointable.FACTORY)
- .createBinaryHashFunction();
+
private IBinaryHashFunction lowerCaseStringHash = new PointableBinaryHashFunctionFactory(UTF8StringLowercasePointable.FACTORY)
.createBinaryHashFunction();
- private IBinaryHashFunction doubleHash = DoubleBinaryHashFunctionFactory.INSTANCE
- .createBinaryHashFunction();
private IBinaryHashFunction genericBinaryHash = MurmurHash3BinaryHashFunctionFamily.INSTANCE
.createBinaryHashFunction(0);
@@ -52,38 +60,21 @@
skip = 1;
}
switch (tag) {
- case BOOLEAN: {
- return boolHash.hash(bytes, offset + skip, length - skip);
- }
- case TIME:
- case DATE:
- case YEARMONTHDURATION:
- case INT32: {
- return intHash.hash(bytes, offset + skip, length - skip);
- }
- case DATETIME:
- case DAYTIMEDURATION:
- case INT64: {
- return longHash.hash(bytes, offset + skip, length - skip);
- }
- case FLOAT: {
- return floatHash.hash(bytes, offset + skip, length - skip);
- }
- case DOUBLE: {
- return doubleHash.hash(bytes, offset + skip, length - skip);
- }
case STRING: {
if (ignoreCase) {
return lowerCaseStringHash.hash(bytes, offset + skip, length - skip);
- } else {
- return stringHash.hash(bytes, offset + skip, length - skip);
}
}
- case NULL: {
- return 0;
- }
default: {
- return genericBinaryHash.hash(bytes, offset + skip, length - skip);
+ if (itemTypeTag != ATypeTag.ANY) {
+ // add the type tag
+ byte[] taggedBytes = new byte[length + 1];
+ taggedBytes[0] = itemTypeTag.serialize();
+ System.arraycopy(bytes, offset, taggedBytes, 1, length);
+ return genericBinaryHash.hash(taggedBytes, 0, length + 1);
+ } else {
+ return genericBinaryHash.hash(bytes, offset, length);
+ }
}
}
}
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AbstractAsterixListIterator.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AbstractAsterixListIterator.java
index 84eef1d..2f9cfc7 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AbstractAsterixListIterator.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AbstractAsterixListIterator.java
@@ -15,7 +15,8 @@
protected int pos = -1;
protected int nextPos = -1;
protected int itemLen = -1;
- protected int size = -1;
+ protected int numberOfItems = -1;
+ protected int listLength = -1;
protected int startOff = -1;
protected IBinaryComparator cmp;
@@ -29,12 +30,12 @@
@Override
public boolean hasNext() {
- return count < size;
+ return count < numberOfItems;
}
@Override
public int size() {
- return size;
+ return numberOfItems;
}
@Override
@@ -56,8 +57,8 @@
try {
pos = nextPos;
++count;
- nextPos = data.length;
- if (count + 1 < size) {
+ nextPos = startOff + listLength;
+ if (count + 1 < numberOfItems) {
nextPos = getItemOffset(data, startOff, count + 1);
}
itemLen = nextPos - pos;
@@ -71,8 +72,8 @@
count = 0;
try {
pos = getItemOffset(data, startOff, count);
- nextPos = data.length;
- if (count + 1 < size) {
+ nextPos = startOff + listLength;
+ if (count + 1 < numberOfItems) {
nextPos = getItemOffset(data, startOff, count + 1);
}
itemLen = nextPos - pos;
@@ -84,7 +85,8 @@
public void reset(byte[] data, int startOff) {
this.data = data;
this.startOff = startOff;
- size = getNumberOfItems(data, startOff);
+ this.numberOfItems = getNumberOfItems(data, startOff);
+ this.listLength = getListLength(data, startOff);
ATypeTag tag = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(data[startOff + 1]);
switch (tag) {
case INT32: {
@@ -119,4 +121,6 @@
protected abstract int getItemOffset(byte[] serOrderedList, int offset, int itemIndex) throws AsterixException;
protected abstract int getNumberOfItems(byte[] serOrderedList, int offset);
+
+ protected abstract int getListLength(byte[] serOrderedList, int offset);
}
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AsterixOrderedListIterator.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AsterixOrderedListIterator.java
index d3714c1..6a73b6d 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AsterixOrderedListIterator.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AsterixOrderedListIterator.java
@@ -14,4 +14,9 @@
protected int getNumberOfItems(byte[] serOrderedList, int offset) {
return AOrderedListSerializerDeserializer.getNumberOfItems(serOrderedList, offset);
}
+
+ @Override
+ protected int getListLength(byte[] serOrderedList, int offset) {
+ return AOrderedListSerializerDeserializer.getOrderedListLength(serOrderedList, offset + 1);
+ }
}
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AsterixUnorderedListIterator.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AsterixUnorderedListIterator.java
index de7742b..90d9ae3 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AsterixUnorderedListIterator.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AsterixUnorderedListIterator.java
@@ -14,4 +14,9 @@
protected int getNumberOfItems(byte[] serOrderedList, int offset) {
return AUnorderedListSerializerDeserializer.getNumberOfItems(serOrderedList, offset);
}
+
+ @Override
+ protected int getListLength(byte[] serOrderedList, int offset) {
+ return AUnorderedListSerializerDeserializer.getUnorderedListLength(serOrderedList, offset + 1);
+ }
}