Merge branch 'master' of https://code.google.com/p/asterixdb into icetindil/issue_378
diff --git a/asterix-app/data/tinysocial/twm.adm b/asterix-app/data/tinysocial/twm.adm
new file mode 100644
index 0000000..d18c70f
--- /dev/null
+++ b/asterix-app/data/tinysocial/twm.adm
@@ -0,0 +1,12 @@
+{"tweetid":"1","user":{"screen-name":"NathanGiesen@211","lang":"en","friends_count":39339,"statuses_count":473,"name":"Nathan Giesen","followers_count":49416},"sender-location":point("47.44,80.65"),"send-time":datetime("2008-04-26T10:10:00"),"referred-topics":{{"t-mobile","customization"}},"message-text":" love t-mobile its customization is good:)"}
+{"tweetid":"2","user":{"screen-name":"ColineGeyer@63","lang":"en","friends_count":121,"statuses_count":362,"name":"Coline Geyer","followers_count":17159},"sender-location":point("32.84,67.14"),"send-time":datetime("2010-05-13T10:10:00"),"referred-topics":{{"verizon","shortcut-menu"}},"message-text":" like verizon its shortcut-menu is awesome:)"}
+{"tweetid":"3","user":{"screen-name":"NathanGiesen@211","lang":"en","friends_count":39339,"statuses_count":473,"name":"Nathan Giesen","followers_count":49416},"sender-location":point("29.72,75.8"),"send-time":datetime("2006-11-04T10:10:00"),"referred-topics":{{"motorola","speed"}},"message-text":" like motorola the speed is good:)"}
+{"tweetid":"4","user":{"screen-name":"NathanGiesen@211","lang":"en","friends_count":39339,"statuses_count":473,"name":"Nathan Giesen","followers_count":49416},"sender-location":point("39.28,70.48"),"send-time":datetime("2011-12-26T10:10:00"),"referred-topics":{{"sprint","voice-command"}},"message-text":" like sprint the voice-command is mind-blowing:)"}
+{"tweetid":"5","user":{"screen-name":"NathanGiesen@211","lang":"en","friends_count":39339,"statuses_count":473,"name":"Nathan Giesen","followers_count":49416},"sender-location":point("40.09,92.69"),"send-time":datetime("2006-08-04T10:10:00"),"referred-topics":{{"motorola","speed"}},"message-text":" can't stand motorola its speed is terrible:("}
+{"tweetid":"6","user":{"screen-name":"ColineGeyer@63","lang":"en","friends_count":121,"statuses_count":362,"name":"Coline Geyer","followers_count":17159},"sender-location":point("47.51,83.99"),"send-time":datetime("2010-05-07T10:10:00"),"referred-topics":{{"iphone","voice-clarity"}},"message-text":" like iphone the voice-clarity is good:)"}
+{"tweetid":"7","user":{"screen-name":"ChangEwing_573","lang":"en","friends_count":182,"statuses_count":394,"name":"Chang Ewing","followers_count":32136},"sender-location":point("36.21,72.6"),"send-time":datetime("2011-08-25T10:10:00"),"referred-topics":{{"samsung","platform"}},"message-text":" like samsung the platform is good"}
+{"tweetid":"8","user":{"screen-name":"NathanGiesen@211","lang":"en","friends_count":39339,"statuses_count":473,"name":"Nathan Giesen","followers_count":49416},"sender-location":point("46.05,93.34"),"send-time":datetime("2005-10-14T10:10:00"),"referred-topics":{{"t-mobile","shortcut-menu"}},"message-text":" like t-mobile the shortcut-menu is awesome:)"}
+{"tweetid":"9","user":{"screen-name":"NathanGiesen@211","lang":"en","friends_count":39339,"statuses_count":473,"name":"Nathan Giesen","followers_count":49416},"sender-location":point("36.86,74.62"),"send-time":datetime("2012-07-21T10:10:00"),"referred-topics":{{"verizon","voicemail-service"}},"message-text":" love verizon its voicemail-service is awesome"}
+{"tweetid":"10","user":{"screen-name":"ColineGeyer@63","lang":"en","friends_count":121,"statuses_count":362,"name":"Coline Geyer","followers_count":17159},"sender-location":point("29.15,76.53"),"send-time":datetime("2008-01-26T10:10:00"),"referred-topics":{{"verizon","voice-clarity"}},"message-text":" hate verizon its voice-clarity is OMG:("}
+{"tweetid":"11","user":{"screen-name":"NilaMilliron_tw","lang":"en","friends_count":445,"statuses_count":164,"name":"Nila Milliron","followers_count":22649},"sender-location":point("37.59,68.42"),"send-time":datetime("2008-03-09T10:10:00"),"referred-topics":{{"iphone","platform"}},"message-text":" can't stand iphone its platform is terrible"}
+{"tweetid":"12","user":{"screen-name":"OliJackson_512","lang":"en","friends_count":445,"statuses_count":164,"name":"Oli Jackson","followers_count":22649},"sender-location":point("24.82,94.63"),"send-time":datetime("2010-02-13T10:10:00"),"referred-topics":{{"samsung","voice-command"}},"message-text":" like samsung the voice-command is amazing:)"}
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/opentype/opentype.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/opentype/opentype.1.ddl.aql
new file mode 100644
index 0000000..17da24b
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/opentype/opentype.1.ddl.aql
@@ -0,0 +1,13 @@
+drop dataverse TinySocial if exists;
+create dataverse TinySocial;
+use dataverse TinySocial;
+
+create type TweetMessageType as open {
+        tweetid: string
+}
+
+create dataset TweetMessages(TweetMessageType)
+primary key tweetid
+hints(cardinality=100); 
+
+
diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/opentype/opentype.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/opentype/opentype.2.update.aql
new file mode 100644
index 0000000..627623a
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/opentype/opentype.2.update.aql
@@ -0,0 +1,5 @@
+use dataverse TinySocial;
+
+load dataset TweetMessages
+using "edu.uci.ics.asterix.external.dataset.adapter.NCFileSystemAdapter"
+(("path"="nc1://data/tinysocial/twm.adm"),("format"="adm"));
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/opentype/opentype.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/opentype/opentype.3.query.aql
new file mode 100644
index 0000000..957de80
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/fuzzyjoin/opentype/opentype.3.query.aql
@@ -0,0 +1,14 @@
+use dataverse TinySocial;
+
+set simfunction "jaccard";
+set simthreshold "0.3";
+
+for $t in dataset TweetMessages
+order by $t.tweetid
+return {                     
+    "tweet": $t,               
+    "similar-tweets": for $t2 in dataset TweetMessages
+                        where  $t2.referred-topics ~= $t.referred-topics
+                        and $t2.tweetid != $t.tweetid
+                        return $t2.referred-topics
+};
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/opentype/opentype.1.adm b/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/opentype/opentype.1.adm
new file mode 100644
index 0000000..af8d0a7
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/fuzzyjoin/opentype/opentype.1.adm
@@ -0,0 +1,12 @@
+{ "tweet": { "tweetid": "1", "user": { "screen-name": "NathanGiesen@211", "lang": "en", "friends_count": 39339, "statuses_count": 473, "name": "Nathan Giesen", "followers_count": 49416 }, "sender-location": point("47.44,80.65"), "send-time": datetime("2008-04-26T10:10:00.000Z"), "referred-topics": {{ "t-mobile", "customization" }}, "message-text": " love t-mobile its customization is good:)" }, "similar-tweets": [ {{ "t-mobile", "shortcut-menu" }} ] }
+{ "tweet": { "tweetid": "10", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("29.15,76.53"), "send-time": datetime("2008-01-26T10:10:00.000Z"), "referred-topics": {{ "verizon", "voice-clarity" }}, "message-text": " hate verizon its voice-clarity is OMG:(" }, "similar-tweets": [ {{ "iphone", "voice-clarity" }}, {{ "verizon", "voicemail-service" }}, {{ "verizon", "shortcut-menu" }} ] }
+{ "tweet": { "tweetid": "11", "user": { "screen-name": "NilaMilliron_tw", "lang": "en", "friends_count": 445, "statuses_count": 164, "name": "Nila Milliron", "followers_count": 22649 }, "sender-location": point("37.59,68.42"), "send-time": datetime("2008-03-09T10:10:00.000Z"), "referred-topics": {{ "iphone", "platform" }}, "message-text": " can't stand iphone its platform is terrible" }, "similar-tweets": [ {{ "iphone", "voice-clarity" }}, {{ "samsung", "platform" }} ] }
+{ "tweet": { "tweetid": "12", "user": { "screen-name": "OliJackson_512", "lang": "en", "friends_count": 445, "statuses_count": 164, "name": "Oli Jackson", "followers_count": 22649 }, "sender-location": point("24.82,94.63"), "send-time": datetime("2010-02-13T10:10:00.000Z"), "referred-topics": {{ "samsung", "voice-command" }}, "message-text": " like samsung the voice-command is amazing:)" }, "similar-tweets": [ {{ "samsung", "platform" }}, {{ "sprint", "voice-command" }} ] }
+{ "tweet": { "tweetid": "2", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("32.84,67.14"), "send-time": datetime("2010-05-13T10:10:00.000Z"), "referred-topics": {{ "verizon", "shortcut-menu" }}, "message-text": " like verizon its shortcut-menu is awesome:)" }, "similar-tweets": [ {{ "verizon", "voicemail-service" }}, {{ "verizon", "voice-clarity" }}, {{ "t-mobile", "shortcut-menu" }} ] }
+{ "tweet": { "tweetid": "3", "user": { "screen-name": "NathanGiesen@211", "lang": "en", "friends_count": 39339, "statuses_count": 473, "name": "Nathan Giesen", "followers_count": 49416 }, "sender-location": point("29.72,75.8"), "send-time": datetime("2006-11-04T10:10:00.000Z"), "referred-topics": {{ "motorola", "speed" }}, "message-text": " like motorola the speed is good:)" }, "similar-tweets": [ {{ "motorola", "speed" }} ] }
+{ "tweet": { "tweetid": "4", "user": { "screen-name": "NathanGiesen@211", "lang": "en", "friends_count": 39339, "statuses_count": 473, "name": "Nathan Giesen", "followers_count": 49416 }, "sender-location": point("39.28,70.48"), "send-time": datetime("2011-12-26T10:10:00.000Z"), "referred-topics": {{ "sprint", "voice-command" }}, "message-text": " like sprint the voice-command is mind-blowing:)" }, "similar-tweets": [ {{ "samsung", "voice-command" }} ] }
+{ "tweet": { "tweetid": "5", "user": { "screen-name": "NathanGiesen@211", "lang": "en", "friends_count": 39339, "statuses_count": 473, "name": "Nathan Giesen", "followers_count": 49416 }, "sender-location": point("40.09,92.69"), "send-time": datetime("2006-08-04T10:10:00.000Z"), "referred-topics": {{ "motorola", "speed" }}, "message-text": " can't stand motorola its speed is terrible:(" }, "similar-tweets": [ {{ "motorola", "speed" }} ] }
+{ "tweet": { "tweetid": "6", "user": { "screen-name": "ColineGeyer@63", "lang": "en", "friends_count": 121, "statuses_count": 362, "name": "Coline Geyer", "followers_count": 17159 }, "sender-location": point("47.51,83.99"), "send-time": datetime("2010-05-07T10:10:00.000Z"), "referred-topics": {{ "iphone", "voice-clarity" }}, "message-text": " like iphone the voice-clarity is good:)" }, "similar-tweets": [ {{ "verizon", "voice-clarity" }}, {{ "iphone", "platform" }} ] }
+{ "tweet": { "tweetid": "7", "user": { "screen-name": "ChangEwing_573", "lang": "en", "friends_count": 182, "statuses_count": 394, "name": "Chang Ewing", "followers_count": 32136 }, "sender-location": point("36.21,72.6"), "send-time": datetime("2011-08-25T10:10:00.000Z"), "referred-topics": {{ "samsung", "platform" }}, "message-text": " like samsung the platform is good" }, "similar-tweets": [ {{ "iphone", "platform" }}, {{ "samsung", "voice-command" }} ] }
+{ "tweet": { "tweetid": "8", "user": { "screen-name": "NathanGiesen@211", "lang": "en", "friends_count": 39339, "statuses_count": 473, "name": "Nathan Giesen", "followers_count": 49416 }, "sender-location": point("46.05,93.34"), "send-time": datetime("2005-10-14T10:10:00.000Z"), "referred-topics": {{ "t-mobile", "shortcut-menu" }}, "message-text": " like t-mobile the shortcut-menu is awesome:)" }, "similar-tweets": [ {{ "t-mobile", "customization" }}, {{ "verizon", "shortcut-menu" }} ] }
+{ "tweet": { "tweetid": "9", "user": { "screen-name": "NathanGiesen@211", "lang": "en", "friends_count": 39339, "statuses_count": 473, "name": "Nathan Giesen", "followers_count": 49416 }, "sender-location": point("36.86,74.62"), "send-time": datetime("2012-07-21T10:10:00.000Z"), "referred-topics": {{ "verizon", "voicemail-service" }}, "message-text": " love verizon its voicemail-service is awesome" }, "similar-tweets": [ {{ "verizon", "voice-clarity" }}, {{ "verizon", "shortcut-menu" }} ] }
\ No newline at end of file
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index 9e6c308..8585570 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -1790,6 +1790,11 @@
       </compilation-unit>
     </test-case>
     -->
+    <test-case FilePath="fuzzyjoin">
+      <compilation-unit name="opentype">
+        <output-dir compare="Text">opentype</output-dir>
+      </compilation-unit>
+    </test-case>
   </test-group>
   <test-group name="index-join">
     <test-case FilePath="index-join">
diff --git a/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/comparators/ListItemBinaryComparatorFactory.java b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/comparators/ListItemBinaryComparatorFactory.java
new file mode 100644
index 0000000..379a7e3
--- /dev/null
+++ b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/comparators/ListItemBinaryComparatorFactory.java
@@ -0,0 +1,149 @@
+package edu.uci.ics.asterix.dataflow.data.nontagged.comparators;
+
+import edu.uci.ics.asterix.formats.nontagged.UTF8StringLowercasePointable;
+import edu.uci.ics.asterix.om.types.ATypeTag;
+import edu.uci.ics.asterix.om.types.EnumDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
+import edu.uci.ics.hyracks.data.std.primitive.DoublePointable;
+import edu.uci.ics.hyracks.data.std.primitive.FloatPointable;
+import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+
+public class ListItemBinaryComparatorFactory implements IBinaryComparatorFactory {
+
+    private static final long serialVersionUID = 1L;
+
+    public static final ListItemBinaryComparatorFactory INSTANCE = new ListItemBinaryComparatorFactory();
+
+    private ListItemBinaryComparatorFactory() {
+    }
+
+    @Override
+    public IBinaryComparator createBinaryComparator() {
+    	return createBinaryComparator(ATypeTag.NULL, ATypeTag.NULL, false);
+    }
+    
+    public IBinaryComparator createBinaryComparator(final ATypeTag firstItemTypeTag, final ATypeTag secondItemTypeTag, final boolean ignoreCase) {
+        return new IBinaryComparator() {
+            final IBinaryComparator ascBoolComp = BooleanBinaryComparatorFactory.INSTANCE.createBinaryComparator();
+            final IBinaryComparator ascIntComp = new PointableBinaryComparatorFactory(IntegerPointable.FACTORY)
+                    .createBinaryComparator();
+            final IBinaryComparator ascLongComp = LongBinaryComparatorFactory.INSTANCE.createBinaryComparator();
+            final IBinaryComparator ascStrComp = new PointableBinaryComparatorFactory(UTF8StringPointable.FACTORY)
+                    .createBinaryComparator();
+            final IBinaryComparator ascLowerCaseStrComp =  new PointableBinaryComparatorFactory(UTF8StringLowercasePointable.FACTORY)
+            		.createBinaryComparator();
+            final IBinaryComparator ascFloatComp = new PointableBinaryComparatorFactory(FloatPointable.FACTORY)
+                    .createBinaryComparator();
+            final IBinaryComparator ascDoubleComp = new PointableBinaryComparatorFactory(DoublePointable.FACTORY)
+                    .createBinaryComparator();
+            final IBinaryComparator ascRectangleComp = ARectanglePartialBinaryComparatorFactory.INSTANCE
+                    .createBinaryComparator();
+            final IBinaryComparator ascCircleComp = ACirclePartialBinaryComparatorFactory.INSTANCE
+                    .createBinaryComparator();
+            final IBinaryComparator ascDurationComp = ADurationPartialBinaryComparatorFactory.INSTANCE
+                    .createBinaryComparator();
+            final IBinaryComparator ascIntervalComp = AIntervalPartialBinaryComparatorFactory.INSTANCE
+                    .createBinaryComparator();
+            final IBinaryComparator ascLineComp = ALinePartialBinaryComparatorFactory.INSTANCE.createBinaryComparator();
+            final IBinaryComparator ascPointComp = APointPartialBinaryComparatorFactory.INSTANCE
+                    .createBinaryComparator();
+            final IBinaryComparator ascPoint3DComp = APoint3DPartialBinaryComparatorFactory.INSTANCE
+                    .createBinaryComparator();
+            final IBinaryComparator ascPolygonComp = APolygonPartialBinaryComparatorFactory.INSTANCE
+                    .createBinaryComparator();
+            final IBinaryComparator rawComp = RawBinaryComparatorFactory.INSTANCE.createBinaryComparator();
+
+            @Override
+            public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+
+                if (b1[s1] == ATypeTag.NULL.serialize()) {
+                    if (b2[s2] == ATypeTag.NULL.serialize())
+                        return 0;
+                    else
+                        return -1;
+                } else {
+                    if (b2[s2] == ATypeTag.NULL.serialize())
+                        return 1;
+                }
+                
+                ATypeTag tag1 = firstItemTypeTag;
+                int skip1 = 0;
+                if (firstItemTypeTag == ATypeTag.ANY) {
+                	tag1 = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(b1[s1]);
+                	skip1 = 1;
+                }
+                
+                ATypeTag tag2 = secondItemTypeTag;
+                int skip2 = 0;
+                if (secondItemTypeTag == ATypeTag.ANY) {
+                	tag2 = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(b2[s2]);
+                	skip2 = 1;
+                }
+                
+                if (tag1 != tag2) {
+                	return rawComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                }
+
+                switch (tag1) {
+                    case BOOLEAN: {
+                        return ascBoolComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    }
+                    case TIME:
+                    case DATE:
+                    case YEARMONTHDURATION:
+                    case INT32: {
+                        return ascIntComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    }
+                    case DATETIME:
+                    case DAYTIMEDURATION:
+                    case INT64: {
+                        return ascLongComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    }
+                    case FLOAT: {
+                        return ascFloatComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    }
+                    case DOUBLE: {
+                        return ascDoubleComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    }
+                    case STRING: {
+                    	if (ignoreCase) {
+                    		return ascLowerCaseStrComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    	} else {
+                    		return ascStrComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    	}
+                    }
+                    case RECTANGLE: {
+                        return ascRectangleComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    }
+                    case CIRCLE: {
+                        return ascCircleComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    }
+                    case POINT: {
+                        return ascPointComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    }
+                    case POINT3D: {
+                        return ascPoint3DComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    }
+                    case LINE: {
+                        return ascLineComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    }
+                    case POLYGON: {
+                        return ascPolygonComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    }
+                    case DURATION: {
+                        return ascDurationComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    }
+                    case INTERVAL: {
+                        return ascIntervalComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    }
+                    default: {
+                        return rawComp.compare(b1, s1 + skip1, l1 - skip1, b2, s2 + skip2, l2 - skip2);
+                    }
+                }
+            }
+        };
+    }
+}
diff --git a/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/hash/ListItemBinaryHashFunctionFactory.java b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/hash/ListItemBinaryHashFunctionFactory.java
new file mode 100644
index 0000000..1c0773c
--- /dev/null
+++ b/asterix-om/src/main/java/edu/uci/ics/asterix/dataflow/data/nontagged/hash/ListItemBinaryHashFunctionFactory.java
@@ -0,0 +1,92 @@
+package edu.uci.ics.asterix.dataflow.data.nontagged.hash;
+
+import edu.uci.ics.asterix.formats.nontagged.UTF8StringLowercasePointable;
+import edu.uci.ics.asterix.om.types.ATypeTag;
+import edu.uci.ics.asterix.om.types.EnumDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+import edu.uci.ics.hyracks.data.std.accessors.MurmurHash3BinaryHashFunctionFamily;
+import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory;
+import edu.uci.ics.hyracks.data.std.primitive.FloatPointable;
+import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+
+public class ListItemBinaryHashFunctionFactory implements IBinaryHashFunctionFactory {
+
+    private static final long serialVersionUID = 1L;
+
+    public static final ListItemBinaryHashFunctionFactory INSTANCE = new ListItemBinaryHashFunctionFactory();
+
+    private ListItemBinaryHashFunctionFactory() {
+    }
+
+    @Override
+    public IBinaryHashFunction createBinaryHashFunction() {
+    	return createBinaryHashFunction(ATypeTag.ANY, false);
+    }
+    
+    public IBinaryHashFunction createBinaryHashFunction(final ATypeTag itemTypeTag, final boolean ignoreCase) {
+        return new IBinaryHashFunction() {
+
+            private IBinaryHashFunction boolHash = BooleanBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction();
+            private IBinaryHashFunction intHash = new PointableBinaryHashFunctionFactory(IntegerPointable.FACTORY)
+                    .createBinaryHashFunction();
+            private IBinaryHashFunction longHash = LongBinaryHashFunctionFactory.INSTANCE.createBinaryHashFunction();
+            private IBinaryHashFunction floatHash = new PointableBinaryHashFunctionFactory(FloatPointable.FACTORY)
+                    .createBinaryHashFunction();
+            private IBinaryHashFunction stringHash = new PointableBinaryHashFunctionFactory(UTF8StringPointable.FACTORY)
+                    .createBinaryHashFunction();
+            private IBinaryHashFunction lowerCaseStringHash = new PointableBinaryHashFunctionFactory(UTF8StringLowercasePointable.FACTORY)
+            		.createBinaryHashFunction();
+            private IBinaryHashFunction doubleHash = DoubleBinaryHashFunctionFactory.INSTANCE
+                    .createBinaryHashFunction();
+            private IBinaryHashFunction genericBinaryHash = MurmurHash3BinaryHashFunctionFamily.INSTANCE
+                    .createBinaryHashFunction(0);
+
+            @Override
+            public int hash(byte[] bytes, int offset, int length) {
+            	ATypeTag tag = itemTypeTag;
+            	int skip = 0;
+            	if (itemTypeTag == ATypeTag.ANY) {
+            		tag = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(bytes[offset]);
+            		skip = 1;
+            	}
+                switch (tag) {
+                    case BOOLEAN: {
+                        return boolHash.hash(bytes, offset + skip, length - skip);
+                    }
+                    case TIME:
+                    case DATE:
+                    case YEARMONTHDURATION:
+                    case INT32: {
+                        return intHash.hash(bytes, offset + skip, length - skip);
+                    }
+                    case DATETIME:
+                    case DAYTIMEDURATION:
+                    case INT64: {
+                        return longHash.hash(bytes, offset + skip, length - skip);
+                    }
+                    case FLOAT: {
+                        return floatHash.hash(bytes, offset + skip, length - skip);
+                    }
+                    case DOUBLE: {
+                        return doubleHash.hash(bytes, offset + skip, length - skip);
+                    }
+                    case STRING: {
+                    	if (ignoreCase) {
+                    		return lowerCaseStringHash.hash(bytes, offset + skip, length - skip);
+                    	} else {
+                    		return stringHash.hash(bytes, offset + skip, length - skip);
+                    	}
+                    }
+                    case NULL: {
+                        return 0;
+                    }
+                    default: {
+                        return genericBinaryHash.hash(bytes, offset + skip, length - skip);
+                    }
+                }
+            }
+        };
+    }
+}
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AbstractAsterixListIterator.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AbstractAsterixListIterator.java
index efae5f9..84eef1d 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AbstractAsterixListIterator.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/AbstractAsterixListIterator.java
@@ -13,6 +13,8 @@
     protected byte[] data;
     protected int count = 0;
     protected int pos = -1;
+    protected int nextPos = -1;
+    protected int itemLen = -1;
     protected int size = -1;
     protected int startOff = -1;
     protected IBinaryComparator cmp;
@@ -44,11 +46,21 @@
     public int getPos() {
         return pos;
     }
+    
+    public int getItemLen() {
+    	return itemLen;
+    }
 
     @Override
     public void next() {
         try {
-            pos = getItemOffset(data, startOff, ++count);
+        	pos = nextPos;
+        	++count;
+            nextPos = data.length;
+            if (count + 1 < size) {
+            	nextPos = getItemOffset(data, startOff, count + 1);
+            }
+            itemLen = nextPos - pos;
         } catch (AsterixException e) {
             throw new AsterixRuntimeException(e);
         }
@@ -59,6 +71,11 @@
         count = 0;
         try {
             pos = getItemOffset(data, startOff, count);
+            nextPos = data.length;
+            if (count + 1 < size) {
+            	nextPos = getItemOffset(data, startOff, count + 1);
+            }
+            itemLen = nextPos - pos;
         } catch (AsterixException e) {
             throw new AsterixRuntimeException(e);
         }
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java
index ab73df2..11c4d6b 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java
@@ -62,7 +62,7 @@
             probeListCount++;
             byte[] buf = probeIter.getData();
             int off = probeIter.getPos();
-            int len = getItemLen(buf, off);
+            int len = probeIter.getItemLen();
             keyEntry.set(buf, off, len);            
             BinaryEntry entry = hashMap.get(keyEntry);
             if (entry != null) {
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
index 9f5c9c8..c610bde 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
@@ -4,8 +4,6 @@
 import java.io.IOException;
 import java.util.Arrays;
 
-import edu.uci.ics.asterix.formats.nontagged.AqlBinaryComparatorFactoryProvider;
-import edu.uci.ics.asterix.formats.nontagged.AqlBinaryHashFunctionFactoryProvider;
 import edu.uci.ics.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
 import edu.uci.ics.asterix.om.base.AFloat;
 import edu.uci.ics.asterix.om.base.AMutableFloat;
@@ -22,9 +20,10 @@
 import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
 import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
 import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
-import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
 import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
 import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+import edu.uci.ics.asterix.dataflow.data.nontagged.comparators.ListItemBinaryComparatorFactory;
+import edu.uci.ics.asterix.dataflow.data.nontagged.hash.ListItemBinaryHashFunctionFactory;
 
 public class SimilarityJaccardEvaluator implements ICopyEvaluator {
 
@@ -58,7 +57,8 @@
     protected int firstStart = -1;
     protected int secondStart = -1;
     protected float jaccSim = 0.0f;
-    protected ATypeTag itemTypeTag;
+    protected ATypeTag firstItemTypeTag;
+    protected ATypeTag secondItemTypeTag;
 
     protected BinaryHashMap hashMap;
     protected BinaryEntry keyEntry = new BinaryEntry();
@@ -105,6 +105,9 @@
 
         firstTypeTag = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argOut.getByteArray()[firstStart]);
         secondTypeTag = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argOut.getByteArray()[secondStart]);
+        
+        firstItemTypeTag = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argOut.getByteArray()[firstStart + 1]);
+        secondItemTypeTag = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argOut.getByteArray()[secondStart + 1]);
     }
 
     protected boolean prepareLists(byte[] bytes, int firstStart, int secondStart, ATypeTag argType)
@@ -116,17 +119,12 @@
         if (firstListIter.size() == 0 || secondListIter.size() == 0) {
             return false;
         }
-        if (firstTypeTag == ATypeTag.ANY || secondTypeTag == ATypeTag.ANY) {
-            throw new AlgebricksException("\n Jaccard can only be called on homogenous lists");
-        }
         // TODO: Check item types are compatible.
-        itemTypeTag = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(bytes[firstStart + 1]);
         return true;
     }
 
     protected float computeResult(byte[] bytes, int firstStart, int secondStart, ATypeTag argType)
             throws AlgebricksException {
-        setHashMap(bytes, firstStart, secondStart);
         // We will subtract the intersection size later to get the real union size.
         int firstListSize = firstListIter.size();
         int secondListSize = secondListIter.size();
@@ -136,7 +134,10 @@
         AbstractAsterixListIterator probeList = (buildList == firstListIter) ? secondListIter : firstListIter;
         int buildListSize = (buildList == firstListIter) ? firstListSize : secondListSize;
         int probeListSize = (probeList == firstListIter) ? firstListSize : secondListSize;
+        ATypeTag buildItemTypeTag = (buildList == firstListIter) ? firstItemTypeTag : secondItemTypeTag;
+        ATypeTag probeItemTypeTag = (probeList == firstListIter) ? firstItemTypeTag : secondItemTypeTag;
 
+        setHashMap(bytes, buildItemTypeTag, probeItemTypeTag);
         buildHashMap(buildList);
         int intersectionSize = probeHashMap(probeList, buildListSize, probeListSize);
         // Special indicator for the "check" version of jaccard.
@@ -154,7 +155,7 @@
         while (buildIter.hasNext()) {
             byte[] buf = buildIter.getData();
             int off = buildIter.getPos();
-            int len = getItemLen(buf, off);
+            int len = buildIter.getItemLen();
             keyEntry.set(buf, off, len);
             BinaryEntry entry = hashMap.put(keyEntry, valEntry);
             if (entry != null) {
@@ -172,7 +173,7 @@
         while (probeIter.hasNext()) {
             byte[] buf = probeIter.getData();
             int off = probeIter.getPos();
-            int len = getItemLen(buf, off);
+            int len = probeIter.getItemLen();
             keyEntry.set(buf, off, len);
             BinaryEntry entry = hashMap.get(keyEntry);
             if (entry != null) {
@@ -195,69 +196,19 @@
         return intersectionSize;
     }
 
-    protected void setHashMap(byte[] bytes, int firstStart, int secondStart) {
+    protected void setHashMap(byte[] bytes, ATypeTag buildItemTypeTag, ATypeTag probeItemTypeTag) {
         if (hashMap != null) {
             hashMap.clear();
             return;
         }
-        IBinaryHashFunction hashFunc = null;
-        IBinaryComparator cmp = null;
-        switch (itemTypeTag) {
-            case INT32: {
-                hashFunc = AqlBinaryHashFunctionFactoryProvider.INTEGER_POINTABLE_INSTANCE.createBinaryHashFunction();
-                cmp = AqlBinaryComparatorFactoryProvider.INTEGER_POINTABLE_INSTANCE.createBinaryComparator();
-                break;
-            }
-            case FLOAT: {
-                hashFunc = AqlBinaryHashFunctionFactoryProvider.FLOAT_POINTABLE_INSTANCE.createBinaryHashFunction();
-                cmp = AqlBinaryComparatorFactoryProvider.FLOAT_POINTABLE_INSTANCE.createBinaryComparator();
-                break;
-            }
-            case DOUBLE: {
-                hashFunc = AqlBinaryHashFunctionFactoryProvider.DOUBLE_POINTABLE_INSTANCE.createBinaryHashFunction();
-                cmp = AqlBinaryComparatorFactoryProvider.DOUBLE_POINTABLE_INSTANCE.createBinaryComparator();
-                break;
-            }
-            case STRING: {
-                if (ignoreCase) {
-                    // Ignore case in comparisons and hashing.
-                    hashFunc = AqlBinaryHashFunctionFactoryProvider.UTF8STRING_LOWERCASE_POINTABLE_INSTANCE
-                            .createBinaryHashFunction();
-                    cmp = AqlBinaryComparatorFactoryProvider.UTF8STRING_LOWERCASE_POINTABLE_INSTANCE
-                            .createBinaryComparator();
-                } else {
-                    hashFunc = AqlBinaryHashFunctionFactoryProvider.UTF8STRING_POINTABLE_INSTANCE
-                            .createBinaryHashFunction();
-                    cmp = AqlBinaryComparatorFactoryProvider.UTF8STRING_POINTABLE_INSTANCE.createBinaryComparator();
-                }
-                break;
-            }
-            default: {
-                break;
-            }
-        }
-        hashMap = new BinaryHashMap(TABLE_SIZE, TABLE_FRAME_SIZE, hashFunc, cmp);
-    }
-
-    protected int getItemLen(byte[] bytes, int itemOff) {
-        switch (itemTypeTag) {
-            case INT32: {
-                return 4;
-            }
-            case FLOAT: {
-                return 4;
-            }
-            case DOUBLE: {
-                return 8;
-            }
-            case STRING: {
-                // 2 bytes for the UTF8 len, plus the string data.
-                return 2 + UTF8StringPointable.getUTFLength(bytes, itemOff);
-            }
-            default: {
-                return -1;
-            }
-        }
+      
+        IBinaryHashFunction putHashFunc = ListItemBinaryHashFunctionFactory.INSTANCE
+				.createBinaryHashFunction(buildItemTypeTag, ignoreCase);
+        IBinaryHashFunction getHashFunc = ListItemBinaryHashFunctionFactory.INSTANCE
+				.createBinaryHashFunction(probeItemTypeTag, ignoreCase);
+        IBinaryComparator cmp = ListItemBinaryComparatorFactory.INSTANCE
+				.createBinaryComparator(buildItemTypeTag, probeItemTypeTag, ignoreCase);
+        hashMap = new BinaryHashMap(TABLE_SIZE, TABLE_FRAME_SIZE, putHashFunc, getHashFunc, cmp);
     }
 
     protected boolean checkArgTypes(ATypeTag typeTag1, ATypeTag typeTag2) throws AlgebricksException {
diff --git a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/BinaryHashMap.java b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/BinaryHashMap.java
index 240f8c7..f69a54e 100644
--- a/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/BinaryHashMap.java
+++ b/asterix-runtime/src/main/java/edu/uci/ics/asterix/runtime/evaluators/functions/BinaryHashMap.java
@@ -33,7 +33,8 @@
 	private static final int PTR_SIZE = 8;
 	private static final int SLOT_SIZE = 2;
 	private static final int ENTRY_HEADER_SIZE = PTR_SIZE + 2 * SLOT_SIZE;
-	private final IBinaryHashFunction hashFunc;
+	private final IBinaryHashFunction putHashFunc;
+	private final IBinaryHashFunction getHashFunc;
 	private final IBinaryComparator cmp;
 	private final BinaryEntry returnValue = new BinaryEntry();
 	
@@ -65,10 +66,11 @@
 		}
 	}
 	
-	public BinaryHashMap(int tableSize, int frameSize, IBinaryHashFunction hashFunc, IBinaryComparator cmp) {
+	public BinaryHashMap(int tableSize, int frameSize, IBinaryHashFunction putHashFunc, IBinaryHashFunction getHashFunc, IBinaryComparator cmp) {
 		listHeads = new long[tableSize];
 		this.frameSize = frameSize;		
-		this.hashFunc = hashFunc;
+		this.putHashFunc = putHashFunc;
+		this.getHashFunc = getHashFunc;
 		this.cmp = cmp;
 		frames.add(ByteBuffer.allocate(frameSize));
 		clear();
@@ -98,7 +100,12 @@
 	}
 	
 	private BinaryEntry getPutInternal(BinaryEntry key, BinaryEntry value, boolean put) {
-		int bucket = Math.abs(hashFunc.hash(key.buf, key.off, key.len) % listHeads.length);
+		int bucket;
+		if (put) {
+			bucket = Math.abs(putHashFunc.hash(key.buf, key.off, key.len) % listHeads.length);
+		} else {
+			bucket = Math.abs(getHashFunc.hash(key.buf, key.off, key.len) % listHeads.length);
+		}
 		long headPtr = listHeads[bucket];
 		if (headPtr == NULL_PTR) {
 			// Key definitely doesn't exist yet.