Fix ASTERIXDB-1566 fix UTF8 comparator and hash function.
Change-Id: I187bf1243abf143b3b265fa8098614b9a72c65ad
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1054
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Yingyi Bu <buyingyi@gmail.com>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/string-equal-public/string-equal-public.1.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/string-equal-public/string-equal-public.1.query.aql
new file mode 100644
index 0000000..d5c2dff
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/string-equal-public/string-equal-public.1.query.aql
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+"的"="离"
+"و"="ن"
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-equal-public/string-equal-public.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-equal-public/string-equal-public.1.adm
new file mode 100644
index 0000000..4b095fd
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-equal-public/string-equal-public.1.adm
@@ -0,0 +1,2 @@
+false
+false
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
index 82f5071..f410cbe 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -5411,6 +5411,11 @@
</compilation-unit>
</test-case>
<test-case FilePath="string">
+ <compilation-unit name="string-equal-public">
+ <output-dir compare="Text">string-equal-public</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="string">
<compilation-unit name="string-join1">
<output-dir compare="Text">string-join1</output-dir>
</compilation-unit>
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryComparatorFactoryProvider.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryComparatorFactoryProvider.java
index feb3228..a8c6eea 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryComparatorFactoryProvider.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryComparatorFactoryProvider.java
@@ -18,8 +18,6 @@
*/
package org.apache.asterix.formats.nontagged;
-import java.io.Serializable;
-
import org.apache.asterix.dataflow.data.nontagged.comparators.ABinaryComparator;
import org.apache.asterix.dataflow.data.nontagged.comparators.ACirclePartialBinaryComparatorFactory;
import org.apache.asterix.dataflow.data.nontagged.comparators.ADurationPartialBinaryComparatorFactory;
@@ -48,9 +46,11 @@
import org.apache.hyracks.data.std.primitive.FloatPointable;
import org.apache.hyracks.data.std.primitive.IntegerPointable;
import org.apache.hyracks.data.std.primitive.LongPointable;
-import org.apache.hyracks.data.std.primitive.RawUTF8StringPointable;
import org.apache.hyracks.data.std.primitive.ShortPointable;
import org.apache.hyracks.data.std.primitive.UTF8StringLowercasePointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+
+import java.io.Serializable;
public class AqlBinaryComparatorFactoryProvider implements IBinaryComparatorFactoryProvider, Serializable {
@@ -69,7 +69,7 @@
public static final PointableBinaryComparatorFactory DOUBLE_POINTABLE_INSTANCE = new PointableBinaryComparatorFactory(
DoublePointable.FACTORY);
public static final PointableBinaryComparatorFactory UTF8STRING_POINTABLE_INSTANCE = new PointableBinaryComparatorFactory(
- RawUTF8StringPointable.FACTORY);
+ UTF8StringPointable.FACTORY);
// Equivalent to UTF8STRING_POINTABLE_INSTANCE but all characters are considered lower case to implement case-insensitive comparisons.
public static final PointableBinaryComparatorFactory UTF8STRING_LOWERCASE_POINTABLE_INSTANCE = new PointableBinaryComparatorFactory(
UTF8StringLowercasePointable.FACTORY);
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryHashFunctionFactoryProvider.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryHashFunctionFactoryProvider.java
index 8cfe51e..23c245f 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryHashFunctionFactoryProvider.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/AqlBinaryHashFunctionFactoryProvider.java
@@ -28,8 +28,8 @@
import org.apache.hyracks.data.std.primitive.DoublePointable;
import org.apache.hyracks.data.std.primitive.FloatPointable;
import org.apache.hyracks.data.std.primitive.IntegerPointable;
-import org.apache.hyracks.data.std.primitive.RawUTF8StringPointable;
import org.apache.hyracks.data.std.primitive.UTF8StringLowercasePointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
public class AqlBinaryHashFunctionFactoryProvider implements IBinaryHashFunctionFactoryProvider, Serializable {
@@ -42,7 +42,7 @@
public static final PointableBinaryHashFunctionFactory DOUBLE_POINTABLE_INSTANCE = new PointableBinaryHashFunctionFactory(
DoublePointable.FACTORY);
public static final PointableBinaryHashFunctionFactory UTF8STRING_POINTABLE_INSTANCE = new PointableBinaryHashFunctionFactory(
- RawUTF8StringPointable.FACTORY);
+ UTF8StringPointable.FACTORY);
// Equivalent to UTF8STRING_POINTABLE_INSTANCE but all characters are considered lower case to implement case-insensitive hashing.
public static final PointableBinaryHashFunctionFactory UTF8STRING_LOWERCASE_POINTABLE_INSTANCE = new PointableBinaryHashFunctionFactory(
UTF8StringLowercasePointable.FACTORY);
diff --git a/hyracks-fullstack/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java b/hyracks-fullstack/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java
index f101ab1..f200384 100644
--- a/hyracks-fullstack/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java
+++ b/hyracks-fullstack/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java
@@ -38,11 +38,13 @@
import static org.apache.hyracks.util.string.UTF8StringUtil.normalize;
import static org.apache.hyracks.util.string.UTF8StringUtil.rawByteCompareTo;
import static org.apache.hyracks.util.string.UTF8StringUtil.hash;
+import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
+import org.junit.Assert;
import org.junit.Test;
public class UTF8StringUtilTest {
@@ -66,6 +68,14 @@
}
@Test
+ public void testChinese() {
+ byte[] bufferDe = writeStringToBytes("的");
+ byte[] bufferLi = writeStringToBytes("离");
+ int ret = compareTo(bufferDe, 0, bufferLi, 0);
+ assertTrue(ret != 0);
+ }
+
+ @Test
public void testCompareToAndNormolize() throws Exception {
testCompare(STRING_UTF8_MIX, STRING_UTF8_MIX, OPTION.STANDARD);
testCompare(STRING_UTF8_3, STRING_UTF8_MIX, OPTION.STANDARD);