modified snapshot version of fuzzy join, fixed issue 703
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_unicode/edit-distance-check_unicode.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_unicode/edit-distance-check_unicode.1.ddl.aql
new file mode 100644
index 0000000..754ea81
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_unicode/edit-distance-check_unicode.1.ddl.aql
@@ -0,0 +1,3 @@
+drop dataverse test if exists;
+create dataverse test;
+
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_unicode/edit-distance-check_unicode.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_unicode/edit-distance-check_unicode.2.update.aql
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_unicode/edit-distance-check_unicode.2.update.aql
diff --git a/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_unicode/edit-distance-check_unicode.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_unicode/edit-distance-check_unicode.3.query.aql
new file mode 100644
index 0000000..ee84ba4
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/similarity/edit-distance-check_unicode/edit-distance-check_unicode.3.query.aql
@@ -0,0 +1,15 @@
+use dataverse test;
+
+let $a := "사랑"
+let $b := "사랑해"
+let $c := "사과"
+
+let $results :=
+[
+  edit-distance-check($a, $b, 1), // TRUE
+  edit-distance-check($b, $a, 1), // TRUE
+  edit-distance-check($b, $c, 1), // FALSE
+  edit-distance-check($c, $b, 2) // TRUE
+]
+for $i in $results
+return $i
diff --git a/asterix-app/src/test/resources/runtimets/results/similarity/edit-distance-check_unicode/edit-distance-check_unicode.1.adm b/asterix-app/src/test/resources/runtimets/results/similarity/edit-distance-check_unicode/edit-distance-check_unicode.1.adm
new file mode 100644
index 0000000..56de037
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/similarity/edit-distance-check_unicode/edit-distance-check_unicode.1.adm
@@ -0,0 +1,4 @@
+[ true, 1 ]
+[ true, 1 ]
+[ false, 2147483647 ]
+[ true, 2 ]
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index d7b4c75..067fddb 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -3160,6 +3160,11 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="similarity">
+      <compilation-unit name="edit-distance-check_unicode">
+        <output-dir compare="Text">edit-distance-check_unicode</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="similarity">
       <compilation-unit name="edit-distance-list-is-filterable">
         <output-dir compare="Text">edit-distance-list-is-filterable</output-dir>
       </compilation-unit>
diff --git a/asterix-fuzzyjoin/pom.xml b/asterix-fuzzyjoin/pom.xml
index 42dd773..95ecfd8 100644
--- a/asterix-fuzzyjoin/pom.xml
+++ b/asterix-fuzzyjoin/pom.xml
@@ -3,11 +3,11 @@
   <parent>
           <artifactId>asterix</artifactId>
           <groupId>edu.uci.ics.asterix</groupId>
-          <version>0.8.1-SNAPSHOT</version>
+          <version>0.8.4-SNAPSHOT</version>
   </parent>
   <groupId>edu.uci.ics.asterix</groupId>
   <artifactId>asterix-fuzzyjoin</artifactId>
-  <version>0.8.1-SNAPSHOT</version>
+  <version>0.8.4-SNAPSHOT</version>
 
   <build>
     <plugins>
diff --git a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
index b99d6f7..247bbd0 100644
--- a/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
+++ b/asterix-fuzzyjoin/src/main/java/edu/uci/ics/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
@@ -155,11 +155,11 @@
 
     public int UTF8StringEditDistance(byte[] bytes, int fsStart, int ssStart, int edThresh) {
 
-        int fsUtfLen = StringUtils.getUTFLen(bytes, fsStart);
-        int ssUtfLen = StringUtils.getUTFLen(bytes, ssStart);
+        int fsStrLen = StringUtils.getStrLen(bytes, fsStart);
+        int ssStrLen = StringUtils.getStrLen(bytes, ssStart);
 
         // length filter
-        if (Math.abs(fsUtfLen - ssUtfLen) > edThresh) {
+        if (Math.abs(fsStrLen - ssStrLen) > edThresh) {
             return -1;
         }
 
@@ -169,7 +169,7 @@
 
         // compute letter counts for first string
         int fsPos = fsStart + utf8SizeIndicatorSize;
-        int fsEnd = fsPos + fsUtfLen;
+        int fsEnd = fsPos + StringUtils.getUTFLen(bytes, fsStart);;
         while (fsPos < fsEnd) {
             char c = StringUtils.toLowerCase(StringUtils.charAt(bytes, fsPos));
             if (c < 128) {
@@ -180,7 +180,7 @@
 
         // compute letter counts for second string
         int ssPos = ssStart + utf8SizeIndicatorSize;
-        int ssEnd = ssPos + ssUtfLen;
+        int ssEnd = ssPos + StringUtils.getUTFLen(bytes, ssStart);
         while (ssPos < ssEnd) {
             char c = StringUtils.toLowerCase(StringUtils.charAt(bytes, ssPos));
             if (c < 128) {
diff --git a/asterix-runtime/pom.xml b/asterix-runtime/pom.xml
index 06ac7fa..8e29c7d 100644
--- a/asterix-runtime/pom.xml
+++ b/asterix-runtime/pom.xml
@@ -139,7 +139,7 @@
 		<dependency>
 			<groupId>edu.uci.ics.asterix</groupId>
 			<artifactId>asterix-fuzzyjoin</artifactId>
-			<version>0.8.1-SNAPSHOT</version>
+			<version>0.8.4-SNAPSHOT</version>
 			<scope>compile</scope>
 		</dependency>
 		<dependency>