[ASTERIXDB-2762] Use code point as the unit in trim()
This commit aims to use code point as the unit in trim().
Currently, Java char (2 bytes) is used as the unit in trim(),
however, for non-English characters such as Emoji and Korean,
one character may have multiple bytes and thus can be trimmed
in an illegal way if we use Java char as the unit.
Instead, code point is a more natural unit to do so.
Change-Id: If14092be9c2a654dba392bb2b773db81c9e47ae6
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/7283
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/pom.xml b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/pom.xml
index c27d884..df1f94e 100644
--- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/pom.xml
+++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/pom.xml
@@ -32,10 +32,6 @@
</properties>
<dependencies>
<dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>commons-lang3</artifactId>
- </dependency>
- <dependency>
<groupId>org.apache.hyracks</groupId>
<artifactId>hyracks-util</artifactId>
<version>${project.version}</version>
@@ -56,5 +52,9 @@
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
+ <dependency>
+ <groupId>it.unimi.dsi</groupId>
+ <artifactId>fastutil</artifactId>
+ </dependency>
</dependencies>
</project>