Provide a Way to Create UTF8 Serde with Reader and Writer

Change-Id: Ia1a0a2f240ab780e21e7170e03767d8e9981899c
Reviewed-on: https://asterix-gerrit.ics.uci.edu/757
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Murtadha Hubail <hubailmor@gmail.com>
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
index b36ee5a..8dccf70 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
@@ -24,19 +24,30 @@
 
 import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.util.string.UTF8StringReader;
 import org.apache.hyracks.util.string.UTF8StringUtil;
+import org.apache.hyracks.util.string.UTF8StringWriter;
 
 public class UTF8StringSerializerDeserializer implements ISerializerDeserializer<String> {
 
     private static final long serialVersionUID = 1L;
+    private final UTF8StringWriter utf8StringWriter;
+    private final UTF8StringReader utf8StringReader;
 
     public UTF8StringSerializerDeserializer() {
+        this.utf8StringWriter = null;
+        this.utf8StringReader = null;
+    }
+
+    public UTF8StringSerializerDeserializer(UTF8StringWriter utf8StringWriter, UTF8StringReader utf8StringReader) {
+        this.utf8StringWriter = utf8StringWriter;
+        this.utf8StringReader = utf8StringReader;
     }
 
     @Override
     public String deserialize(DataInput in) throws HyracksDataException {
         try {
-            return UTF8StringUtil.readUTF8(in);
+            return UTF8StringUtil.readUTF8(in, utf8StringReader);
         } catch (IOException e) {
             throw new HyracksDataException(e);
         }
@@ -45,7 +56,7 @@
     @Override
     public void serialize(String instance, DataOutput out) throws HyracksDataException {
         try {
-            UTF8StringUtil.writeUTF8(instance, out);
+            UTF8StringUtil.writeUTF8(instance, out, utf8StringWriter);
         } catch (IOException e) {
             throw new HyracksDataException(e);
         }
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
index ee05b03..e867ecc 100644
--- a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
@@ -448,7 +448,7 @@
         return readUTF8(in, null);
     }
 
-    static String readUTF8(DataInput in, UTF8StringReader reader) throws IOException {
+    public static String readUTF8(DataInput in, UTF8StringReader reader) throws IOException {
         int utflen = VarLenIntEncoderDecoder.decode(in);
         byte[] bytearr;
         char[] chararr;
@@ -473,8 +473,9 @@
 
         while (count < utflen) {
             c = bytearr[count] & 0xff;
-            if (c > 127)
+            if (c > 127) {
                 break;
+            }
             count++;
             chararr[chararr_count++] = (char) c;
         }
@@ -498,22 +499,26 @@
                 case 13:
                     /* 110x xxxx   10xx xxxx*/
                     count += 2;
-                    if (count > utflen)
+                    if (count > utflen) {
                         throw new UTFDataFormatException("malformed input: partial character at end");
+                    }
                     char2 = bytearr[count - 1];
-                    if ((char2 & 0xC0) != 0x80)
+                    if ((char2 & 0xC0) != 0x80) {
                         throw new UTFDataFormatException("malformed input around byte " + count);
+                    }
                     chararr[chararr_count++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F));
                     break;
                 case 14:
                     /* 1110 xxxx  10xx xxxx  10xx xxxx */
                     count += 3;
-                    if (count > utflen)
+                    if (count > utflen) {
                         throw new UTFDataFormatException("malformed input: partial character at end");
+                    }
                     char2 = bytearr[count - 2];
                     char3 = bytearr[count - 1];
-                    if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
+                    if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) {
                         throw new UTFDataFormatException("malformed input around byte " + (count - 1));
+                    }
                     chararr[chararr_count++] = (char) (((c & 0x0F) << 12) | ((char2 & 0x3F) << 6)
                             | ((char3 & 0x3F) << 0));
                     break;
@@ -539,7 +544,7 @@
         writeUTF8(str, out, null);
     }
 
-    static void writeUTF8(CharSequence str, DataOutput out, UTF8StringWriter writer) throws IOException {
+    public static void writeUTF8(CharSequence str, DataOutput out, UTF8StringWriter writer) throws IOException {
         int strlen = str.length();
         int utflen = 0;
         char c;