Provide a Way to Create UTF8 Serde with Reader and Writer
Change-Id: Ia1a0a2f240ab780e21e7170e03767d8e9981899c
Reviewed-on: https://asterix-gerrit.ics.uci.edu/757
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Murtadha Hubail <hubailmor@gmail.com>
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
index b36ee5a..8dccf70 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
@@ -24,19 +24,30 @@
import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.util.string.UTF8StringReader;
import org.apache.hyracks.util.string.UTF8StringUtil;
+import org.apache.hyracks.util.string.UTF8StringWriter;
public class UTF8StringSerializerDeserializer implements ISerializerDeserializer<String> {
private static final long serialVersionUID = 1L;
+ private final UTF8StringWriter utf8StringWriter;
+ private final UTF8StringReader utf8StringReader;
public UTF8StringSerializerDeserializer() {
+ this.utf8StringWriter = null;
+ this.utf8StringReader = null;
+ }
+
+ public UTF8StringSerializerDeserializer(UTF8StringWriter utf8StringWriter, UTF8StringReader utf8StringReader) {
+ this.utf8StringWriter = utf8StringWriter;
+ this.utf8StringReader = utf8StringReader;
}
@Override
public String deserialize(DataInput in) throws HyracksDataException {
try {
- return UTF8StringUtil.readUTF8(in);
+ return UTF8StringUtil.readUTF8(in, utf8StringReader);
} catch (IOException e) {
throw new HyracksDataException(e);
}
@@ -45,7 +56,7 @@
@Override
public void serialize(String instance, DataOutput out) throws HyracksDataException {
try {
- UTF8StringUtil.writeUTF8(instance, out);
+ UTF8StringUtil.writeUTF8(instance, out, utf8StringWriter);
} catch (IOException e) {
throw new HyracksDataException(e);
}
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
index ee05b03..e867ecc 100644
--- a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
@@ -448,7 +448,7 @@
return readUTF8(in, null);
}
- static String readUTF8(DataInput in, UTF8StringReader reader) throws IOException {
+ public static String readUTF8(DataInput in, UTF8StringReader reader) throws IOException {
int utflen = VarLenIntEncoderDecoder.decode(in);
byte[] bytearr;
char[] chararr;
@@ -473,8 +473,9 @@
while (count < utflen) {
c = bytearr[count] & 0xff;
- if (c > 127)
+ if (c > 127) {
break;
+ }
count++;
chararr[chararr_count++] = (char) c;
}
@@ -498,22 +499,26 @@
case 13:
/* 110x xxxx 10xx xxxx*/
count += 2;
- if (count > utflen)
+ if (count > utflen) {
throw new UTFDataFormatException("malformed input: partial character at end");
+ }
char2 = bytearr[count - 1];
- if ((char2 & 0xC0) != 0x80)
+ if ((char2 & 0xC0) != 0x80) {
throw new UTFDataFormatException("malformed input around byte " + count);
+ }
chararr[chararr_count++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F));
break;
case 14:
/* 1110 xxxx 10xx xxxx 10xx xxxx */
count += 3;
- if (count > utflen)
+ if (count > utflen) {
throw new UTFDataFormatException("malformed input: partial character at end");
+ }
char2 = bytearr[count - 2];
char3 = bytearr[count - 1];
- if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
+ if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) {
throw new UTFDataFormatException("malformed input around byte " + (count - 1));
+ }
chararr[chararr_count++] = (char) (((c & 0x0F) << 12) | ((char2 & 0x3F) << 6)
| ((char3 & 0x3F) << 0));
break;
@@ -539,7 +544,7 @@
writeUTF8(str, out, null);
}
- static void writeUTF8(CharSequence str, DataOutput out, UTF8StringWriter writer) throws IOException {
+ public static void writeUTF8(CharSequence str, DataOutput out, UTF8StringWriter writer) throws IOException {
int strlen = str.length();
int utflen = 0;
char c;