ASTERIXDB-1198: make ISerializerDeserializer implementations in Hyracks stateless.
Change-Id: I1ec86d0a93d8f15d88d68fab24dbe858c5ba8842
Reviewed-on: https://asterix-gerrit.ics.uci.edu/521
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Jianfeng Jia <jianfeng.jia@gmail.com>
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java
index d16fca7..7b4f7b6 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java
@@ -31,16 +31,16 @@
public class ByteArraySerializerDeserializer implements ISerializerDeserializer<byte[]> {
private static final long serialVersionUID = 1L;
+ public static ByteArraySerializerDeserializer INSTANCE = new ByteArraySerializerDeserializer();
- public ByteArraySerializerDeserializer() {
+ private ByteArraySerializerDeserializer() {
}
- private byte[] metaBuffer = new byte[5];
-
/**
* Return a pure byte array which doesn't have the length encoding prefix
*
- * @param in - Stream to read instance from.
+ * @param in
+ * - Stream to read instance from.
* @return
* @throws HyracksDataException
*/
@@ -63,6 +63,7 @@
@Override
public void serialize(byte[] instance, DataOutput out) throws HyracksDataException {
try {
+ byte[] metaBuffer = new byte[5];
int metaLength = VarLenIntEncoderDecoder.encode(instance.length, metaBuffer, 0);
out.write(metaBuffer, 0, metaLength);
out.write(instance);
@@ -81,6 +82,7 @@
// A pure byte array, which doesn't have the length information encoded at the beginning
public void serialize(byte[] instance, int start, int length, DataOutput out) throws HyracksDataException {
+ byte[] metaBuffer = new byte[5];
int metaLength = VarLenIntEncoderDecoder.encode(length, metaBuffer, 0);
try {
out.write(metaBuffer, 0, metaLength);
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
index aee11bc..0d10f13 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
@@ -24,21 +24,19 @@
import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.util.string.UTF8StringReader;
-import org.apache.hyracks.util.string.UTF8StringWriter;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public class UTF8StringSerializerDeserializer implements ISerializerDeserializer<String> {
private static final long serialVersionUID = 1L;
- private UTF8StringReader reader = new UTF8StringReader();
- private UTF8StringWriter writer = new UTF8StringWriter();
- public UTF8StringSerializerDeserializer() {}
+ public UTF8StringSerializerDeserializer() {
+ }
@Override
public String deserialize(DataInput in) throws HyracksDataException {
try {
- return reader.readUTF(in);
+ return UTF8StringUtil.readUTF8(in);
} catch (IOException e) {
throw new HyracksDataException(e);
}
@@ -47,7 +45,7 @@
@Override
public void serialize(String instance, DataOutput out) throws HyracksDataException {
try {
- writer.writeUTF8(instance, out);
+ UTF8StringUtil.writeUTF8(instance, out);
} catch (IOException e) {
throw new HyracksDataException(e);
}
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
index c85d1b2..91fb2eb 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
@@ -28,6 +28,7 @@
public class ByteArrayBase64ParserFactory implements IValueParserFactory {
+ private static final long serialVersionUID = 1L;
public static final ByteArrayBase64ParserFactory INSTANCE = new ByteArrayBase64ParserFactory();
private ByteArrayBase64ParserFactory() {
@@ -37,11 +38,10 @@
public IValueParser createValueParser() {
return new IValueParser() {
Base64Parser parser = new Base64Parser();
- ByteArraySerializerDeserializer serializer = new ByteArraySerializerDeserializer();
+ ByteArraySerializerDeserializer serializer = ByteArraySerializerDeserializer.INSTANCE;
@Override
- public void parse(char[] input, int start, int length, DataOutput out)
- throws HyracksDataException {
+ public void parse(char[] input, int start, int length, DataOutput out) throws HyracksDataException {
parser.generatePureByteArrayFromBase64String(input, start, length);
try {
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
index f1f1eb1..289dc6d 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
@@ -27,6 +27,7 @@
import org.apache.hyracks.util.bytes.HexParser;
public class ByteArrayHexParserFactory implements IValueParserFactory {
+ private static final long serialVersionUID = 1L;
public static ByteArrayHexParserFactory INSTANCE = new ByteArrayHexParserFactory();
private ByteArrayHexParserFactory() {
@@ -36,11 +37,10 @@
public IValueParser createValueParser() {
return new IValueParser() {
HexParser parser = new HexParser();
- ByteArraySerializerDeserializer serializer = new ByteArraySerializerDeserializer();
+ ByteArraySerializerDeserializer serializer = ByteArraySerializerDeserializer.INSTANCE;
@Override
- public void parse(char[] input, int start, int length, DataOutput out)
- throws HyracksDataException {
+ public void parse(char[] input, int start, int length, DataOutput out) throws HyracksDataException {
try {
parser.generateByteArrayFromHexString(input, start, length);
serializer.serialize(parser.getByteArray(), 0, parser.getLength(), out);
diff --git a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java
index f0e831a..f8e9c96 100644
--- a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java
+++ b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java
@@ -35,7 +35,7 @@
public class ByteArraySerializerDeserializerTest {
ByteArrayPointable bytePtr = new ByteArrayPointable();
- ByteArraySerializerDeserializer serder = new ByteArraySerializerDeserializer();
+ ByteArraySerializerDeserializer serder = ByteArraySerializerDeserializer.INSTANCE;
@Test
public void testSerializeDeserializeRandomBytes() throws Exception {
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java
index 3cd0300..27d6008 100644
--- a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java
@@ -22,110 +22,39 @@
import java.io.DataInput;
import java.io.EOFException;
import java.io.IOException;
-import java.io.Serializable;
import java.io.UTFDataFormatException;
-import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+public class UTF8StringReader {
-public class UTF8StringReader implements Serializable{
-
- private byte[] bytearr = null;
- private char[] chararr = null;
+ byte[] bytearr = null;
+ char[] chararr = null;
/**
* Reads from the
* stream <code>in</code> a representation
- * of a Unicode character string encoded in
+ * of a Unicode character string encoded in
* <a href="DataInput.html#modified-utf-8">modified UTF-8</a> format;
* this string of characters is then returned as a <code>String</code>.
* The details of the modified UTF-8 representation
- * are exactly the same as for the <code>readUTF</code>
+ * are exactly the same as for the <code>readUTF</code>
* method of <code>DataInput</code>.
*
- * @param in a data input stream.
+ * @param in
+ * a data input stream.
* @return a Unicode string.
- * @throws EOFException if the input stream reaches the end
- * before all the bytes.
- * @throws IOException the stream has been closed and the contained
- * input stream does not support reading after close, or
- * another I/O error occurs.
- * @throws UTFDataFormatException if the bytes do not represent a
- * valid modified UTF-8 encoding of a Unicode string.
+ * @throws EOFException
+ * if the input stream reaches the end
+ * before all the bytes.
+ * @throws IOException
+ * the stream has been closed and the contained
+ * input stream does not support reading after close, or
+ * another I/O error occurs.
+ * @throws UTFDataFormatException
+ * if the bytes do not represent a
+ * valid modified UTF-8 encoding of a Unicode string.
* @see java.io.DataInputStream#readUnsignedShort()
*/
public final String readUTF(DataInput in) throws IOException {
- int utflen = VarLenIntEncoderDecoder.decode(in);
-
- if (bytearr == null || bytearr.length < utflen) {
- bytearr = new byte[utflen * 2];
- chararr = new char[utflen * 2];
- }
-
- int c, char2, char3;
- int count = 0;
- int chararr_count = 0;
-
- in.readFully(bytearr, 0, utflen);
-
- while (count < utflen) {
- c = (int) bytearr[count] & 0xff;
- if (c > 127)
- break;
- count++;
- chararr[chararr_count++] = (char) c;
- }
-
- while (count < utflen) {
- c = (int) bytearr[count] & 0xff;
- switch (c >> 4) {
- case 0:
- case 1:
- case 2:
- case 3:
- case 4:
- case 5:
- case 6:
- case 7:
- /* 0xxxxxxx*/
- count++;
- chararr[chararr_count++] = (char) c;
- break;
- case 12:
- case 13:
- /* 110x xxxx 10xx xxxx*/
- count += 2;
- if (count > utflen)
- throw new UTFDataFormatException(
- "malformed input: partial character at end");
- char2 = (int) bytearr[count - 1];
- if ((char2 & 0xC0) != 0x80)
- throw new UTFDataFormatException(
- "malformed input around byte " + count);
- chararr[chararr_count++] = (char) (((c & 0x1F) << 6) |
- (char2 & 0x3F));
- break;
- case 14:
- /* 1110 xxxx 10xx xxxx 10xx xxxx */
- count += 3;
- if (count > utflen)
- throw new UTFDataFormatException(
- "malformed input: partial character at end");
- char2 = (int) bytearr[count - 2];
- char3 = (int) bytearr[count - 1];
- if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
- throw new UTFDataFormatException(
- "malformed input around byte " + (count - 1));
- chararr[chararr_count++] = (char) (((c & 0x0F) << 12) |
- ((char2 & 0x3F) << 6) |
- ((char3 & 0x3F) << 0));
- break;
- default:
- /* 10xx xxxx, 1111 xxxx */
- throw new UTFDataFormatException(
- "malformed input around byte " + count);
- }
- }
- // The number of chars produced may be less than utflen
- return new String(chararr, 0, chararr_count);
+ return UTF8StringUtil.readUTF8(in, this);
}
}
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
index 7929691..1c2ac8e 100644
--- a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
@@ -18,10 +18,13 @@
package org.apache.hyracks.util.string;
import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
import java.io.DataOutput;
import java.io.DataOutputStream;
+import java.io.EOFException;
import java.io.IOException;
import java.io.OutputStream;
+import java.io.UTFDataFormatException;
import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
@@ -149,26 +152,22 @@
return b[s];
} else if ((b[s] & 0xe0) == 0xc0) { /*0xe0 = 0b1110000*/
// 2 bytes
- return ((int) (b[s] & 0x1f)) << 6 | /*0x3f = 0b00111111*/
- ((int) (b[s + 1] & 0x3f));
+ return (b[s] & 0x1f) << 6 | /*0x3f = 0b00111111*/
+ (b[s + 1] & 0x3f);
} else if ((b[s] & 0xf0) == 0xe0) {
// 3bytes
- return ((int) (b[s] & 0xf)) << 12 | ((int) (b[s + 1] & 0x3f)) << 6
- | ((int) (b[s + 2] & 0x3f));
+ return (b[s] & 0xf) << 12 | (b[s + 1] & 0x3f) << 6 | (b[s + 2] & 0x3f);
} else if ((b[s] & 0xf8) == 0xf0) {
// 4bytes
- return ((int) (b[s] & 0x7)) << 18 | ((int) (b[s + 1] & 0x3f)) << 12
- | ((int) (b[s + 2] & 0x3f)) << 6 | ((int) (b[s + 3] & 0x3f));
+ return (b[s] & 0x7) << 18 | (b[s + 1] & 0x3f) << 12 | (b[s + 2] & 0x3f) << 6 | (b[s + 3] & 0x3f);
} else if ((b[s] & 0xfc) == 0xf8) {
// 5bytes
- return ((int) (b[s] & 0x3)) << 24 | ((int) (b[s + 1] & 0x3f)) << 18
- | ((int) (b[s + 2] & 0x3f)) << 12 | ((int) (b[s + 3] & 0x3f)) << 6
- | ((int) (b[s + 4] & 0x3f));
+ return (b[s] & 0x3) << 24 | (b[s + 1] & 0x3f) << 18 | (b[s + 2] & 0x3f) << 12 | (b[s + 3] & 0x3f) << 6
+ | (b[s + 4] & 0x3f);
} else if ((b[s] & 0xfe) == 0xfc) {
// 6bytes
- return ((int) (b[s] & 0x1)) << 30 | ((int) (b[s + 1] & 0x3f)) << 24
- | ((int) (b[s + 2] & 0x3f)) << 18 | ((int) (b[s + 3] & 0x3f)) << 12
- | ((int) (b[s + 4] & 0x3f)) << 6 | ((int) (b[s + 5] & 0x3f));
+ return (b[s] & 0x1) << 30 | (b[s + 1] & 0x3f) << 24 | (b[s + 2] & 0x3f) << 18 | (b[s + 3] & 0x3f) << 12
+ | (b[s + 4] & 0x3f) << 6 | (b[s + 5] & 0x3f);
}
return 0;
}
@@ -226,7 +225,7 @@
for (int i = 0; i < 2; ++i) {
nk <<= 16;
if (i < len) {
- nk += ((int) charAt(bytes, offset)) & 0xffff;
+ nk += (charAt(bytes, offset)) & 0xffff;
offset += charSize(bytes, offset);
}
}
@@ -351,8 +350,8 @@
}
}
- private static int compareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart,
- boolean useLowerCase, boolean useRawByte) {
+ private static int compareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart, boolean useLowerCase,
+ boolean useRawByte) {
int utflen1 = getUTFLength(thisBytes, thisStart);
int utflen2 = getUTFLength(thatBytes, thatStart);
@@ -419,4 +418,211 @@
}
return bos.toByteArray();
}
+
+ /**
+ * Reads from the
+ * stream <code>in</code> a representation
+ * of a Unicode character string encoded in
+ * <a href="DataInput.html#modified-utf-8">modified UTF-8</a> format;
+ * this string of characters is then returned as a <code>String</code>.
+ * The details of the modified UTF-8 representation
+ * are exactly the same as for the <code>readUTF</code>
+ * method of <code>DataInput</code>.
+ *
+ * @param in
+ * a data input stream.
+ * @return a Unicode string.
+ * @throws EOFException
+ * if the input stream reaches the end
+ * before all the bytes.
+ * @throws IOException
+ * the stream has been closed and the contained
+ * input stream does not support reading after close, or
+ * another I/O error occurs.
+ * @throws UTFDataFormatException
+ * if the bytes do not represent a
+ * valid modified UTF-8 encoding of a Unicode string.
+ * @see java.io.DataInputStream#readUnsignedShort()
+ */
+ public static String readUTF8(DataInput in) throws IOException {
+ return readUTF8(in, null);
+ }
+
+ static String readUTF8(DataInput in, UTF8StringReader reader) throws IOException {
+ int utflen = VarLenIntEncoderDecoder.decode(in);
+ byte[] bytearr;
+ char[] chararr;
+
+ if (reader == null) {
+ bytearr = new byte[utflen * 2];
+ chararr = new char[utflen * 2];
+ } else {
+ if (reader.bytearr == null || reader.bytearr.length < utflen) {
+ reader.bytearr = new byte[utflen * 2];
+ reader.chararr = new char[utflen * 2];
+ }
+ bytearr = reader.bytearr;
+ chararr = reader.chararr;
+ }
+
+ int c, char2, char3;
+ int count = 0;
+ int chararr_count = 0;
+
+ in.readFully(bytearr, 0, utflen);
+
+ while (count < utflen) {
+ c = bytearr[count] & 0xff;
+ if (c > 127)
+ break;
+ count++;
+ chararr[chararr_count++] = (char) c;
+ }
+
+ while (count < utflen) {
+ c = bytearr[count] & 0xff;
+ switch (c >> 4) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ /* 0xxxxxxx*/
+ count++;
+ chararr[chararr_count++] = (char) c;
+ break;
+ case 12:
+ case 13:
+ /* 110x xxxx 10xx xxxx*/
+ count += 2;
+ if (count > utflen)
+ throw new UTFDataFormatException("malformed input: partial character at end");
+ char2 = bytearr[count - 1];
+ if ((char2 & 0xC0) != 0x80)
+ throw new UTFDataFormatException("malformed input around byte " + count);
+ chararr[chararr_count++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F));
+ break;
+ case 14:
+ /* 1110 xxxx 10xx xxxx 10xx xxxx */
+ count += 3;
+ if (count > utflen)
+ throw new UTFDataFormatException("malformed input: partial character at end");
+ char2 = bytearr[count - 2];
+ char3 = bytearr[count - 1];
+ if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
+ throw new UTFDataFormatException("malformed input around byte " + (count - 1));
+ chararr[chararr_count++] = (char) (((c & 0x0F) << 12) | ((char2 & 0x3F) << 6)
+ | ((char3 & 0x3F) << 0));
+ break;
+ default:
+ /* 10xx xxxx, 1111 xxxx */
+ throw new UTFDataFormatException("malformed input around byte " + count);
+ }
+ }
+ // The number of chars produced may be less than utflen
+ return new String(chararr, 0, chararr_count);
+ }
+
+ /**
+ * Write a UTF8 String <code>str</code> into the DataOutput <code>out</code>
+ *
+ * @param str,
+ * a Unicode string;
+ * @param out,
+ * a Data output stream.
+ * @throws IOException
+ */
+ public static void writeUTF8(CharSequence str, DataOutput out) throws IOException {
+ writeUTF8(str, out, null);
+ }
+
+ static void writeUTF8(CharSequence str, DataOutput out, UTF8StringWriter writer) throws IOException {
+ int strlen = str.length();
+ int utflen = 0;
+ char c;
+ int count = 0;
+
+ for (int i = 0; i < strlen; i++) {
+ c = str.charAt(i);
+ utflen += UTF8StringUtil.getModifiedUTF8Len(c);
+ }
+
+ byte[] tempBytes = getTempBytes(writer, utflen);
+ count += VarLenIntEncoderDecoder.encode(utflen, tempBytes, count);
+ int i = 0;
+ for (; i < strlen; i++) {
+ c = str.charAt(i);
+ if (!((c >= 0x0001) && (c <= 0x007F))) {
+ break;
+ }
+ tempBytes[count++] = (byte) c;
+ }
+
+ for (; i < strlen; i++) {
+ c = str.charAt(i);
+ count += writeToBytes(tempBytes, count, c);
+ }
+ out.write(tempBytes, 0, count);
+ }
+
+ static void writeUTF8(char[] buffer, int start, int length, DataOutput out, UTF8StringWriter writer)
+ throws IOException {
+ int utflen = 0;
+ int count = 0;
+ char c;
+
+ for (int i = 0; i < length; i++) {
+ c = buffer[i + start];
+ utflen += UTF8StringUtil.getModifiedUTF8Len(c);
+ }
+
+ byte[] tempBytes = getTempBytes(writer, utflen);
+ count += VarLenIntEncoderDecoder.encode(utflen, tempBytes, count);
+
+ int i = 0;
+ for (; i < length; i++) {
+ c = buffer[i + start];
+ if (!((c >= 0x0001) && (c <= 0x007F))) {
+ break;
+ }
+ tempBytes[count++] = (byte) c;
+ }
+
+ for (; i < length; i++) {
+ c = buffer[i + start];
+ count += writeToBytes(tempBytes, count, c);
+ }
+ out.write(tempBytes, 0, count);
+ }
+
+ private static int writeToBytes(byte[] tempBytes, int count, char c) {
+ int orig = count;
+ if ((c >= 0x0001) && (c <= 0x007F)) {
+ tempBytes[count++] = (byte) c;
+ } else if (c > 0x07FF) {
+ tempBytes[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
+ tempBytes[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
+ tempBytes[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
+ } else {
+ tempBytes[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
+ tempBytes[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
+ }
+ return count - orig;
+ }
+
+ private static byte[] getTempBytes(UTF8StringWriter writer, int utflen) {
+ byte[] tempBytes;
+ if (writer == null) {
+ tempBytes = new byte[utflen + 5];
+ } else {
+ if (writer.tempBytes == null || writer.tempBytes.length < utflen + 5) {
+ writer.tempBytes = new byte[utflen + 5];
+ }
+ tempBytes = writer.tempBytes;
+ }
+ return tempBytes;
+ }
}
\ No newline at end of file
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java
index 021c02f..3a78075 100644
--- a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java
@@ -20,94 +20,16 @@
import java.io.DataOutput;
import java.io.IOException;
-import java.io.Serializable;
-import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
-
-public class UTF8StringWriter implements Serializable{
- private byte[] tempBytes;
+public class UTF8StringWriter {
+ byte[] tempBytes;
public final void writeUTF8(CharSequence str, DataOutput out) throws IOException {
- int strlen = str.length();
- int utflen = 0;
- char c;
- int count = 0;
-
- for (int i = 0; i < strlen; i++) {
- c = str.charAt(i);
- utflen += UTF8StringUtil.getModifiedUTF8Len(c);
- }
-
- ensureTempSize(utflen);
-
- count += VarLenIntEncoderDecoder.encode(utflen, tempBytes, count);
-
- int i = 0;
- for (; i < strlen; i++) {
- c = str.charAt(i);
- if (!((c >= 0x0001) && (c <= 0x007F))) {
- break;
- }
- tempBytes[count++] = (byte) c;
- }
-
- for (; i < strlen; i++) {
- c = str.charAt(i);
- count += writeToBytes(tempBytes, count, c);
- }
- out.write(tempBytes, 0, count);
+ UTF8StringUtil.writeUTF8(str, out, this);
}
public final void writeUTF8(char[] buffer, int start, int length, DataOutput out) throws IOException {
- int utflen = 0;
- int count = 0;
- char c;
-
- for (int i = 0; i < length; i++) {
- c = buffer[i + start];
- utflen += UTF8StringUtil.getModifiedUTF8Len(c);
- }
-
- ensureTempSize(utflen);
-
- count += VarLenIntEncoderDecoder.encode(utflen, tempBytes, count);
-
- int i = 0;
- for (; i < length; i++) {
- c = buffer[i + start];
- if (!((c >= 0x0001) && (c <= 0x007F))) {
- break;
- }
- tempBytes[count++] = (byte) c;
- }
-
- for (; i < length; i++) {
- c = buffer[i + start];
- count += writeToBytes(tempBytes, count, c);
- }
- out.write(tempBytes, 0, count);
- }
-
- private static int writeToBytes(byte[] tempBytes, int count, char c) {
- int orig = count;
- if ((c >= 0x0001) && (c <= 0x007F)) {
- tempBytes[count++] = (byte) c;
- } else if (c > 0x07FF) {
- tempBytes[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
- tempBytes[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
- tempBytes[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
- } else {
- tempBytes[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
- tempBytes[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
- }
- return count - orig;
- }
-
- private void ensureTempSize(int utflen) {
- if (tempBytes == null || tempBytes.length < utflen + 5) {
- tempBytes = new byte[utflen + 5];
- }
-
+ UTF8StringUtil.writeUTF8(buffer, start, length, out, this);
}
}
\ No newline at end of file