ASTERIXDB-1102: VarSize Encoding to store length of String and ByteArray
This patch is to change the encoding format that stores the length value of
the variable length type (e.g. String, ByteArray) from fix-size encoding
(2bytes) to variable-size encoding ( 1 to 5bytes)
It will solve the issue 1102 to enable us to store a String that longer
than 64K. Also for the common case of storing the short string ( <=
127), it will save one byte per string.
Some important changes include:
1. Add one hyracks-util package to consolidate all the hyracks
independent utility functions. It will reduce the chances of having
duplicate utils in different packages.
2. Move parts of Asterix string functions down to Hyracks
UTF8StringPointable object, which will benefit the other dependencies,
such as VXQuery.
Change-Id: I7e95df0f06984b784ebac2c84b97e56a50207d27
Reviewed-on: https://asterix-gerrit.ics.uci.edu/449
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Taewoo Kim <wangsaeu@gmail.com>
Reviewed-by: Jianfeng Jia <jianfeng.jia@gmail.com>
diff --git a/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/impl/UTF8StringPrinterFactory.java b/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/impl/UTF8StringPrinterFactory.java
index 8aa646e..1aa3370 100644
--- a/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/impl/UTF8StringPrinterFactory.java
+++ b/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/impl/UTF8StringPrinterFactory.java
@@ -18,12 +18,13 @@
*/
package org.apache.hyracks.algebricks.data.impl;
+import java.io.IOException;
import java.io.PrintStream;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.algebricks.data.IPrinter;
import org.apache.hyracks.algebricks.data.IPrinterFactory;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public class UTF8StringPrinterFactory implements IPrinterFactory {
@@ -40,22 +41,11 @@
@Override
public void print(byte[] b, int s, int l, PrintStream ps) throws AlgebricksException {
- int strlen = UTF8StringPointable.getUTFLength(b, s);
- int pos = s + 2;
- int maxPos = pos + strlen;
- ps.print("\"");
- while (pos < maxPos) {
- char c = UTF8StringPointable.charAt(b, pos);
- switch (c) {
- case '\\':
- case '"':
- ps.print('\\');
- break;
- }
- ps.print(c);
- pos += UTF8StringPointable.charSize(b, pos);
+ try {
+ UTF8StringUtil.printUTF8StringWithQuotes(b, s, l, ps);
+ } catch (IOException e) {
+ throw new AlgebricksException(e);
}
- ps.print("\"");
}
@Override
diff --git a/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/utils/WriteValueTools.java b/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/utils/WriteValueTools.java
index 8a96ea6..97e7d95 100644
--- a/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/utils/WriteValueTools.java
+++ b/algebricks/algebricks-data/src/main/java/org/apache/hyracks/algebricks/data/utils/WriteValueTools.java
@@ -20,14 +20,16 @@
import java.io.IOException;
import java.io.OutputStream;
+import java.io.PrintStream;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public final class WriteValueTools {
private final static int[] INT_INTERVALS = { 9, 99, 999, 9999, 99999, 999999, 9999999, 99999999, 999999999,
Integer.MAX_VALUE };
- private final static int[] INT_DIVIDERS = { 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };
+ private final static int[] INT_DIVIDERS = { 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000,
+ 1000000000 };
private final static int[] DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
public static void writeInt(int i, OutputStream os) throws IOException {
@@ -75,50 +77,11 @@
os.write(DIGITS[(int) (d % 10)]);
}
- public static void writeUTF8String(byte[] b, int s, int l, OutputStream os) throws IOException {
- int stringLength = UTF8StringPointable.getUTFLength(b, s);
- int position = s + 2;
- int maxPosition = position + stringLength;
- os.write('\"');
- while (position < maxPosition) {
- char c = UTF8StringPointable.charAt(b, position);
- switch (c) {
- // escape
- case '\\':
- case '"':
- os.write('\\');
- break;
- }
- int sz = UTF8StringPointable.charSize(b, position);
- while (sz > 0) {
- os.write(b[position]);
- position++;
- sz--;
- }
- }
- os.write('\"');
+ public static void writeUTF8StringWithQuotes(String string, OutputStream ps) throws IOException {
+ UTF8StringUtil.printUTF8StringWithQuotes(string, ps);
}
- public static void writeUTF8StringNoQuotes(byte[] b, int s, int l, OutputStream os) throws IOException {
- int stringLength = UTF8StringPointable.getUTFLength(b, s);
- int position = s + 2;
- int maxPosition = position + stringLength;
- while (position < maxPosition) {
- char c = UTF8StringPointable.charAt(b, position);
- switch (c) {
- // escape
- case '\\':
- case '"':
- os.write('\\');
- break;
- }
- int sz = UTF8StringPointable.charSize(b, position);
- while (sz > 0) {
- os.write(b[position]);
- position++;
- sz--;
- }
- }
+ public static void writeUTF8StringNoQuotes(String string, OutputStream ps) throws IOException {
+ UTF8StringUtil.printUTF8StringNoQuotes(string, ps);
}
-
}
diff --git a/algebricks/algebricks-examples/piglet-example/pom.xml b/algebricks/algebricks-examples/piglet-example/pom.xml
index a037db5..ae2ec51 100644
--- a/algebricks/algebricks-examples/piglet-example/pom.xml
+++ b/algebricks/algebricks-examples/piglet-example/pom.xml
@@ -111,5 +111,10 @@
<artifactId>algebricks-compiler</artifactId>
<version>0.2.17-SNAPSHOT</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-util</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ </dependency>
</dependencies>
</project>
diff --git a/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/compiler/PigletPrinterFactoryProvider.java b/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/compiler/PigletPrinterFactoryProvider.java
index 6d64741..8049594 100644
--- a/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/compiler/PigletPrinterFactoryProvider.java
+++ b/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/compiler/PigletPrinterFactoryProvider.java
@@ -29,7 +29,9 @@
import org.apache.hyracks.algebricks.data.utils.WriteValueTools;
import org.apache.hyracks.algebricks.examples.piglet.types.Type;
import org.apache.hyracks.data.std.primitive.FloatPointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
import org.apache.hyracks.dataflow.common.data.marshalling.FloatSerializerDeserializer;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public class PigletPrinterFactoryProvider implements IPrinterFactoryProvider {
@@ -73,7 +75,7 @@
@Override
public void print(byte[] b, int s, int l, PrintStream ps) throws AlgebricksException {
try {
- WriteValueTools.writeUTF8String(b, s, l, ps);
+ UTF8StringUtil.printUTF8StringWithQuotes(b, s, l, ps);
} catch (IOException e) {
throw new AlgebricksException(e);
}
diff --git a/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/metadata/PigletMetadataProvider.java b/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/metadata/PigletMetadataProvider.java
index 7d9b3db..8f9ab9f 100644
--- a/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/metadata/PigletMetadataProvider.java
+++ b/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/metadata/PigletMetadataProvider.java
@@ -110,7 +110,7 @@
case CHAR_ARRAY:
vpf = UTF8StringParserFactory.INSTANCE;
- serDeser = UTF8StringSerializerDeserializer.INSTANCE;
+ serDeser = new UTF8StringSerializerDeserializer();
break;
case FLOAT:
diff --git a/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/runtime/PigletExpressionJobGen.java b/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/runtime/PigletExpressionJobGen.java
index 6c173b2..1c3f9b8 100644
--- a/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/runtime/PigletExpressionJobGen.java
+++ b/algebricks/algebricks-examples/piglet-example/src/main/java/org/apache/hyracks/algebricks/examples/piglet/runtime/PigletExpressionJobGen.java
@@ -53,6 +53,8 @@
import org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
public class PigletExpressionJobGen implements ILogicalExpressionJobGen {
+ private final UTF8StringSerializerDeserializer utf8SerDer = new UTF8StringSerializerDeserializer();
+
@Override
public ICopyEvaluatorFactory createEvaluatorFactory(ILogicalExpression expr, IVariableTypeEnvironment env,
IOperatorSchema[] inputSchemas, JobGenContext context) throws AlgebricksException {
@@ -74,7 +76,7 @@
case CHAR_ARRAY:
try {
- UTF8StringSerializerDeserializer.INSTANCE.serialize(image, dos);
+ utf8SerDer.serialize(image, dos);
} catch (Exception e) {
throw new AlgebricksException(e);
}
diff --git a/algebricks/algebricks-examples/pom.xml b/algebricks/algebricks-examples/pom.xml
index 7ba1b5b..968db33 100644
--- a/algebricks/algebricks-examples/pom.xml
+++ b/algebricks/algebricks-examples/pom.xml
@@ -22,8 +22,15 @@
<artifactId>algebricks-examples</artifactId>
<packaging>pom</packaging>
<name>algebricks-examples</name>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>algebricks-core</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ </dependency>
+ </dependencies>
- <parent>
+ <parent>
<groupId>org.apache.hyracks</groupId>
<artifactId>algebricks</artifactId>
<version>0.2.17-SNAPSHOT</version>
diff --git a/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/pushruntime/PushRuntimeTest.java b/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/pushruntime/PushRuntimeTest.java
index 3c97878..7fcab17 100644
--- a/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/pushruntime/PushRuntimeTest.java
+++ b/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/pushruntime/PushRuntimeTest.java
@@ -275,10 +275,10 @@
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE,
UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE,
UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
@@ -355,10 +355,10 @@
"data/tpch0.001/customer-part1.tbl")));
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE,
UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE,
UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
@@ -407,10 +407,10 @@
"data/tpch0.001/customer.tbl")));
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE,
UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE,
UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
@@ -492,10 +492,10 @@
"data/tpch0.001/customer.tbl")));
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE,
UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE,
UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
@@ -663,7 +663,7 @@
DelimitedDataTupleParserFactory stringParser = new DelimitedDataTupleParserFactory(
new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, '\u0000');
RecordDescriptor stringRec = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE, });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), });
FileScanOperatorDescriptor scanOp = new FileScanOperatorDescriptor(spec, new ConstantFileSplitProvider(
inputSplits), stringParser, stringRec);
@@ -709,8 +709,8 @@
"data/tpch0.001/nation.tbl")));
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE,
UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE };
FileScanOperatorDescriptor scanner = new FileScanOperatorDescriptor(spec, splitProvider,
@@ -817,10 +817,10 @@
"data/tpch0.001/customer.tbl")));
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(fileSplits);
RecordDescriptor scannerDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), FloatSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
IValueParserFactory[] valueParsers = new IValueParserFactory[] { IntegerParserFactory.INSTANCE,
UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE,
UTF8StringParserFactory.INSTANCE, FloatParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
diff --git a/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/tools/WriteValueTest.java b/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/tools/WriteValueTest.java
index 0968478..6770494 100644
--- a/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/tools/WriteValueTest.java
+++ b/algebricks/algebricks-tests/src/test/java/org/apache/hyracks/algebricks/tests/tools/WriteValueTest.java
@@ -97,7 +97,7 @@
interm.reset();
dout.writeUTF(str);
baaos.reset();
- WriteValueTools.writeUTF8String(interm.getByteArray(), 0, interm.size(), baaos);
+ WriteValueTools.writeUTF8StringWithQuotes(str, baaos);
byte[] b = str.getBytes("UTF-8");
if (baaos.size() != b.length + 2) {
throw new Exception("Expecting to write " + b + " in " + b.length + " bytes, but found " + baaos.size()
diff --git a/hyracks/hyracks-data/hyracks-data-std/pom.xml b/hyracks/hyracks-data/hyracks-data-std/pom.xml
index 8546bdb..20c30ef 100644
--- a/hyracks/hyracks-data/hyracks-data-std/pom.xml
+++ b/hyracks/hyracks-data/hyracks-data-std/pom.xml
@@ -17,23 +17,35 @@
! under the License.
!-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <artifactId>hyracks-data-std</artifactId>
- <name>hyracks-data-std</name>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hyracks-data-std</artifactId>
+ <name>hyracks-data-std</name>
- <parent>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks-data</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- </parent>
+ <parent>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-data</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ </parent>
-
- <dependencies>
- <dependency>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks-api</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- </dependency>
- </dependencies>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-util</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-api</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-util</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
</project>
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/accessors/UTF8StringBinaryHashFunctionFamily.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/accessors/UTF8StringBinaryHashFunctionFamily.java
index affafea..ea661e3 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/accessors/UTF8StringBinaryHashFunctionFamily.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/accessors/UTF8StringBinaryHashFunctionFamily.java
@@ -20,7 +20,7 @@
import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction;
import org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public class UTF8StringBinaryHashFunctionFamily implements IBinaryHashFunctionFamily {
public static final IBinaryHashFunctionFamily INSTANCE = new UTF8StringBinaryHashFunctionFamily();
@@ -40,17 +40,7 @@
return new IBinaryHashFunction() {
@Override
public int hash(byte[] bytes, int offset, int length) {
- int h = 0;
- int utflen = UTF8StringPointable.getUTFLength(bytes, offset);
- int sStart = offset + 2;
- int c = 0;
-
- while (c < utflen) {
- char ch = UTF8StringPointable.charAt(bytes, sStart + c);
- h = (coefficient * h + ch) % r;
- c += UTF8StringPointable.charSize(bytes, sStart + c);
- }
- return h;
+ return UTF8StringUtil.hash(bytes, offset, coefficient, r);
}
};
}
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/api/AbstractPointable.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/api/AbstractPointable.java
index a10b0da..549a136 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/api/AbstractPointable.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/api/AbstractPointable.java
@@ -30,6 +30,7 @@
this.bytes = bytes;
this.start = start;
this.length = length;
+ afterReset();
}
@Override
@@ -37,6 +38,13 @@
set(pointer.getByteArray(), pointer.getStartOffset(), pointer.getLength());
}
+ /**
+ * This method will be called after set the new bytes values.
+ * It could be used to reset the state of the inherited Pointable object.
+ */
+ protected void afterReset() {
+ }
+
@Override
public byte[] getByteArray() {
return bytes;
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/ByteArrayPointable.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/ByteArrayPointable.java
index 0a2a723..af54c7e 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/ByteArrayPointable.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/ByteArrayPointable.java
@@ -19,10 +19,33 @@
package org.apache.hyracks.data.std.primitive;
-import org.apache.hyracks.api.dataflow.value.ITypeTraits;
-import org.apache.hyracks.data.std.api.*;
+import java.io.Serializable;
+import java.util.Arrays;
-public class ByteArrayPointable extends AbstractPointable implements IHashable, IComparable {
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.data.std.api.AbstractPointable;
+import org.apache.hyracks.data.std.api.IComparable;
+import org.apache.hyracks.data.std.api.IHashable;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.api.IPointableFactory;
+import org.apache.hyracks.data.std.api.IValueReference;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+
+public class ByteArrayPointable extends AbstractPointable implements IHashable, IComparable, Serializable {
+
+ // These three values are cached to speed up the length data access.
+ // Since the we are using the variable-length encoding, we can save the repeated decoding efforts.
+ // WARNING: must call the resetConstants() method after each reset().
+ private int contentLength = -1;
+ private int metaLength = -1;
+ private int hash = 0;
+
+ @Override
+ protected void afterReset() {
+ contentLength = getContentLength(getByteArray(), getStartOffset());
+ metaLength = getNumberBytesToStoreMeta(contentLength);
+ hash = 0;
+ }
public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
private static final long serialVersionUID = 1L;
@@ -58,48 +81,106 @@
}
@Override
- public int compareTo(byte[] bytes, int start, int length) {
- int thislen = getLength(this.bytes, this.start);
- int thatlen = getLength(bytes, start);
+ public int compareTo(byte[] thatBytes, int thatStart, int thatLength) {
+ int thisArrayLen = getContentLength(this.bytes, this.start);
+ int thatArrayLen = getContentLength(thatBytes, thatStart);
- for (int thisIndex = 0, thatIndex = 0; thisIndex < thislen && thatIndex < thatlen; ++thisIndex, ++thatIndex) {
- if (this.bytes[this.start + SIZE_OF_LENGTH + thisIndex] != bytes[start + SIZE_OF_LENGTH + thatIndex]) {
- return (0xff & this.bytes[this.start + SIZE_OF_LENGTH + thisIndex]) - (0xff & bytes[start + SIZE_OF_LENGTH
- + thatIndex]);
+ int thisArrayStart = this.getContentStartOffset();
+ int thatArrayStart = thatStart + getNumberBytesToStoreMeta(thatArrayLen);
+
+ for (int thisIndex = 0, thatIndex = 0;
+ thisIndex < thisArrayLen && thatIndex < thatArrayLen; ++thisIndex, ++thatIndex) {
+ if (this.bytes[thisArrayStart + thisIndex] != thatBytes[thatArrayStart + thatIndex]) {
+ return (0xff & this.bytes[thisArrayStart + thisIndex]) - (0xff & thatBytes[thatArrayStart + thatIndex]);
}
}
- return thislen - thatlen;
+ return thisArrayLen - thatArrayLen;
+ }
+
+ public int getContentLength() {
+ return contentLength;
+ }
+
+ public int getMetaLength() {
+ return metaLength;
}
@Override
public int hash() {
- int h = 0;
- int realLength = getLength(bytes, start);
- for (int i = 0; i < realLength; ++i) {
- h = 31 * h + bytes[start + SIZE_OF_LENGTH + i];
+ if (hash == 0) {
+ int h = 0;
+ int realLength = getContentLength();
+ int startOffset = getContentStartOffset();
+ for (int i = 0; i < realLength; ++i) {
+ h = 31 * h + bytes[startOffset + i];
+ }
+ hash = h;
}
- return h;
+ return hash;
}
@Override
- public int getLength(){
- return getFullLength(getByteArray(), getStartOffset());
+ public int getLength() {
+ return getContentLength() + getMetaLength();
}
- public static final int SIZE_OF_LENGTH = 2;
- public static final int MAX_LENGTH = 65535;
-
- public static int getLength(byte[] bytes, int offset) {
- return ((0xFF & bytes[offset]) << 8) + (0xFF & bytes[offset + 1]);
+ public int getContentStartOffset() {
+ return getStartOffset() + getMetaLength();
}
- public static int getFullLength(byte[] bytes, int offset){
- return getLength(bytes, offset) + SIZE_OF_LENGTH;
+ ///////////////// helper functions ////////////////////////////////
+ public static byte[] copyContent(ByteArrayPointable bytePtr) {
+ return Arrays.copyOfRange(bytePtr.getByteArray(), bytePtr.getContentStartOffset(),
+ bytePtr.getContentStartOffset() + bytePtr.getContentLength());
}
- public static void putLength(int length, byte[] bytes, int offset) {
- bytes[offset] = (byte) ((length >>> 8) & 0xFF);
- bytes[offset + 1] = (byte) ((length >>> 0) & 0xFF);
+ public static ByteArrayPointable generatePointableFromPureBytes(byte[] bytes) {
+ return generatePointableFromPureBytes(bytes, 0, bytes.length);
+ }
+
+ public static ByteArrayPointable generatePointableFromPureBytes(byte[] bytes, int start, int length) {
+ int metaLen = getNumberBytesToStoreMeta(length);
+ byte[] ret = new byte[length + metaLen];
+ VarLenIntEncoderDecoder.encode(length, ret, 0);
+ for (int i = 0; i < length; ++i) {
+ ret[i + metaLen] = bytes[start + i];
+ }
+ ByteArrayPointable ptr = new ByteArrayPointable();
+ ptr.set(ret, 0, ret.length);
+ return ptr;
+ }
+
+ public static int getContentLength(byte[] bytes, int offset) {
+ return VarLenIntEncoderDecoder.decode(bytes, offset);
+ }
+
+ public static int getNumberBytesToStoreMeta(int length) {
+ return VarLenIntEncoderDecoder.getBytesRequired(length);
+ }
+
+ /**
+ * Compute the normalized key of the byte array.
+ * The normalized key in Hyracks is mainly used to speedup the comparison between pointable data.
+ * In the ByteArray case, we compute the integer value by using the first 4 bytes.
+ * The comparator will first use this integer to get the result ( <,>, or =), it will check
+ * the actual bytes only if the normalized key is equal. Thus this normalized key must be
+ * consistent with the comparison result.
+ *
+ * @param bytesPtr
+ * @param start
+ * @return
+ */
+ public static int normalize(byte[] bytesPtr, int start) {
+ int len = getContentLength(bytesPtr, start);
+ long nk = 0;
+ start = start + getNumberBytesToStoreMeta(len);
+ for (int i = 0; i < 4; ++i) {
+ nk <<= 8;
+ if (i < len) {
+ nk |= bytesPtr[start + i] & 0xff;
+ }
+ }
+ return (int) (nk >> 1); // make it always positive.
}
}
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/RawUTF8StringPointable.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/RawUTF8StringPointable.java
index 2b1f557..70bac4d 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/RawUTF8StringPointable.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/RawUTF8StringPointable.java
@@ -24,6 +24,7 @@
import org.apache.hyracks.data.std.api.IHashable;
import org.apache.hyracks.data.std.api.IPointable;
import org.apache.hyracks.data.std.api.IPointableFactory;
+import org.apache.hyracks.util.string.UTF8StringUtil;
/**
* This class provides the raw bytes-based comparison and hash function for UTF8 strings.
@@ -66,44 +67,16 @@
@Override
public int compareTo(byte[] bytes, int start, int length) {
- int utflen1 = UTF8StringPointable.getUTFLength(this.bytes, this.start);
- int utflen2 = UTF8StringPointable.getUTFLength(bytes, start);
-
- int c1 = 0;
- int c2 = 0;
-
- int s1Start = this.start + 2;
- int s2Start = start + 2;
-
- while (c1 < utflen1 && c2 < utflen2) {
- char ch1 = (char) this.bytes[s1Start + c1];
- char ch2 = (char) bytes[s2Start + c2];
-
- if (ch1 != ch2) {
- return ch1 - ch2;
- }
- c1++;
- c2++;
- }
- return utflen1 - utflen2;
+ return UTF8StringUtil.rawByteCompareTo(this.bytes, this.start, bytes, start);
}
@Override
public int hash() {
- int h = 0;
- int utflen = UTF8StringPointable.getUTFLength(bytes, start);
- int sStart = start + 2;
- int c = 0;
-
- while (c < utflen) {
- char ch = (char) bytes[sStart + c];
- h = 31 * h + ch;
- c++;
- }
- return h;
+ return UTF8StringUtil.rawBytehash(this.bytes, this.start);
}
public void toString(StringBuilder buffer) {
- UTF8StringPointable.toString(buffer, bytes, start);
+ UTF8StringUtil.toString(buffer, bytes, start);
}
+
}
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringLowercasePointable.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringLowercasePointable.java
new file mode 100644
index 0000000..6e4810c
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringLowercasePointable.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.data.std.primitive;
+
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.data.std.api.AbstractPointable;
+import org.apache.hyracks.data.std.api.IComparable;
+import org.apache.hyracks.data.std.api.IHashable;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.api.IPointableFactory;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+
+public final class UTF8StringLowercasePointable extends AbstractPointable implements IHashable, IComparable {
+ public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public boolean isFixedLength() {
+ return false;
+ }
+
+ @Override
+ public int getFixedLength() {
+ return 0;
+ }
+ };
+
+ public static final IPointableFactory FACTORY = new IPointableFactory() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IPointable createPointable() {
+ return new UTF8StringLowercasePointable();
+ }
+
+ @Override
+ public ITypeTraits getTypeTraits() {
+ return TYPE_TRAITS;
+ }
+ };
+
+ @Override
+ public int compareTo(IPointable pointer) {
+ return compareTo(pointer.getByteArray(), pointer.getStartOffset(), pointer.getLength());
+ }
+
+ @Override
+ public int compareTo(byte[] bytes, int start, int length) {
+ return UTF8StringUtil.lowerCaseCompareTo(this.bytes, this.start, bytes, start);
+ }
+
+ @Override
+ public int hash() {
+ return UTF8StringUtil.lowerCaseHash(bytes, start);
+ }
+
+}
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
index 8b41206..e311fa6 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
@@ -18,14 +18,42 @@
*/
package org.apache.hyracks.data.std.primitive;
+import java.io.IOException;
+import java.nio.charset.Charset;
+
import org.apache.hyracks.api.dataflow.value.ITypeTraits;
import org.apache.hyracks.data.std.api.AbstractPointable;
import org.apache.hyracks.data.std.api.IComparable;
import org.apache.hyracks.data.std.api.IHashable;
import org.apache.hyracks.data.std.api.IPointable;
import org.apache.hyracks.data.std.api.IPointableFactory;
+import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public final class UTF8StringPointable extends AbstractPointable implements IHashable, IComparable {
+
+ // These values are cached to speed up the length data access.
+ // Since we are using the variable-length encoding, we can save the repeated decoding efforts.
+ // WARNING: must call the resetConstants() method after each reset().
+ private int utf8Length;
+ private int metaLength;
+ private int hashValue;
+ private int stringLength;
+
+ /**
+ * reset those meta length.
+ * Since the {@code utf8Length} and the {@code metaLength} are often used, we compute those two values in advance.
+ * As for the {@code stringLength} and the {@code hashValue}, they will be lazily initialized after the first call.
+ */
+ @Override
+ protected void afterReset() {
+ utf8Length = UTF8StringUtil.getUTFLength(bytes, start);
+ metaLength = UTF8StringUtil.getNumBytesToStoreLength(getUTF8Length());
+ hashValue = 0;
+ stringLength = -1;
+ }
+
public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
private static final long serialVersionUID = 1L;
@@ -54,111 +82,57 @@
}
};
+ public static UTF8StringPointable generateUTF8Pointable(String string) {
+ byte[] bytes;
+ bytes = UTF8StringUtil.writeStringToBytes(string);
+ UTF8StringPointable ptr = new UTF8StringPointable();
+ ptr.set(bytes, 0, bytes.length);
+ return ptr;
+ }
+
/**
* Returns the character at the given byte offset. The caller is responsible for making sure that
* the provided offset is within bounds and points to the beginning of a valid UTF8 character.
- *
- * @param offset
- * - Byte offset
+ *
+ * @param offset - Byte offset
* @return Character at the given offset.
*/
public char charAt(int offset) {
- return charAt(bytes, start + offset);
- }
-
- public static char charAt(byte[] b, int s) {
- int c = b[s] & 0xff;
- switch (c >> 4) {
- case 0:
- case 1:
- case 2:
- case 3:
- case 4:
- case 5:
- case 6:
- case 7:
- return (char) c;
-
- case 12:
- case 13:
- return (char) (((c & 0x1F) << 6) | ((b[s + 1]) & 0x3F));
-
- case 14:
- return (char) (((c & 0x0F) << 12) | (((b[s + 1]) & 0x3F) << 6) | (((b[s + 2]) & 0x3F) << 0));
-
- default:
- throw new IllegalArgumentException();
- }
+ return UTF8StringUtil.charAt(bytes, start + offset);
}
public int charSize(int offset) {
- return charSize(bytes, start + offset);
- }
-
- public static int charSize(byte[] b, int s) {
- int c = b[s] & 0xff;
- switch (c >> 4) {
- case 0:
- case 1:
- case 2:
- case 3:
- case 4:
- case 5:
- case 6:
- case 7:
- return 1;
-
- case 12:
- case 13:
- return 2;
-
- case 14:
- return 3;
- }
- throw new IllegalStateException();
- }
-
- public static int getModifiedUTF8Len(char c) {
- if (c >= 0x0000 && c <= 0x007F) {
- return 1;
- } else if (c <= 0x07FF) {
- return 2;
- } else {
- return 3;
- }
+ return UTF8StringUtil.charSize(bytes, start + offset);
}
/**
* Gets the length of the string in characters.
- *
+ * The first time call will need to go through the entire string, the following call will just return the pre-caculated result
+ *
* @return length of string in characters
*/
public int getStringLength() {
- return getStringLength(bytes, start);
- }
-
- public static int getStringLength(byte[] b, int s) {
- int pos = s + 2;
- int end = pos + getUTFLength(b, s);
- int charCount = 0;
- while (pos < end) {
- charCount++;
- pos += charSize(b, pos);
+ if (stringLength < 0) {
+ stringLength = UTF8StringUtil.getStringLength(bytes, start);
}
- return charCount;
+ return stringLength;
}
/**
* Gets the length of the UTF-8 encoded string in bytes.
- *
+ *
* @return length of UTF-8 encoded string in bytes
*/
- public int getUTFLength() {
- return getUTFLength(bytes, start);
+ public int getUTF8Length() {
+ return utf8Length;
}
- public static int getUTFLength(byte[] b, int s) {
- return ((b[s] & 0xff) << 8) + ((b[s + 1] & 0xff) << 0);
+ public int getMetaDataLength() {
+ return metaLength;
+ }
+
+ public int getCharStartOffset() {
+ return getStartOffset() + getMetaDataLength();
}
@Override
@@ -168,56 +142,307 @@
@Override
public int compareTo(byte[] bytes, int start, int length) {
- int utflen1 = getUTFLength(this.bytes, this.start);
- int utflen2 = getUTFLength(bytes, start);
-
- int c1 = 0;
- int c2 = 0;
-
- int s1Start = this.start + 2;
- int s2Start = start + 2;
-
- while (c1 < utflen1 && c2 < utflen2) {
- char ch1 = charAt(this.bytes, s1Start + c1);
- char ch2 = charAt(bytes, s2Start + c2);
-
- if (ch1 != ch2) {
- return ch1 - ch2;
- }
- c1 += charSize(this.bytes, s1Start + c1);
- c2 += charSize(bytes, s2Start + c2);
- }
- return utflen1 - utflen2;
+ return UTF8StringUtil.compareTo(this.bytes, this.start, bytes, start);
}
@Override
public int hash() {
- int h = 0;
- int utflen = getUTFLength(bytes, start);
- int sStart = start + 2;
- int c = 0;
-
- while (c < utflen) {
- char ch = charAt(bytes, sStart + c);
- h = 31 * h + ch;
- c += charSize(bytes, sStart + c);
+ if (hashValue == 0) {
+ hashValue = UTF8StringUtil.hash(this.bytes, this.start);
}
- return h;
- }
-
- public static void toString(StringBuilder buffer, byte[] bytes, int start) {
- int utfLen = getUTFLength(bytes, start);
- int offset = 2;
- while (utfLen > 0) {
- char c = charAt(bytes, start + offset);
- buffer.append(c);
- int cLen = UTF8StringPointable.getModifiedUTF8Len(c);
- offset += cLen;
- utfLen -= cLen;
- }
+ return hashValue;
}
public void toString(StringBuilder buffer) {
- toString(buffer, bytes, start);
+ UTF8StringUtil.toString(buffer, bytes, start);
}
+
+ public String toString() {
+ return new String(this.bytes, this.getCharStartOffset(), this.getUTF8Length(), Charset.forName("UTF-8"));
+ }
+
+ /****
+ * String functions
+ */
+
+ public int ignoreCaseCompareTo(UTF8StringPointable other) {
+ return UTF8StringUtil.lowerCaseCompareTo(this.getByteArray(), this.getStartOffset(),
+ other.getByteArray(), other.getStartOffset());
+ }
+
+ public int find(UTF8StringPointable pattern, boolean ignoreCase) {
+ return find(this, pattern, ignoreCase);
+ }
+
+ /**
+ * return the byte offset of the first character of the matching string. Not including the MetaLength
+ *
+ * @param src
+ * @param pattern
+ * @param ignoreCase
+ * @return
+ */
+ public static int find(UTF8StringPointable src, UTF8StringPointable pattern, boolean ignoreCase) {
+ final int srcUtfLen = src.getUTF8Length();
+ final int pttnUtfLen = pattern.getUTF8Length();
+ final int srcStart = src.getMetaDataLength();
+ final int pttnStart = pattern.getMetaDataLength();
+
+ int startMatch = 0;
+ int maxStart = srcUtfLen - pttnUtfLen;
+ while (startMatch <= maxStart) {
+ int c1 = startMatch;
+ int c2 = 0;
+ while (c1 < srcUtfLen && c2 < pttnUtfLen) {
+ char ch1 = src.charAt(srcStart + c1);
+ char ch2 = pattern.charAt(pttnStart + c2);
+
+ if (ch1 != ch2) {
+ if (!ignoreCase || ignoreCase && Character.toLowerCase(ch1) != Character.toLowerCase(ch2)) {
+ break;
+ }
+ }
+ c1 += src.charSize(srcStart + c1);
+ c2 += pattern.charSize(pttnStart + c2);
+ }
+ if (c2 == pttnUtfLen) {
+ return startMatch;
+ }
+ startMatch += src.charSize(srcStart + startMatch);
+ }
+ return -1;
+ }
+
+ public boolean contains(UTF8StringPointable pattern, boolean ignoreCase) {
+ return contains(this, pattern, ignoreCase);
+ }
+
+ public static boolean contains(UTF8StringPointable src, UTF8StringPointable pattern, boolean ignoreCase) {
+ return find(src, pattern, ignoreCase) >= 0;
+ }
+
+ public boolean startsWith(UTF8StringPointable pattern, boolean ignoreCase) {
+ return startsWith(this, pattern, ignoreCase);
+ }
+
+ public static boolean startsWith(UTF8StringPointable src, UTF8StringPointable pattern, boolean ignoreCase) {
+ int utflen1 = src.getUTF8Length();
+ int utflen2 = pattern.getUTF8Length();
+ if (utflen2 > utflen1)
+ return false;
+
+ int s1Start = src.getMetaDataLength();
+ int s2Start = pattern.getMetaDataLength();
+
+ int c1 = 0;
+ int c2 = 0;
+ while (c1 < utflen1 && c2 < utflen2) {
+ char ch1 = src.charAt(s1Start + c1);
+ char ch2 = pattern.charAt(s2Start + c2);
+ if (ch1 != ch2) {
+ if (!ignoreCase || ignoreCase && Character.toLowerCase(ch1) != Character.toLowerCase(ch2)) {
+ break;
+ }
+ }
+ c1 += src.charSize(s1Start + c1);
+ c2 += pattern.charSize(s2Start + c2);
+ }
+ return (c2 == utflen2);
+ }
+
+ public boolean endsWith(UTF8StringPointable pattern, boolean ignoreCase) {
+ return endsWith(this, pattern, ignoreCase);
+ }
+
+ public static boolean endsWith(UTF8StringPointable src, UTF8StringPointable pattern, boolean ignoreCase) {
+ int len1 = src.getUTF8Length();
+ int len2 = pattern.getUTF8Length();
+ if (len2 > len1)
+ return false;
+
+ int s1Start = src.getMetaDataLength();
+ int s2Start = pattern.getMetaDataLength();
+
+ int c1 = len1 - len2;
+ int c2 = 0;
+ while (c1 < len1 && c2 < len2) {
+ char ch1 = src.charAt(s1Start + c1);
+ char ch2 = pattern.charAt(s2Start + c2);
+
+ if (ch1 != ch2) {
+ if (!ignoreCase || ignoreCase && Character.toLowerCase(ch1) != Character.toLowerCase(ch2)) {
+ break;
+ }
+ }
+ c1 += src.charSize(s1Start + c1);
+ c2 += pattern.charSize(s2Start + c2);
+ }
+ return (c2 == len2);
+ }
+
+ public void concat(UTF8StringPointable next, UTF8StringBuilder builder, GrowableArray out) throws IOException {
+ concat(this, next, builder, out);
+ }
+
+ public static void concat(UTF8StringPointable first, UTF8StringPointable next, UTF8StringBuilder builder,
+ GrowableArray out) throws IOException {
+ int firstUtfLen = first.getUTF8Length();
+ int nextUtfLen = next.getUTF8Length();
+
+ builder.reset(out, firstUtfLen + nextUtfLen);
+ builder.appendUtf8StringPointable(first);
+ builder.appendUtf8StringPointable(next);
+ builder.finish();
+ }
+
+ public void substr(int charOffset, int charLength, UTF8StringBuilder builder, GrowableArray out)
+ throws IOException {
+ substr(this, charOffset, charLength, builder, out);
+ }
+
+ public static void substr(UTF8StringPointable src, int charOffset, int charLength, UTF8StringBuilder builder,
+ GrowableArray out) throws IOException {
+ // Really don't understand why we need to support the charOffset < 0 case.
+ // At this time, usually there is mistake on user side, we'd better give him a warning.
+ // assert charOffset >= 0;
+ if (charOffset < 0) {
+ charOffset = 0;
+ }
+ if (charLength < 0) {
+ charLength = 0;
+ }
+
+ int utfLen = src.getUTF8Length();
+ int chIdx = 0;
+ int byteIdx = 0;
+ while (byteIdx < utfLen && chIdx < charOffset) {
+ byteIdx += src.charSize(src.getMetaDataLength() + byteIdx);
+ chIdx++;
+ }
+ if (byteIdx >= utfLen) {
+ // Again, why do we tolerant this kind of mistakes?
+ // throw new StringIndexOutOfBoundsException(charOffset);
+ builder.reset(out, 0);
+ builder.finish();
+ return;
+ }
+
+ builder.reset(out, Math.min(utfLen - byteIdx, (int) (charLength * 1.0 * byteIdx / chIdx)));
+ chIdx = 0;
+ while (byteIdx < utfLen && chIdx < charLength) {
+ builder.appendChar(src.charAt(src.getMetaDataLength() + byteIdx));
+ chIdx++;
+ byteIdx += src.charSize(src.getMetaDataLength() + byteIdx);
+ }
+ builder.finish();
+ }
+
+ public void substrBefore(UTF8StringPointable match, UTF8StringBuilder builder, GrowableArray out)
+ throws IOException {
+ substrBefore(this, match, builder, out);
+ }
+
+ /**
+ * Write the substring before the given pattern. It will write a empty string if the matching fails.
+ *
+ * @param src
+ * @param match
+ * @param builder
+ * @param out
+ * @throws IOException
+ */
+ public static void substrBefore(
+ UTF8StringPointable src,
+ UTF8StringPointable match,
+ UTF8StringBuilder builder,
+ GrowableArray out) throws IOException {
+
+ int byteOffset = find(src, match, false);
+ if (byteOffset < 0) {
+ builder.reset(out, 0);
+ builder.finish();
+ return;
+ }
+
+ final int srcMetaLen = src.getMetaDataLength();
+
+ builder.reset(out, byteOffset);
+ for (int idx = 0; idx < byteOffset; ) {
+ builder.appendChar(src.charAt(srcMetaLen + idx));
+ idx += src.charSize(srcMetaLen + idx);
+ }
+ builder.finish();
+ }
+
+ public void substrAfter(UTF8StringPointable match, UTF8StringBuilder builder, GrowableArray out)
+ throws IOException {
+ substrAfter(this, match, builder, out);
+ }
+
+ /**
+ * Write the substring after the given pattern. It will write a empty string if the matching fails.
+ *
+ * @param src
+ * @param match
+ * @param builder
+ * @param out
+ */
+ public static void substrAfter(
+ UTF8StringPointable src,
+ UTF8StringPointable match,
+ UTF8StringBuilder builder,
+ GrowableArray out) throws IOException {
+
+ int byteOffset = find(src, match, false);
+ if (byteOffset < 0) {
+ builder.reset(out, 0);
+ builder.finish();
+ return;
+ }
+
+ final int srcUtfLen = src.getUTF8Length();
+ final int matchUtfLen = match.getUTF8Length();
+
+ final int resultLen = srcUtfLen - byteOffset - matchUtfLen;
+ builder.reset(out, resultLen);
+ builder.appendUtf8StringPointable(src, src.getCharStartOffset() + byteOffset + matchUtfLen, resultLen);
+ builder.finish();
+ }
+
+ public void lowercase(UTF8StringBuilder builder, GrowableArray out) throws IOException {
+ lowercase(this, builder, out);
+ }
+
+ public static void lowercase(UTF8StringPointable src, UTF8StringBuilder builder, GrowableArray out)
+ throws IOException {
+ final int srcUtfLen = src.getUTF8Length();
+ final int srcStart = src.getMetaDataLength();
+
+ builder.reset(out, srcUtfLen);
+ int byteIndex = 0;
+ while (byteIndex < srcUtfLen) {
+ builder.appendChar(Character.toLowerCase(src.charAt(srcStart + byteIndex)));
+ byteIndex += src.charSize(srcStart + byteIndex);
+ }
+ builder.finish();
+ }
+
+ public void uppercase(UTF8StringBuilder builder, GrowableArray out) throws IOException {
+ uppercase(this, builder, out);
+ }
+
+ public static void uppercase(UTF8StringPointable src, UTF8StringBuilder builder, GrowableArray out)
+ throws IOException {
+ final int srcUtfLen = src.getUTF8Length();
+ final int srcStart = src.getMetaDataLength();
+
+ builder.reset(out, srcUtfLen);
+ int byteIndex = 0;
+ while (byteIndex < srcUtfLen) {
+ builder.appendChar(Character.toUpperCase(src.charAt(srcStart + byteIndex)));
+ byteIndex += src.charSize(srcStart + byteIndex);
+ }
+ builder.finish();
+ }
+
}
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringWriter.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringWriter.java
deleted file mode 100644
index ae7e903..0000000
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringWriter.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.hyracks.data.std.primitive;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.UTFDataFormatException;
-
-public class UTF8StringWriter {
- private byte[] tempBytes;
-
- public void writeUTF8String(CharSequence str, DataOutput out) throws IOException {
- int strlen = str.length();
- int utflen = 0;
- int c, count = 0;
-
- for (int i = 0; i < strlen; i++) {
- c = str.charAt(i);
- if ((c >= 0x0001) && (c <= 0x007F)) {
- utflen++;
- } else if (c > 0x07FF) {
- utflen += 3;
- } else {
- utflen += 2;
- }
- }
-
- if (utflen > 65535) {
- throw new UTFDataFormatException("encoded string too long: " + utflen + " bytes");
- }
-
- if (tempBytes == null || tempBytes.length < utflen + 2) {
- tempBytes = new byte[utflen + 2];
- }
-
- tempBytes[count++] = (byte) ((utflen >>> 8) & 0xFF);
- tempBytes[count++] = (byte) ((utflen >>> 0) & 0xFF);
-
- int i = 0;
- for (i = 0; i < strlen; i++) {
- c = str.charAt(i);
- if (!((c >= 0x0001) && (c <= 0x007F))) {
- break;
- }
- tempBytes[count++] = (byte) c;
- }
-
- for (; i < strlen; i++) {
- c = str.charAt(i);
- if ((c >= 0x0001) && (c <= 0x007F)) {
- tempBytes[count++] = (byte) c;
- } else if (c > 0x07FF) {
- tempBytes[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
- tempBytes[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
- tempBytes[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
- } else {
- tempBytes[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
- tempBytes[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
- }
- }
- out.write(tempBytes, 0, utflen + 2);
- }
-}
\ No newline at end of file
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/AbstractVarLenObjectBuilder.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/AbstractVarLenObjectBuilder.java
new file mode 100644
index 0000000..452710e
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/AbstractVarLenObjectBuilder.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+
+/**
+ * This builder is used to build the variable length encoding object (e.g. UTF8String or ByteArray).
+ * The caller needs to give an estimated length when {@link #reset(GrowableArray, int)}.
+ * Then it can append the content byte by byte.
+ * Since the actual byte length to store the content length is not precise at the beginning, the caller need
+ * to explicitly call the {@link #finish()} function to notify that one object has finished building.
+ * Then internally this builder will take care of storing the actual length field at the beginning of the
+ * given storage array.
+ */
+public abstract class AbstractVarLenObjectBuilder {
+ protected GrowableArray ary;
+ protected DataOutput out;
+ protected int startOffset;
+ protected int estimateMetaLen;
+
+ /**
+ * Start to build an variable length object
+ *
+ * @param ary the destination storage array
+ * @param estimateLength the estimate length of this object
+ * @throws IOException
+ */
+ public void reset(GrowableArray ary, int estimateLength) throws IOException {
+ this.ary = ary;
+ this.out = ary.getDataOutput();
+ this.startOffset = ary.getLength();
+ this.estimateMetaLen = VarLenIntEncoderDecoder.getBytesRequired(estimateLength);
+
+ // increase the offset
+ for (int i = 0; i < estimateMetaLen; i++) {
+ out.writeByte(0);
+ }
+ }
+
+ /**
+ * Finish building an variable length object.
+ * It will write the correct length of the object at the beginning of the storage array.
+ * Since the actual byte size for storing the length could be changed ( if the given estimated length varies too
+ * far from the actual length), we need to shift the data around in some cases.
+ * Specifically, if the varlength(actual length) > varlength(estimated length) we need to grow the storage and
+ * shift the content rightward. Else we need to shift the data leftward and tell the storage to rewind the
+ * difference to mark the correct position.
+ *
+ * @throws IOException
+ */
+ public void finish() throws IOException {
+ int actualDataLength = ary.getLength() - startOffset - estimateMetaLen;
+ int actualMetaLen = VarLenIntEncoderDecoder.getBytesRequired(actualDataLength);
+ if (actualMetaLen != estimateMetaLen) {// ugly but rare situation if the estimate vary a lot
+ int diff = estimateMetaLen - actualMetaLen;
+ int actualDataStart = startOffset + actualMetaLen;
+ if (diff > 0) { // shrink
+ for (int i = 0; i < actualDataLength; i++) {
+ ary.getByteArray()[actualDataStart + i] = ary.getByteArray()[actualDataStart + i + diff];
+ }
+ ary.rewindPositionBy(diff);
+ } else { // increase space
+ diff = -diff;
+ for (int i = 0; i < diff; i++) {
+ out.writeByte(0);
+ }
+ for (int i = ary.getLength() - 1; i >= actualDataStart + diff; i--) {
+ ary.getByteArray()[i] = ary.getByteArray()[i - diff];
+ }
+ }
+ }
+ VarLenIntEncoderDecoder.encode(actualDataLength, ary.getByteArray(), startOffset);
+ }
+
+}
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayAccessibleOutputStream.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayAccessibleOutputStream.java
index 2f1ad1d..287e2f2 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayAccessibleOutputStream.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayAccessibleOutputStream.java
@@ -41,6 +41,26 @@
count += 1;
}
+ /**
+ * Rewind the current position by {@code delta} to a previous position.
+ * This function is used to drop the already written delta bytes.
+ * In some cases, we write some bytes, and afterward we found we've written more than expected.
+ * Then we need to fix the position by rewind the current position to the expected one.
+ *
+ * Currently, it is used by the {@link AbstractVarLenObjectBuilder} which may take more space than required
+ * at beginning, and it will shift the data and fix the position whenever required.
+ *
+ * It will throw {@link IndexOutOfBoundsException} if the {@code delta} is negative.
+ * Evil function, use with caution.
+ * @param delta
+ */
+ public void rewindPositionBy(int delta) {
+ if (delta < 0 || count < delta) {
+ throw new IndexOutOfBoundsException();
+ }
+ count -= delta;
+ }
+
@Override
public void write(byte[] b, int off, int len) {
if ((off < 0) || (off > b.length) || (len < 0) || ((off + len) - b.length > 0)) {
@@ -60,9 +80,8 @@
/**
* Increases the capacity to ensure that it can hold at least the
* number of elements specified by the minimum capacity argument.
- *
- * @param minCapacity
- * the desired minimum capacity
+ *
+ * @param minCapacity the desired minimum capacity
*/
private void grow(int minCapacity) {
// overflow-conscious code
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayBuilder.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayBuilder.java
new file mode 100644
index 0000000..61b15d4
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ByteArrayBuilder.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import java.io.IOException;
+
+public class ByteArrayBuilder extends AbstractVarLenObjectBuilder {
+
+ public void appendByte(byte b) throws IOException {
+ out.writeByte(b);
+ }
+
+ public void appendBytes(byte[] bytes, int start, int length) throws IOException {
+ out.write(bytes, start, length);
+ }
+}
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/GrowableArray.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/GrowableArray.java
index d08412e..6e329ab 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/GrowableArray.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/GrowableArray.java
@@ -20,7 +20,6 @@
package org.apache.hyracks.data.std.util;
import java.io.DataOutput;
-import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hyracks.data.std.api.IDataOutputProvider;
@@ -28,7 +27,7 @@
public class GrowableArray implements IDataOutputProvider {
private final ByteArrayAccessibleOutputStream baaos = new ByteArrayAccessibleOutputStream();
- private final DataOutputStream dos = new DataOutputStream(baaos);
+ private final RewindableDataOutputStream dos = new RewindableDataOutputStream(baaos);
@Override
public DataOutput getDataOutput() {
@@ -39,6 +38,24 @@
baaos.reset();
}
+ /**
+ * Rewind the current position by {@code delta} to a previous position.
+ * This function is used to drop the already written delta bytes.
+ * In some cases, we write some bytes, and afterward we found we've written more than expected.
+ * Then we need to fix the position by rewind the current position to the expected one.
+ *
+ * Currently, it is used by the {@link AbstractVarLenObjectBuilder} which may take more space than required
+ * at beginning, and it will shift the data and fix the position whenever required.
+ * It will throw {@link IndexOutOfBoundsException} if the {@code delta} is negative.
+ * Evil function, use with caution.
+ *
+ * @param delta
+ */
+ public void rewindPositionBy(int delta) {
+ baaos.rewindPositionBy(delta);
+ dos.rewindWrittenBy(delta);
+ }
+
public byte[] getByteArray() {
return baaos.getByteArray();
}
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ICharIterator.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ICharIterator.java
new file mode 100644
index 0000000..118893b
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/ICharIterator.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+public interface ICharIterator {
+
+ boolean hasNext();
+
+ char next();
+}
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/RewindableDataOutputStream.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/RewindableDataOutputStream.java
new file mode 100644
index 0000000..dcd5458
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/RewindableDataOutputStream.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import java.io.DataOutputStream;
+import java.io.FilterOutputStream;
+import java.io.OutputStream;
+
+public class RewindableDataOutputStream extends DataOutputStream {
+ /**
+ * Creates a new data output stream to write data to the specified
+ * underlying output stream. The counter <code>written</code> is
+ * set to zero.
+ *
+ * @param out the underlying output stream, to be saved for later
+ * use.
+ * @see FilterOutputStream#out
+ */
+ public RewindableDataOutputStream(OutputStream out) {
+ super(out);
+ }
+
+ /**
+ * Rewind the current position by {@code delta} to a previous position.
+ * This function is used to drop the already written delta bytes.
+ * In some cases, we write some bytes, and afterward we found we've written more than expected.
+ * Then we need to fix the position by rewind the current position to the expected one.
+ * Currently, it is used by the {@link AbstractVarLenObjectBuilder} which may take more space than required
+ * at beginning, and it will shift the data and fix the position whenever required.
+ *
+ * @param delta
+ */
+ public void rewindWrittenBy(int delta) {
+ if (written < delta) {
+ throw new IndexOutOfBoundsException();
+ }
+ written -= delta;
+ }
+}
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java
new file mode 100644
index 0000000..9dafef1
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8CharSequence.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.data.std.util;
+
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+
+public class UTF8CharSequence implements CharSequence {
+
+ private char[] buf;
+ private int length;
+
+ @Override
+ public char charAt(int index) {
+ if (index >= length || index < 0) {
+ throw new IndexOutOfBoundsException("No index " + index + " for string of length " + length);
+ }
+ return buf[index];
+ }
+
+ @Override
+ public int length() {
+ return length;
+ }
+
+ @Override
+ public CharSequence subSequence(int start, int end) {
+ UTF8CharSequence carSeq = new UTF8CharSequence();
+ carSeq.length = end - start;
+ if (end != start) {
+ carSeq.buf = new char[carSeq.length];
+ System.arraycopy(buf, start, carSeq.buf, 0, carSeq.length);
+ }
+ return carSeq;
+ }
+
+ public void reset(UTF8StringPointable valuePtr) {
+ int utfLen = valuePtr.getUTF8Length();
+ if (buf == null || buf.length < utfLen) {
+ buf = new char[utfLen];
+ }
+ int bytePos = 0;
+ int charPos = 0;
+ while (bytePos < utfLen) {
+ buf[charPos++] = valuePtr.charAt(valuePtr.getMetaDataLength() + bytePos);
+ bytePos += valuePtr.charSize(valuePtr.getMetaDataLength() + bytePos);
+ }
+ this.length = charPos;
+ }
+
+ @Override
+ public String toString() {
+ return new String(buf, 0, length);
+ }
+
+}
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringBuilder.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringBuilder.java
new file mode 100644
index 0000000..eb29a98
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringBuilder.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hyracks.data.std.util;
+
+import java.io.IOException;
+
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+
+public class UTF8StringBuilder extends AbstractVarLenObjectBuilder {
+
+ public void appendChar(char ch) throws IOException {
+ UTF8StringUtil.writeCharAsModifiedUTF8(ch, out);
+ }
+
+ public void appendString(String string) throws IOException {
+ for (int i = 0; i < string.length(); i++) {
+ appendChar(string.charAt(i));
+ }
+ }
+
+ public void appendUtf8StringPointable(UTF8StringPointable src, int byteStartOffset, int byteLength) throws IOException {
+ out.write(src.getByteArray(), byteStartOffset, byteLength);
+ }
+
+ public void appendUtf8StringPointable(UTF8StringPointable src) throws IOException {
+ appendUtf8StringPointable(src, src.getCharStartOffset(), src.getUTF8Length());
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIterator.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIterator.java
new file mode 100644
index 0000000..317527e
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIterator.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+
+public class UTF8StringCharacterIterator implements ICharIterator {
+
+ private UTF8StringPointable utf8Ptr;
+ private int pos;
+
+ public UTF8StringCharacterIterator reset(UTF8StringPointable utf8Ptr) {
+ this.utf8Ptr = utf8Ptr;
+ return reset();
+ }
+
+ public UTF8StringCharacterIterator reset() {
+ this.pos = utf8Ptr.getMetaDataLength();
+ return this;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return pos < utf8Ptr.getMetaDataLength() + utf8Ptr.getUTF8Length();
+ }
+
+ @Override
+ public char next() {
+ char ret = utf8Ptr.charAt(pos);
+ pos += utf8Ptr.charSize(pos);
+ return ret;
+ }
+}
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/ByteArrayPointableTest.java b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/ByteArrayPointableTest.java
index f58c8da..1713467 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/ByteArrayPointableTest.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/ByteArrayPointableTest.java
@@ -19,51 +19,39 @@
package org.apache.hyracks.data.std.primitive;
+import static org.junit.Assert.assertTrue;
+
import org.junit.Test;
-import javax.xml.bind.DatatypeConverter;
-
-import static org.junit.Assert.*;
-
public class ByteArrayPointableTest {
- public static byte[] generatePointableBytes(byte[] bytes){
- byte[] ret = new byte[bytes.length + ByteArrayPointable.SIZE_OF_LENGTH];
- for (int i = 0; i < bytes.length; ++i){
- ret[i+ ByteArrayPointable.SIZE_OF_LENGTH] = bytes[i];
- }
- ByteArrayPointable.putLength(bytes.length, ret, 0);
- return ret;
- }
-
@Test
public void testCompareTo() throws Exception {
- byte [] bytes = generatePointableBytes(new byte[] { 1, 2, 3, 4});
- ByteArrayPointable byteArrayPointable = new ByteArrayPointable();
- byteArrayPointable.set(bytes, 0, bytes.length);
+ ByteArrayPointable byteArrayPointable = ByteArrayPointable
+ .generatePointableFromPureBytes(new byte[] { 1, 2, 3, 4 });
- testEqual(byteArrayPointable, generatePointableBytes(new byte[] { 1,2 ,3,4}));
+ testEqual(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 1, 2, 3, 4 }));
- testLessThan(byteArrayPointable, generatePointableBytes(new byte[] {2}));
- testLessThan(byteArrayPointable, generatePointableBytes(new byte[] {1,2,3,5}));
- testLessThan(byteArrayPointable, generatePointableBytes(new byte[] {1,2,3,4,5}));
+ testLessThan(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 2 }, 0, 1));
+ testLessThan(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 1, 2, 3, 5 }));
+ testLessThan(byteArrayPointable,
+ ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 1, 2, 3, 4, 5 }));
- testGreaterThan(byteArrayPointable, generatePointableBytes(new byte[] { }));
- testGreaterThan(byteArrayPointable, generatePointableBytes(new byte[] { 0}));
- testGreaterThan(byteArrayPointable, generatePointableBytes(new byte[] { 1,2,3}));
+ testGreaterThan(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] {}));
+ testGreaterThan(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 0 }));
+ testGreaterThan(byteArrayPointable, ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 1, 2, 3 }));
}
-
- void testEqual(ByteArrayPointable pointable, byte [] bytes){
- assertTrue(pointable.compareTo(bytes, 0, bytes.length) == 0);
+ void testEqual(ByteArrayPointable pointable, ByteArrayPointable bytes) {
+ assertTrue(pointable.compareTo(bytes) == 0);
}
- void testLessThan(ByteArrayPointable pointable, byte[] bytes){
- assertTrue(pointable.compareTo(bytes, 0, bytes.length) < 0);
+ void testLessThan(ByteArrayPointable pointable, ByteArrayPointable bytes) {
+ assertTrue(pointable.compareTo(bytes) < 0);
}
- void testGreaterThan(ByteArrayPointable pointable, byte[] bytes){
- assertTrue(pointable.compareTo(bytes, 0, bytes.length) > 0);
+ void testGreaterThan(ByteArrayPointable pointable, ByteArrayPointable bytes) {
+ assertTrue(pointable.compareTo(bytes) > 0);
}
}
\ No newline at end of file
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
new file mode 100644
index 0000000..f134718
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.primitive;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
+import org.apache.hyracks.util.string.UTF8StringSample;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+import org.junit.Test;
+
+public class UTF8StringPointableTest {
+ public static UTF8StringPointable STRING_EMPTY = UTF8StringPointable
+ .generateUTF8Pointable(UTF8StringSample.EMPTY_STRING);
+ public static UTF8StringPointable STRING_UTF8_MIX = UTF8StringPointable
+ .generateUTF8Pointable(UTF8StringSample.STRING_UTF8_MIX);
+ public static UTF8StringPointable STRING_UTF8_MIX_LOWERCASE = UTF8StringPointable.generateUTF8Pointable(
+ UTF8StringSample.STRING_UTF8_MIX_LOWERCASE);
+
+ public static UTF8StringPointable STRING_LEN_127 = UTF8StringPointable
+ .generateUTF8Pointable(UTF8StringSample.STRING_LEN_127);
+ public static UTF8StringPointable STRING_LEN_128 = UTF8StringPointable
+ .generateUTF8Pointable(UTF8StringSample.STRING_LEN_128);
+
+ @Test
+ public void testGetStringLength() throws Exception {
+ UTF8StringPointable utf8Ptr = UTF8StringPointable.generateUTF8Pointable(UTF8StringSample.STRING_LEN_127);
+ assertEquals(127, utf8Ptr.getUTF8Length());
+ assertEquals(1, utf8Ptr.getMetaDataLength());
+ assertEquals(127, utf8Ptr.getStringLength());
+
+ byte[] bytes = UTF8StringUtil.writeStringToBytes(UTF8StringSample.STRING_LEN_128);
+ utf8Ptr.set(bytes, 0, bytes.length);
+ assertEquals(128, utf8Ptr.getUTF8Length());
+ assertEquals(2, utf8Ptr.getMetaDataLength());
+ assertEquals(128, utf8Ptr.getStringLength());
+ }
+
+ @Test
+ public void testContains() throws Exception {
+ assertTrue(STRING_UTF8_MIX.contains(STRING_UTF8_MIX, false));
+ assertTrue(STRING_UTF8_MIX.contains(STRING_UTF8_MIX, true));
+ assertTrue(STRING_UTF8_MIX.contains(STRING_EMPTY, true));
+
+ assertTrue(STRING_UTF8_MIX.contains(STRING_UTF8_MIX_LOWERCASE, true));
+ assertTrue(STRING_UTF8_MIX_LOWERCASE.contains(STRING_UTF8_MIX, true));
+ }
+
+ @Test
+ public void testStartsWith() throws Exception {
+ assertTrue(STRING_LEN_128.startsWith(STRING_LEN_127, true));
+ assertFalse(STRING_LEN_127.startsWith(STRING_LEN_128, true));
+
+ assertTrue(STRING_LEN_127.startsWith(STRING_EMPTY, true));
+ }
+
+ @Test
+ public void testEndsWith() throws Exception {
+ assertTrue(STRING_LEN_128.endsWith(STRING_LEN_127, true));
+ assertFalse(STRING_LEN_127.endsWith(STRING_LEN_128, true));
+
+ assertTrue(STRING_LEN_127.startsWith(STRING_EMPTY, true));
+ }
+
+ @Test
+ public void testConcat() throws Exception {
+ UTF8StringPointable expected = UTF8StringPointable.generateUTF8Pointable(
+ UTF8StringSample.generateStringRepeatBy(UTF8StringSample.ONE_ASCII_CHAR, 127 + 128));
+
+ GrowableArray storage = new GrowableArray();
+ UTF8StringBuilder builder = new UTF8StringBuilder();
+ STRING_LEN_127.concat(STRING_LEN_128, builder, storage);
+
+ UTF8StringPointable actual = new UTF8StringPointable();
+ actual.set(storage.getByteArray(), 0, storage.getLength());
+
+ assertEquals(0, expected.compareTo(actual));
+
+ storage.reset();
+ STRING_LEN_127.concat(STRING_EMPTY, builder, storage);
+ actual.set(storage.getByteArray(), 0, storage.getLength());
+
+ assertEquals(0, STRING_LEN_127.compareTo(actual));
+ }
+
+ @Test
+ public void testSubstr() throws Exception {
+ GrowableArray storage = new GrowableArray();
+ UTF8StringBuilder builder = new UTF8StringBuilder();
+
+ STRING_LEN_128.substr(1, 127, builder, storage);
+ UTF8StringPointable result = new UTF8StringPointable();
+ result.set(storage.getByteArray(), 0, storage.getLength());
+
+ assertEquals(0, STRING_LEN_127.compareTo(result));
+
+ storage.reset();
+ STRING_UTF8_MIX.substr(0, UTF8StringSample.STRING_UTF8_MIX.length(), builder, storage);
+ result.set(storage.getByteArray(), 0, storage.getLength());
+ assertEquals(0, STRING_UTF8_MIX.compareTo(result));
+ }
+
+ @Test
+ public void testSubstrBefore() throws Exception {
+ UTF8StringBuilder builder = new UTF8StringBuilder();
+ GrowableArray storage = new GrowableArray();
+
+ STRING_LEN_128.substrBefore(STRING_LEN_127, builder, storage);
+ UTF8StringPointable result = new UTF8StringPointable();
+ result.set(storage.getByteArray(), 0, storage.getLength());
+
+ assertEquals(0, STRING_EMPTY.compareTo(result));
+
+ storage.reset();
+ UTF8StringPointable testPtr = UTF8StringPointable.generateUTF8Pointable("Mix中文123");
+ UTF8StringPointable pattern = UTF8StringPointable.generateUTF8Pointable("文");
+ UTF8StringPointable expect = UTF8StringPointable.generateUTF8Pointable("Mix中");
+ testPtr.substrBefore(pattern, builder, storage);
+ result.set(storage.getByteArray(), 0, storage.getLength());
+ assertEquals(0, expect.compareTo(result));
+ }
+
+ @Test
+ public void testSubstrAfter() throws Exception {
+ UTF8StringBuilder builder = new UTF8StringBuilder();
+ GrowableArray storage = new GrowableArray();
+
+ STRING_LEN_128.substrAfter(STRING_LEN_127, builder, storage);
+ UTF8StringPointable result = new UTF8StringPointable();
+ result.set(storage.getByteArray(), 0, storage.getLength());
+
+ UTF8StringPointable expect = UTF8StringPointable
+ .generateUTF8Pointable(Character.toString(UTF8StringSample.ONE_ASCII_CHAR));
+ assertEquals(0, expect.compareTo(result));
+
+ storage.reset();
+ UTF8StringPointable testPtr = UTF8StringPointable.generateUTF8Pointable("Mix中文123");
+ UTF8StringPointable pattern = UTF8StringPointable.generateUTF8Pointable("文");
+ expect = UTF8StringPointable.generateUTF8Pointable("123");
+ testPtr.substrAfter(pattern, builder, storage);
+ result.set(storage.getByteArray(), 0, storage.getLength());
+ assertEquals(0, expect.compareTo(result));
+ }
+
+ @Test
+ public void testLowercase() throws Exception {
+ UTF8StringBuilder builder = new UTF8StringBuilder();
+ GrowableArray storage = new GrowableArray();
+
+ UTF8StringPointable result = new UTF8StringPointable();
+ STRING_UTF8_MIX.lowercase(builder, storage);
+
+ result.set(storage.getByteArray(), 0, storage.getLength());
+
+ assertEquals(0, STRING_UTF8_MIX_LOWERCASE.compareTo(result));
+ }
+
+ @Test
+ public void testUppercase() throws Exception {
+ UTF8StringBuilder builder = new UTF8StringBuilder();
+ GrowableArray storage = new GrowableArray();
+
+ UTF8StringPointable result = new UTF8StringPointable();
+ STRING_UTF8_MIX_LOWERCASE.uppercase(builder, storage);
+
+ result.set(storage.getByteArray(), 0, storage.getLength());
+
+ UTF8StringPointable expected = UTF8StringPointable
+ .generateUTF8Pointable(UTF8StringSample.STRING_UTF8_MIX_LOWERCASE.toUpperCase());
+ assertEquals(0, expected.compareTo(result));
+
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringBuilderTest.java b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringBuilderTest.java
new file mode 100644
index 0000000..bc0c629
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringBuilderTest.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+import org.apache.hyracks.util.string.UTF8StringSample;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+import org.junit.Test;
+
+public class UTF8StringBuilderTest {
+
+ UTF8StringBuilder utf8StringBuilder = new UTF8StringBuilder();
+ GrowableArray storage = new GrowableArray();
+
+ @Test
+ public void testNormalBuilder() throws IOException {
+ testOneString(UTF8StringSample.EMPTY_STRING, 0);
+ testOneString(UTF8StringSample.EMPTY_STRING, 127);
+
+ testOneString(UTF8StringSample.STRING_UTF8_MIX, 127);
+ testOneString(UTF8StringSample.STRING_LEN_128, 128);
+
+ testOneString(UTF8StringSample.STRING_LEN_MEDIUM, VarLenIntEncoderDecoder.BOUND_TWO_BYTE);
+ testOneString(UTF8StringSample.STRING_LEN_LARGE, VarLenIntEncoderDecoder.BOUND_THREE_BYTE);
+ }
+
+ @Test
+ public void testShrinkAfterFinish() throws IOException {
+ testOneString(UTF8StringSample.STRING_LEN_127, VarLenIntEncoderDecoder.BOUND_TWO_BYTE);
+ testOneString(UTF8StringSample.STRING_LEN_127, VarLenIntEncoderDecoder.BOUND_THREE_BYTE);
+ testOneString(UTF8StringSample.STRING_LEN_127, VarLenIntEncoderDecoder.BOUND_FOUR_BYTE);
+ }
+
+ @Test
+ public void testIncreaseAfterFinish() throws IOException {
+ testOneString(UTF8StringSample.STRING_LEN_128, VarLenIntEncoderDecoder.BOUND_ONE_BYTE);
+ testOneString(UTF8StringSample.STRING_LEN_MEDIUM, VarLenIntEncoderDecoder.BOUND_ONE_BYTE);
+ testOneString(UTF8StringSample.STRING_LEN_LARGE, VarLenIntEncoderDecoder.BOUND_TWO_BYTE);
+ }
+
+ public void testOneString(String testString, int estimateLength) throws IOException {
+ storage.reset();
+ utf8StringBuilder.reset(storage, estimateLength);
+ for (char c : testString.toCharArray()) {
+ utf8StringBuilder.appendChar(c);
+ }
+ utf8StringBuilder.finish();
+ assertEquals(testString, UTF8StringUtil.toString(new StringBuilder(), storage.getByteArray(), 0).toString());
+
+ UTF8StringPointable hyracksUtf = new UTF8StringPointable();
+ hyracksUtf.set(storage.getByteArray(), 0, storage.getLength());
+
+ GrowableArray storage2 = new GrowableArray();
+ utf8StringBuilder.reset(storage2, estimateLength);
+ utf8StringBuilder.appendUtf8StringPointable(hyracksUtf);
+ utf8StringBuilder.finish();
+ assertEquals(testString, UTF8StringUtil.toString(new StringBuilder(), storage.getByteArray(), 0).toString());
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIteratorTest.java b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIteratorTest.java
new file mode 100644
index 0000000..5268c82
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/util/UTF8StringCharacterIteratorTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.data.std.util;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringSample;
+import org.junit.Test;
+
+public class UTF8StringCharacterIteratorTest {
+
+ private UTF8StringCharacterIterator iterator = new UTF8StringCharacterIterator();
+
+ private void testEachIterator(String testString) {
+ UTF8StringPointable ptr = UTF8StringPointable.generateUTF8Pointable(testString);
+ iterator.reset(ptr);
+ for (char ch : testString.toCharArray()) {
+ assertTrue(iterator.hasNext());
+ assertEquals(ch, iterator.next());
+ }
+ assertFalse(iterator.hasNext());
+
+ iterator.reset();
+ for (char ch : testString.toCharArray()) {
+ assertTrue(iterator.hasNext());
+ assertEquals(ch, iterator.next());
+ }
+ assertFalse(iterator.hasNext());
+ }
+
+ @Test
+ public void testIterator(){
+ testEachIterator(UTF8StringSample.EMPTY_STRING);
+ testEachIterator(UTF8StringSample.STRING_UTF8_MIX);
+ testEachIterator(UTF8StringSample.STRING_LEN_128);
+ testEachIterator(UTF8StringSample.STRING_LEN_128);
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-dataflow-common/pom.xml b/hyracks/hyracks-dataflow-common/pom.xml
index c6e85cd..ad4dfa7 100644
--- a/hyracks/hyracks-dataflow-common/pom.xml
+++ b/hyracks/hyracks-dataflow-common/pom.xml
@@ -17,48 +17,61 @@
! under the License.
!-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <artifactId>hyracks-dataflow-common</artifactId>
- <name>hyracks-dataflow-common</name>
- <parent>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- </parent>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hyracks-dataflow-common</artifactId>
+ <name>hyracks-dataflow-common</name>
+ <parent>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ </parent>
- <licenses>
- <license>
- <name>Apache License, Version 2.0</name>
- <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
- <distribution>repo</distribution>
- <comments>A business-friendly OSS license</comments>
- </license>
- </licenses>
+ <licenses>
+ <license>
+ <name>Apache License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments>A business-friendly OSS license</comments>
+ </license>
+ </licenses>
- <dependencies>
- <dependency>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks-api</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks-data-std</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks-control-nc</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>commons-io</groupId>
- <artifactId>commons-io</artifactId>
- </dependency>
- </dependencies>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-util</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-util</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-api</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-data-std</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-control-nc</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
+ </dependencies>
</project>
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java
index 4c8bc1e..d16fca7 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializer.java
@@ -19,57 +19,71 @@
package org.apache.hyracks.dataflow.common.data.marshalling;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+
public class ByteArraySerializerDeserializer implements ISerializerDeserializer<byte[]> {
private static final long serialVersionUID = 1L;
- public final static ByteArraySerializerDeserializer INSTANCE = new ByteArraySerializerDeserializer();
-
- private ByteArraySerializerDeserializer() {
+ public ByteArraySerializerDeserializer() {
}
+ private byte[] metaBuffer = new byte[5];
+
+ /**
+ * Return a pure byte array which doesn't have the length encoding prefix
+ *
+ * @param in - Stream to read instance from.
+ * @return
+ * @throws HyracksDataException
+ */
@Override
public byte[] deserialize(DataInput in) throws HyracksDataException {
try {
- int length = in.readUnsignedShort();
- byte[] bytes = new byte[length + ByteArrayPointable.SIZE_OF_LENGTH];
- in.readFully(bytes, ByteArrayPointable.SIZE_OF_LENGTH, length);
- ByteArrayPointable.putLength(length, bytes, 0);
+ int contentLength = VarLenIntEncoderDecoder.decode(in);
+ byte[] bytes = new byte[contentLength];
+ in.readFully(bytes, 0, contentLength);
return bytes;
} catch (IOException e) {
throw new HyracksDataException(e);
}
}
+ /**
+ * a pure content only byte array which doesn't have the encoded length at the beginning.
+ * will write the entire array into the out
+ */
@Override
public void serialize(byte[] instance, DataOutput out) throws HyracksDataException {
-
- if (instance.length > ByteArrayPointable.MAX_LENGTH) {
- throw new HyracksDataException(
- "encoded byte array too long: " + instance.length + " bytes");
- }
try {
- int realLength = ByteArrayPointable.getFullLength(instance, 0);
- out.write(instance, 0, realLength);
+ int metaLength = VarLenIntEncoderDecoder.encode(instance.length, metaBuffer, 0);
+ out.write(metaBuffer, 0, metaLength);
+ out.write(instance);
} catch (IOException e) {
throw new HyracksDataException(e);
}
}
- public void serialize(byte[] instance, int start, int length, DataOutput out) throws HyracksDataException {
- if (length > ByteArrayPointable.MAX_LENGTH) {
- throw new HyracksDataException(
- "encoded byte array too long: " + instance.length + " bytes");
- }
+ public void serialize(ByteArrayPointable byteArrayPtr, DataOutput out) throws HyracksDataException {
try {
+ out.write(byteArrayPtr.getByteArray(), byteArrayPtr.getStartOffset(), byteArrayPtr.getLength());
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ // A pure byte array, which doesn't have the length information encoded at the beginning
+ public void serialize(byte[] instance, int start, int length, DataOutput out) throws HyracksDataException {
+ int metaLength = VarLenIntEncoderDecoder.encode(length, metaBuffer, 0);
+ try {
+ out.write(metaBuffer, 0, metaLength);
out.write(instance, start, length);
} catch (IOException e) {
throw new HyracksDataException(e);
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
index 2435672..aee11bc 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java
@@ -24,19 +24,21 @@
import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.util.string.UTF8StringReader;
+import org.apache.hyracks.util.string.UTF8StringWriter;
public class UTF8StringSerializerDeserializer implements ISerializerDeserializer<String> {
- public static final UTF8StringSerializerDeserializer INSTANCE = new UTF8StringSerializerDeserializer();
private static final long serialVersionUID = 1L;
+ private UTF8StringReader reader = new UTF8StringReader();
+ private UTF8StringWriter writer = new UTF8StringWriter();
- private UTF8StringSerializerDeserializer() {
- }
+ public UTF8StringSerializerDeserializer() {}
@Override
public String deserialize(DataInput in) throws HyracksDataException {
try {
- return in.readUTF();
+ return reader.readUTF(in);
} catch (IOException e) {
throw new HyracksDataException(e);
}
@@ -45,7 +47,7 @@
@Override
public void serialize(String instance, DataOutput out) throws HyracksDataException {
try {
- out.writeUTF(instance);
+ writer.writeUTF8(instance, out);
} catch (IOException e) {
throw new HyracksDataException(e);
}
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactory.java
index b7d302b..3d081af 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactory.java
@@ -26,23 +26,12 @@
public class ByteArrayNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
public static ByteArrayNormalizedKeyComputerFactory INSTANCE = new ByteArrayNormalizedKeyComputerFactory();
- @Override public INormalizedKeyComputer createNormalizedKeyComputer() {
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
return new INormalizedKeyComputer() {
- @Override public int normalize(byte[] bytes, int start, int length) {
- int normalizedKey = 0;
- int realLength = ByteArrayPointable.getLength(bytes, start);
- for (int i = 0; i < 3; ++i) {
- normalizedKey <<= 8;
- if (i < realLength) {
- normalizedKey += bytes[start + ByteArrayPointable.SIZE_OF_LENGTH + i] & 0xff;
- }
- }
- // last byte, shift 7 instead of 8 to avoid negative number
- normalizedKey <<= 7;
- if (3 < realLength) {
- normalizedKey += (bytes[start + ByteArrayPointable.SIZE_OF_LENGTH + 3] & 0xfe) >> 1;
- }
- return normalizedKey;
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ return ByteArrayPointable.normalize(bytes, start);
}
};
}
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/UTF8StringNormalizedKeyComputerFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/UTF8StringNormalizedKeyComputerFactory.java
index 941afda..79936de 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/UTF8StringNormalizedKeyComputerFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/normalizers/UTF8StringNormalizedKeyComputerFactory.java
@@ -20,7 +20,7 @@
import org.apache.hyracks.api.dataflow.value.INormalizedKeyComputer;
import org.apache.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public class UTF8StringNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
private static final long serialVersionUID = 1L;
@@ -30,17 +30,7 @@
return new INormalizedKeyComputer() {
@Override
public int normalize(byte[] bytes, int start, int length) {
- int len = UTF8StringPointable.getUTFLength(bytes, start);
- int nk = 0;
- int offset = start + 2;
- for (int i = 0; i < 2; ++i) {
- nk <<= 16;
- if (i < len) {
- nk += ((int) UTF8StringPointable.charAt(bytes, offset)) & 0xffff;
- offset += UTF8StringPointable.charSize(bytes, offset);
- }
- }
- return nk;
+ return UTF8StringUtil.normalize(bytes, start);
}
};
}
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
index c71950b..c85d1b2 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactory.java
@@ -19,12 +19,12 @@
package org.apache.hyracks.dataflow.common.data.parsers;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-
import java.io.DataOutput;
import java.io.IOException;
-import java.util.Arrays;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.data.marshalling.ByteArraySerializerDeserializer;
+import org.apache.hyracks.util.bytes.Base64Parser;
public class ByteArrayBase64ParserFactory implements IValueParserFactory {
@@ -33,20 +33,19 @@
private ByteArrayBase64ParserFactory() {
}
- @Override public IValueParser createValueParser() {
+ @Override
+ public IValueParser createValueParser() {
return new IValueParser() {
- private byte[] buffer;
- private byte[] quadruplet = new byte[4];
+ Base64Parser parser = new Base64Parser();
+ ByteArraySerializerDeserializer serializer = new ByteArraySerializerDeserializer();
- @Override public void parse(char[] input, int start, int length, DataOutput out)
+ @Override
+ public void parse(char[] input, int start, int length, DataOutput out)
throws HyracksDataException {
- if (length % 4 != 0) {
- throw new HyracksDataException(
- "Invalid Base64 string, the length of the string should be a multiple of 4");
- }
- buffer = extractPointableArrayFromBase64String(input, start, length, buffer, quadruplet);
+
+ parser.generatePureByteArrayFromBase64String(input, start, length);
try {
- out.write(buffer, 0, ByteArrayPointable.getFullLength(buffer, 0));
+ serializer.serialize(parser.getByteArray(), 0, parser.getLength(), out);
} catch (IOException e) {
throw new HyracksDataException(e);
}
@@ -54,194 +53,4 @@
};
}
- // The following base64 related implementation is copied/changed base on javax.xml.bind.DatatypeConverterImpl.java
- private static final byte[] decodeMap = initDecodeMap();
- private static final byte PADDING = 127;
-
- private static byte[] initDecodeMap() {
- byte[] map = new byte[128];
- Arrays.fill(map, (byte) -1);
-
- int i;
- for (i = 'A'; i <= 'Z'; i++) {
- map[i] = (byte) (i - 'A');
- }
- for (i = 'a'; i <= 'z'; i++) {
- map[i] = (byte) (i - 'a' + 26);
- }
- for (i = '0'; i <= '9'; i++) {
- map[i] = (byte) (i - '0' + 52);
- }
- map['+'] = 62;
- map['/'] = 63;
- map['='] = PADDING;
-
- return map;
- }
-
- /**
- * computes the length of binary data speculatively.
- * Our requirement is to create byte[] of the exact length to store the binary data.
- * If we do this in a straight-forward way, it takes two passes over the data.
- * Experiments show that this is a non-trivial overhead (35% or so is spent on
- * the first pass in calculating the length.)
- * So the approach here is that we compute the length speculatively, without looking
- * at the whole contents. The obtained speculative value is never less than the
- * actual length of the binary data, but it may be bigger. So if the speculation
- * goes wrong, we'll pay the cost of reallocation and buffer copying.
- * If the base64 text is tightly packed with no indentation nor illegal char
- * (like what most web services produce), then the speculation of this method
- * will be correct, so we get the performance benefit.
- */
- private static int guessLength(char[] chars, int start, int length) {
-
- // compute the tail '=' chars
- int j = length - 1;
- for (; j >= 0; j--) {
- byte code = decodeMap[chars[start + j]];
- if (code == PADDING) {
- continue;
- }
- if (code == -1) // most likely this base64 text is indented. go with the upper bound
- {
- return length / 4 * 3;
- }
- break;
- }
-
- j++; // text.charAt(j) is now at some base64 char, so +1 to make it the size
- int padSize = length - j;
- if (padSize > 2) // something is wrong with base64. be safe and go with the upper bound
- {
- return length / 4 * 3;
- }
-
- // so far this base64 looks like it's unindented tightly packed base64.
- // take a chance and create an array with the expected size
- return length / 4 * 3 - padSize;
- }
-
- private static int guessLength(byte[] chars, int start, int length) {
-
- // compute the tail '=' chars
- int j = length - 1;
- for (; j >= 0; j--) {
- byte code = decodeMap[chars[start + j]];
- if (code == PADDING) {
- continue;
- }
- if (code == -1) // most likely this base64 text is indented. go with the upper bound
- {
- return length / 4 * 3;
- }
- break;
- }
-
- j++; // text.charAt(j) is now at some base64 char, so +1 to make it the size
- int padSize = length - j;
- if (padSize > 2) // something is wrong with base64. be safe and go with the upper bound
- {
- return length / 4 * 3;
- }
-
- // so far this base64 looks like it's unindented tightly packed base64.
- // take a chance and create an array with the expected size
- return length / 4 * 3 - padSize;
- }
-
- public static byte[] extractPointableArrayFromBase64String(byte[] input, int start, int length,
- byte[] bufferNeedToReset, byte[] quadruplet)
- throws HyracksDataException {
- int contentOffset = ByteArrayPointable.SIZE_OF_LENGTH;
- final int buflen = guessLength(input, start, length) + contentOffset;
- bufferNeedToReset = ByteArrayHexParserFactory.ensureCapacity(buflen, bufferNeedToReset);
- int byteArrayLength = parseBase64String(input, start, length, bufferNeedToReset, contentOffset,
- quadruplet);
- if (byteArrayLength > ByteArrayPointable.MAX_LENGTH) {
- throw new HyracksDataException("The decoded byte array is too long.");
- }
- ByteArrayPointable.putLength(byteArrayLength, bufferNeedToReset, 0);
- return bufferNeedToReset;
- }
-
- public static byte[] extractPointableArrayFromBase64String(char[] input, int start, int length,
- byte[] bufferNeedToReset, byte[] quadruplet)
- throws HyracksDataException {
- int contentOffset = ByteArrayPointable.SIZE_OF_LENGTH;
- final int buflen = guessLength(input, start, length) + contentOffset;
- bufferNeedToReset = ByteArrayHexParserFactory.ensureCapacity(buflen, bufferNeedToReset);
- int byteArrayLength = parseBase64String(input, start, length, bufferNeedToReset, contentOffset,
- quadruplet);
- if (byteArrayLength > ByteArrayPointable.MAX_LENGTH) {
- throw new HyracksDataException("The decoded byte array is too long.");
- }
- ByteArrayPointable.putLength(byteArrayLength, bufferNeedToReset, 0);
- return bufferNeedToReset;
- }
-
- static int parseBase64String(char[] input, int start, int length, byte[] out, int offset,
- byte[] quadruplet) throws HyracksDataException {
- int outLength = 0;
-
- int i;
- int q = 0;
-
- // convert each quadruplet to three bytes.
- for (i = 0; i < length; i++) {
- char ch = input[start + i];
- byte v = decodeMap[ch];
-
- if (v == -1) {
- throw new HyracksDataException("Invalid Base64 character");
- }
- quadruplet[q++] = v;
-
- if (q == 4) {
- // quadruplet is now filled.
- out[offset + outLength++] = (byte) ((quadruplet[0] << 2) | (quadruplet[1] >> 4));
- if (quadruplet[2] != PADDING) {
- out[offset + outLength++] = (byte) ((quadruplet[1] << 4) | (quadruplet[2] >> 2));
- }
- if (quadruplet[3] != PADDING) {
- out[offset + outLength++] = (byte) ((quadruplet[2] << 6) | (quadruplet[3]));
- }
- q = 0;
- }
- }
-
- return outLength;
- }
-
- static int parseBase64String(byte[] input, int start, int length, byte[] out, int offset,
- byte[] quadruplet) throws HyracksDataException {
- int outLength = 0;
-
- int i;
- int q = 0;
-
- // convert each quadruplet to three bytes.
- for (i = 0; i < length; i++) {
- char ch = (char)input[start + i];
- byte v = decodeMap[ch];
-
- if (v == -1) {
- throw new HyracksDataException("Invalid Base64 character");
- }
- quadruplet[q++] = v;
-
- if (q == 4) {
- // quadruplet is now filled.
- out[offset + outLength++] = (byte) ((quadruplet[0] << 2) | (quadruplet[1] >> 4));
- if (quadruplet[2] != PADDING) {
- out[offset + outLength++] = (byte) ((quadruplet[1] << 4) | (quadruplet[2] >> 2));
- }
- if (quadruplet[3] != PADDING) {
- out[offset + outLength++] = (byte) ((quadruplet[2] << 6) | (quadruplet[3]));
- }
- q = 0;
- }
- }
-
- return outLength;
- }
}
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
index ec249f3..f1f1eb1 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactory.java
@@ -19,12 +19,12 @@
package org.apache.hyracks.dataflow.common.data.parsers;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-
import java.io.DataOutput;
import java.io.IOException;
-import java.util.Arrays;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.dataflow.common.data.marshalling.ByteArraySerializerDeserializer;
+import org.apache.hyracks.util.bytes.HexParser;
public class ByteArrayHexParserFactory implements IValueParserFactory {
public static ByteArrayHexParserFactory INSTANCE = new ByteArrayHexParserFactory();
@@ -32,15 +32,18 @@
private ByteArrayHexParserFactory() {
}
- @Override public IValueParser createValueParser() {
+ @Override
+ public IValueParser createValueParser() {
return new IValueParser() {
- private byte[] buffer = new byte[] { };
+ HexParser parser = new HexParser();
+ ByteArraySerializerDeserializer serializer = new ByteArraySerializerDeserializer();
- @Override public void parse(char[] input, int start, int length, DataOutput out)
+ @Override
+ public void parse(char[] input, int start, int length, DataOutput out)
throws HyracksDataException {
try {
- buffer = extractPointableArrayFromHexString(input, start, length, buffer);
- out.write(buffer, 0, ByteArrayPointable.getFullLength(buffer, 0));
+ parser.generateByteArrayFromHexString(input, start, length);
+ serializer.serialize(parser.getByteArray(), 0, parser.getLength(), out);
} catch (IOException e) {
throw new HyracksDataException(e);
}
@@ -48,85 +51,4 @@
};
}
- public static boolean isValidHexChar(char c) {
- if (c >= '0' && c <= '9'
- || c >= 'a' && c <= 'f'
- || c >= 'A' && c <= 'F') {
- return true;
- }
- return false;
- }
-
- public static byte[] extractPointableArrayFromHexString(char[] input, int start, int length,
- byte[] bufferNeedToReset) throws HyracksDataException {
- if (length % 2 != 0) {
- throw new HyracksDataException(
- "Invalid hex string for binary type: the string length should be a muliple of 2.");
- }
- int byteLength = length / 2;
- bufferNeedToReset = ensureCapacity(byteLength + ByteArrayPointable.SIZE_OF_LENGTH, bufferNeedToReset);
- extractByteArrayFromHexString(input, start, length, bufferNeedToReset,
- ByteArrayPointable.SIZE_OF_LENGTH);
- if (byteLength > ByteArrayPointable.MAX_LENGTH) {
- throw new HyracksDataException("The decoded byte array is too long.");
- }
- ByteArrayPointable.putLength(byteLength, bufferNeedToReset, 0);
- return bufferNeedToReset;
- }
-
- public static byte[] extractPointableArrayFromHexString(byte[] input, int start, int length,
- byte[] bufferNeedToReset) throws HyracksDataException {
- if (length % 2 != 0) {
- throw new HyracksDataException(
- "Invalid hex string for binary type: the string length should be a muliple of 2.");
- }
- int byteLength = length / 2;
- bufferNeedToReset = ensureCapacity(byteLength + ByteArrayPointable.SIZE_OF_LENGTH, bufferNeedToReset);
- extractByteArrayFromHexString(input, start, length, bufferNeedToReset,
- ByteArrayPointable.SIZE_OF_LENGTH);
- if (byteLength > ByteArrayPointable.MAX_LENGTH) {
- throw new HyracksDataException("The decoded byte array is too long.");
- }
- ByteArrayPointable.putLength(byteLength, bufferNeedToReset, 0);
- return bufferNeedToReset;
- }
-
- static byte[] ensureCapacity(int capacity, byte[] original) {
- if (original == null) {
- return new byte[capacity];
- }
- if (original.length < capacity) {
- return Arrays.copyOf(original, capacity);
- }
- return original;
- }
-
- private static int getValueFromValidHexChar(char c) throws HyracksDataException {
- if (!isValidHexChar(c)) {
- throw new HyracksDataException("Invalid hex character : " + c);
- }
- if (c >= '0' && c <= '9') {
- return c - '0';
- }
- if (c >= 'a' && c <= 'f') {
- return 10 + c - 'a';
- }
- return 10 + c - 'A';
- }
-
- private static void extractByteArrayFromHexString(char[] input, int start, int length, byte[] output,
- int offset) throws HyracksDataException {
- for (int i = 0; i < length; i += 2) {
- output[offset + i / 2] = (byte) ((getValueFromValidHexChar(input[start + i]) << 4) +
- getValueFromValidHexChar(input[start + i + 1]));
- }
- }
-
- private static void extractByteArrayFromHexString(byte[] input, int start, int length, byte[] output,
- int offset) throws HyracksDataException {
- for (int i = 0; i < length; i += 2) {
- output[offset + i / 2] = (byte) ((getValueFromValidHexChar((char)input[start + i]) << 4) +
- getValueFromValidHexChar((char)input[start + i + 1]));
- }
- }
}
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java
index 7294e2d..58ee687 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/parsers/UTF8StringParserFactory.java
@@ -22,6 +22,7 @@
import java.io.IOException;
import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.util.string.UTF8StringWriter;
public class UTF8StringParserFactory implements IValueParserFactory {
public static final IValueParserFactory INSTANCE = new UTF8StringParserFactory();
@@ -34,53 +35,12 @@
@Override
public IValueParser createValueParser() {
return new IValueParser() {
- private byte[] utf8;
+ private UTF8StringWriter writer = new UTF8StringWriter();
@Override
public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
- int utflen = 0;
- for (int i = 0; i < length; i++) {
- char ch = buffer[i + start];
- if ((ch >= 0x0001) && (ch <= 0x007F)) {
- utflen++;
- } else if (ch > 0x07ff) {
- utflen += 3;
- } else {
- utflen += 2;
- }
- }
-
- if (utf8 == null || utf8.length < utflen + 2) {
- utf8 = new byte[utflen + 2];
- }
-
- int count = 0;
- utf8[count++] = (byte) ((utflen >>> 8) & 0xff);
- utf8[count++] = (byte) ((utflen >>> 0) & 0xff);
-
- int i = 0;
- for (i = 0; i < length; i++) {
- char ch = buffer[i + start];
- if (!((ch >= 0x0001) && (ch <= 0x007F)))
- break;
- utf8[count++] = (byte) ch;
- }
-
- for (; i < length; i++) {
- char ch = buffer[i + start];
- if ((ch >= 0x0001) && (ch <= 0x007F)) {
- utf8[count++] = (byte) ch;
- } else if (ch > 0x07FF) {
- utf8[count++] = (byte) (0xE0 | ((ch >> 12) & 0x0F));
- utf8[count++] = (byte) (0x80 | ((ch >> 6) & 0x3F));
- utf8[count++] = (byte) (0x80 | ((ch >> 0) & 0x3F));
- } else {
- utf8[count++] = (byte) (0xC0 | ((ch >> 6) & 0x1F));
- utf8[count++] = (byte) (0x80 | ((ch >> 0) & 0x3F));
- }
- }
try {
- out.write(utf8, 0, utflen + 2);
+ writer.writeUTF8(buffer, start, length, out);
} catch (IOException e) {
throw new HyracksDataException(e);
}
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/util/StringUtils.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/util/StringUtils.java
deleted file mode 100644
index 3b05824..0000000
--- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/util/StringUtils.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.hyracks.dataflow.common.data.util;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-public class StringUtils {
- public static int writeCharAsModifiedUTF8(char c, DataOutput dos) throws IOException {
- if (c >= 0x0000 && c <= 0x007F) {
- dos.writeByte(c);
- return 1;
- } else if (c <= 0x07FF) {
- dos.writeByte((byte) (0xC0 | ((c >> 6) & 0x3F)));
- dos.writeByte((byte) (0x80 | (c & 0x3F)));
- return 2;
- } else {
- dos.writeByte((byte) (0xE0 | ((c >> 12) & 0x0F)));
- dos.writeByte((byte) (0x80 | ((c >> 6) & 0x3F)));
- dos.writeByte((byte) (0x80 | (c & 0x3F)));
- return 3;
- }
- }
-
- public static void writeUTF8Len(int len, DataOutput dos) throws IOException {
- dos.write((len >>> 8) & 0xFF);
- dos.write((len >>> 0) & 0xFF);
- }
-
-
-}
\ No newline at end of file
diff --git a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/comm/io/largeobject/FrameFixedFieldTupleAppenderTest.java b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/comm/io/largeobject/FrameFixedFieldTupleAppenderTest.java
index ad4461d..05710ad 100644
--- a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/comm/io/largeobject/FrameFixedFieldTupleAppenderTest.java
+++ b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/comm/io/largeobject/FrameFixedFieldTupleAppenderTest.java
@@ -53,9 +53,9 @@
FrameFixedFieldAppender appender;
static ISerializerDeserializer[] fields = new ISerializerDeserializer[] {
IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(),
IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(),
};
static RecordDescriptor recordDescriptor = new RecordDescriptor(fields);
static ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(recordDescriptor.getFieldCount());
diff --git a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java
index 8534388..f0e831a 100644
--- a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java
+++ b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/marshalling/ByteArraySerializerDeserializerTest.java
@@ -19,58 +19,44 @@
package org.apache.hyracks.dataflow.common.data.marshalling;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-import org.junit.Test;
+import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.util.Arrays;
-import java.util.Random;
-import static org.junit.Assert.assertTrue;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
+import org.apache.hyracks.util.string.UTF8StringSample;
+import org.junit.Test;
public class ByteArraySerializerDeserializerTest {
- Random random = new Random();
- public static byte[] generateRandomBytes(int maxSize, Random random) {
- int size = random.nextInt(maxSize);
- byte[] bytes = new byte[size + ByteArrayPointable.SIZE_OF_LENGTH];
- random.nextBytes(bytes);
- ByteArrayPointable.putLength(size, bytes, 0);
- return bytes;
- }
+ ByteArrayPointable bytePtr = new ByteArrayPointable();
+ ByteArraySerializerDeserializer serder = new ByteArraySerializerDeserializer();
@Test
public void testSerializeDeserializeRandomBytes() throws Exception {
- for (int i = 0; i < 10; ++i) {
- ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
- byte[] randomBytes = generateRandomBytes(ByteArrayPointable.MAX_LENGTH + 1, random);
-
- ByteArraySerializerDeserializer.INSTANCE.serialize(randomBytes, new DataOutputStream(outputStream));
- byte[] result = outputStream.toByteArray();
- assertTrue(Arrays.equals(randomBytes, result));
-
- ByteArrayInputStream inputStream = new ByteArrayInputStream(result);
- assertTrue(Arrays.equals(randomBytes,
- ByteArraySerializerDeserializer.INSTANCE.deserialize(new DataInputStream(inputStream))));
- }
-
+ testOneByteArray(UTF8StringSample.EMPTY_STRING.getBytes());
+ testOneByteArray(UTF8StringSample.STRING_UTF8_MIX.getBytes());
+ testOneByteArray(UTF8StringSample.STRING_LEN_128.getBytes());
+ testOneByteArray(UTF8StringSample.STRING_LEN_MEDIUM.getBytes());
+ testOneByteArray(UTF8StringSample.STRING_LEN_LARGE.getBytes());
}
- @Test
- public void testPutGetLength() throws Exception {
- final int size = 5;
- byte[] newBytes = new byte[size];
- for (int i = 0; i < 10; ++i) {
- int length = random.nextInt(ByteArrayPointable.MAX_LENGTH +1);
- for (int j = 0; j < size - 1; ++j) {
- ByteArrayPointable.putLength(length, newBytes, j);
- int result = ByteArrayPointable.getLength(newBytes, j);
- assertTrue(result == length);
- }
- }
+ void testOneByteArray(byte[] testBytes) throws HyracksDataException {
+ ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+
+ serder.serialize(testBytes, new DataOutputStream(outputStream));
+
+ bytePtr.set(outputStream.toByteArray(), 0, outputStream.size());
+ assertTrue(Arrays.equals(testBytes, ByteArrayPointable.copyContent(bytePtr)));
+
+ ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray());
+ assertTrue(Arrays.equals(testBytes, serder.deserialize(new DataInputStream(inputStream))));
+
}
}
\ No newline at end of file
diff --git a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactoryTest.java b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactoryTest.java
index 1645631..4d3eb49 100644
--- a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactoryTest.java
+++ b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/normalizers/ByteArrayNormalizedKeyComputerFactoryTest.java
@@ -19,14 +19,13 @@
package org.apache.hyracks.dataflow.common.data.normalizers;
-import org.apache.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-import org.apache.hyracks.dataflow.common.data.marshalling.ByteArraySerializerDeserializerTest;
-import org.junit.Test;
+import static junit.framework.Assert.assertTrue;
import java.util.Random;
-import static junit.framework.Assert.assertTrue;
+import org.apache.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
+import org.junit.Test;
public class ByteArrayNormalizedKeyComputerFactoryTest {
@@ -34,33 +33,21 @@
INormalizedKeyComputer computer = ByteArrayNormalizedKeyComputerFactory.INSTANCE.createNormalizedKeyComputer();
- public static ByteArrayPointable generateRandomByteArrayPointable(int maxSize, Random random) {
- byte[] bytes = ByteArraySerializerDeserializerTest
- .generateRandomBytes(maxSize, random);
- ByteArrayPointable pointable = new ByteArrayPointable();
- pointable.set(bytes, 0, bytes.length);
- return pointable;
- }
-
@Test
public void testRandomNormalizedKey() {
for (int i = 0; i < 10; ++i) {
- ByteArrayPointable pointable1 = generateRandomByteArrayPointable(ByteArrayPointable.MAX_LENGTH + 1,
- random);
-
- ByteArrayPointable pointable2 = generateRandomByteArrayPointable(ByteArrayPointable.MAX_LENGTH + 1,
- random);
+ ByteArrayPointable pointable1 = generateRandomByteArrayPointableWithFixLength(
+ Math.abs(random.nextInt((i + 1) * 10)), random);
+ ByteArrayPointable pointable2 = generateRandomByteArrayPointableWithFixLength(
+ Math.abs(random.nextInt((i + 1) * 10)), random);
assertNormalizeValue(pointable1, pointable2, computer);
}
}
public static ByteArrayPointable generateRandomByteArrayPointableWithFixLength(int length, Random random) {
- byte[] bytes = new byte[length + ByteArrayPointable.SIZE_OF_LENGTH];
+ byte[] bytes = new byte[length];
random.nextBytes(bytes);
- ByteArrayPointable pointable = new ByteArrayPointable();
- ByteArrayPointable.putLength(length, bytes, 0);
- pointable.set(bytes, 0, bytes.length);
- return pointable;
+ return ByteArrayPointable.generatePointableFromPureBytes(bytes);
}
public static void assertNormalizeValue(ByteArrayPointable pointable1, ByteArrayPointable pointable2,
@@ -82,11 +69,12 @@
assertNormalizeValue(pointable1, pointable2, computer);
}
- byte[] bytes1 = new byte[] { 0, 4, 0, 25, 34, 42 };
- byte[] bytes2 = new byte[] { 0, 4, (byte) 130, 25, 34, 42 };
+ ByteArrayPointable ptr1 = ByteArrayPointable.generatePointableFromPureBytes(new byte[] { 0, 25, 34, 42 });
+ ByteArrayPointable ptr2 = ByteArrayPointable.generatePointableFromPureBytes(
+ new byte[] { (byte) 130, 25, 34, 42 });
- int n1 = computer.normalize(bytes1, 0, bytes1.length);
- int n2 = computer.normalize(bytes2, 0, bytes2.length);
+ int n1 = computer.normalize(ptr1.getByteArray(), ptr1.getStartOffset(), ptr1.getLength());
+ int n2 = computer.normalize(ptr2.getByteArray(), ptr2.getStartOffset(), ptr2.getLength());
assertTrue(n1 < n2);
}
diff --git a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java
index fe8b03b..cec6add 100644
--- a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java
+++ b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayBase64ParserFactoryTest.java
@@ -19,32 +19,25 @@
package org.apache.hyracks.dataflow.common.data.parsers;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
-import junit.framework.TestCase;
-import org.junit.Test;
+import static org.apache.hyracks.data.std.primitive.ByteArrayPointable.copyContent;
-import javax.xml.bind.DatatypeConverter;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.util.Arrays;
-import static org.apache.hyracks.dataflow.common.data.parsers.ByteArrayHexParserFactoryTest.subArray;
+import javax.xml.bind.DatatypeConverter;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
+import org.junit.Test;
+
+import junit.framework.TestCase;
public class ByteArrayBase64ParserFactoryTest extends TestCase {
@Test
public void testParseBase64String() throws HyracksDataException {
- IValueParser parser = ByteArrayBase64ParserFactory.INSTANCE.createValueParser();
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- DataOutputStream outputStream = new DataOutputStream(bos);
- String empty = "";
-
- parser.parse(empty.toCharArray(), 0, empty.length(), outputStream);
-
- byte[] cache = bos.toByteArray();
- assertTrue(ByteArrayPointable.getLength(cache, 0) == 0);
- assertTrue(DatatypeConverter.printBase64Binary(subArray(cache, 2)).equalsIgnoreCase(empty));
+ testOneString("");
StringBuilder everyChar = new StringBuilder();
for (char c = 'a'; c <= 'z'; c++) {
@@ -58,21 +51,26 @@
}
everyChar.append("+/");
- bos.reset();
- parser.parse(everyChar.toString().toCharArray(), 0, everyChar.length(), outputStream);
- cache = bos.toByteArray();
- byte[] answer = DatatypeConverter.parseBase64Binary(everyChar.toString());
- assertTrue(ByteArrayPointable.getLength(cache, 0) == answer.length);
- assertTrue(Arrays.equals(answer, subArray(cache, 2)));
+ testOneString(everyChar.toString());
- byte[] maxBytes = new byte[ByteArrayPointable.MAX_LENGTH];
- Arrays.fill(maxBytes, (byte) 0xff);
- String maxString = DatatypeConverter.printBase64Binary(maxBytes);
- bos.reset();
- parser.parse(maxString.toCharArray(), 0, maxString.length(), outputStream);
- cache = bos.toByteArray();
- assertTrue(ByteArrayPointable.getLength(cache, 0) == maxBytes.length);
- assertTrue(Arrays.equals(maxBytes, subArray(cache, 2)));
+ byte[] longBytes = new byte[65536];
+ Arrays.fill(longBytes, (byte) 0xff);
+ String maxString = DatatypeConverter.printBase64Binary(longBytes);
+
+ testOneString(maxString);
}
+ void testOneString(String test) throws HyracksDataException {
+ IValueParser parser = ByteArrayBase64ParserFactory.INSTANCE.createValueParser();
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ DataOutputStream outputStream = new DataOutputStream(bos);
+ ByteArrayPointable bytePtr = new ByteArrayPointable();
+
+ parser.parse(test.toCharArray(), 0, test.length(), outputStream);
+ bytePtr.set(bos.toByteArray(), 0, bos.size());
+
+ byte[] answer = DatatypeConverter.parseBase64Binary(test);
+ assertTrue(bytePtr.getContentLength() == answer.length);
+ assertTrue(Arrays.equals(answer, copyContent(bytePtr)));
+ }
}
\ No newline at end of file
diff --git a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java
index 3f25c3c..3e12837 100644
--- a/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java
+++ b/hyracks/hyracks-dataflow-common/src/test/java/org/apache/hyracks/dataflow/common/data/parsers/ByteArrayHexParserFactoryTest.java
@@ -19,50 +19,48 @@
package org.apache.hyracks.dataflow.common.data.parsers;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.util.Arrays;
+
+import javax.xml.bind.DatatypeConverter;
+
+import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.primitive.ByteArrayPointable;
import org.junit.Test;
-import javax.xml.bind.DatatypeConverter;
-import java.util.Arrays;
-
-import static org.junit.Assert.assertTrue;
-
public class ByteArrayHexParserFactoryTest {
- public static byte[] subArray(byte[] bytes, int start) {
- return Arrays.copyOfRange(bytes, start, bytes.length);
- }
-
@Test
public void testExtractPointableArrayFromHexString() throws Exception {
- byte[] cache = new byte[] { };
+ testOneString("");
+ testOneString("ABCDEF0123456789");
- String empty = "";
- cache = ByteArrayHexParserFactory
- .extractPointableArrayFromHexString(empty.toCharArray(), 0, empty.length(), cache);
+ testOneString("0123456789abcdef");
- assertTrue(ByteArrayPointable.getLength(cache, 0) == 0);
- assertTrue(DatatypeConverter.printHexBinary(subArray(cache, 2)).equalsIgnoreCase(empty));
-
- String everyChar = "ABCDEF0123456789";
- cache = ByteArrayHexParserFactory
- .extractPointableArrayFromHexString(everyChar.toCharArray(), 0, everyChar.length(), cache);
- assertTrue(ByteArrayPointable.getLength(cache, 0) == everyChar.length() / 2);
- assertTrue(DatatypeConverter.printHexBinary(subArray(cache, 2)).equalsIgnoreCase(everyChar));
-
- String lowercase = "0123456789abcdef";
- cache = ByteArrayHexParserFactory
- .extractPointableArrayFromHexString(lowercase.toCharArray(), 0, lowercase.length(), cache);
- assertTrue(ByteArrayPointable.getLength(cache, 0) == lowercase.length() / 2);
- assertTrue(DatatypeConverter.printHexBinary(subArray(cache, 2)).equalsIgnoreCase(lowercase));
-
- char[] maxChars = new char[ByteArrayPointable.MAX_LENGTH * 2];
+ char[] maxChars = new char[65540 * 2];
Arrays.fill(maxChars, 'f');
String maxString = new String(maxChars);
- cache = ByteArrayHexParserFactory
- .extractPointableArrayFromHexString(maxString.toCharArray(), 0, maxString.length(), cache);
- assertTrue(ByteArrayPointable.getLength(cache, 0) == maxString.length() / 2);
- assertTrue(DatatypeConverter.printHexBinary(subArray(cache, 2)).equalsIgnoreCase(maxString));
+
+ testOneString(maxString);
+ }
+
+ void testOneString(String test) throws HyracksDataException {
+ IValueParser parser = ByteArrayHexParserFactory.INSTANCE.createValueParser();
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ DataOutputStream outputStream = new DataOutputStream(bos);
+ ByteArrayPointable bytePtr = new ByteArrayPointable();
+
+ parser.parse(test.toCharArray(), 0, test.length(), outputStream);
+
+ bytePtr.set(bos.toByteArray(), 0, bos.size());
+
+ assertTrue(bytePtr.getContentLength() == test.length() / 2);
+ assertEquals(DatatypeConverter.printHexBinary(ByteArrayPointable.copyContent(bytePtr)).toLowerCase(),
+ test.toLowerCase());
}
}
\ No newline at end of file
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/aggregators/MinMaxStringFieldAggregatorFactory.java b/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/aggregators/MinMaxStringFieldAggregatorFactory.java
index 53c60a3..4e69437 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/aggregators/MinMaxStringFieldAggregatorFactory.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/group/aggregators/MinMaxStringFieldAggregatorFactory.java
@@ -29,7 +29,6 @@
import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.primitive.IntegerPointable;
-import org.apache.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
import org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
import org.apache.hyracks.dataflow.std.group.AggregateState;
import org.apache.hyracks.dataflow.std.group.IFieldAggregateDescriptor;
@@ -68,6 +67,8 @@
RecordDescriptor outRecordDescriptor) throws HyracksDataException {
return new IFieldAggregateDescriptor() {
+ UTF8StringSerializerDeserializer utf8SerializerDeserializer = new UTF8StringSerializerDeserializer();
+
@Override
public void reset() {
}
@@ -112,7 +113,7 @@
int tupleOffset = accessor.getTupleStartOffset(tIndex);
int fieldStart = accessor.getFieldStartOffset(tIndex, aggField);
int fieldLength = accessor.getFieldLength(tIndex, aggField);
- String strField = UTF8StringSerializerDeserializer.INSTANCE.deserialize(new DataInputStream(
+ String strField = utf8SerializerDeserializer.deserialize(new DataInputStream(
new ByteArrayInputStream(accessor.getBuffer().array(), tupleOffset
+ accessor.getFieldSlotsLength() + fieldStart, fieldLength)));
if (hasBinaryState) {
@@ -157,7 +158,7 @@
int tupleOffset = accessor.getTupleStartOffset(tIndex);
int fieldStart = accessor.getFieldStartOffset(tIndex, aggField);
int fieldLength = accessor.getFieldLength(tIndex, aggField);
- String strField = UTF8StringSerializerDeserializer.INSTANCE.deserialize(new DataInputStream(
+ String strField = utf8SerializerDeserializer.deserialize(new DataInputStream(
new ByteArrayInputStream(accessor.getBuffer().array(), tupleOffset
+ accessor.getFieldSlotsLength() + fieldStart, fieldLength)));
if (hasBinaryState) {
diff --git a/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/buffermanager/VariableTupleMemoryManagerTest.java b/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/buffermanager/VariableTupleMemoryManagerTest.java
index 61b29fd..9651529 100644
--- a/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/buffermanager/VariableTupleMemoryManagerTest.java
+++ b/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/buffermanager/VariableTupleMemoryManagerTest.java
@@ -46,7 +46,7 @@
public class VariableTupleMemoryManagerTest {
ISerializerDeserializer[] fieldsSerDer = new ISerializerDeserializer[] {
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
RecordDescriptor recordDescriptor = new RecordDescriptor(fieldsSerDer);
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(recordDescriptor.getFieldCount());
VariableTupleMemoryManager tupleMemoryManager;
diff --git a/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/util/DeletableFrameTupleAppenderTest.java b/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/util/DeletableFrameTupleAppenderTest.java
index cc52b78..44a082f 100644
--- a/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/util/DeletableFrameTupleAppenderTest.java
+++ b/hyracks/hyracks-dataflow-std/src/test/java/org/apache/hyracks/dataflow/std/sort/util/DeletableFrameTupleAppenderTest.java
@@ -25,9 +25,6 @@
import java.nio.ByteBuffer;
import org.apache.commons.lang3.ArrayUtils;
-import org.junit.Before;
-import org.junit.Test;
-
import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
import org.apache.hyracks.api.exceptions.HyracksDataException;
@@ -36,12 +33,15 @@
import org.apache.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
import org.apache.hyracks.dataflow.common.util.IntSerDeUtils;
import org.apache.hyracks.dataflow.std.sort.Utility;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+import org.junit.Before;
+import org.junit.Test;
public class DeletableFrameTupleAppenderTest {
DeletableFrameTupleAppender appender;
ISerializerDeserializer[] fields = new ISerializerDeserializer[] {
IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(),
};
RecordDescriptor recordDescriptor = new RecordDescriptor(fields);
ArrayTupleBuilder builder = new ArrayTupleBuilder(recordDescriptor.getFieldCount());
@@ -90,7 +90,8 @@
}
int assertTupleIsExpected(int i, int dataOffset) {
- int tupleLength = 2 * 4 + 4 + 2 + i + 1;
+ int lenStrMeta = UTF8StringUtil.getNumBytesToStoreLength(i);
+ int tupleLength = 2 * 4 + 4 + lenStrMeta + i + 1;
assertEquals(dataOffset, appender.getTupleStartOffset(i));
assertEquals(tupleLength, appender.getTupleLength(i));
@@ -99,7 +100,7 @@
assertEquals(i + 1,
IntSerDeUtils.getInt(appender.getBuffer().array(), appender.getAbsoluteFieldStartOffset(i, 0)));
assertEquals(dataOffset + 2 * 4 + 4, appender.getAbsoluteFieldStartOffset(i, 1));
- assertEquals(2 + i + 1, appender.getFieldLength(i, 1));
+ assertEquals(lenStrMeta + i + 1, appender.getFieldLength(i, 1));
return tupleLength;
}
diff --git a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/InsertPipelineExample.java b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/InsertPipelineExample.java
index 202096a..ac521f5 100644
--- a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/InsertPipelineExample.java
+++ b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/InsertPipelineExample.java
@@ -107,15 +107,15 @@
// string
// we will use field 2 as primary key to fill a clustered index
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, // this field will
+ new UTF8StringSerializerDeserializer(), // this field will
// not go into B-Tree
- UTF8StringSerializerDeserializer.INSTANCE, // we will use this
+ new UTF8StringSerializerDeserializer(), // we will use this
// as payload
IntegerSerializerDeserializer.INSTANCE, // we will use this
// field as key
IntegerSerializerDeserializer.INSTANCE, // we will use this as
// payload
- UTF8StringSerializerDeserializer.INSTANCE // we will use this as
+ new UTF8StringSerializerDeserializer() // we will use this as
// payload
});
diff --git a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexBulkLoadExample.java b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexBulkLoadExample.java
index a493aec..734feb4 100644
--- a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexBulkLoadExample.java
+++ b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexBulkLoadExample.java
@@ -105,15 +105,15 @@
// int, string
// we will use field-index 2 as primary key to fill a clustered index
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, // this field will
+ new UTF8StringSerializerDeserializer(), // this field will
// not go into B-Tree
- UTF8StringSerializerDeserializer.INSTANCE, // we will use this
+ new UTF8StringSerializerDeserializer(), // we will use this
// as payload
IntegerSerializerDeserializer.INSTANCE, // we will use this
// field as key
IntegerSerializerDeserializer.INSTANCE, // we will use this as
// payload
- UTF8StringSerializerDeserializer.INSTANCE // we will use this as
+ new UTF8StringSerializerDeserializer() // we will use this as
// payload
});
diff --git a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexSearchExample.java b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexSearchExample.java
index 48b9942..df33132 100644
--- a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexSearchExample.java
+++ b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/PrimaryIndexSearchExample.java
@@ -111,8 +111,8 @@
// schema of tuples coming out of primary index
RecordDescriptor recDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE, });
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), });
// build tuple containing low and high search keys
ArrayTupleBuilder tb = new ArrayTupleBuilder(comparatorFactories.length * 2); // high
@@ -129,8 +129,8 @@
// high key
tb.addFieldEndOffset();
- ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,
diff --git a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexBulkLoadExample.java b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexBulkLoadExample.java
index e8e2281..8d68021 100644
--- a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexBulkLoadExample.java
+++ b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexBulkLoadExample.java
@@ -105,10 +105,10 @@
IntegerSerializerDeserializer.INSTANCE, // we will use this as
// payload in secondary
// index
- UTF8StringSerializerDeserializer.INSTANCE, // we will use this
+ new UTF8StringSerializerDeserializer(), // we will use this
// ask key in
// secondary index
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
int primaryFieldCount = 4;
ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
diff --git a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexSearchExample.java b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexSearchExample.java
index 1a1559f..0e80272 100644
--- a/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexSearchExample.java
+++ b/hyracks/hyracks-examples/btree-example/btreeclient/src/main/java/org/apache/hyracks/examples/btree/client/SecondaryIndexSearchExample.java
@@ -102,7 +102,7 @@
// schema of tuples coming out of secondary index
RecordDescriptor secondaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
int secondaryFieldCount = 2;
ITypeTraits[] secondaryTypeTraits = new ITypeTraits[secondaryFieldCount];
@@ -120,8 +120,8 @@
// schema of tuples coming out of primary index
RecordDescriptor primaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE, });
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(),
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer(), });
int primaryFieldCount = 4;
ITypeTraits[] primaryTypeTraits = new ITypeTraits[primaryFieldCount];
@@ -145,15 +145,15 @@
DataOutput dos = tb.getDataOutput();
tb.reset();
- UTF8StringSerializerDeserializer.INSTANCE.serialize("0", dos); // low
+ new UTF8StringSerializerDeserializer().serialize("0", dos); // low
// key
tb.addFieldEndOffset();
- UTF8StringSerializerDeserializer.INSTANCE.serialize("f", dos); // high
+ new UTF8StringSerializerDeserializer().serialize("f", dos); // high
// key
tb.addFieldEndOffset();
- ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/AbstractBTreeOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/AbstractBTreeOperatorTest.java
index 4aeebe7..837a8a4 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/AbstractBTreeOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/AbstractBTreeOperatorTest.java
@@ -82,9 +82,9 @@
protected final int[] primaryBloomFilterKeyFields = new int[primaryKeyFieldCount];
protected final RecordDescriptor primaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
// to be set by subclasses
protected String primaryFileName;
@@ -98,7 +98,7 @@
protected final int[] secondaryBloomFilterKeyFields = new int[secondaryKeyFieldCount];
protected final RecordDescriptor secondaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
protected String secondaryFileName;
protected IFileSplitProvider secondarySplitProvider;
@@ -160,11 +160,11 @@
"data/tpch0.001/orders-part1.tbl"))) };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -217,11 +217,11 @@
DataOutput dos = tb.getDataOutput();
tb.reset();
- UTF8StringSerializerDeserializer.INSTANCE.serialize("0", dos);
+ new UTF8StringSerializerDeserializer().serialize("0", dos);
tb.addFieldEndOffset();
- ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,
@@ -272,11 +272,11 @@
"data/tpch0.001/orders-part2.tbl"))) };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexScanOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexScanOperatorTest.java
index c4068a7..d4fb56d 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexScanOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexScanOperatorTest.java
@@ -59,11 +59,11 @@
DataOutput dos = tb.getDataOutput();
tb.reset();
- UTF8StringSerializerDeserializer.INSTANCE.serialize("0", dos);
+ new UTF8StringSerializerDeserializer().serialize("0", dos);
tb.addFieldEndOffset();
- ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexSearchOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexSearchOperatorTest.java
index 5429135..e690423 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexSearchOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreePrimaryIndexSearchOperatorTest.java
@@ -61,14 +61,14 @@
tb.reset();
// low key
- UTF8StringSerializerDeserializer.INSTANCE.serialize("100", dos);
+ new UTF8StringSerializerDeserializer().serialize("100", dos);
tb.addFieldEndOffset();
// high key
- UTF8StringSerializerDeserializer.INSTANCE.serialize("200", dos);
+ new UTF8StringSerializerDeserializer().serialize("200", dos);
tb.addFieldEndOffset();
- ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexInsertOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexInsertOperatorTest.java
index 97175f2..6ebc177 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexInsertOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexInsertOperatorTest.java
@@ -64,14 +64,14 @@
tb.reset();
// low key
- UTF8StringSerializerDeserializer.INSTANCE.serialize("1998-07-21", dos);
+ new UTF8StringSerializerDeserializer().serialize("1998-07-21", dos);
tb.addFieldEndOffset();
// high key
- UTF8StringSerializerDeserializer.INSTANCE.serialize("2000-10-18", dos);
+ new UTF8StringSerializerDeserializer().serialize("2000-10-18", dos);
tb.addFieldEndOffset();
- ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexSearchOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexSearchOperatorTest.java
index 11c060b..2bba010 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexSearchOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexSearchOperatorTest.java
@@ -63,14 +63,14 @@
tb.reset();
// low key
- UTF8StringSerializerDeserializer.INSTANCE.serialize("1998-07-21", dos);
+ new UTF8StringSerializerDeserializer().serialize("1998-07-21", dos);
tb.addFieldEndOffset();
// high key
- UTF8StringSerializerDeserializer.INSTANCE.serialize("2000-10-18", dos);
+ new UTF8StringSerializerDeserializer().serialize("2000-10-18", dos);
tb.addFieldEndOffset();
- ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexUpsertOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexUpsertOperatorTest.java
index 7abd14c..031ef76 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexUpsertOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/btree/BTreeSecondaryIndexUpsertOperatorTest.java
@@ -63,14 +63,14 @@
tb.reset();
// low key
- UTF8StringSerializerDeserializer.INSTANCE.serialize("1998-07-21", dos);
+ new UTF8StringSerializerDeserializer().serialize("1998-07-21", dos);
tb.addFieldEndOffset();
// high key
- UTF8StringSerializerDeserializer.INSTANCE.serialize("2000-10-18", dos);
+ new UTF8StringSerializerDeserializer().serialize("2000-10-18", dos);
tb.addFieldEndOffset();
- ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/rtree/AbstractRTreeOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/rtree/AbstractRTreeOperatorTest.java
index 7100920..9381727 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/rtree/AbstractRTreeOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/am/rtree/AbstractRTreeOperatorTest.java
@@ -100,9 +100,9 @@
protected final IBinaryComparatorFactory[] primaryComparatorFactories = new IBinaryComparatorFactory[primaryKeyFieldCount];
protected final RecordDescriptor primaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE,
DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE });
@@ -119,7 +119,7 @@
protected final RecordDescriptor secondaryRecDesc = new RecordDescriptor(new ISerializerDeserializer[] {
DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE,
DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer() });
// This is only used for the LSMRTree. We need a comparator Factories for
// the BTree component of the LSMRTree.
@@ -220,11 +220,11 @@
"data/orders-with-locations-part1.txt"))) };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), DoubleSerializerDeserializer.INSTANCE,
DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE,
DoubleSerializerDeserializer.INSTANCE });
@@ -283,11 +283,11 @@
DataOutput dos = tb.getDataOutput();
tb.reset();
- UTF8StringSerializerDeserializer.INSTANCE.serialize("0", dos);
+ new UTF8StringSerializerDeserializer().serialize("0", dos);
tb.addFieldEndOffset();
- ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] keyRecDescSers = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
ConstantTupleSourceOperatorDescriptor keyProviderOp = new ConstantTupleSourceOperatorDescriptor(spec,
@@ -331,11 +331,11 @@
"data/orders-with-locations-part2.txt"))) };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), DoubleSerializerDeserializer.INSTANCE,
DoubleSerializerDeserializer.INSTANCE, DoubleSerializerDeserializer.INSTANCE,
DoubleSerializerDeserializer.INSTANCE });
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/comm/SerializationDeserializationTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/comm/SerializationDeserializationTest.java
index 8baf9e0..41e63a0 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/comm/SerializationDeserializationTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/comm/SerializationDeserializationTest.java
@@ -144,7 +144,7 @@
@Test
public void serdeser01() throws Exception {
RecordDescriptor rDes = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
LineProcessor processor = new LineProcessor() {
@Override
public void process(String line, IDataWriter<Object[]> writer) throws Exception {
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java
index 2457ee9..c330f8e 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/AggregationTest.java
@@ -77,14 +77,14 @@
new FileReference(new File("data/tpch0.001/lineitem.tbl"))) });
final RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
FloatSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
final ITupleParserFactory tupleParserFactory = new DelimitedDataTupleParserFactory(new IValueParserFactory[] {
UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, IntegerParserFactory.INSTANCE,
@@ -115,7 +115,7 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
@@ -159,7 +159,7 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
@@ -199,7 +199,7 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
@@ -248,7 +248,7 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
@@ -292,7 +292,7 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
@@ -332,7 +332,7 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
@@ -381,8 +381,8 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer() });
int[] keyFields = new int[] { 0 };
int tableSize = 8;
@@ -425,8 +425,8 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer() });
int[] keyFields = new int[] { 0 };
@@ -465,8 +465,8 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer() });
int[] keyFields = new int[] { 0 };
int frameLimits = 4;
@@ -513,7 +513,7 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 8, 0 };
@@ -558,7 +558,7 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 8, 0 };
@@ -599,7 +599,7 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 8, 0 };
@@ -648,7 +648,7 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
FloatSerializerDeserializer.INSTANCE });
@@ -694,7 +694,7 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
FloatSerializerDeserializer.INSTANCE });
@@ -736,7 +736,7 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
FloatSerializerDeserializer.INSTANCE });
@@ -788,8 +788,8 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
int[] keyFields = new int[] { 8, 0 };
int tableSize = 8;
@@ -833,8 +833,8 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
int[] keyFields = new int[] { 8, 0 };
@@ -874,8 +874,8 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, csvScanner, NC2_ID);
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() });
int[] keyFields = new int[] { 8, 0 };
int frameLimits = 4;
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/CountOfCountsTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/CountOfCountsTest.java
index bb8627a..f3721e6 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/CountOfCountsTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/CountOfCountsTest.java
@@ -66,7 +66,7 @@
FileSplit[] splits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File("data/words.txt"))) };
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
RecordDescriptor desc = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
spec,
@@ -81,7 +81,7 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID);
RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 },
new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
new MultiFieldsAggregatorFactory(
@@ -138,7 +138,7 @@
FileSplit[] splits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File("data/words.txt"))) };
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
RecordDescriptor desc = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
spec,
@@ -153,7 +153,7 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 },
new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
new MultiFieldsAggregatorFactory(
@@ -211,7 +211,7 @@
FileSplit[] splits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File("data/words.txt"))) };
IFileSplitProvider splitProvider = new ConstantFileSplitProvider(splits);
RecordDescriptor desc = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
spec,
@@ -226,7 +226,7 @@
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, sorter, NC1_ID, NC2_ID, NC1_ID, NC2_ID);
RecordDescriptor desc2 = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
PreclusteredGroupOperatorDescriptor group = new PreclusteredGroupOperatorDescriptor(spec, new int[] { 0 },
new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY) },
new MultiFieldsAggregatorFactory(
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/LocalityAwareConnectorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/LocalityAwareConnectorTest.java
index f8fbfc6..55689e9 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/LocalityAwareConnectorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/LocalityAwareConnectorTest.java
@@ -73,14 +73,14 @@
new FileSplit("asterix-004", new FileReference(new File("data/tpch0.001/lineitem.tbl"))) });
final RecordDescriptor desc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
FloatSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
final ITupleParserFactory tupleParserFactory = new DelimitedDataTupleParserFactory(new IValueParserFactory[] {
UTF8StringParserFactory.INSTANCE, IntegerParserFactory.INSTANCE, IntegerParserFactory.INSTANCE,
@@ -110,7 +110,7 @@
"asterix-003", "asterix-004");
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
@@ -170,7 +170,7 @@
"asterix-003", "asterix-004");
RecordDescriptor outputRec = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE });
int[] keyFields = new int[] { 0 };
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/OptimizedSortMergeTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/OptimizedSortMergeTest.java
index 04194d8..c574ec8 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/OptimizedSortMergeTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/OptimizedSortMergeTest.java
@@ -63,11 +63,11 @@
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -120,11 +120,11 @@
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/ScanPrintTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/ScanPrintTest.java
index a7612e9..b19b47f 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/ScanPrintTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/ScanPrintTest.java
@@ -60,7 +60,7 @@
new FileSplit(NC1_ID, new FileReference(new File("data/words.txt"))) });
RecordDescriptor desc = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor csvScanner = new FileScanOperatorDescriptor(
spec,
@@ -91,11 +91,11 @@
"data/tpch0.001/orders.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -131,10 +131,10 @@
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { IntegerParserFactory.INSTANCE,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SortMergeTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SortMergeTest.java
index 40b1687..0902da2 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SortMergeTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SortMergeTest.java
@@ -61,11 +61,11 @@
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -109,11 +109,11 @@
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SplitOperatorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SplitOperatorTest.java
index c104ec5..d1e9c7a 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SplitOperatorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/SplitOperatorTest.java
@@ -80,7 +80,7 @@
DelimitedDataTupleParserFactory stringParser = new DelimitedDataTupleParserFactory(
new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE }, '\u0000');
RecordDescriptor stringRec = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE, });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(), });
FileScanOperatorDescriptor scanOp = new FileScanOperatorDescriptor(spec, new ConstantFileSplitProvider(
inputSplits), stringParser, stringRec);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOptimizedHybridHashJoinTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOptimizedHybridHashJoinTest.java
index 6fdc797..a10513a 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOptimizedHybridHashJoinTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOptimizedHybridHashJoinTest.java
@@ -59,32 +59,32 @@
"data/tpch0.001/customer4.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
"data/tpch0.001/orders4.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -137,32 +137,32 @@
"data/tpch0.001/customer3.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
"data/tpch0.001/orders4.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -216,32 +216,32 @@
"data/tpch0.001/customer3.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
"data/tpch0.001/orders1.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java
index a9cf4ad..7ee3682 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java
@@ -78,31 +78,31 @@
"data/tpch0.001/customer.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
"data/tpch0.001/orders.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -157,31 +157,31 @@
"data/tpch0.001/customer.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
"data/tpch0.001/orders.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -240,31 +240,31 @@
"data/tpch0.001/customer.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
"data/tpch0.001/orders.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -323,31 +323,31 @@
"data/tpch0.001/customer.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
"data/tpch0.001/orders.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -407,31 +407,31 @@
"data/tpch0.001/customer.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
"data/tpch0.001/orders.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -495,31 +495,31 @@
"data/tpch0.001/customer.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC2_ID, new FileReference(new File(
"data/tpch0.001/orders.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -584,32 +584,32 @@
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] {
new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -671,32 +671,32 @@
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] {
new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -762,32 +762,32 @@
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] {
new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -853,32 +853,32 @@
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] {
new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -940,32 +940,32 @@
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] {
new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java
index 039d936..985f974 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java
@@ -132,31 +132,31 @@
"data/tpch0.001/customer.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] { new FileSplit(NC1_ID, new FileReference(new File(
"data/tpch0.001/orders.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -208,32 +208,32 @@
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] {
new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -285,32 +285,32 @@
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] {
new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
@@ -362,32 +362,32 @@
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileSplit[] ordersSplits = new FileSplit[] {
new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/UnionTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/UnionTest.java
index 57bfad0..508db7c 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/UnionTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/UnionTest.java
@@ -52,7 +52,7 @@
new FileSplit(NC1_ID, new FileReference(new File("data/words.txt"))) });
RecordDescriptor desc = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor csvScanner01 = new FileScanOperatorDescriptor(
spec,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/VSizeFrameSortMergeTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/VSizeFrameSortMergeTest.java
index d019f16..b774e0e 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/VSizeFrameSortMergeTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/integration/VSizeFrameSortMergeTest.java
@@ -58,11 +58,11 @@
new FileSplit(NC2_ID, new FileReference(new File(INPUTS[1]))) };
IFileSplitProvider ordersSplitProvider = new ConstantFileSplitProvider(ordersSplits);
RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
@Test
public void sortNormalMergeTest() throws Exception {
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/AbstractRunGeneratorTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/AbstractRunGeneratorTest.java
index 1f339152..3cc2a23 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/AbstractRunGeneratorTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/AbstractRunGeneratorTest.java
@@ -54,8 +54,8 @@
public abstract class AbstractRunGeneratorTest {
static TestUtils testUtils = new TestUtils();
- static ISerializerDeserializer[] SerDers = new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ static ISerializerDeserializer[] SerDers = new ISerializerDeserializer[] {
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
static RecordDescriptor RecordDesc = new RecordDescriptor(SerDers);
static Random GRandom = new Random(System.currentTimeMillis());
static int[] SortFields = new int[] { 0, 1 };
@@ -153,7 +153,7 @@
for (Map.Entry<Integer, String> entry : specialData.entrySet()) {
tb.reset();
tb.addField(IntegerSerializerDeserializer.INSTANCE, entry.getKey());
- tb.addField(UTF8StringSerializerDeserializer.INSTANCE, entry.getValue());
+ tb.addField(new UTF8StringSerializerDeserializer(), entry.getValue());
VSizeFrame frame = new VSizeFrame(ctx, FrameHelper.calcAlignedFrameSizeToStore(
tb.getFieldEndOffsets().length, tb.getSize(), ctx.getInitialFrameSize()));
@@ -173,7 +173,7 @@
if (!keyValuePair.containsKey(key)) {
String value = generateRandomRecord(minRecordSize, maxRecordSize);
tb.addField(IntegerSerializerDeserializer.INSTANCE, key);
- tb.addField(UTF8StringSerializerDeserializer.INSTANCE, value);
+ tb.addField(new UTF8StringSerializerDeserializer(), value);
if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
frameList.add(frame);
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/RunMergingFrameReaderTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/RunMergingFrameReaderTest.java
index ca0a6bb..e6d10f2 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/RunMergingFrameReaderTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/org/apache/hyracks/tests/unit/RunMergingFrameReaderTest.java
@@ -109,7 +109,7 @@
while (true) {
tb.reset();
tb.addField(IntegerSerializerDeserializer.INSTANCE, lastEntry.getKey());
- tb.addField(UTF8StringSerializerDeserializer.INSTANCE, lastEntry.getValue());
+ tb.addField(new UTF8StringSerializerDeserializer(), lastEntry.getValue());
if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
break;
} else {
@@ -148,7 +148,7 @@
for (Map.Entry<Integer, String> entry : specialData.entrySet()) {
tb.reset();
tb.addField(IntegerSerializerDeserializer.INSTANCE, entry.getKey());
- tb.addField(UTF8StringSerializerDeserializer.INSTANCE, entry.getValue());
+ tb.addField(new UTF8StringSerializerDeserializer(), entry.getValue());
int size = tb.getSize() + tb.getFieldEndOffsets().length * 4;
datasize += size;
if (size > maxtuple) {
@@ -164,7 +164,7 @@
int key = GRandom.nextInt(datasize + 1);
if (!result.containsKey(key)) {
tb.addField(IntegerSerializerDeserializer.INSTANCE, key);
- tb.addField(UTF8StringSerializerDeserializer.INSTANCE, value);
+ tb.addField(new UTF8StringSerializerDeserializer(), value);
int size = tb.getSize() + tb.getFieldEndOffsets().length * 4;
datasize += size;
if (size > maxtuple) {
diff --git a/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/ExternalGroupClient.java b/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/ExternalGroupClient.java
index 3f9b0e9..965e194 100644
--- a/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/ExternalGroupClient.java
+++ b/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/ExternalGroupClient.java
@@ -168,10 +168,10 @@
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
FloatSerializerDeserializer.INSTANCE, FloatSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor fileScanner = new FileScanOperatorDescriptor(spec, splitsProvider,
new DelimitedDataTupleParserFactory(new IValueParserFactory[] { IntegerParserFactory.INSTANCE,
diff --git a/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/WordCountMain.java b/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/WordCountMain.java
index a3e1ee0..cb6006b 100644
--- a/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/WordCountMain.java
+++ b/hyracks/hyracks-examples/text-example/textclient/src/main/java/org/apache/hyracks/examples/text/client/WordCountMain.java
@@ -132,14 +132,14 @@
IFileSplitProvider splitsProvider = new ConstantFileSplitProvider(inSplits);
RecordDescriptor wordDesc = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
FileScanOperatorDescriptor wordScanner = new FileScanOperatorDescriptor(spec, splitsProvider,
new WordTupleParserFactory(), wordDesc);
createPartitionConstraint(spec, wordScanner, inSplits);
RecordDescriptor groupResultDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
IOperatorDescriptor gBy;
int[] keys = new int[] { 0 };
diff --git a/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Common.java b/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Common.java
index 28b62a7..ac172fd 100644
--- a/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Common.java
+++ b/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Common.java
@@ -34,27 +34,27 @@
public class Common {
static RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() });
static RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
static RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() });
static IValueParserFactory[] orderParserFactories = new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
diff --git a/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Join.java b/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Join.java
index dadb935..b20d2b8 100644
--- a/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Join.java
+++ b/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/org/apache/hyracks/examples/tpch/client/Join.java
@@ -229,7 +229,7 @@
if (hasGroupBy) {
RecordDescriptor groupResultDesc = new RecordDescriptor(new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE });
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE });
HashGroupOperatorDescriptor gby = new HashGroupOperatorDescriptor(
spec,
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs/dataflow/DataflowTest.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs/dataflow/DataflowTest.java
index e36dd06..1515037 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs/dataflow/DataflowTest.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs/dataflow/DataflowTest.java
@@ -141,7 +141,7 @@
String[] readSchedule = scheduler.getLocationConstraints(splits);
JobSpecification jobSpec = new JobSpecification();
RecordDescriptor recordDesc = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
String[] locations = new String[] { HyracksUtils.NC1_ID, HyracksUtils.NC1_ID, HyracksUtils.NC2_ID,
HyracksUtils.NC2_ID };
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs2/dataflow/DataflowTest.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs2/dataflow/DataflowTest.java
index a703e57..0b41b07 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs2/dataflow/DataflowTest.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/org/apache/hyracks/hdfs2/dataflow/DataflowTest.java
@@ -148,7 +148,7 @@
String[] readSchedule = scheduler.getLocationConstraints(splits);
JobSpecification jobSpec = new JobSpecification();
RecordDescriptor recordDesc = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
String[] locations = new String[] { HyracksUtils.NC1_ID, HyracksUtils.NC1_ID, HyracksUtils.NC2_ID,
HyracksUtils.NC2_ID };
diff --git a/hyracks/hyracks-storage-am-common/pom.xml b/hyracks/hyracks-storage-am-common/pom.xml
index 727da41..86b0eac 100644
--- a/hyracks/hyracks-storage-am-common/pom.xml
+++ b/hyracks/hyracks-storage-am-common/pom.xml
@@ -68,5 +68,10 @@
<type>jar</type>
<scope>compile</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-util</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ </dependency>
</dependencies>
</project>
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java
index c99b1e5..554cd3f 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorDescriptor.java
@@ -38,7 +38,7 @@
private static final long serialVersionUID = 1L;
private static final RecordDescriptor recDesc = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
public TreeIndexStatsOperatorDescriptor(IOperatorDescriptorRegistry spec, IStorageManagerInterface storageManager,
IIndexLifecycleManagerProvider lifecycleManagerProvider, IFileSplitProvider fileSplitProvider,
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorNodePushable.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorNodePushable.java
index 51ca3c2..584418c 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorNodePushable.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/dataflow/TreeIndexStatsOperatorNodePushable.java
@@ -38,6 +38,7 @@
private final AbstractTreeIndexOperatorDescriptor opDesc;
private final IHyracksTaskContext ctx;
private final TreeIndexDataflowHelper treeIndexHelper;
+ private final UTF8StringSerializerDeserializer utf8SerDer = new UTF8StringSerializerDeserializer();
private TreeIndexStatsGatherer statsGatherer;
public TreeIndexStatsOperatorNodePushable(AbstractTreeIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx,
@@ -77,7 +78,7 @@
ArrayTupleBuilder tb = new ArrayTupleBuilder(1);
DataOutput dos = tb.getDataOutput();
tb.reset();
- UTF8StringSerializerDeserializer.INSTANCE.serialize(stats.toString(), dos);
+ utf8SerDer.serialize(stats.toString(), dos);
tb.addFieldEndOffset();
if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
throw new HyracksDataException(
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleReference.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleReference.java
index 1a17a5a..c6a0035 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleReference.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleReference.java
@@ -19,11 +19,14 @@
package org.apache.hyracks.storage.am.common.tuples;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.VarLenIntDecoder;
+
import java.nio.ByteBuffer;
import org.apache.hyracks.api.dataflow.value.ITypeTraits;
import org.apache.hyracks.storage.am.common.api.ITreeIndexFrame;
import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleReference;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
public class TypeAwareTupleReference implements ITreeIndexTupleReference {
protected ByteBuffer buf;
@@ -34,7 +37,7 @@
protected int dataStartOff;
protected ITypeTraits[] typeTraits;
- protected VarLenIntEncoderDecoder encDec = new VarLenIntEncoderDecoder();
+ protected VarLenIntDecoder encDec = VarLenIntEncoderDecoder.createDecoder();
protected int[] decodedFieldSlots;
public TypeAwareTupleReference(ITypeTraits[] typeTraits) {
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleWriter.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleWriter.java
index 73e6e3e..c44cb6b 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleWriter.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/TypeAwareTupleWriter.java
@@ -25,11 +25,12 @@
import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference;
import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleReference;
import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
public class TypeAwareTupleWriter implements ITreeIndexTupleWriter {
protected ITypeTraits[] typeTraits;
- protected VarLenIntEncoderDecoder encDec = new VarLenIntEncoderDecoder();
+ protected VarLenIntEncoderDecoder.VarLenIntDecoder decoder = VarLenIntEncoderDecoder.createDecoder();
public TypeAwareTupleWriter(ITypeTraits[] typeTraits) {
this.typeTraits = typeTraits;
@@ -73,13 +74,11 @@
}
// write field slots for variable length fields
- encDec.reset(targetBuf, runner);
for (int i = 0; i < tuple.getFieldCount(); i++) {
if (!typeTraits[i].isFixedLength()) {
- encDec.encode(tuple.getFieldLength(i));
+ runner += VarLenIntEncoderDecoder.encode(tuple.getFieldLength(i), targetBuf, runner);
}
}
- runner = encDec.getPos();
// write data fields
for (int i = 0; i < tuple.getFieldCount(); i++) {
@@ -100,13 +99,11 @@
}
// write field slots for variable length fields
- encDec.reset(targetBuf, runner);
for (int i = startField; i < startField + numFields; i++) {
if (!typeTraits[i].isFixedLength()) {
- encDec.encode(tuple.getFieldLength(i));
+ runner += VarLenIntEncoderDecoder.encode(tuple.getFieldLength(i), targetBuf, runner);
}
}
- runner = encDec.getPos();
for (int i = startField; i < startField + numFields; i++) {
System.arraycopy(tuple.getFieldData(i), tuple.getFieldStart(i), targetBuf, runner, tuple.getFieldLength(i));
@@ -124,7 +121,7 @@
int fieldSlotBytes = 0;
for (int i = 0; i < tuple.getFieldCount(); i++) {
if (!typeTraits[i].isFixedLength()) {
- fieldSlotBytes += encDec.getBytesRequired(tuple.getFieldLength(i));
+ fieldSlotBytes += VarLenIntEncoderDecoder.getBytesRequired(tuple.getFieldLength(i));
}
}
return fieldSlotBytes;
@@ -138,7 +135,7 @@
int fieldSlotBytes = 0;
for (int i = startField; i < startField + numFields; i++) {
if (!typeTraits[i].isFixedLength()) {
- fieldSlotBytes += encDec.getBytesRequired(tuple.getFieldLength(i));
+ fieldSlotBytes += VarLenIntEncoderDecoder.getBytesRequired(tuple.getFieldLength(i));
}
}
return fieldSlotBytes;
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/VarLenIntEncoderDecoder.java b/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/VarLenIntEncoderDecoder.java
deleted file mode 100644
index cd3d366..0000000
--- a/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/tuples/VarLenIntEncoderDecoder.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hyracks.storage.am.common.tuples;
-
-// encodes positive integers in a variable-byte format
-
-public class VarLenIntEncoderDecoder {
- public static final int ENCODE_MASK = 0x0000007F;
- public static final byte CONTINUE_CHUNK = (byte) 0x80;
- public static final byte DECODE_MASK = (byte) 0x7F;
-
- private byte[] encTmp = new byte[5];
-
- private int pos;
- private byte[] bytes;
-
- public void reset(byte[] bytes, int pos) {
- this.bytes = bytes;
- this.pos = pos;
- }
-
- public int encode(int val) {
- int origPos = 0;
- int tmpPos = 0;
- while (val > ENCODE_MASK) {
- encTmp[tmpPos++] = (byte) (val & ENCODE_MASK);
- val = val >>> 7;
- }
- encTmp[tmpPos++] = (byte) (val);
-
- // reverse order to optimize for decoding speed
- for (int i = 0; i < tmpPos - 1; i++) {
- bytes[pos++] = (byte) (encTmp[tmpPos - 1 - i] | CONTINUE_CHUNK);
- }
- bytes[pos++] = encTmp[0];
-
- return pos - origPos;
- }
-
- public int decode() {
- int sum = 0;
- while ((bytes[pos] & CONTINUE_CHUNK) == CONTINUE_CHUNK) {
- sum = (sum + (bytes[pos] & DECODE_MASK)) << 7;
- pos++;
- }
- sum += bytes[pos++];
- return sum;
- }
-
- // calculate the number of bytes needed for encoding
- public int getBytesRequired(int val) {
- int byteCount = 0;
- while (val > ENCODE_MASK) {
- val = val >>> 7;
- byteCount++;
- }
- return byteCount + 1;
- }
-
- public int getPos() {
- return pos;
- }
-
- // fast encoding, slow decoding version
- /*
- * public void encode(int val) { while(val > ENCODE_MASK) { bytes[pos++] =
- * (byte)(((byte)(val & ENCODE_MASK)) | CONTINUE_CHUNK); val = val >>> 7; }
- * bytes[pos++] = (byte)(val); }
- *
- * public int decode() { int sum = 0; int shift = 0; while( (bytes[pos] &
- * CONTINUE_CHUNK) == CONTINUE_CHUNK) { sum = (sum + (bytes[pos] &
- * DECODE_MASK)) << 7 * shift++; pos++; } sum += bytes[pos++] << 7 * shift;
- * return sum; }
- */
-}
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml b/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml
index af70253..dc5282e 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml
@@ -17,41 +17,46 @@
! under the License.
!-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
- <parent>
- <artifactId>hyracks</artifactId>
- <groupId>org.apache.hyracks</groupId>
- <version>0.2.17-SNAPSHOT</version>
- <relativePath>..</relativePath>
- </parent>
+ <parent>
+ <artifactId>hyracks</artifactId>
+ <groupId>org.apache.hyracks</groupId>
+ <version>0.2.17-SNAPSHOT</version>
+ <relativePath>..</relativePath>
+ </parent>
- <licenses>
- <license>
- <name>Apache License, Version 2.0</name>
- <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
- <distribution>repo</distribution>
- <comments>A business-friendly OSS license</comments>
- </license>
- </licenses>
+ <licenses>
+ <license>
+ <name>Apache License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments>A business-friendly OSS license</comments>
+ </license>
+ </licenses>
-
- <dependencies>
- <dependency>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks-storage-am-lsm-common</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- </dependencies>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-util</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-storage-am-btree</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-storage-am-lsm-common</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
</project>
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java
index f536a67..7d34198 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/AbstractTOccurrenceSearcher.java
@@ -50,7 +50,7 @@
public abstract class AbstractTOccurrenceSearcher implements IInvertedIndexSearcher {
protected static final RecordDescriptor QUERY_TOKEN_REC_DESC = new RecordDescriptor(
- new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
+ new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer() });
protected final int OBJECT_CACHE_INIT_SIZE = 10;
protected final int OBJECT_CACHE_EXPAND_SIZE = 10;
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java
index 1460857..9d4446f 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8StringBinaryTokenizer.java
@@ -19,19 +19,16 @@
package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public abstract class AbstractUTF8StringBinaryTokenizer implements IBinaryTokenizer {
- protected byte[] data;
- protected int start;
- protected int length;
- protected int tokenLength;
- protected int index;
- protected int originalIndex;
- protected int utf8Length;
- protected boolean tokenCountCalculated = false;
- protected short tokenCount;
+ protected byte[] sentenceBytes;
+ protected int sentenceStartOffset;
+ protected int sentenceEndOffset;
+ protected int sentenceUtf8Length;
+
+ protected int byteIndex;
protected final IntArray tokensStart;
protected final IntArray tokensLength;
@@ -59,27 +56,27 @@
return token;
}
+ //TODO: This UTF8Tokenizer strongly relies on the Asterix data format,
+ // i.e. the TypeTag and the byteIndex increasing both assume the given byte[] sentence
+ // is an AString object. A better way (if we want to keep the byte[] interface) would be
+ // giving this tokenizer the pure UTF8 character sequence whose {@code start} is the start
+ // of the first character, and move the shift offset to the caller.
@Override
- public void reset(byte[] data, int start, int length) {
- this.start = start;
- index = this.start;
- if (sourceHasTypeTag) {
- index++; // skip type tag
- }
- utf8Length = UTF8StringPointable.getUTFLength(data, index);
- index += 2; // skip utf8 length indicator
- this.data = data;
- this.length = length + start;
+ public void reset(byte[] sentenceData, int start, int length) {
+ this.sentenceBytes = sentenceData;
+ this.sentenceStartOffset = start;
+ this.sentenceEndOffset = length + start;
- tokenLength = 0;
+ byteIndex = this.sentenceStartOffset;
+ if (sourceHasTypeTag) {
+ byteIndex++; // skip type tag
+ }
+ sentenceUtf8Length = UTF8StringUtil.getUTFLength(sentenceData, byteIndex);
+ byteIndex += UTF8StringUtil.getNumBytesToStoreLength(sentenceUtf8Length); // skip utf8 length indicator
+
if (!ignoreTokenCount) {
tokensStart.reset();
tokensLength.reset();
}
-
- // Needed for calculating the number of tokens
- originalIndex = index;
- tokenCountCalculated = false;
- tokenCount = 0;
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8Token.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8Token.java
index 7aeb6fa..9613fb9 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8Token.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/AbstractUTF8Token.java
@@ -21,17 +21,18 @@
import java.io.DataOutput;
import java.io.IOException;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public abstract class AbstractUTF8Token implements IToken {
public static final int GOLDEN_RATIO_32 = 0x09e3779b9;
- protected int length;
- protected int tokenLength;
- protected int start;
- protected int tokenCount;
protected byte[] data;
+ protected int startOffset;
+ protected int endOffset;
+ protected int tokenLength;
+ protected int tokenCount;
protected final byte tokenTypeTag;
protected final byte countTypeTag;
@@ -51,24 +52,24 @@
}
@Override
- public int getLength() {
- return length;
+ public int getEndOffset() {
+ return endOffset;
}
- public int getLowerCaseUTF8Len(int size) {
+ public int getLowerCaseUTF8Len(int limit) {
int lowerCaseUTF8Len = 0;
- int pos = start;
- for (int i = 0; i < size; i++) {
- char c = Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
- lowerCaseUTF8Len += UTF8StringPointable.getModifiedUTF8Len(c);
- pos += UTF8StringPointable.charSize(data, pos);
+ int pos = startOffset;
+ for (int i = 0; i < limit; i++) {
+ char c = Character.toLowerCase(UTF8StringUtil.charAt(data, pos));
+ lowerCaseUTF8Len += UTF8StringUtil.getModifiedUTF8Len(c);
+ pos += UTF8StringUtil.charSize(data, pos);
}
return lowerCaseUTF8Len;
}
@Override
- public int getStart() {
- return start;
+ public int getStartOffset() {
+ return startOffset;
}
@Override
@@ -88,11 +89,20 @@
}
}
+ /**
+ * Note: the {@code startOffset} is the offset of first character, not the string length offset
+ *
+ * @param data
+ * @param startOffset
+ * @param endOffset
+ * @param tokenLength
+ * @param tokenCount the count of this token in a document , or a record, or something else.
+ */
@Override
- public void reset(byte[] data, int start, int length, int tokenLength, int tokenCount) {
+ public void reset(byte[] data, int startOffset, int endOffset, int tokenLength, int tokenCount) {
this.data = data;
- this.start = start;
- this.length = length;
+ this.startOffset = startOffset;
+ this.endOffset = endOffset;
this.tokenLength = tokenLength;
this.tokenCount = tokenCount;
}
@@ -102,4 +112,38 @@
handleCountTypeTag(out.getDataOutput());
out.getDataOutput().writeInt(tokenCount);
}
+
+ // The preChar and postChar are required to be a single byte utf8 char, e.g. ASCII char.
+ protected void serializeToken(UTF8StringBuilder builder, GrowableArray out, int numPreChars, int numPostChars,
+ char preChar, char postChar)
+ throws IOException {
+
+ handleTokenTypeTag(out.getDataOutput());
+
+ assert UTF8StringUtil.getModifiedUTF8Len(preChar) == 1 && UTF8StringUtil.getModifiedUTF8Len(postChar) == 1;
+ int actualUtfLen = endOffset - startOffset;
+
+ builder.reset(out, actualUtfLen + numPreChars + numPostChars);
+ // pre chars
+ for (int i = 0; i < numPreChars; i++) {
+ builder.appendChar(preChar);
+ }
+
+ /// regular chars
+ int numRegChars = tokenLength - numPreChars - numPostChars;
+ int pos = startOffset;
+ for (int i = 0; i < numRegChars; i++) {
+ char c = Character.toLowerCase(UTF8StringUtil.charAt(data, pos));
+ builder.appendChar(c);
+ pos += UTF8StringUtil.charSize(data, pos);
+ }
+
+ // post chars
+ for (int i = 0; i < numPostChars; i++) {
+ builder.appendChar(postChar);
+ }
+
+ builder.finish();
+ }
+
}
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
index ddf3a43..f6d6be4 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
@@ -19,54 +19,66 @@
package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public class DelimitedUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryTokenizer {
+ protected short tokenCount;
+ private boolean tokenCountCalculated;
+ private int originalIndex;
+
public DelimitedUTF8StringBinaryTokenizer(boolean ignoreTokenCount, boolean sourceHasTypeTag,
ITokenFactory tokenFactory) {
super(ignoreTokenCount, sourceHasTypeTag, tokenFactory);
}
@Override
- public boolean hasNext() {
- // skip delimiters
- while (index < length && isSeparator(UTF8StringPointable.charAt(data, index))) {
- index += UTF8StringPointable.charSize(data, index);
- }
- return index < length;
+ public void reset(byte[] sentenceData, int start, int length) {
+ super.reset(sentenceData, start, length);
+ // Needed for calculating the number of tokens
+ tokenCount = 0;
+ tokenCountCalculated = false;
+ originalIndex = byteIndex;
}
- private boolean isSeparator(char c) {
- return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER || Character.getType(c) == Character.OTHER_NUMBER);
+ @Override
+ public boolean hasNext() {
+ // skip delimiters
+ while (byteIndex < sentenceEndOffset && isSeparator(UTF8StringUtil.charAt(sentenceBytes, byteIndex))) {
+ byteIndex += UTF8StringUtil.charSize(sentenceBytes, byteIndex);
+ }
+ return byteIndex < sentenceEndOffset;
+ }
+
+ private static boolean isSeparator(char c) {
+ return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER
+ || Character.getType(c) == Character.OTHER_NUMBER);
}
@Override
public void next() {
- tokenLength = 0;
- int currentTokenStart = index;
- while (index < length && !isSeparator(UTF8StringPointable.charAt(data, index))) {
- index += UTF8StringPointable.charSize(data, index);
+ int tokenLength = 0;
+ int currentTokenStart = byteIndex;
+ while (byteIndex < sentenceEndOffset && !isSeparator(UTF8StringUtil.charAt(sentenceBytes, byteIndex))) {
+ byteIndex += UTF8StringUtil.charSize(sentenceBytes, byteIndex);
tokenLength++;
}
- int tokenCount = 1;
+ int curTokenCount = 1;
if (tokenLength > 0 && !ignoreTokenCount) {
// search if we got the same token before
for (int i = 0; i < tokensStart.length(); ++i) {
if (tokenLength == tokensLength.get(i)) {
int tokenStart = tokensStart.get(i);
- tokenCount++; // assume we found it
+ curTokenCount++; // assume we found it
int offset = 0;
- int currLength = 0;
- while (currLength < tokenLength) {
+ for (int charPos= 0; charPos < tokenLength; charPos++) {
// case insensitive comparison
- if (Character.toLowerCase(UTF8StringPointable.charAt(data, currentTokenStart + offset)) != Character
- .toLowerCase(UTF8StringPointable.charAt(data, tokenStart + offset))) {
- tokenCount--;
+ if (Character.toLowerCase(UTF8StringUtil.charAt(sentenceBytes, currentTokenStart + offset))
+ != Character.toLowerCase(UTF8StringUtil.charAt(sentenceBytes, tokenStart + offset))) {
+ curTokenCount--;
break;
}
- offset += UTF8StringPointable.charSize(data, currentTokenStart + offset);
- currLength++;
+ offset += UTF8StringUtil.charSize(sentenceBytes, currentTokenStart + offset);
}
}
}
@@ -76,16 +88,19 @@
}
// set token
- token.reset(data, currentTokenStart, index, tokenLength, tokenCount);
+ token.reset(sentenceBytes, currentTokenStart, byteIndex, tokenLength, curTokenCount);
+ tokenCount++;
}
+
+ // TODO Why we bother to get the tokenCount in advance? It seems a caller's problem.
@Override
public short getTokensCount() {
if (!tokenCountCalculated) {
tokenCount = 0;
boolean previousCharIsSeparator = true;
- while (originalIndex < length) {
- if (isSeparator(UTF8StringPointable.charAt(data, originalIndex))) {
+ while (originalIndex < sentenceEndOffset) {
+ if (isSeparator(UTF8StringUtil.charAt(sentenceBytes, originalIndex))) {
previousCharIsSeparator = true;
} else {
if (previousCharIsSeparator) {
@@ -93,7 +108,7 @@
previousCharIsSeparator = false;
}
}
- originalIndex += UTF8StringPointable.charSize(data, originalIndex);
+ originalIndex += UTF8StringUtil.charSize(sentenceBytes, originalIndex);
}
}
return tokenCount;
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramToken.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramToken.java
index 43ee3c0..8ffd355 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramToken.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8NGramToken.java
@@ -21,8 +21,8 @@
import java.io.IOException;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public class HashedUTF8NGramToken extends UTF8NGramToken {
public HashedUTF8NGramToken(byte tokenTypeTag, byte countTypeTag) {
@@ -43,11 +43,11 @@
// regular chars
int numRegGrams = tokenLength - numPreChars - numPostChars;
- int pos = start;
+ int pos = startOffset;
for (int i = 0; i < numRegGrams; i++) {
- hash ^= Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
+ hash ^= Character.toLowerCase(UTF8StringUtil.charAt(data, pos));
hash *= GOLDEN_RATIO_32;
- pos += UTF8StringPointable.charSize(data, pos);
+ pos += UTF8StringUtil.charSize(data, pos);
}
// post chars
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8WordToken.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8WordToken.java
index 18f958d..150ffd6 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8WordToken.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/HashedUTF8WordToken.java
@@ -21,8 +21,8 @@
import java.io.IOException;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public class HashedUTF8WordToken extends UTF8WordToken {
@@ -46,11 +46,11 @@
}
int offset = 0;
for (int i = 0; i < tokenLength; i++) {
- if (UTF8StringPointable.charAt(t.getData(), t.getStart() + offset) != UTF8StringPointable.charAt(data,
- start + offset)) {
+ if (UTF8StringUtil.charAt(t.getData(), t.getStartOffset() + offset) != UTF8StringUtil.charAt(data,
+ startOffset + offset)) {
return false;
}
- offset += UTF8StringPointable.charSize(data, start + offset);
+ offset += UTF8StringUtil.charSize(data, startOffset + offset);
}
return true;
}
@@ -61,16 +61,16 @@
}
@Override
- public void reset(byte[] data, int start, int length, int tokenLength, int tokenCount) {
- super.reset(data, start, length, tokenLength, tokenCount);
+ public void reset(byte[] data, int startOffset, int endOffset, int tokenLength, int tokenCount) {
+ super.reset(data, startOffset, endOffset, tokenLength, tokenCount);
// pre-compute hash value using JAQL-like string hashing
- int pos = start;
+ int pos = startOffset;
hash = GOLDEN_RATIO_32;
for (int i = 0; i < tokenLength; i++) {
- hash ^= Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
+ hash ^= Character.toLowerCase(UTF8StringUtil.charAt(data, pos));
hash *= GOLDEN_RATIO_32;
- pos += UTF8StringPointable.charSize(data, pos);
+ pos += UTF8StringUtil.charSize(data, pos);
}
hash += tokenCount;
}
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/IToken.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/IToken.java
index d48af44..cb1b098 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/IToken.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/IToken.java
@@ -24,18 +24,26 @@
import org.apache.hyracks.data.std.util.GrowableArray;
public interface IToken {
- public byte[] getData();
+ public byte[] getData();
- public int getLength();
+ public int getEndOffset();
- public int getStart();
+ public int getStartOffset();
- public int getTokenLength();
+ public int getTokenLength();
- public void reset(byte[] data, int start, int length, int tokenLength,
- int tokenCount);
+ /**
+ * reset the storage byte array.
+ *
+ * @param data
+ * @param startOffset
+ * @param endOffset
+ * @param tokenLength
+ * @param tokenCount the count of this token in a document , or a record, or something else.
+ */
+ public void reset(byte[] data, int startOffset, int endOffset, int tokenLength, int tokenCount);
- public void serializeToken(GrowableArray out) throws IOException;
+ public void serializeToken(GrowableArray out) throws IOException;
- public void serializeTokenCount(GrowableArray out) throws IOException;
+ public void serializeTokenCount(GrowableArray out) throws IOException;
}
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
index def7ad2..9161a54 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.java
@@ -19,7 +19,7 @@
package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
public class NGramUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryTokenizer {
@@ -50,7 +50,7 @@
@Override
public void next() {
- int currentTokenStart = index;
+ int currentTokenStart = byteIndex;
int tokenCount = 1;
int numPreChars = 0;
int numPostChars = 0;
@@ -62,46 +62,48 @@
concreteToken.setNumPrePostChars(numPreChars, numPostChars);
if (numPreChars == 0) {
- index += UTF8StringPointable.charSize(data, index);
+ byteIndex += UTF8StringUtil.charSize(sentenceBytes, byteIndex);
}
// compute token count
// ignore pre and post grams for duplicate detection
if (!ignoreTokenCount && numPreChars == 0 && numPostChars == 0) {
- int tmpIndex = start + 2; // skip utf8 length indicator
+ int tmpIndex = sentenceStartOffset;
if (sourceHasTypeTag) {
tmpIndex++; // skip type tag
}
+ int utfLength = UTF8StringUtil.getUTFLength(sentenceBytes, tmpIndex);
+ tmpIndex += UTF8StringUtil.getNumBytesToStoreLength(utfLength); // skip utf8 length indicator
while (tmpIndex < currentTokenStart) {
tokenCount++; // assume found
int offset = 0;
for (int j = 0; j < gramLength; j++) {
- if (Character.toLowerCase(UTF8StringPointable.charAt(data, currentTokenStart + offset)) != Character
- .toLowerCase(UTF8StringPointable.charAt(data, tmpIndex + offset))) {
+ if (Character.toLowerCase(UTF8StringUtil.charAt(sentenceBytes, currentTokenStart + offset))
+ != Character.toLowerCase(UTF8StringUtil.charAt(sentenceBytes, tmpIndex + offset))) {
tokenCount--;
break;
}
- offset += UTF8StringPointable.charSize(data, tmpIndex + offset);
+ offset += UTF8StringUtil.charSize(sentenceBytes, tmpIndex + offset);
}
- tmpIndex += UTF8StringPointable.charSize(data, tmpIndex);
+ tmpIndex += UTF8StringUtil.charSize(sentenceBytes, tmpIndex);
}
}
// set token
- token.reset(data, currentTokenStart, length, gramLength, tokenCount);
+ token.reset(sentenceBytes, currentTokenStart, sentenceEndOffset, gramLength, tokenCount);
}
@Override
- public void reset(byte[] data, int start, int length) {
- super.reset(data, start, length);
+ public void reset(byte[] sentenceData, int start, int length) {
+ super.reset(sentenceData, start, length);
gramNum = 0;
int numChars = 0;
- int pos = index;
- int end = pos + utf8Length;
+ int pos = byteIndex;
+ int end = pos + sentenceUtf8Length;
while (pos < end) {
numChars++;
- pos += UTF8StringPointable.charSize(data, pos);
+ pos += UTF8StringUtil.charSize(sentenceData, pos);
}
if (usePrePost) {
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8NGramToken.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8NGramToken.java
index 7d68d6f..259288c 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8NGramToken.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8NGramToken.java
@@ -21,9 +21,8 @@
import java.io.IOException;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
import org.apache.hyracks.data.std.util.GrowableArray;
-import org.apache.hyracks.dataflow.common.data.util.StringUtils;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
public class UTF8NGramToken extends AbstractUTF8Token implements INGramToken {
@@ -34,6 +33,8 @@
protected int numPreChars;
protected int numPostChars;
+ private UTF8StringBuilder builder = new UTF8StringBuilder();
+
public UTF8NGramToken(byte tokenTypeTag, byte countTypeTag) {
super(tokenTypeTag, countTypeTag);
}
@@ -50,38 +51,7 @@
@Override
public void serializeToken(GrowableArray out) throws IOException {
- handleTokenTypeTag(out.getDataOutput());
- int tokenUTF8LenOff = out.getLength();
-
- // regular chars
- int numRegChars = tokenLength - numPreChars - numPostChars;
-
- // assuming pre and post char need 1-byte each in utf8
- int tokenUTF8Len = numPreChars + numPostChars;
-
- // Write dummy UTF length which will be correctly set later.
- out.getDataOutput().writeShort(0);
-
- // pre chars
- for (int i = 0; i < numPreChars; i++) {
- StringUtils.writeCharAsModifiedUTF8(PRECHAR, out.getDataOutput());
- }
-
- int pos = start;
- for (int i = 0; i < numRegChars; i++) {
- char c = Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
- tokenUTF8Len += StringUtils.writeCharAsModifiedUTF8(c, out.getDataOutput());
- pos += UTF8StringPointable.charSize(data, pos);
- }
-
- // post chars
- for (int i = 0; i < numPostChars; i++) {
- StringUtils.writeCharAsModifiedUTF8(POSTCHAR, out.getDataOutput());
- }
-
- // Set UTF length of token.
- out.getByteArray()[tokenUTF8LenOff] = (byte) ((tokenUTF8Len >>> 8) & 0xFF);
- out.getByteArray()[tokenUTF8LenOff + 1] = (byte) ((tokenUTF8Len >>> 0) & 0xFF);
+ super.serializeToken(builder, out, numPreChars, numPostChars, PRECHAR, POSTCHAR);
}
public void setNumPrePostChars(int numPreChars, int numPostChars) {
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8WordToken.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8WordToken.java
index caaa682..bc7085c 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8WordToken.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/UTF8WordToken.java
@@ -21,31 +21,21 @@
import java.io.IOException;
-import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
import org.apache.hyracks.data.std.util.GrowableArray;
-import org.apache.hyracks.dataflow.common.data.util.StringUtils;
+import org.apache.hyracks.data.std.util.UTF8StringBuilder;
public class UTF8WordToken extends AbstractUTF8Token {
+ private static char NULL_PLACEHOLDER = 1; // can't be 0, cause utf8 modified char will use 2 bytes to write 0
+
+ private UTF8StringBuilder builder = new UTF8StringBuilder();
+
public UTF8WordToken(byte tokenTypeTag, byte countTypeTag) {
super(tokenTypeTag, countTypeTag);
}
@Override
public void serializeToken(GrowableArray out) throws IOException {
- handleTokenTypeTag(out.getDataOutput());
- int tokenUTF8LenOff = out.getLength();
- int tokenUTF8Len = 0;
- // Write dummy UTF length which will be correctly set later.
- out.getDataOutput().writeShort(0);
- int pos = start;
- for (int i = 0; i < tokenLength; i++) {
- char c = Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
- tokenUTF8Len += StringUtils.writeCharAsModifiedUTF8(c, out.getDataOutput());
- pos += UTF8StringPointable.charSize(data, pos);
- }
- // Set UTF length of token.
- out.getByteArray()[tokenUTF8LenOff] = (byte) ((tokenUTF8Len >>> 8) & 0xFF);
- out.getByteArray()[tokenUTF8LenOff + 1] = (byte) ((tokenUTF8Len >>> 0) & 0xFF);
+ super.serializeToken(builder, out, 0, 0, NULL_PLACEHOLDER, NULL_PLACEHOLDER);
}
}
diff --git a/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/tuples/RTreeTypeAwareTupleWriter.java b/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/tuples/RTreeTypeAwareTupleWriter.java
index d2332f0..40b0481 100644
--- a/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/tuples/RTreeTypeAwareTupleWriter.java
+++ b/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/tuples/RTreeTypeAwareTupleWriter.java
@@ -24,6 +24,7 @@
import org.apache.hyracks.api.dataflow.value.ITypeTraits;
import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleReference;
import org.apache.hyracks.storage.am.common.tuples.TypeAwareTupleWriter;
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
public class RTreeTypeAwareTupleWriter extends TypeAwareTupleWriter {
@@ -41,13 +42,11 @@
// write field slots for variable length fields
// since the r-tree has fixed length keys, we don't actually need this?
- encDec.reset(targetBuf.array(), runner);
for (int i = startField; i < startField + refs.length; i++) {
if (!typeTraits[i].isFixedLength()) {
- encDec.encode(refs[i].getFieldLength(i));
+ runner += VarLenIntEncoderDecoder.encode(refs[i].getFieldLength(i), targetBuf.array(), runner);
}
}
- runner = encDec.getPos();
// write data
for (int i = 0; i < refs.length; i++) {
diff --git a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexExamplesTest.java b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
index f79997b..b8f2166 100644
--- a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
+++ b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
@@ -164,8 +164,8 @@
typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
// Declare field serdes.
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
// Declare keys.
int keyFieldCount = 1;
@@ -324,8 +324,8 @@
typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
// Declare field serdes.
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
// Declare keys.
int keyFieldCount = 1;
@@ -408,8 +408,8 @@
typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
// Declare field serdes.
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
// Declare keys.
int keyFieldCount = 1;
@@ -514,8 +514,8 @@
typeTraits[0] = UTF8StringPointable.TYPE_TRAITS;
typeTraits[1] = UTF8StringPointable.TYPE_TRAITS;
// Declare field serdes.
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
// Declare keys.
int keyFieldCount = 1;
diff --git a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
index 160f9bf..e181710 100644
--- a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
+++ b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexMultiThreadTest.java
@@ -120,7 +120,7 @@
@Test
public void oneStringKeyAndValue() throws InterruptedException, TreeIndexException, HyracksException {
ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
int numKeys = 1;
String dataMsg = "One String Key And Value";
diff --git a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexTestDriver.java b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexTestDriver.java
index 0ec313b..b1e8a8c 100644
--- a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexTestDriver.java
+++ b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/btree/OrderedIndexTestDriver.java
@@ -124,8 +124,8 @@
LOGGER.info("BTree " + getTestOpName() + " Test With One String Key And Value.");
}
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
// Range search in ["cbf", cc7"]
ITupleReference lowKey = TupleUtils.createTuple(fieldSerdes, "cbf");
@@ -142,8 +142,8 @@
LOGGER.info("BTree " + getTestOpName() + " Test With Two String Keys.");
}
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
// Range search in ["cbf", "ddd", cc7", "eee"]
ITupleReference lowKey = TupleUtils.createTuple(fieldSerdes, "cbf", "ddd");
@@ -164,9 +164,9 @@
LOGGER.info("BTree " + getTestOpName() + " Test With Two String Keys And Values.");
}
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
// Range search in ["cbf", "ddd", cc7", "eee"]
ITupleReference lowKey = TupleUtils.createTuple(fieldSerdes, "cbf", "ddd");
diff --git a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
index 6cd81c3..a3029f8 100644
--- a/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
+++ b/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
@@ -210,7 +210,7 @@
// Declare field serdes.
ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
// Declare RTree keys.
int rtreeKeyFieldCount = 4;
@@ -350,7 +350,7 @@
// Declare field serdes.
ISerializerDeserializer[] fieldSerdes = { IntegerSerializerDeserializer.INSTANCE,
IntegerSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- IntegerSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+ IntegerSerializerDeserializer.INSTANCE, new UTF8StringSerializerDeserializer() };
// Declare RTree keys.
int rtreeKeyFieldCount = 4;
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java b/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
index 49df30f..80a69c4 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/BloomFilterTest.java
@@ -135,9 +135,9 @@
bloomFilterSpec.getNumBucketsPerElements());
int fieldCount = 5;
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java b/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java
index 1d7aa90..3284f8d 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/src/test/java/org/apache/hyracks/storage/am/bloomfilter/MurmurHashForITupleReferenceTest.java
@@ -109,7 +109,7 @@
}
int fieldCount = 2;
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer() };
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
String s = randomString(100, rnd);
@@ -137,8 +137,8 @@
}
int fieldCount = 3;
- ISerializerDeserializer[] fieldSerdes = { UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE };
+ ISerializerDeserializer[] fieldSerdes = { new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer() };
ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(fieldCount);
ArrayTupleReference tuple = new ArrayTupleReference();
String s1 = randomString(40, rnd);
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java
index a7215a5..d537bf9 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/tuples/LSMBTreeTuplesTest.java
@@ -161,14 +161,14 @@
testLSMBTreeTuple(intFields);
ISerializerDeserializer[] stringFields = new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE };
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
+ new UTF8StringSerializerDeserializer() };
testLSMBTreeTuple(stringFields);
ISerializerDeserializer[] mixedFields = new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE,
- UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE,
+ new UTF8StringSerializerDeserializer(), new UTF8StringSerializerDeserializer(),
IntegerSerializerDeserializer.INSTANCE };
testLSMBTreeTuple(mixedFields);
}
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
index f2896cb..11a57a2 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
@@ -17,40 +17,47 @@
! under the License.
!-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <artifactId>hyracks-storage-am-lsm-invertedindex-test</artifactId>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hyracks-storage-am-lsm-invertedindex-test</artifactId>
- <parent>
- <artifactId>hyracks-tests</artifactId>
- <groupId>org.apache.hyracks</groupId>
- <version>0.2.17-SNAPSHOT</version>
- <relativePath>..</relativePath>
- </parent>
+ <parent>
+ <artifactId>hyracks-tests</artifactId>
+ <groupId>org.apache.hyracks</groupId>
+ <version>0.2.17-SNAPSHOT</version>
+ <relativePath>..</relativePath>
+ </parent>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-test-support</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-data-std</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>hyracks-util</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
- <dependencies>
- <dependency>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks-test-support</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- <type>jar</type>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>hyracks-data-std</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- <type>jar</type>
- <scope>test</scope>
- </dependency>
- </dependencies>
-
-</project>
+</project>
\ No newline at end of file
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramTokenizerTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramTokenizerTest.java
index f372dbe..6e764c3 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramTokenizerTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramTokenizerTest.java
@@ -20,21 +20,19 @@
package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
+import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.util.string.UTF8StringReader;
+import org.apache.hyracks.util.string.UTF8StringUtil;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
-import org.apache.hyracks.data.std.util.GrowableArray;
-
public class NGramTokenizerTest {
private char PRECHAR = '#';
@@ -72,11 +70,7 @@
@Before
public void init() throws Exception {
- // serialize string into bytes
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- DataOutput dos = new DataOutputStream(baos);
- dos.writeUTF(str);
- inputBuffer = baos.toByteArray();
+ inputBuffer = UTF8StringUtil.writeStringToBytes(str);
}
void runTestNGramTokenizerWithCountedHashedUTF8Tokens(boolean prePost) throws IOException {
@@ -192,7 +186,8 @@
ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
DataInput in = new DataInputStream(bais);
- String strGram = in.readUTF();
+ UTF8StringReader reader = new UTF8StringReader();
+ String strGram = reader.readUTF(in);
// System.out.println("\"" + strGram + "\"");
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/WordTokenizerTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/WordTokenizerTest.java
index c42022e..78ba6a3 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/WordTokenizerTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/WordTokenizerTest.java
@@ -20,21 +20,19 @@
package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;
import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
-import junit.framework.Assert;
-
+import org.apache.hyracks.data.std.util.GrowableArray;
+import org.apache.hyracks.util.string.UTF8StringReader;
+import org.apache.hyracks.util.string.UTF8StringUtil;
import org.junit.Before;
import org.junit.Test;
-import org.apache.hyracks.data.std.util.GrowableArray;
+import junit.framework.Assert;
public class WordTokenizerTest {
@@ -46,7 +44,8 @@
private ArrayList<Integer> expectedCountedHashedUTF8Tokens = new ArrayList<Integer>();
private boolean isSeparator(char c) {
- return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER || Character.getType(c) == Character.OTHER_NUMBER);
+ return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER
+ || Character.getType(c) == Character.OTHER_NUMBER);
}
private void tokenize(String text, ArrayList<String> tokens) {
@@ -78,10 +77,7 @@
@Before
public void init() throws IOException {
// serialize text into bytes
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- DataOutput dos = new DataOutputStream(baos);
- dos.writeUTF(text);
- inputBuffer = baos.toByteArray();
+ inputBuffer = UTF8StringUtil.writeStringToBytes(text);
// init expected string tokens
tokenize(text, expectedUTF8Tokens);
@@ -144,7 +140,8 @@
public void testWordTokenizerWithHashedUTF8Tokens() throws IOException {
HashedUTF8WordTokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
- DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, tokenFactory);
+ DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false,
+ tokenFactory);
tokenizer.reset(inputBuffer, 0, inputBuffer.length);
@@ -175,7 +172,8 @@
public void testWordTokenizerWithUTF8Tokens() throws IOException {
UTF8WordTokenFactory tokenFactory = new UTF8WordTokenFactory();
- DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, tokenFactory);
+ DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false,
+ tokenFactory);
tokenizer.reset(inputBuffer, 0, inputBuffer.length);
@@ -194,7 +192,8 @@
ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
DataInput in = new DataInputStream(bais);
- String strToken = in.readUTF();
+ UTF8StringReader reader = new UTF8StringReader();
+ String strToken = reader.readUTF(in);
Assert.assertEquals(expectedUTF8Tokens.get(tokenCount), strToken);
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
index 36f615f..fd94870 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
@@ -88,7 +88,7 @@
fieldGens[0] = new DocumentStringFieldValueGenerator(2, 10, 10000, rnd);
fieldGens[1] = new SortedIntegerFieldValueGenerator(0);
ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE };
TupleGenerator tupleGen = new TupleGenerator(fieldGens, fieldSerdes, 0);
return tupleGen;
}
@@ -98,7 +98,7 @@
fieldGens[0] = new PersonNameFieldValueGenerator(rnd, 0.5f);
fieldGens[1] = new SortedIntegerFieldValueGenerator(0);
ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] {
- UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+ new UTF8StringSerializerDeserializer(), IntegerSerializerDeserializer.INSTANCE };
TupleGenerator tupleGen = new TupleGenerator(fieldGens, fieldSerdes, 0);
return tupleGen;
}
@@ -110,7 +110,7 @@
case INMEMORY:
case ONDISK:
case LSM: {
- fieldSerdes = new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE,
+ fieldSerdes = new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(),
IntegerSerializerDeserializer.INSTANCE };
break;
}
@@ -118,7 +118,7 @@
case PARTITIONED_ONDISK:
case PARTITIONED_LSM: {
// Such indexes also include the set-size for partitioning.
- fieldSerdes = new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE,
+ fieldSerdes = new ISerializerDeserializer[] { new UTF8StringSerializerDeserializer(),
ShortSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
break;
}
diff --git a/hyracks/hyracks-util/pom.xml b/hyracks/hyracks-util/pom.xml
new file mode 100644
index 0000000..ca38040
--- /dev/null
+++ b/hyracks/hyracks-util/pom.xml
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied. See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>hyracks</artifactId>
+ <groupId>org.apache.hyracks</groupId>
+ <version>0.2.17-SNAPSHOT</version>
+ </parent>
+
+ <modelVersion>4.0.0</modelVersion>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <version>2.6</version>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+ <artifactId>hyracks-util</artifactId>
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ </dependency>
+ </dependencies>
+
+
+</project>
\ No newline at end of file
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Parser.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Parser.java
new file mode 100644
index 0000000..257daee
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Parser.java
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.bytes;
+
+import java.util.Arrays;
+
+public class Base64Parser {
+ private static final byte[] DECODE_MAP = initDecodeMap();
+ private static final byte PADDING = 127;
+
+ private static byte[] initDecodeMap() {
+ byte[] map = new byte[128];
+ Arrays.fill(map, (byte) -1);
+
+ int i;
+ for (i = 'A'; i <= 'Z'; i++) {
+ map[i] = (byte) (i - 'A');
+ }
+ for (i = 'a'; i <= 'z'; i++) {
+ map[i] = (byte) (i - 'a' + 26);
+ }
+ for (i = '0'; i <= '9'; i++) {
+ map[i] = (byte) (i - '0' + 52);
+ }
+ map['+'] = 62;
+ map['/'] = 63;
+ map['='] = PADDING;
+
+ return map;
+ }
+
+ private byte[] quadruplet = new byte[4];
+ private byte[] storage;
+ private int length = 0;
+
+ /**
+ * Parse the Base64 sequence from {@code input} into {@code out}
+ * Note, the out should have enough space by checking the {@link #guessLength(char[], int, int)} first
+ *
+ * @param input
+ * @param start
+ * @param length
+ * @param out
+ * @param offset
+ * @return
+ */
+ public int parseBase64String(char[] input, int start, int length, byte[] out, int offset) {
+ int outLength = 0;
+
+ int i;
+ int q = 0;
+
+ // convert each quadruplet to three bytes.
+ for (i = 0; i < length; i++) {
+ char ch = input[start + i];
+ byte v = DECODE_MAP[ch];
+
+ if (v == -1) {
+ throw new IllegalArgumentException("Invalid Base64 character");
+ }
+ quadruplet[q++] = v;
+
+ if (q == 4) {
+ outLength += dumpQuadruplet(out, offset + outLength);
+ q = 0;
+ }
+ }
+
+ return outLength;
+ }
+
+ /**
+ * Parse the Base64 sequence from {@code input} into {@code out}
+ * Note, the out should have enough space by checking the {@link #guessLength(byte[], int, int)} first
+ *
+ * @param input
+ * @param start
+ * @param length
+ * @param out
+ * @param offset
+ * @return the number of written bytes
+ */
+ public int parseBase64String(byte[] input, int start, int length, byte[] out, int offset) {
+ int outLength = 0;
+
+ int i;
+ int q = 0;
+
+ // convert each quadruplet to three bytes.
+ for (i = 0; i < length; i++) {
+ char ch = (char) input[start + i];
+ byte v = DECODE_MAP[ch];
+
+ if (v == -1) {
+ throw new IllegalArgumentException("Invalid Base64 character");
+ }
+ quadruplet[q++] = v;
+
+ if (q == 4) {
+ outLength += dumpQuadruplet(out, offset + outLength);
+ q = 0;
+ }
+ }
+
+ return outLength;
+ }
+
+ /**
+ * computes the length of binary data speculatively.
+ * Our requirement is to create byte[] of the exact length to store the binary data.
+ * If we do this in a straight-forward way, it takes two passes over the data.
+ * Experiments show that this is a non-trivial overhead (35% or so is spent on
+ * the first pass in calculating the length.)
+ * So the approach here is that we compute the length speculatively, without looking
+ * at the whole contents. The obtained speculative value is never less than the
+ * actual length of the binary data, but it may be bigger. So if the speculation
+ * goes wrong, we'll pay the cost of reallocation and buffer copying.
+ * If the base64 text is tightly packed with no indentation nor illegal char
+ * (like what most web services produce), then the speculation of this method
+ * will be correct, so we get the performance benefit.
+ */
+ public static int guessLength(char[] chars, int start, int length) {
+
+ // compute the tail '=' chars
+ int j = length - 1;
+ for (; j >= 0; j--) {
+ byte code = DECODE_MAP[chars[start + j]];
+ if (code == PADDING) {
+ continue;
+ }
+ if (code == -1) // most likely this base64 text is indented. go with the upper bound
+ {
+ return length / 4 * 3;
+ }
+ break;
+ }
+
+ j++; // text.charAt(j) is now at some base64 char, so +1 to make it the size
+ int padSize = length - j;
+ if (padSize > 2) // something is wrong with base64. be safe and go with the upper bound
+ {
+ return length / 4 * 3;
+ }
+
+ // so far this base64 looks like it's unindented tightly packed base64.
+ // take a chance and create an array with the expected size
+ return length / 4 * 3 - padSize;
+ }
+
+ public static int guessLength(byte[] chars, int start, int length) {
+
+ // compute the tail '=' chars
+ int j = length - 1;
+ for (; j >= 0; j--) {
+ byte code = DECODE_MAP[chars[start + j]];
+ if (code == PADDING) {
+ continue;
+ }
+ if (code == -1) // most likely this base64 text is indented. go with the upper bound
+ {
+ return length / 4 * 3;
+ }
+ break;
+ }
+
+ j++; // text.charAt(j) is now at some base64 char, so +1 to make it the size
+ int padSize = length - j;
+ if (padSize > 2) // something is wrong with base64. be safe and go with the upper bound
+ {
+ return length / 4 * 3;
+ }
+
+ // so far this base64 looks like it's unindented tightly packed base64.
+ // take a chance and create an array with the expected size
+ return length / 4 * 3 - padSize;
+ }
+
+ public byte[] getByteArray() {
+ return storage;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ /**
+ * Same as {@link #parseBase64String(byte[], int, int, byte[], int)}, but we will provide the storage for caller
+ *
+ * @param input
+ * @param start
+ * @param length
+ */
+ public void generatePureByteArrayFromBase64String(byte[] input, int start, int length) {
+ // The base64 character length equals to utf8length
+ if (length % 4 != 0) {
+ throw new IllegalArgumentException(
+ "Invalid Base64 string, the length of the string should be a multiple of 4");
+ }
+ final int buflen = guessLength(input, start, length);
+ ensureCapacity(buflen);
+ this.length = parseBase64String(input, start, length, storage, 0);
+ }
+
+ public void generatePureByteArrayFromBase64String(char[] input, int start, int length) {
+ if (length % 4 != 0) {
+ throw new IllegalArgumentException(
+ "Invalid Base64 string, the length of the string should be a multiple of 4");
+ }
+ final int buflen = guessLength(input, start, length);
+ ensureCapacity(buflen);
+ this.length = parseBase64String(input, start, length, storage, 0);
+ }
+
+ private void ensureCapacity(int length) {
+ if (storage == null || storage.length < length) {
+ storage = new byte[length];
+ }
+ }
+
+ private int dumpQuadruplet(byte[] out, int offset) {
+ int outLength = 0;
+ // quadruplet is now filled.
+ out[offset + outLength++] = (byte) ((quadruplet[0] << 2) | (quadruplet[1] >> 4));
+ if (quadruplet[2] != PADDING) {
+ out[offset + outLength++] = (byte) ((quadruplet[1] << 4) | (quadruplet[2] >> 2));
+ }
+ if (quadruplet[3] != PADDING) {
+ out[offset + outLength++] = (byte) ((quadruplet[2] << 6) | (quadruplet[3]));
+ }
+ return outLength;
+ }
+
+}
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Printer.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Printer.java
new file mode 100644
index 0000000..0e1c078
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/Base64Printer.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.bytes;
+
+import java.io.IOException;
+
+public class Base64Printer {
+ /**
+ * Encodes a byte array into a {@code Appendable} stream by doing base64 encoding.
+ *
+ * @return the same input stream.
+ */
+ public static Appendable printBase64Binary(byte[] input, int offset, int len, Appendable appendable)
+ throws IOException {
+ // encode elements until only 1 or 2 elements are left to encode
+ int remaining = len;
+ int i;
+ for (i = offset; remaining >= 3; remaining -= 3, i += 3) {
+ appendable.append(encode(input[i] >> 2));
+ appendable.append(encode(
+ ((input[i] & 0x3) << 4)
+ | ((input[i + 1] >> 4) & 0xF)));
+ appendable.append(encode(
+ ((input[i + 1] & 0xF) << 2)
+ | ((input[i + 2] >> 6) & 0x3)));
+ appendable.append(encode(input[i + 2] & 0x3F));
+ }
+ // encode when exactly 1 element (left) to encode
+ if (remaining == 1) {
+ appendable.append(encode(input[i] >> 2));
+ appendable.append(encode(((input[i]) & 0x3) << 4));
+ appendable.append('=');
+ appendable.append('=');
+ }
+ // encode when exactly 2 elements (left) to encode
+ if (remaining == 2) {
+ appendable.append(encode(input[i] >> 2));
+ appendable.append(encode(((input[i] & 0x3) << 4)
+ | ((input[i + 1] >> 4) & 0xF)));
+ appendable.append(encode((input[i + 1] & 0xF) << 2));
+ appendable.append('=');
+ }
+ return appendable;
+ }
+
+ /**
+ * Encodes a byte array into a char array by doing base64 encoding.
+ * The caller must supply a big enough buffer.
+ *
+ * @return the value of {@code ptr+((len+2)/3)*4}, which is the new offset
+ * in the output buffer where the further bytes should be placed.
+ */
+ public static int printBase64Binary(byte[] input, int offset, int len, char[] buf, int ptr) {
+ // encode elements until only 1 or 2 elements are left to encode
+ int remaining = len;
+ int i;
+ for (i = offset; remaining >= 3; remaining -= 3, i += 3) {
+ buf[ptr++] = encode(input[i] >> 2);
+ buf[ptr++] = encode(
+ ((input[i] & 0x3) << 4)
+ | ((input[i + 1] >> 4) & 0xF));
+ buf[ptr++] = encode(
+ ((input[i + 1] & 0xF) << 2)
+ | ((input[i + 2] >> 6) & 0x3));
+ buf[ptr++] = encode(input[i + 2] & 0x3F);
+ }
+ // encode when exactly 1 element (left) to encode
+ if (remaining == 1) {
+ buf[ptr++] = encode(input[i] >> 2);
+ buf[ptr++] = encode(((input[i]) & 0x3) << 4);
+ buf[ptr++] = '=';
+ buf[ptr++] = '=';
+ }
+ // encode when exactly 2 elements (left) to encode
+ if (remaining == 2) {
+ buf[ptr++] = encode(input[i] >> 2);
+ buf[ptr++] = encode(((input[i] & 0x3) << 4)
+ | ((input[i + 1] >> 4) & 0xF));
+ buf[ptr++] = encode((input[i + 1] & 0xF) << 2);
+ buf[ptr++] = '=';
+ }
+ return ptr;
+ }
+
+ private static final char[] encodeMap = initEncodeMap();
+
+ private static char[] initEncodeMap() {
+ char[] map = new char[64];
+ int i;
+ for (i = 0; i < 26; i++) {
+ map[i] = (char) ('A' + i);
+ }
+ for (i = 26; i < 52; i++) {
+ map[i] = (char) ('a' + (i - 26));
+ }
+ for (i = 52; i < 62; i++) {
+ map[i] = (char) ('0' + (i - 52));
+ }
+ map[62] = '+';
+ map[63] = '/';
+
+ return map;
+ }
+
+ public static char encode(int i) {
+ return encodeMap[i & 0x3F];
+ }
+}
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexParser.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexParser.java
new file mode 100644
index 0000000..ba7276b
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexParser.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.bytes;
+
+public class HexParser {
+ public static boolean isValidHexChar(char c) {
+ if (c >= '0' && c <= '9'
+ || c >= 'a' && c <= 'f'
+ || c >= 'A' && c <= 'F') {
+ return true;
+ }
+ return false;
+ }
+
+ public static int getValueFromValidHexChar(char c) {
+ if (c >= '0' && c <= '9') {
+ return c - '0';
+ }
+ if (c >= 'a' && c <= 'f') {
+ return 10 + c - 'a';
+ }
+ if (c >= 'A' && c <= 'F') {
+ return 10 + c - 'A';
+ }
+ throw new IllegalArgumentException("Invalid hex character : " + c);
+ }
+
+ private byte[] storage;
+ private int length;
+
+ public byte[] getByteArray() {
+ return storage;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ public void generateByteArrayFromHexString(char[] input, int start, int length) {
+ if (length % 2 != 0) {
+ throw new IllegalArgumentException(
+ "Invalid hex string for binary type: the string length should be a muliple of 2.");
+ }
+ this.length = length / 2;
+ ensureCapacity(this.length);
+ generateByteArrayFromHexString(input, start, length, storage, 0);
+ }
+
+ public void generateByteArrayFromHexString(byte[] input, int start, int length) {
+ if (length % 2 != 0) {
+ throw new IllegalArgumentException(
+ "Invalid hex string for binary type: the string length should be a muliple of 2.");
+ }
+ this.length = length / 2;
+ ensureCapacity(this.length);
+ generateByteArrayFromHexString(input, start, length, storage, 0);
+ }
+
+ private void ensureCapacity(int capacity) {
+ if (storage == null || storage.length < capacity) {
+ storage = new byte[capacity];
+ }
+ }
+
+ public static void generateByteArrayFromHexString(char[] input, int start, int length, byte[] output,
+ int offset) {
+ for (int i = 0; i < length; i += 2) {
+ output[offset + i / 2] = (byte) ((getValueFromValidHexChar(input[start + i]) << 4) +
+ getValueFromValidHexChar(input[start + i + 1]));
+ }
+ }
+
+ public static void generateByteArrayFromHexString(byte[] input, int start, int length, byte[] output,
+ int offset) {
+ for (int i = 0; i < length; i += 2) {
+ output[offset + i / 2] = (byte) ((getValueFromValidHexChar((char) input[start + i]) << 4) +
+ getValueFromValidHexChar((char) input[start + i + 1]));
+ }
+ }
+}
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexPrinter.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexPrinter.java
new file mode 100644
index 0000000..5a9c064
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/bytes/HexPrinter.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.bytes;
+
+import java.io.IOException;
+
+public class HexPrinter {
+ public enum CASE {
+ LOWER_CASE,
+ UPPER_CASE,
+ }
+
+ public static byte hex(int i, CASE c) {
+ switch (c) {
+ case LOWER_CASE:
+ return (byte) (i < 10 ? i + '0' : i + ('a' - 10));
+ case UPPER_CASE:
+ return (byte) (i < 10 ? i + '0' : i + ('A' - 10));
+ }
+ return Byte.parseByte(null);
+ }
+
+ public static Appendable printHexString(byte[] bytes, int start, int length, Appendable appendable)
+ throws IOException {
+ for (int i = 0; i < length; ++i) {
+ appendable.append((char) hex((bytes[start + i] >>> 4) & 0x0f, CASE.UPPER_CASE));
+ appendable.append((char) hex((bytes[start + i] & 0x0f), CASE.UPPER_CASE));
+ }
+ return appendable;
+ }
+}
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoder.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoder.java
new file mode 100644
index 0000000..5a716b4
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoder.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.encoding;
+
+import java.io.DataInput;
+import java.io.IOException;
+
+/**
+ * Encodes positive integers in a variable-bytes format.
+ *
+ * Each byte stores seven bits of the number. The first bit of each byte notifies if it is the last byte.
+ * Specifically, if the first bit is set, then we need to shift the current value by seven and
+ * continue to read the next byte util we meet a byte whose first byte is unset.
+ *
+ * e.g. if the number is < 128, it will be stored using one byte and the byte value keeps as original.
+ * To store the number 255 (0xff) , it will be encoded as [0x81,0x7f]. To decode that value, it reads the 0x81
+ * to know that the current value is (0x81 & 0x7f)= 0x01, and the first bit tells that there are more bytes to
+ * be read. When it meets 0x7f, whose first flag is unset, it knows that it is the final byte to decode.
+ * Finally it will return ( 0x01 << 7) + 0x7f === 255.
+ *
+ */
+public class VarLenIntEncoderDecoder {
+ // sometimes the dec number is easier to get the sense of how big it is.
+ public static final int BOUND_ONE_BYTE = 128; // 1 << 7
+ public static final int BOUND_TWO_BYTE = 16384; // 1 << 14
+ public static final int BOUND_THREE_BYTE = 2097152; // 1 << 21
+ public static final int BOUND_FOUR_BYTE = 268435456; // 1 << 28
+ public static final int BOUND_FIVE_BYTE = Integer.MAX_VALUE;
+
+ public static final int ENCODE_MASK = 0x0000007F;
+ public static final byte CONTINUE_CHUNK = (byte) 0x80;
+ public static final byte DECODE_MASK = 0x7F;
+
+ // calculate the number of bytes needed for encoding
+ public static int getBytesRequired(int length) {
+ if (length < 0) {
+ throw new IllegalArgumentException("The length must be an non-negative value");
+ }
+
+ int byteCount = 0;
+ while (length > ENCODE_MASK) {
+ length = length >>> 7;
+ byteCount++;
+ }
+ return byteCount + 1;
+ }
+
+ public static int decode(DataInput in) throws IOException {
+ int sum = 0;
+ byte b = in.readByte();
+ while ((b & CONTINUE_CHUNK) == CONTINUE_CHUNK) {
+ sum = (sum + (b & DECODE_MASK)) << 7;
+ b = in.readByte();
+ }
+ sum += b;
+ return sum;
+ }
+
+ public static int decode(byte[] srcBytes, int startPos) {
+ int sum = 0;
+ while ((srcBytes[startPos] & CONTINUE_CHUNK) == CONTINUE_CHUNK) {
+ sum = (sum + (srcBytes[startPos] & DECODE_MASK)) << 7;
+ startPos++;
+ }
+ sum += srcBytes[startPos++];
+ return sum;
+ }
+
+ public static int encode(int lengthVal, byte[] destBytes, int startPos) {
+ if (lengthVal < 0) {
+ throw new IllegalArgumentException("The length must be an non-negative value");
+ }
+ int nextPos = startPos;
+ while (lengthVal > ENCODE_MASK) {
+ destBytes[nextPos++] = (byte) (lengthVal & ENCODE_MASK);
+ lengthVal = lengthVal >>> 7;
+ }
+ destBytes[nextPos++] = (byte) lengthVal;
+
+ // reverse order to optimize for decoding speed
+ int length = nextPos - startPos;
+ int i = 0;
+ for (; i < length / 2; i++) {
+ byte b = destBytes[startPos + i];
+ destBytes[startPos + i] = (byte) (destBytes[startPos + length - 1 - i] | CONTINUE_CHUNK);
+ destBytes[startPos + length - 1 - i] = (byte) (b | CONTINUE_CHUNK);
+ }
+ destBytes[startPos + i] |= CONTINUE_CHUNK;
+ destBytes[nextPos - 1] &= ENCODE_MASK;
+ return length;
+ }
+
+ public static VarLenIntDecoder createDecoder() {
+ return new VarLenIntDecoder();
+ }
+
+ // keep the stateful version for the ease of the continuously decoding behaviors.
+ public static class VarLenIntDecoder {
+
+ private byte[] bytes = null;
+ private int pos = 0;
+
+ public VarLenIntDecoder reset(byte[] bytes, int pos) {
+ this.bytes = bytes;
+ this.pos = pos;
+ return this;
+ }
+
+ /**
+ * @return the int value
+ */
+ public int decode() {
+ int sum = 0;
+ while ((bytes[pos] & CONTINUE_CHUNK) == CONTINUE_CHUNK) {
+ sum = (sum + (bytes[pos] & DECODE_MASK)) << 7;
+ pos++;
+ }
+ sum += bytes[pos++];
+ return sum;
+ }
+
+ public int getPos() {
+ return pos;
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java
new file mode 100644
index 0000000..3cd0300
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringReader.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.string;
+
+import java.io.DataInput;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.Serializable;
+import java.io.UTFDataFormatException;
+
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+
+public class UTF8StringReader implements Serializable{
+
+ private byte[] bytearr = null;
+ private char[] chararr = null;
+
+ /**
+ * Reads from the
+ * stream <code>in</code> a representation
+ * of a Unicode character string encoded in
+ * <a href="DataInput.html#modified-utf-8">modified UTF-8</a> format;
+ * this string of characters is then returned as a <code>String</code>.
+ * The details of the modified UTF-8 representation
+ * are exactly the same as for the <code>readUTF</code>
+ * method of <code>DataInput</code>.
+ *
+ * @param in a data input stream.
+ * @return a Unicode string.
+ * @throws EOFException if the input stream reaches the end
+ * before all the bytes.
+ * @throws IOException the stream has been closed and the contained
+ * input stream does not support reading after close, or
+ * another I/O error occurs.
+ * @throws UTFDataFormatException if the bytes do not represent a
+ * valid modified UTF-8 encoding of a Unicode string.
+ * @see java.io.DataInputStream#readUnsignedShort()
+ */
+ public final String readUTF(DataInput in) throws IOException {
+ int utflen = VarLenIntEncoderDecoder.decode(in);
+
+ if (bytearr == null || bytearr.length < utflen) {
+ bytearr = new byte[utflen * 2];
+ chararr = new char[utflen * 2];
+ }
+
+ int c, char2, char3;
+ int count = 0;
+ int chararr_count = 0;
+
+ in.readFully(bytearr, 0, utflen);
+
+ while (count < utflen) {
+ c = (int) bytearr[count] & 0xff;
+ if (c > 127)
+ break;
+ count++;
+ chararr[chararr_count++] = (char) c;
+ }
+
+ while (count < utflen) {
+ c = (int) bytearr[count] & 0xff;
+ switch (c >> 4) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ /* 0xxxxxxx*/
+ count++;
+ chararr[chararr_count++] = (char) c;
+ break;
+ case 12:
+ case 13:
+ /* 110x xxxx 10xx xxxx*/
+ count += 2;
+ if (count > utflen)
+ throw new UTFDataFormatException(
+ "malformed input: partial character at end");
+ char2 = (int) bytearr[count - 1];
+ if ((char2 & 0xC0) != 0x80)
+ throw new UTFDataFormatException(
+ "malformed input around byte " + count);
+ chararr[chararr_count++] = (char) (((c & 0x1F) << 6) |
+ (char2 & 0x3F));
+ break;
+ case 14:
+ /* 1110 xxxx 10xx xxxx 10xx xxxx */
+ count += 3;
+ if (count > utflen)
+ throw new UTFDataFormatException(
+ "malformed input: partial character at end");
+ char2 = (int) bytearr[count - 2];
+ char3 = (int) bytearr[count - 1];
+ if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
+ throw new UTFDataFormatException(
+ "malformed input around byte " + (count - 1));
+ chararr[chararr_count++] = (char) (((c & 0x0F) << 12) |
+ ((char2 & 0x3F) << 6) |
+ ((char3 & 0x3F) << 0));
+ break;
+ default:
+ /* 10xx xxxx, 1111 xxxx */
+ throw new UTFDataFormatException(
+ "malformed input around byte " + count);
+ }
+ }
+ // The number of chars produced may be less than utflen
+ return new String(chararr, 0, chararr_count);
+ }
+}
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
new file mode 100644
index 0000000..7929691
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
@@ -0,0 +1,422 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.string;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+
+/**
+ * A helper package to operate the UTF8String in Hyracks.
+ * Most of the codes were migrated from asterix-fuzzyjoin and hyracks-storage-am-invertedindex
+ */
+public class UTF8StringUtil {
+ public static char charAt(byte[] b, int s) {
+ if (s >= b.length) {
+ throw new ArrayIndexOutOfBoundsException("Are you crazy?");
+ }
+ int c = b[s] & 0xff;
+ switch (c >> 4) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ return (char) c;
+
+ case 12:
+ case 13:
+ return (char) (((c & 0x1F) << 6) | ((b[s + 1]) & 0x3F));
+
+ case 14:
+ return (char) (((c & 0x0F) << 12) | (((b[s + 1]) & 0x3F) << 6) | (((b[s + 2]) & 0x3F) << 0));
+
+ default:
+ throw new IllegalArgumentException();
+ }
+ }
+
+ public static int charSize(byte[] b, int s) {
+ int c = b[s] & 0xff;
+ switch (c >> 4) {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ return 1;
+
+ case 12:
+ case 13:
+ return 2;
+
+ case 14:
+ return 3;
+ }
+ throw new IllegalStateException();
+ }
+
+ public static int getModifiedUTF8Len(char c) {
+ if (c >= 0x0001 && c <= 0x007F) {
+ return 1;
+ } else if (c <= 0x07FF) {
+ return 2;
+ } else {
+ return 3;
+ }
+ }
+
+ public static int writeCharAsModifiedUTF8(char c, DataOutput dos) throws IOException {
+ if (c >= 0x0001 && c <= 0x007F) {
+ dos.writeByte(c);
+ return 1;
+ } else if (c <= 0x07FF) {
+ dos.writeByte((byte) (0xC0 | ((c >> 6) & 0x3F)));
+ dos.writeByte((byte) (0x80 | (c & 0x3F)));
+ return 2;
+ } else {
+ dos.writeByte((byte) (0xE0 | ((c >> 12) & 0x0F)));
+ dos.writeByte((byte) (0x80 | ((c >> 6) & 0x3F)));
+ dos.writeByte((byte) (0x80 | (c & 0x3F)));
+ return 3;
+ }
+ }
+
+ public static int writeCharAsModifiedUTF8(char c, OutputStream dos) throws IOException {
+ if (c >= 0x0001 && c <= 0x007F) {
+ dos.write(c);
+ return 1;
+ } else if (c <= 0x07FF) {
+ dos.write((byte) (0xC0 | ((c >> 6) & 0x3F)));
+ dos.write((byte) (0x80 | (c & 0x3F)));
+ return 2;
+ } else {
+ dos.write((byte) (0xE0 | ((c >> 12) & 0x0F)));
+ dos.write((byte) (0x80 | ((c >> 6) & 0x3F)));
+ dos.write((byte) (0x80 | (c & 0x3F)));
+ return 3;
+ }
+ }
+
+ public static int getStringLength(byte[] b, int s) {
+ int len = getUTFLength(b, s);
+ int pos = s + getNumBytesToStoreLength(len);
+ int end = pos + len;
+ int charCount = 0;
+ while (pos < end) {
+ charCount++;
+ pos += charSize(b, pos);
+ }
+ return charCount;
+ }
+
+ public static int getUTFLength(byte[] b, int s) {
+ return VarLenIntEncoderDecoder.decode(b, s);
+ }
+
+ public static int getNumBytesToStoreLength(int strlen) {
+ return VarLenIntEncoderDecoder.getBytesRequired(strlen);
+ }
+
+ public static int UTF8ToCodePoint(byte[] b, int s) {
+ if (b[s] >> 7 == 0) {
+ // 1 byte
+ return b[s];
+ } else if ((b[s] & 0xe0) == 0xc0) { /*0xe0 = 0b1110000*/
+ // 2 bytes
+ return ((int) (b[s] & 0x1f)) << 6 | /*0x3f = 0b00111111*/
+ ((int) (b[s + 1] & 0x3f));
+ } else if ((b[s] & 0xf0) == 0xe0) {
+ // 3bytes
+ return ((int) (b[s] & 0xf)) << 12 | ((int) (b[s + 1] & 0x3f)) << 6
+ | ((int) (b[s + 2] & 0x3f));
+ } else if ((b[s] & 0xf8) == 0xf0) {
+ // 4bytes
+ return ((int) (b[s] & 0x7)) << 18 | ((int) (b[s + 1] & 0x3f)) << 12
+ | ((int) (b[s + 2] & 0x3f)) << 6 | ((int) (b[s + 3] & 0x3f));
+ } else if ((b[s] & 0xfc) == 0xf8) {
+ // 5bytes
+ return ((int) (b[s] & 0x3)) << 24 | ((int) (b[s + 1] & 0x3f)) << 18
+ | ((int) (b[s + 2] & 0x3f)) << 12 | ((int) (b[s + 3] & 0x3f)) << 6
+ | ((int) (b[s + 4] & 0x3f));
+ } else if ((b[s] & 0xfe) == 0xfc) {
+ // 6bytes
+ return ((int) (b[s] & 0x1)) << 30 | ((int) (b[s + 1] & 0x3f)) << 24
+ | ((int) (b[s + 2] & 0x3f)) << 18 | ((int) (b[s + 3] & 0x3f)) << 12
+ | ((int) (b[s + 4] & 0x3f)) << 6 | ((int) (b[s + 5] & 0x3f));
+ }
+ return 0;
+ }
+
+ public static int codePointToUTF8(int c, byte[] outputUTF8) {
+ if (c < 0x80) {
+ outputUTF8[0] = (byte) (c & 0x7F /* mask 7 lsb: 0b1111111 */);
+ return 1;
+ } else if (c < 0x0800) {
+ outputUTF8[0] = (byte) (c >> 6 & 0x1F | 0xC0);
+ outputUTF8[1] = (byte) (c & 0x3F | 0x80);
+ return 2;
+ } else if (c < 0x010000) {
+ outputUTF8[0] = (byte) (c >> 12 & 0x0F | 0xE0);
+ outputUTF8[1] = (byte) (c >> 6 & 0x3F | 0x80);
+ outputUTF8[2] = (byte) (c & 0x3F | 0x80);
+ return 3;
+ } else if (c < 0x200000) {
+ outputUTF8[0] = (byte) (c >> 18 & 0x07 | 0xF0);
+ outputUTF8[1] = (byte) (c >> 12 & 0x3F | 0x80);
+ outputUTF8[2] = (byte) (c >> 6 & 0x3F | 0x80);
+ outputUTF8[3] = (byte) (c & 0x3F | 0x80);
+ return 4;
+ } else if (c < 0x4000000) {
+ outputUTF8[0] = (byte) (c >> 24 & 0x03 | 0xF8);
+ outputUTF8[1] = (byte) (c >> 18 & 0x3F | 0x80);
+ outputUTF8[2] = (byte) (c >> 12 & 0x3F | 0x80);
+ outputUTF8[3] = (byte) (c >> 6 & 0x3F | 0x80);
+ outputUTF8[4] = (byte) (c & 0x3F | 0x80);
+ return 5;
+ } else if (c < 0x80000000) {
+ outputUTF8[0] = (byte) (c >> 30 & 0x01 | 0xFC);
+ outputUTF8[1] = (byte) (c >> 24 & 0x3F | 0x80);
+ outputUTF8[2] = (byte) (c >> 18 & 0x3F | 0x80);
+ outputUTF8[3] = (byte) (c >> 12 & 0x3F | 0x80);
+ outputUTF8[4] = (byte) (c >> 6 & 0x3F | 0x80);
+ outputUTF8[5] = (byte) (c & 0x3F | 0x80);
+ return 6;
+ }
+ return 0;
+ }
+
+ /**
+ * Compute the normalized key of the UTF8 string.
+ * The normalized key in Hyracks is mainly used to speedup the comparison between pointable data.
+ * In the UTF8StringPTR case, we compute the integer value by using the first 2 chars.
+ * The comparator will first use this integer to get the result ( <,>, or =), it will check
+ * the actual bytes only if the normalized key is equal. Thus this normalized key must be
+ * consistent with the comparison result.
+ */
+ public static int normalize(byte[] bytes, int start) {
+ int len = getUTFLength(bytes, start);
+ long nk = 0;
+ int offset = start + getNumBytesToStoreLength(len);
+ for (int i = 0; i < 2; ++i) {
+ nk <<= 16;
+ if (i < len) {
+ nk += ((int) charAt(bytes, offset)) & 0xffff;
+ offset += charSize(bytes, offset);
+ }
+ }
+ return (int) (nk >> 1); // make it always positive.
+ }
+
+ public static int compareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart) {
+ return compareTo(thisBytes, thisStart, thatBytes, thatStart, false, false);
+ }
+
+ /**
+ * This function provides the raw bytes-based comparison for UTF8 strings.
+ * Note that the comparison may not deliver the correct ordering for certain languages that include 2 or 3 bytes characters.
+ * But it works for single-byte character languages.
+ */
+ public static int rawByteCompareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart) {
+ return compareTo(thisBytes, thisStart, thatBytes, thatStart, false, true);
+ }
+
+ public static int lowerCaseCompareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart) {
+ return compareTo(thisBytes, thisStart, thatBytes, thatStart, true, false);
+ }
+
+ public static int hash(byte[] bytes, int start, int coefficient, int r) {
+ return hash(bytes, start, false, false, coefficient, r);
+ }
+
+ public static int hash(byte[] bytes, int start) {
+ return hash(bytes, start, false, false, 31, Integer.MAX_VALUE);
+ }
+
+ /**
+ * This function provides the raw bytes-based hash function for UTF8 strings.
+ * Note that the hash values may not deliver the correct ordering for certain languages that include 2 or 3 bytes characters.
+ * But it works for single-byte character languages.
+ */
+ public static int rawBytehash(byte[] bytes, int start) {
+ return hash(bytes, start, false, true, 31, Integer.MAX_VALUE);
+ }
+
+ public static int lowerCaseHash(byte[] bytes, int start) {
+ return hash(bytes, start, true, false, 31, Integer.MAX_VALUE);
+ }
+
+ public static StringBuilder toString(StringBuilder builder, byte[] bytes, int start) {
+ int utfLen = getUTFLength(bytes, start);
+ int offset = getNumBytesToStoreLength(utfLen);
+ while (utfLen > 0) {
+ char c = charAt(bytes, start + offset);
+ builder.append(c);
+ int cLen = getModifiedUTF8Len(c);
+ offset += cLen;
+ utfLen -= cLen;
+ }
+ return builder;
+ }
+
+ public static void printUTF8StringWithQuotes(byte[] b, int s, int l, OutputStream os) throws IOException {
+ printUTF8String(b, s, l, os, true);
+ }
+
+ public static void printUTF8StringNoQuotes(byte[] b, int s, int l, OutputStream os) throws IOException {
+ printUTF8String(b, s, l, os, false);
+ }
+
+ public static void printUTF8StringWithQuotes(String str, OutputStream os) throws IOException {
+ printUTF8String(str, os, true);
+ }
+
+ public static void printUTF8StringNoQuotes(String str, OutputStream os) throws IOException {
+ printUTF8String(str, os, false);
+ }
+
+ public static int encodeUTF8Length(int length, byte[] bytes, int start) {
+ return VarLenIntEncoderDecoder.encode(length, bytes, start);
+ }
+
+ public static int writeUTF8Length(int length, byte[] bytes, DataOutput out) throws IOException {
+ int nbytes = encodeUTF8Length(length, bytes, 0);
+ out.write(bytes, 0, nbytes);
+ return nbytes;
+ }
+
+ private static void printUTF8String(byte[] b, int s, int l, OutputStream os, boolean useQuotes) throws IOException {
+ int stringLength = getUTFLength(b, s);
+ int position = s + getNumBytesToStoreLength(stringLength);
+ int maxPosition = position + stringLength;
+ if (useQuotes) {
+ os.write('\"');
+ }
+ while (position < maxPosition) {
+ char c = charAt(b, position);
+ switch (c) {
+ // escape
+ case '\\':
+ case '"':
+ os.write('\\');
+ break;
+ }
+ int sz = charSize(b, position);
+ while (sz > 0) {
+ os.write(b[position]);
+ position++;
+ sz--;
+ }
+ }
+ if (useQuotes) {
+ os.write('\"');
+ }
+ }
+
+ private static void printUTF8String(String string, OutputStream os, boolean useQuotes) throws IOException {
+ if (useQuotes) {
+ os.write('\"');
+ }
+ for (int i = 0; i < string.length(); i++) {
+ char ch = string.charAt(i);
+ writeCharAsModifiedUTF8(ch, os);
+ }
+ if (useQuotes) {
+ os.write('\"');
+ }
+ }
+
+ private static int compareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart,
+ boolean useLowerCase, boolean useRawByte) {
+ int utflen1 = getUTFLength(thisBytes, thisStart);
+ int utflen2 = getUTFLength(thatBytes, thatStart);
+
+ int c1 = 0;
+ int c2 = 0;
+
+ int s1Start = thisStart + getNumBytesToStoreLength(utflen1);
+ int s2Start = thatStart + getNumBytesToStoreLength(utflen2);
+
+ while (c1 < utflen1 && c2 < utflen2) {
+ char ch1, ch2;
+ if (useRawByte) {
+ ch1 = (char) thisBytes[s1Start + c1];
+ ch2 = (char) thatBytes[s2Start + c2];
+ } else {
+ ch1 = (charAt(thisBytes, s1Start + c1));
+ ch2 = (charAt(thatBytes, s2Start + c2));
+
+ if (useLowerCase) {
+ ch1 = Character.toLowerCase(ch1);
+ ch2 = Character.toLowerCase(ch2);
+ }
+ }
+
+ if (ch1 != ch2) {
+ return ch1 - ch2;
+ }
+ c1 += charSize(thisBytes, s1Start + c1);
+ c2 += charSize(thatBytes, s2Start + c2);
+ }
+ return utflen1 - utflen2;
+ }
+
+ private static int hash(byte[] bytes, int start, boolean useLowerCase, boolean useRawByte, int coefficient, int r) {
+ int h = 0;
+ int utflen = getUTFLength(bytes, start);
+ int sStart = start + getNumBytesToStoreLength(utflen);
+ int c = 0;
+
+ while (c < utflen) {
+ char ch;
+ if (useRawByte) {
+ ch = (char) bytes[sStart + c];
+ } else {
+ ch = charAt(bytes, sStart + c);
+ if (useLowerCase) {
+ ch = Character.toLowerCase(ch);
+ }
+ }
+ h = (coefficient * h + ch) % r;
+ c += charSize(bytes, sStart + c);
+ }
+ return h;
+ }
+
+ public static byte[] writeStringToBytes(String string) {
+ UTF8StringWriter writer = new UTF8StringWriter();
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ DataOutputStream dos = new DataOutputStream(bos);
+ try {
+ writer.writeUTF8(string, dos);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return bos.toByteArray();
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java
new file mode 100644
index 0000000..021c02f
--- /dev/null
+++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringWriter.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.util.string;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+
+import org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder;
+
+public class UTF8StringWriter implements Serializable{
+ private byte[] tempBytes;
+
+ public final void writeUTF8(CharSequence str, DataOutput out) throws IOException {
+ int strlen = str.length();
+ int utflen = 0;
+ char c;
+ int count = 0;
+
+ for (int i = 0; i < strlen; i++) {
+ c = str.charAt(i);
+ utflen += UTF8StringUtil.getModifiedUTF8Len(c);
+ }
+
+ ensureTempSize(utflen);
+
+ count += VarLenIntEncoderDecoder.encode(utflen, tempBytes, count);
+
+ int i = 0;
+ for (; i < strlen; i++) {
+ c = str.charAt(i);
+ if (!((c >= 0x0001) && (c <= 0x007F))) {
+ break;
+ }
+ tempBytes[count++] = (byte) c;
+ }
+
+ for (; i < strlen; i++) {
+ c = str.charAt(i);
+ count += writeToBytes(tempBytes, count, c);
+ }
+ out.write(tempBytes, 0, count);
+ }
+
+ public final void writeUTF8(char[] buffer, int start, int length, DataOutput out) throws IOException {
+ int utflen = 0;
+ int count = 0;
+ char c;
+
+ for (int i = 0; i < length; i++) {
+ c = buffer[i + start];
+ utflen += UTF8StringUtil.getModifiedUTF8Len(c);
+ }
+
+ ensureTempSize(utflen);
+
+ count += VarLenIntEncoderDecoder.encode(utflen, tempBytes, count);
+
+ int i = 0;
+ for (; i < length; i++) {
+ c = buffer[i + start];
+ if (!((c >= 0x0001) && (c <= 0x007F))) {
+ break;
+ }
+ tempBytes[count++] = (byte) c;
+ }
+
+ for (; i < length; i++) {
+ c = buffer[i + start];
+ count += writeToBytes(tempBytes, count, c);
+ }
+ out.write(tempBytes, 0, count);
+ }
+
+ private static int writeToBytes(byte[] tempBytes, int count, char c) {
+ int orig = count;
+ if ((c >= 0x0001) && (c <= 0x007F)) {
+ tempBytes[count++] = (byte) c;
+ } else if (c > 0x07FF) {
+ tempBytes[count++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
+ tempBytes[count++] = (byte) (0x80 | ((c >> 6) & 0x3F));
+ tempBytes[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
+ } else {
+ tempBytes[count++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
+ tempBytes[count++] = (byte) (0x80 | ((c >> 0) & 0x3F));
+ }
+ return count - orig;
+ }
+
+ private void ensureTempSize(int utflen) {
+ if (tempBytes == null || tempBytes.length < utflen + 5) {
+ tempBytes = new byte[utflen + 5];
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoderTest.java b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoderTest.java
new file mode 100644
index 0000000..193dca6
--- /dev/null
+++ b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/encoding/VarLenIntEncoderDecoderTest.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.encoding;
+
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.BOUND_FIVE_BYTE;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.BOUND_FOUR_BYTE;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.BOUND_ONE_BYTE;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.BOUND_THREE_BYTE;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.BOUND_TWO_BYTE;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.VarLenIntDecoder;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.createDecoder;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.decode;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.encode;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.getBytesRequired;
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+
+import org.junit.Test;
+
+public class VarLenIntEncoderDecoderTest {
+
+ int[] bounds = new int[] { 0, BOUND_ONE_BYTE, BOUND_TWO_BYTE, BOUND_THREE_BYTE, BOUND_FOUR_BYTE, BOUND_FIVE_BYTE };
+
+ @Test
+ public void testGetBytesRequired() throws Exception {
+ for (int bound = 0; bound < bounds.length - 1; bound++) {
+ assertEquals(bound + 1, getBytesRequired(bounds[bound]));
+ assertEquals(bound + 1, getBytesRequired(bounds[bound + 1] - 1));
+ }
+ }
+
+ @Test
+ public void testEncodeDecode() throws Exception {
+ byte[] bytes = new byte[10];
+ int startPos = 3;
+ for (int i = 1; i < bounds.length - 1; i++) {
+ testEncodeDecode(i, bounds[i] - 1, bytes, startPos);
+ testEncodeDecode(i + 1, bounds[i], bytes, startPos);
+ testEncodeDecode(i + 1, bounds[i] + 1, bytes, startPos);
+ }
+ // Integer.Max
+ testEncodeDecode(5, BOUND_FIVE_BYTE, bytes, startPos);
+ }
+
+ @Test
+ public void testCreateDecoder() throws Exception {
+ VarLenIntDecoder decoder = createDecoder();
+ byte[] bytes = new byte[100];
+ int pos = 1;
+ for (int b : bounds) {
+ pos += encode(b, bytes, pos);
+ }
+ decoder.reset(bytes, 1);
+ for (int b : bounds) {
+ assertEquals(b, decoder.decode());
+ }
+ }
+
+ protected void testEncodeDecode(int expectedBytes, int value, byte[] bytes, int startPos) throws IOException {
+ assertEquals(expectedBytes, encode(value, bytes, startPos));
+ assertEquals(value, decode(bytes, startPos));
+
+ ByteArrayInputStream bis = new ByteArrayInputStream(bytes, startPos, bytes.length - startPos);
+ DataInputStream dis = new DataInputStream(bis);
+ assertEquals(value, decode(dis));
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringReaderWriterTest.java b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringReaderWriterTest.java
new file mode 100644
index 0000000..bfc1fa8
--- /dev/null
+++ b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringReaderWriterTest.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.string;
+
+import static org.apache.hyracks.util.string.UTF8StringSample.EMPTY_STRING;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_127;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_128;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_3;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_LARGE;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_LARGE_SUB_1;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_MEDIUM;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_MEDIUM_SUB_1;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_UTF8_3;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_UTF8_MIX;
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import org.junit.Test;
+
+public class UTF8StringReaderWriterTest {
+
+ UTF8StringWriter writer = new UTF8StringWriter();
+ UTF8StringReader reader = new UTF8StringReader();
+
+ @Test
+ public void testWriterReader() throws IOException {
+ writeAndReadOneString(EMPTY_STRING);
+ writeAndReadOneString(STRING_LEN_3);
+
+ writeAndReadOneString(STRING_LEN_127);
+ writeAndReadOneString(STRING_LEN_128);
+ writeAndReadOneString(STRING_LEN_MEDIUM_SUB_1);
+ }
+
+ @Test
+ public void testMedium() throws IOException {
+ writeAndReadOneString(STRING_LEN_MEDIUM);
+ writeAndReadOneString(STRING_LEN_LARGE_SUB_1);
+ }
+
+ @Test
+ public void testLarge() throws IOException {
+ writeAndReadOneString(STRING_LEN_LARGE);
+ }
+
+ @Test
+ public void testUTF8() throws IOException {
+ writeAndReadOneString(STRING_UTF8_3);
+ writeAndReadOneString(STRING_UTF8_MIX);
+ }
+
+ private void writeAndReadOneString(String testString) throws IOException {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ DataOutputStream dos = new DataOutputStream(bos);
+ writer.writeUTF8(testString, dos);
+
+ ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray(), 0, bos.size());
+ assertEquals(testString, reader.readUTF(new DataInputStream(bis)));
+
+ int lastOffset = bos.size();
+ char[] charArray = testString.toCharArray();
+ writer.writeUTF8(charArray, 0, charArray.length, dos);
+
+ bis = new ByteArrayInputStream(bos.toByteArray(), lastOffset, bos.size());
+ assertEquals(testString, reader.readUTF(new DataInputStream(bis)));
+ }
+
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringSample.java b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringSample.java
new file mode 100644
index 0000000..3e6e984
--- /dev/null
+++ b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringSample.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.string;
+
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.BOUND_THREE_BYTE;
+import static org.apache.hyracks.util.encoding.VarLenIntEncoderDecoder.BOUND_TWO_BYTE;
+
+import java.util.Arrays;
+
+/**
+ * Util class to provide the sample test string
+ */
+public class UTF8StringSample {
+ public static String EMPTY_STRING = "";
+
+ public static char ONE_ASCII_CHAR = 'x';
+ public static char ONE_UTF8_CHAR = 'à';
+
+ public static String STRING_LEN_3 = "xyz";
+ public static String STRING_UTF8_3 = "锟斤拷";
+ public static String STRING_UTF8_MIX = "\uD841\uDF0E\uD841\uDF31锟X斤Y拷Zà"; // one, two, three, and four bytes
+ public static String STRING_UTF8_MIX_LOWERCASE = "\uD841\uDF0E\uD841\uDF31锟x斤y拷zà";
+
+ public static String STRING_LEN_127 = generateStringRepeatBy(ONE_ASCII_CHAR, 127);
+ public static String STRING_LEN_128 = generateStringRepeatBy(ONE_ASCII_CHAR, 128);
+
+ public static String STRING_LEN_MEDIUM_SUB_1 = generateStringRepeatBy(ONE_ASCII_CHAR, BOUND_TWO_BYTE - 1);
+ public static String STRING_LEN_MEDIUM = generateStringRepeatBy(ONE_ASCII_CHAR, BOUND_TWO_BYTE);
+
+ public static String STRING_LEN_LARGE_SUB_1 = generateStringRepeatBy(ONE_ASCII_CHAR, BOUND_THREE_BYTE - 1);
+ public static String STRING_LEN_LARGE = generateStringRepeatBy(ONE_ASCII_CHAR, BOUND_THREE_BYTE);
+
+ public static String generateStringRepeatBy(char c, int times) {
+ char[] chars = new char[times];
+ Arrays.fill(chars, c);
+ return new String(chars);
+ }
+
+}
diff --git a/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java
new file mode 100644
index 0000000..0e3ed5c
--- /dev/null
+++ b/hyracks/hyracks-util/src/test/java/org/apache/hyracks/util/string/UTF8StringUtilTest.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hyracks.util.string;
+
+import static org.apache.hyracks.util.string.UTF8StringUtil.writeStringToBytes;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_127;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_128;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_LEN_MEDIUM;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_UTF8_3;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_UTF8_MIX;
+import static org.apache.hyracks.util.string.UTF8StringSample.STRING_UTF8_MIX_LOWERCASE;
+import static org.apache.hyracks.util.string.UTF8StringUtil.charAt;
+import static org.apache.hyracks.util.string.UTF8StringUtil.charSize;
+import static org.apache.hyracks.util.string.UTF8StringUtil.compareTo;
+import static org.apache.hyracks.util.string.UTF8StringUtil.getModifiedUTF8Len;
+import static org.apache.hyracks.util.string.UTF8StringUtil.getNumBytesToStoreLength;
+import static org.apache.hyracks.util.string.UTF8StringUtil.getStringLength;
+import static org.apache.hyracks.util.string.UTF8StringUtil.getUTFLength;
+import static org.apache.hyracks.util.string.UTF8StringUtil.lowerCaseCompareTo;
+import static org.apache.hyracks.util.string.UTF8StringUtil.lowerCaseHash;
+import static org.apache.hyracks.util.string.UTF8StringUtil.normalize;
+import static org.apache.hyracks.util.string.UTF8StringUtil.rawByteCompareTo;
+import static org.apache.hyracks.util.string.UTF8StringUtil.hash;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+public class UTF8StringUtilTest {
+
+ @Test
+ public void testCharAtCharSizeGetLen() throws Exception {
+ char[] utf8Mix = STRING_UTF8_MIX.toCharArray();
+ byte[] buffer = writeStringToBytes(STRING_UTF8_MIX);
+ int pos = getNumBytesToStoreLength(getUTFLength(buffer, 0));
+ for (char c : utf8Mix) {
+ assertEquals(c, charAt(buffer, pos));
+ assertEquals(getModifiedUTF8Len(c), charSize(buffer, pos));
+ pos += charSize(buffer, pos);
+ }
+ }
+
+ @Test
+ public void testGetStringLength() throws Exception {
+ byte[] buffer = writeStringToBytes(STRING_UTF8_MIX);
+ assertEquals(STRING_UTF8_MIX.length(), getStringLength(buffer, 0));
+ }
+
+ @Test
+ public void testCompareToAndNormolize() throws Exception {
+ testCompare(STRING_UTF8_MIX, STRING_UTF8_MIX, OPTION.STANDARD);
+ testCompare(STRING_UTF8_3, STRING_UTF8_MIX, OPTION.STANDARD);
+ testCompare(STRING_LEN_MEDIUM, STRING_UTF8_MIX, OPTION.STANDARD);
+ }
+
+ public boolean isSameSign(int r1, int r2) {
+ if (r1 > 0) {
+ return r2 > 0;
+ }
+ if (r1 < 0) {
+ return r2 < 0;
+ }
+ return r2 == 0;
+ }
+
+ enum OPTION {STANDARD, RAW_BYTE, LOWERCASE}
+
+ public void testCompare(String str1, String str2, OPTION option) throws IOException {
+ byte[] buffer1 = writeStringToBytes(str1);
+ byte[] buffer2 = writeStringToBytes(str2);
+
+ switch (option) {
+ case STANDARD:
+ assertEquals(str1.compareTo(str2), compareTo(buffer1, 0, buffer2, 0));
+ int n1 = normalize(buffer1, 0);
+ int n2 = normalize(buffer2, 0);
+ assertTrue(isSameSign(str1.compareTo(str2), n1 - n2));
+ break;
+ case RAW_BYTE:
+ assertEquals(str1.compareTo(str2), rawByteCompareTo(buffer1, 0, buffer2, 0));
+ break;
+ case LOWERCASE:
+ assertEquals(str1.compareToIgnoreCase(str2), lowerCaseCompareTo(buffer1, 0, buffer2, 0));
+ break;
+ }
+
+ }
+
+ @Test
+ public void testRawByteCompareTo() throws Exception {
+ testCompare(STRING_LEN_MEDIUM, STRING_LEN_MEDIUM, OPTION.RAW_BYTE);
+ testCompare(STRING_LEN_127, STRING_LEN_128, OPTION.RAW_BYTE);
+ }
+
+ @Test
+ public void testLowerCaseCompareTo() throws Exception {
+ testCompare(STRING_LEN_127, STRING_LEN_128, OPTION.LOWERCASE);
+ testCompare(STRING_LEN_127, STRING_UTF8_MIX, OPTION.LOWERCASE);
+ testCompare(STRING_UTF8_MIX, STRING_UTF8_MIX_LOWERCASE, OPTION.LOWERCASE);
+ testCompare(STRING_UTF8_MIX_LOWERCASE, STRING_UTF8_MIX, OPTION.LOWERCASE);
+ }
+
+ @Test
+ public void testToString() throws Exception {
+
+ StringBuilder sb = new StringBuilder();
+ byte[] buffer = writeStringToBytes(STRING_UTF8_MIX);
+ assertEquals(STRING_UTF8_MIX, UTF8StringUtil.toString(sb, buffer, 0).toString());
+ }
+
+ @Test
+ public void testHash() throws IOException {
+ byte[] buffer = writeStringToBytes(STRING_UTF8_MIX_LOWERCASE);
+ int lowerHash = hash(buffer, 0);
+
+ buffer = writeStringToBytes(STRING_UTF8_MIX_LOWERCASE);
+ int upperHash = lowerCaseHash(buffer, 0);
+ assertEquals(lowerHash, upperHash);
+
+ int familyOne = hash(buffer, 0, 7, 297);
+ int familyTwo = hash(buffer, 0, 8, 297);
+ assertTrue(familyOne != familyTwo);
+ }
+
+}
\ No newline at end of file
diff --git a/hyracks/pom.xml b/hyracks/pom.xml
index c1af7b9..61e06e4 100644
--- a/hyracks/pom.xml
+++ b/hyracks/pom.xml
@@ -96,6 +96,7 @@
</pluginRepositories>
<modules>
+ <module>hyracks-util</module>
<module>hyracks-ipc</module>
<module>hyracks-api</module>
<module>hyracks-comm</module>
diff --git a/pom.xml b/pom.xml
index 8f00aac..368ba2a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -18,87 +18,88 @@
!-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <groupId>org.apache.hyracks</groupId>
- <artifactId>fullstack</artifactId>
- <version>0.2.17-SNAPSHOT</version>
- <packaging>pom</packaging>
- <name>hyracks-ecosystem-full-stack</name>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>org.apache.hyracks</groupId>
+ <artifactId>fullstack</artifactId>
+ <version>0.2.17-SNAPSHOT</version>
+ <packaging>pom</packaging>
+ <name>hyracks-ecosystem-full-stack</name>
- <parent>
- <groupId>org.apache</groupId>
- <artifactId>apache</artifactId>
- <version>LATEST</version>
- </parent>
+ <parent>
+ <groupId>org.apache</groupId>
+ <artifactId>apache</artifactId>
+ <version>LATEST</version>
+ </parent>
- <licenses>
- <license>
- <name>Apache License, Version 2.0</name>
- <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
- <distribution>repo</distribution>
- <comments>A business-friendly OSS license</comments>
- </license>
- </licenses>
+ <licenses>
+ <license>
+ <name>Apache License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments>A business-friendly OSS license</comments>
+ </license>
+ </licenses>
- <properties>
- <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
- <jvm.extraargs />
- <!-- Definition of tests in various categories which may be excluded -->
- <hanging.pregelix.tests>**/pregelix/**/FailureRecovery*.java</hanging.pregelix.tests>
- <jdk.version>1.8</jdk.version>
- <hivesterix.perf.tests>**/hivesterix/perf/PerfTestSuite.java</hivesterix.perf.tests>
- <global.test.includes>**/*TestSuite.java,**/*Test.java</global.test.includes>
- <global.test.excludes>**/Abstract*.java,${hanging.pregelix.tests},${hivesterix.perf.tests}</global.test.excludes>
- <!-- Versions under dependencymanagement or used in many projects via properties -->
- <hadoop.version>2.2.0</hadoop.version>
- <junit.version>4.8.1</junit.version>
- <commons.io.version>2.4</commons.io.version>
- </properties>
- <dependencyManagement>
- <dependencies>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>${junit.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-yarn-client</artifactId>
- <version>${hadoop.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <version>${hadoop.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- <version>${hadoop.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-hdfs</artifactId>
- <version>${hadoop.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-minicluster</artifactId>
- <version>${hadoop.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-mapreduce-client-core</artifactId>
- <version>${hadoop.version}</version>
- </dependency>
- <dependency>
- <groupId>commons-io</groupId>
- <artifactId>commons-io</artifactId>
- <version>${commons.io.version}</version>
- </dependency>
- </dependencies>
- </dependencyManagement>
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ <jvm.extraargs/>
+ <!-- Definition of tests in various categories which may be excluded -->
+ <hanging.pregelix.tests>**/pregelix/**/FailureRecovery*.java</hanging.pregelix.tests>
+ <hivesterix.perf.tests>**/hivesterix/perf/PerfTestSuite.java</hivesterix.perf.tests>
+ <global.test.includes>**/*TestSuite.java,**/*Test.java</global.test.includes>
+ <global.test.excludes>**/Abstract*.java,${hanging.pregelix.tests},${hivesterix.perf.tests}
+ </global.test.excludes>
+ <!-- Versions under dependencymanagement or used in many projects via properties -->
+ <hadoop.version>2.2.0</hadoop.version>
+ <junit.version>4.8.1</junit.version>
+ <commons.io.version>2.4</commons.io.version>
+ </properties>
+ <dependencyManagement>
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>${junit.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-yarn-client</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-client</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ <version>${hadoop.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>${commons.io.version}</version>
+ </dependency>
+ </dependencies>
+ </dependencyManagement>
<build>
<plugins>
@@ -167,6 +168,7 @@
<exclude>**/*.conf</exclude>
<exclude>**/src/main/resources/*.cleaned</exclude>
<exclude>**/ClusterControllerService/**</exclude>
+ <exclude>**/target/**</exclude>
<exclude>**/output/**</exclude>
<exclude>**/*.iml</exclude>
</excludes>