split hivesterix into serveral modules
git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_release_cleanup@3074 123451ca-8445-de46-9d55-352943316053
diff --git a/hivesterix-serde/pom.xml b/hivesterix-serde/pom.xml
new file mode 100644
index 0000000..0ba73bd
--- /dev/null
+++ b/hivesterix-serde/pom.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0"?>
+<project
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+ xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>hivesterix</artifactId>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <version>0.2.3-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>hivesterix-serde</artifactId>
+ <name>hivesterix-serde</name>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <encoding>UTF-8</encoding>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-serde</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-common</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>algebricks-compiler</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>0.20.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>3.8.1</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/ByteArrayRef.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/ByteArrayRef.java
new file mode 100644
index 0000000..92415f9
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/ByteArrayRef.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+/**
+ * ByteArrayRef stores a reference to a byte array.
+ * The LazyObject hierarchy uses a reference to a single ByteArrayRef, so that
+ * it's much faster to switch to the next row and release the reference to the
+ * old row (so that the system can do garbage collection if needed).
+ */
+public class ByteArrayRef {
+
+ /**
+ * Stores the actual data.
+ */
+ byte[] data;
+
+ public byte[] getData() {
+ return data;
+ }
+
+ public void setData(byte[] data) {
+ this.data = data;
+ }
+
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyArray.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyArray.java
new file mode 100644
index 0000000..33b20bf
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyArray.java
@@ -0,0 +1,229 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyListObjectInspector;
+
+/**
+ * LazyArray is serialized as follows: start A b b b b b b end bytes[] ->
+ * |--------|---|---|---|---| ... |---|---|
+ * Section A is the null-bytes. Suppose the list has N elements, then there are
+ * (N+7)/8 bytes used as null-bytes. Each bit corresponds to an element and it
+ * indicates whether that element is null (0) or not null (1).
+ * After A, all b(s) represent the elements of the list. Each of them is again a
+ * LazyObject.
+ */
+
+public class LazyArray extends LazyNonPrimitive<LazyListObjectInspector> {
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean parsed = false;
+ /**
+ * The length of the array. Only valid when the data is parsed.
+ */
+ int arraySize = 0;
+
+ /**
+ * The start positions and lengths of array elements. Only valid when the
+ * data is parsed.
+ */
+ int[] elementStart;
+ int[] elementLength;
+
+ /**
+ * Whether an element is initialized or not.
+ */
+ boolean[] elementInited;
+
+ /**
+ * Whether an element is null or not. Because length is 0 does not means the
+ * field is null. In particular, a 0-length string is not null.
+ */
+ boolean[] elementIsNull;
+
+ /**
+ * The elements of the array. Note that we call arrayElements[i].init(bytes,
+ * begin, length) only when that element is accessed.
+ */
+ @SuppressWarnings("rawtypes")
+ LazyObject[] arrayElements;
+
+ /**
+ * Construct a LazyArray object with the ObjectInspector.
+ *
+ * @param oi
+ * the oi representing the type of this LazyArray
+ */
+ protected LazyArray(LazyListObjectInspector oi) {
+ super(oi);
+ }
+
+ /**
+ * Set the row data for this LazyArray.
+ *
+ * @see LazyObject#init(ByteArrayRef, int, int)
+ */
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ super.init(bytes, start, length);
+ parsed = false;
+ }
+
+ /**
+ * Enlarge the size of arrays storing information for the elements inside
+ * the array.
+ */
+ private void adjustArraySize(int newSize) {
+ if (elementStart == null || elementStart.length < newSize) {
+ elementStart = new int[newSize];
+ elementLength = new int[newSize];
+ elementInited = new boolean[newSize];
+ elementIsNull = new boolean[newSize];
+ arrayElements = new LazyObject[newSize];
+ }
+ }
+
+ VInt vInt = new LazyUtils.VInt();
+ RecordInfo recordInfo = new LazyUtils.RecordInfo();
+
+ /**
+ * Parse the bytes and fill elementStart, elementLength, elementInited and
+ * elementIsNull.
+ */
+ private void parse() {
+
+ // get the vlong that represents the map size
+ LazyUtils.readVInt(bytes, start, vInt);
+ arraySize = vInt.value;
+ if (0 == arraySize) {
+ parsed = true;
+ return;
+ }
+
+ // adjust arrays
+ adjustArraySize(arraySize);
+ // find out the null-bytes
+ int arryByteStart = start + vInt.length;
+ int nullByteCur = arryByteStart;
+ int nullByteEnd = arryByteStart + (arraySize + 7) / 8;
+ // the begin the real elements
+ int lastElementByteEnd = nullByteEnd;
+ // the list element object inspector
+ ObjectInspector listEleObjectInspector = ((ListObjectInspector) oi).getListElementObjectInspector();
+ // parsing elements one by one
+ for (int i = 0; i < arraySize; i++) {
+ elementIsNull[i] = true;
+ if ((bytes[nullByteCur] & (1 << (i % 8))) != 0) {
+ elementIsNull[i] = false;
+ LazyUtils.checkObjectByteInfo(listEleObjectInspector, bytes, lastElementByteEnd, recordInfo);
+ elementStart[i] = lastElementByteEnd + recordInfo.elementOffset;
+ elementLength[i] = recordInfo.elementSize;
+ lastElementByteEnd = elementStart[i] + elementLength[i];
+ }
+ // move onto the next null byte
+ if (7 == (i % 8)) {
+ nullByteCur++;
+ }
+ }
+
+ Arrays.fill(elementInited, 0, arraySize, false);
+ parsed = true;
+ }
+
+ /**
+ * Returns the actual primitive object at the index position inside the
+ * array represented by this LazyObject.
+ */
+ public Object getListElementObject(int index) {
+ if (!parsed) {
+ parse();
+ }
+ if (index < 0 || index >= arraySize) {
+ return null;
+ }
+ return uncheckedGetElement(index);
+ }
+
+ /**
+ * Get the element without checking out-of-bound index.
+ *
+ * @param index
+ * index to the array element
+ */
+ private Object uncheckedGetElement(int index) {
+
+ if (elementIsNull[index]) {
+ return null;
+ } else {
+ if (!elementInited[index]) {
+ elementInited[index] = true;
+ if (arrayElements[index] == null) {
+ arrayElements[index] = LazyFactory.createLazyObject((oi).getListElementObjectInspector());
+ }
+ arrayElements[index].init(bytes, elementStart[index], elementLength[index]);
+ }
+ }
+ return arrayElements[index].getObject();
+ }
+
+ /**
+ * Returns the array size.
+ */
+ public int getListLength() {
+ if (!parsed) {
+ parse();
+ }
+ return arraySize;
+ }
+
+ /**
+ * cachedList is reused every time getList is called. Different
+ * LazyBianryArray instances cannot share the same cachedList.
+ */
+ ArrayList<Object> cachedList;
+
+ /**
+ * Returns the List of actual primitive objects. Returns null for null
+ * array.
+ */
+ public List<Object> getList() {
+ if (!parsed) {
+ parse();
+ }
+ if (cachedList == null) {
+ cachedList = new ArrayList<Object>(arraySize);
+ } else {
+ cachedList.clear();
+ }
+ for (int index = 0; index < arraySize; index++) {
+ cachedList.add(uncheckedGetElement(index));
+ }
+ return cachedList;
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyBoolean.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyBoolean.java
new file mode 100644
index 0000000..5a48525
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyBoolean.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.BooleanWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyBooleanObjectInspector;
+
+/**
+ * LazyObject for storing a value of boolean.
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project. As with the specification, this implementation relied on code laid out in <a href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's Delight, (Addison Wesley, 2002)</a> as well as <a href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ */
+public class LazyBoolean extends LazyPrimitive<LazyBooleanObjectInspector, BooleanWritable> {
+
+ public LazyBoolean(LazyBooleanObjectInspector oi) {
+ super(oi);
+ data = new BooleanWritable();
+ }
+
+ public LazyBoolean(LazyBoolean copy) {
+ super(copy);
+ data = new BooleanWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ // a temporal hack
+ assert (1 == length);
+ byte val = bytes[start];
+ if (val == 0) {
+ data.set(false);
+ } else if (val == 1) {
+ data.set(true);
+ }
+ }
+
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyByte.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyByte.java
new file mode 100644
index 0000000..bf4ff04
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyByte.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.ByteWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyByteObjectInspector;
+
+/**
+ * LazyObject for storing a value of Byte.
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project. As with the specification, this implementation relied on code laid out in <a href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's Delight, (Addison Wesley, 2002)</a> as well as <a href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ */
+public class LazyByte extends LazyPrimitive<LazyByteObjectInspector, ByteWritable> {
+
+ public LazyByte(LazyByteObjectInspector oi) {
+ super(oi);
+ data = new ByteWritable();
+ }
+
+ public LazyByte(LazyByte copy) {
+ super(copy);
+ data = new ByteWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ assert (1 == length);
+ data.set(bytes[start]);
+ }
+
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyColumnar.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyColumnar.java
new file mode 100644
index 0000000..d73fea7
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyColumnar.java
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+/**
+ * LazyObject for storing a struct. The field of a struct can be primitive or
+ * non-primitive.
+ * LazyStruct does not deal with the case of a NULL struct. That is handled by
+ * the parent LazyObject.
+ */
+@SuppressWarnings("rawtypes")
+public class LazyColumnar extends LazyNonPrimitive<LazyColumnarObjectInspector> {
+
+ /**
+ * IFrameTupleReference: the backend of the struct
+ */
+ IFrameTupleReference tuple;
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean reset;
+
+ /**
+ * The fields of the struct.
+ */
+ LazyObject[] fields;
+
+ /**
+ * Whether init() has been called on the field or not.
+ */
+ boolean[] fieldVisited;
+
+ /**
+ * whether it is the first time initialization
+ */
+ boolean start = true;
+
+ /**
+ * Construct a LazyStruct object with the ObjectInspector.
+ */
+ public LazyColumnar(LazyColumnarObjectInspector oi) {
+ super(oi);
+ }
+
+ /**
+ * Set the row data for this LazyStruct.
+ *
+ * @see LazyObject#init(ByteArrayRef, int, int)
+ */
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ super.init(bytes, start, length);
+ reset = false;
+ }
+
+ /**
+ * Parse the byte[] and fill each field.
+ */
+ private void parse() {
+
+ if (start) {
+ // initialize field array and reusable objects
+ List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
+
+ fields = new LazyObject[fieldRefs.size()];
+ for (int i = 0; i < fields.length; i++) {
+ fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i).getFieldObjectInspector());
+ }
+ fieldVisited = new boolean[fields.length];
+ start = false;
+ }
+
+ Arrays.fill(fieldVisited, false);
+ reset = true;
+ }
+
+ /**
+ * Get one field out of the struct.
+ * If the field is a primitive field, return the actual object. Otherwise
+ * return the LazyObject. This is because PrimitiveObjectInspector does not
+ * have control over the object used by the user - the user simply directly
+ * use the Object instead of going through Object
+ * PrimitiveObjectInspector.get(Object).
+ *
+ * @param fieldID
+ * The field ID
+ * @return The field as a LazyObject
+ */
+ public Object getField(int fieldID) {
+ if (!reset) {
+ parse();
+ }
+ return uncheckedGetField(fieldID);
+ }
+
+ /**
+ * Get the field out of the row without checking parsed. This is called by
+ * both getField and getFieldsAsList.
+ *
+ * @param fieldID
+ * The id of the field starting from 0.
+ * @param nullSequence
+ * The sequence representing NULL value.
+ * @return The value of the field
+ */
+ private Object uncheckedGetField(int fieldID) {
+ // get the buffer
+ byte[] buffer = tuple.getFieldData(fieldID);
+ // get the offset of the field
+ int s1 = tuple.getFieldStart(fieldID);
+ int l1 = tuple.getFieldLength(fieldID);
+
+ if (!fieldVisited[fieldID]) {
+ fieldVisited[fieldID] = true;
+ fields[fieldID].init(buffer, s1, l1);
+ }
+ // if (fields[fieldID].getObject() == null) {
+ // throw new IllegalStateException("illegal field " + fieldID);
+ // }
+ return fields[fieldID].getObject();
+ }
+
+ ArrayList<Object> cachedList;
+
+ /**
+ * Get the values of the fields as an ArrayList.
+ *
+ * @return The values of the fields as an ArrayList.
+ */
+ public ArrayList<Object> getFieldsAsList() {
+ if (!reset) {
+ parse();
+ }
+ if (cachedList == null) {
+ cachedList = new ArrayList<Object>();
+ } else {
+ cachedList.clear();
+ }
+ for (int i = 0; i < fields.length; i++) {
+ cachedList.add(uncheckedGetField(i));
+ }
+ return cachedList;
+ }
+
+ @Override
+ public Object getObject() {
+ return this;
+ }
+
+ protected boolean getParsed() {
+ return reset;
+ }
+
+ protected void setParsed(boolean parsed) {
+ this.reset = parsed;
+ }
+
+ protected LazyObject[] getFields() {
+ return fields;
+ }
+
+ protected void setFields(LazyObject[] fields) {
+ this.fields = fields;
+ }
+
+ protected boolean[] getFieldInited() {
+ return fieldVisited;
+ }
+
+ protected void setFieldInited(boolean[] fieldInited) {
+ this.fieldVisited = fieldInited;
+ }
+
+ /**
+ * rebind a frametuplereference to the struct
+ */
+ public void init(IFrameTupleReference r) {
+ this.tuple = r;
+ reset = false;
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyDouble.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyDouble.java
new file mode 100644
index 0000000..1b2cc5a
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyDouble.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.DoubleWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyDoubleObjectInspector;
+
+/**
+ * LazyObject for storing a value of Double.
+ */
+public class LazyDouble extends LazyPrimitive<LazyDoubleObjectInspector, DoubleWritable> {
+
+ public LazyDouble(LazyDoubleObjectInspector oi) {
+ super(oi);
+ data = new DoubleWritable();
+ }
+
+ public LazyDouble(LazyDouble copy) {
+ super(copy);
+ data = new DoubleWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+ assert (8 == length);
+ data.set(Double.longBitsToDouble(LazyUtils.byteArrayToLong(bytes, start)));
+ }
+
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFactory.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFactory.java
new file mode 100644
index 0000000..7caa9ed
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFactory.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyListObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyMapObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyStructObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyBooleanObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyByteObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyDoubleObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyFloatObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyIntObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyLongObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyShortObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyStringObjectInspector;
+
+/**
+ * LazyFactory.
+ */
+public final class LazyFactory {
+
+ /**
+ * Create a lazy binary primitive class given the type name.
+ */
+ public static LazyPrimitive<?, ?> createLazyPrimitiveClass(PrimitiveObjectInspector oi) {
+ PrimitiveCategory p = oi.getPrimitiveCategory();
+ switch (p) {
+ case BOOLEAN:
+ return new LazyBoolean((LazyBooleanObjectInspector) oi);
+ case BYTE:
+ return new LazyByte((LazyByteObjectInspector) oi);
+ case SHORT:
+ return new LazyShort((LazyShortObjectInspector) oi);
+ case INT:
+ return new LazyInteger((LazyIntObjectInspector) oi);
+ case LONG:
+ return new LazyLong((LazyLongObjectInspector) oi);
+ case FLOAT:
+ return new LazyFloat((LazyFloatObjectInspector) oi);
+ case DOUBLE:
+ return new LazyDouble((LazyDoubleObjectInspector) oi);
+ case STRING:
+ return new LazyString((LazyStringObjectInspector) oi);
+ default:
+ throw new RuntimeException("Internal error: no LazyObject for " + p);
+ }
+ }
+
+ /**
+ * Create a hierarchical LazyObject based on the given typeInfo.
+ */
+ public static LazyObject<? extends ObjectInspector> createLazyObject(ObjectInspector oi) {
+ ObjectInspector.Category c = oi.getCategory();
+ switch (c) {
+ case PRIMITIVE:
+ return createLazyPrimitiveClass((PrimitiveObjectInspector) oi);
+ case MAP:
+ return new LazyMap((LazyMapObjectInspector) oi);
+ case LIST:
+ return new LazyArray((LazyListObjectInspector) oi);
+ case STRUCT: // check whether it is a top-level struct
+ if (oi instanceof LazyStructObjectInspector)
+ return new LazyStruct((LazyStructObjectInspector) oi);
+ else
+ return new LazyColumnar((LazyColumnarObjectInspector) oi);
+ default:
+ throw new RuntimeException("Hive LazySerDe Internal error.");
+ }
+ }
+
+ private LazyFactory() {
+ // prevent instantiation
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFloat.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFloat.java
new file mode 100644
index 0000000..430ac2e
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFloat.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.FloatWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyFloatObjectInspector;
+
+/**
+ * LazyObject for storing a value of Double.
+ */
+public class LazyFloat extends LazyPrimitive<LazyFloatObjectInspector, FloatWritable> {
+
+ public LazyFloat(LazyFloatObjectInspector oi) {
+ super(oi);
+ data = new FloatWritable();
+ }
+
+ public LazyFloat(LazyFloat copy) {
+ super(copy);
+ data = new FloatWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ assert (4 == length);
+ data.set(Float.intBitsToFloat(LazyUtils.byteArrayToInt(bytes, start)));
+ }
+
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyInteger.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyInteger.java
new file mode 100644
index 0000000..0765c4f
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyInteger.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.IntWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyIntObjectInspector;
+
+/**
+ * LazyObject for storing a value of Integer.
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project. As with the specification, this implementation relied on code laid out in <a href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's Delight, (Addison Wesley, 2002)</a> as well as <a href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ */
+public class LazyInteger extends LazyPrimitive<LazyIntObjectInspector, IntWritable> {
+
+ public LazyInteger(LazyIntObjectInspector oi) {
+ super(oi);
+ data = new IntWritable();
+ }
+
+ public LazyInteger(LazyInteger copy) {
+ super(copy);
+ data = new IntWritable(copy.data.get());
+ }
+
+ /**
+ * The reusable vInt for decoding the integer.
+ */
+ VInt vInt = new LazyUtils.VInt();
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ LazyUtils.readVInt(bytes, start, vInt);
+ assert (length == vInt.length);
+ if (length != vInt.length)
+ throw new IllegalStateException("parse int: length mismatch, expected " + vInt.length + " but get "
+ + length);
+ data.set(vInt.value);
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyLong.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyLong.java
new file mode 100644
index 0000000..e6b56c3
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyLong.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.LongWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyLongObjectInspector;
+
+/**
+ * LazyObject for storing a value of Long.
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project. As with the specification, this implementation relied on code laid out in <a href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's Delight, (Addison Wesley, 2002)</a> as well as <a href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ */
+public class LazyLong extends LazyPrimitive<LazyLongObjectInspector, LongWritable> {
+
+ public LazyLong(LazyLongObjectInspector oi) {
+ super(oi);
+ data = new LongWritable();
+ }
+
+ public LazyLong(LazyLong copy) {
+ super(copy);
+ data = new LongWritable(copy.data.get());
+ }
+
+ /**
+ * The reusable vLong for decoding the long.
+ */
+ VLong vLong = new LazyUtils.VLong();
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ LazyUtils.readVLong(bytes, start, vLong);
+ assert (length == vLong.length);
+ if (length != vLong.length)
+ throw new IllegalStateException("parse long: length mismatch");
+ data.set(vLong.value);
+ }
+
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyMap.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyMap.java
new file mode 100644
index 0000000..9c7af2e
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyMap.java
@@ -0,0 +1,327 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyMapObjectInspector;
+
+/**
+ * LazyMap is serialized as follows: start A b c b c b c end bytes[] ->
+ * |--------|---|---|---|---| ... |---|---|
+ * Section A is the null-bytes. Suppose the map has N key-value pairs, then
+ * there are (N*2+7)/8 bytes used as null-bytes. Each bit corresponds to a key
+ * or a value and it indicates whether that key or value is null (0) or not null
+ * (1).
+ * After A, all the bytes are actual serialized data of the map, which are
+ * key-value pairs. b represent the keys and c represent the values. Each of
+ * them is again a LazyObject.
+ */
+
+@SuppressWarnings("rawtypes")
+public class LazyMap extends LazyNonPrimitive<LazyMapObjectInspector> {
+
+ private static Log LOG = LogFactory.getLog(LazyMap.class.getName());
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean parsed;
+
+ /**
+ * The size of the map. Only valid when the data is parsed. -1 when the map
+ * is NULL.
+ */
+ int mapSize = 0;
+
+ /**
+ * The beginning position and length of key[i] and value[i]. Only valid when
+ * the data is parsed.
+ */
+ int[] keyStart;
+ int[] keyLength;
+ int[] valueStart;
+ int[] valueLength;
+ /**
+ * Whether valueObjects[i]/keyObjects[i] is initialized or not.
+ */
+ boolean[] keyInited;
+ boolean[] valueInited;
+
+ /**
+ * Whether valueObjects[i]/keyObjects[i] is null or not This could not be
+ * inferred from the length of the object. In particular, a 0-length string
+ * is not null.
+ */
+ boolean[] keyIsNull;
+ boolean[] valueIsNull;
+
+ /**
+ * The keys are stored in an array of LazyPrimitives.
+ */
+ LazyPrimitive<?, ?>[] keyObjects;
+ /**
+ * The values are stored in an array of LazyObjects. value[index] will start
+ * from KeyEnd[index] + 1, and ends before KeyStart[index+1] - 1.
+ */
+ LazyObject[] valueObjects;
+
+ protected LazyMap(LazyMapObjectInspector oi) {
+ super(oi);
+ }
+
+ /**
+ * Set the row data for this LazyMap.
+ *
+ * @see LazyObject#init(ByteArrayRef, int, int)
+ */
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ super.init(bytes, start, length);
+ parsed = false;
+ }
+
+ /**
+ * Adjust the size of arrays: keyStart, keyLength valueStart, valueLength
+ * keyInited, keyIsNull valueInited, valueIsNull.
+ */
+ protected void adjustArraySize(int newSize) {
+ if (keyStart == null || keyStart.length < newSize) {
+ keyStart = new int[newSize];
+ keyLength = new int[newSize];
+ valueStart = new int[newSize];
+ valueLength = new int[newSize];
+ keyInited = new boolean[newSize];
+ keyIsNull = new boolean[newSize];
+ valueInited = new boolean[newSize];
+ valueIsNull = new boolean[newSize];
+ keyObjects = new LazyPrimitive<?, ?>[newSize];
+ valueObjects = new LazyObject[newSize];
+ }
+ }
+
+ boolean nullMapKey = false;
+ VInt vInt = new LazyUtils.VInt();
+ RecordInfo recordInfo = new LazyUtils.RecordInfo();
+
+ /**
+ * Parse the byte[] and fill keyStart, keyLength, keyIsNull valueStart,
+ * valueLength and valueIsNull.
+ */
+ private void parse() {
+
+ // get the VInt that represents the map size
+ LazyUtils.readVInt(bytes, start, vInt);
+ mapSize = vInt.value;
+ if (0 == mapSize) {
+ parsed = true;
+ return;
+ }
+
+ // adjust arrays
+ adjustArraySize(mapSize);
+
+ // find out the null-bytes
+ int mapByteStart = start + vInt.length;
+ int nullByteCur = mapByteStart;
+ int nullByteEnd = mapByteStart + (mapSize * 2 + 7) / 8;
+ int lastElementByteEnd = nullByteEnd;
+
+ // parsing the keys and values one by one
+ for (int i = 0; i < mapSize; i++) {
+ // parse a key
+ keyIsNull[i] = true;
+ if ((bytes[nullByteCur] & (1 << ((i * 2) % 8))) != 0) {
+ keyIsNull[i] = false;
+ LazyUtils.checkObjectByteInfo(((MapObjectInspector) oi).getMapKeyObjectInspector(), bytes,
+ lastElementByteEnd, recordInfo);
+ keyStart[i] = lastElementByteEnd + recordInfo.elementOffset;
+ keyLength[i] = recordInfo.elementSize;
+ lastElementByteEnd = keyStart[i] + keyLength[i];
+ } else if (!nullMapKey) {
+ nullMapKey = true;
+ LOG.warn("Null map key encountered! Ignoring similar problems.");
+ }
+
+ // parse a value
+ valueIsNull[i] = true;
+ if ((bytes[nullByteCur] & (1 << ((i * 2 + 1) % 8))) != 0) {
+ valueIsNull[i] = false;
+ LazyUtils.checkObjectByteInfo(((MapObjectInspector) oi).getMapValueObjectInspector(), bytes,
+ lastElementByteEnd, recordInfo);
+ valueStart[i] = lastElementByteEnd + recordInfo.elementOffset;
+ valueLength[i] = recordInfo.elementSize;
+ lastElementByteEnd = valueStart[i] + valueLength[i];
+ }
+
+ // move onto the next null byte
+ if (3 == (i % 4)) {
+ nullByteCur++;
+ }
+ }
+
+ Arrays.fill(keyInited, 0, mapSize, false);
+ Arrays.fill(valueInited, 0, mapSize, false);
+ parsed = true;
+ }
+
+ /**
+ * Get the value object with the index without checking parsed.
+ *
+ * @param index
+ * The index into the array starting from 0
+ */
+ private LazyObject uncheckedGetValue(int index) {
+ if (valueIsNull[index]) {
+ return null;
+ }
+ if (!valueInited[index]) {
+ valueInited[index] = true;
+ if (valueObjects[index] == null) {
+ valueObjects[index] = LazyFactory.createLazyObject(((MapObjectInspector) oi)
+ .getMapValueObjectInspector());
+ }
+ valueObjects[index].init(bytes, valueStart[index], valueLength[index]);
+ }
+ return valueObjects[index];
+ }
+
+ /**
+ * Get the value in the map for the key.
+ * If there are multiple matches (which is possible in the serialized
+ * format), only the first one is returned.
+ * The most efficient way to get the value for the key is to serialize the
+ * key and then try to find it in the array. We do linear search because in
+ * most cases, user only wants to get one or two values out of the map, and
+ * the cost of building up a HashMap is substantially higher.
+ *
+ * @param key
+ * The key object that we are looking for.
+ * @return The corresponding value object, or NULL if not found
+ */
+ public Object getMapValueElement(Object key) {
+ if (!parsed) {
+ parse();
+ }
+ // search for the key
+ for (int i = 0; i < mapSize; i++) {
+ LazyPrimitive<?, ?> lazyKeyI = uncheckedGetKey(i);
+ if (lazyKeyI == null) {
+ continue;
+ }
+ // getWritableObject() will convert LazyPrimitive to actual
+ // primitive
+ // writable objects.
+ Object keyI = lazyKeyI.getWritableObject();
+ if (keyI == null) {
+ continue;
+ }
+ if (keyI.equals(key)) {
+ // Got a match, return the value
+ LazyObject v = uncheckedGetValue(i);
+ return v == null ? v : v.getObject();
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Get the key object with the index without checking parsed.
+ *
+ * @param index
+ * The index into the array starting from 0
+ */
+ private LazyPrimitive<?, ?> uncheckedGetKey(int index) {
+ if (keyIsNull[index]) {
+ return null;
+ }
+ if (!keyInited[index]) {
+ keyInited[index] = true;
+ if (keyObjects[index] == null) {
+ // Keys are always primitive
+ keyObjects[index] = LazyFactory
+ .createLazyPrimitiveClass((PrimitiveObjectInspector) ((MapObjectInspector) oi)
+ .getMapKeyObjectInspector());
+ }
+ keyObjects[index].init(bytes, keyStart[index], keyLength[index]);
+ }
+ return keyObjects[index];
+ }
+
+ /**
+ * cachedMap is reused for different calls to getMap(). But each LazyMap has
+ * a separate cachedMap so we won't overwrite the data by accident.
+ */
+ LinkedHashMap<Object, Object> cachedMap;
+
+ /**
+ * Return the map object representing this LazyMap. Note that the keyObjects
+ * will be Writable primitive objects.
+ *
+ * @return the map object
+ */
+ public Map<Object, Object> getMap() {
+ if (!parsed) {
+ parse();
+ }
+ if (cachedMap == null) {
+ // Use LinkedHashMap to provide deterministic order
+ cachedMap = new LinkedHashMap<Object, Object>();
+ } else {
+ cachedMap.clear();
+ }
+
+ // go through each element of the map
+ for (int i = 0; i < mapSize; i++) {
+ LazyPrimitive<?, ?> lazyKey = uncheckedGetKey(i);
+ if (lazyKey == null) {
+ continue;
+ }
+ Object key = lazyKey.getObject();
+ // do not overwrite if there are duplicate keys
+ if (key != null && !cachedMap.containsKey(key)) {
+ LazyObject lazyValue = uncheckedGetValue(i);
+ Object value = (lazyValue == null ? null : lazyValue.getObject());
+ cachedMap.put(key, value);
+ }
+ }
+ return cachedMap;
+ }
+
+ /**
+ * Get the size of the map represented by this LazyMap.
+ *
+ * @return The size of the map
+ */
+ public int getMapSize() {
+ if (!parsed) {
+ parse();
+ }
+ return mapSize;
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyNonPrimitive.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyNonPrimitive.java
new file mode 100644
index 0000000..f7ae1e3
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyNonPrimitive.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+/**
+ * LazyPrimitive stores a primitive Object in a LazyObject.
+ */
+public abstract class LazyNonPrimitive<OI extends ObjectInspector> extends LazyObject<OI> {
+
+ protected byte[] bytes;
+ protected int start;
+ protected int length;
+
+ /**
+ * Create a LazyNonPrimitive object with the specified ObjectInspector.
+ *
+ * @param oi
+ * The ObjectInspector would have to have a hierarchy of
+ * LazyObjectInspectors with the leaf nodes being
+ * WritableObjectInspectors. It's used both for accessing the
+ * type hierarchy of the complex object, as well as getting meta
+ * information (separator, nullSequence, etc) when parsing the
+ * lazy object.
+ */
+ protected LazyNonPrimitive(OI oi) {
+ super(oi);
+ bytes = null;
+ start = 0;
+ length = 0;
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (bytes == null) {
+ throw new RuntimeException("bytes cannot be null!");
+ }
+ this.bytes = bytes;
+ this.start = start;
+ this.length = length;
+ assert start >= 0;
+ assert start + length <= bytes.length;
+ }
+
+ @Override
+ public Object getObject() {
+ return this;
+ }
+
+ @Override
+ public int hashCode() {
+ return LazyUtils.hashBytes(bytes, start, length);
+ }
+
+ @Override
+ public void init(IFrameTupleReference tuple) {
+ }
+
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyObject.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyObject.java
new file mode 100644
index 0000000..dc1dc60
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyObject.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+/**
+ * LazyObject stores an object in a range of bytes in a byte[].
+ * A LazyObject can represent any primitive object or hierarchical object like
+ * array, map or struct.
+ */
+public abstract class LazyObject<OI extends ObjectInspector> {
+
+ OI oi;
+
+ /**
+ * Create a LazyObject.
+ *
+ * @param oi
+ * Derived classes can access meta information about this Lazy
+ * Object (e.g, separator, nullSequence, escaper) from it.
+ */
+ protected LazyObject(OI oi) {
+ this.oi = oi;
+ }
+
+ /**
+ * Set the data for this LazyObject. We take ByteArrayRef instead of byte[]
+ * so that we will be able to drop the reference to byte[] by a single
+ * assignment. The ByteArrayRef object can be reused across multiple rows.
+ *
+ * @param bytes
+ * The wrapper of the byte[].
+ * @param start
+ * The start position inside the bytes.
+ * @param length
+ * The length of the data, starting from "start"
+ * @see ByteArrayRef
+ */
+ public abstract void init(byte[] bytes, int start, int length);
+
+ public abstract void init(IFrameTupleReference tuple);
+
+ /**
+ * If the LazyObject is a primitive Object, then deserialize it and return
+ * the actual primitive Object. Otherwise (array, map, struct), return this.
+ */
+ public abstract Object getObject();
+
+ @Override
+ public abstract int hashCode();
+
+ protected OI getInspector() {
+ return oi;
+ }
+
+ protected void setInspector(OI oi) {
+ this.oi = oi;
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyPrimitive.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyPrimitive.java
new file mode 100644
index 0000000..8139c65
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyPrimitive.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Writable;
+
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+/**
+ * LazyPrimitive stores a primitive Object in a LazyObject.
+ */
+public abstract class LazyPrimitive<OI extends ObjectInspector, T extends Writable> extends LazyObject<OI> {
+
+ LazyPrimitive(OI oi) {
+ super(oi);
+ }
+
+ LazyPrimitive(LazyPrimitive<OI, T> copy) {
+ super(copy.oi);
+ isNull = copy.isNull;
+ }
+
+ T data;
+ boolean isNull = false;
+
+ /**
+ * Returns the primitive object represented by this LazyObject. This is
+ * useful because it can make sure we have "null" for null objects.
+ */
+ @Override
+ public Object getObject() {
+ return isNull ? null : this;
+ }
+
+ public T getWritableObject() {
+ return isNull ? null : data;
+ }
+
+ @Override
+ public String toString() {
+ return isNull ? "null" : data.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ return isNull ? 0 : data.hashCode();
+ }
+
+ @Override
+ public void init(IFrameTupleReference tuple) {
+ }
+
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
new file mode 100644
index 0000000..05b82ba
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
@@ -0,0 +1,460 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.ByteStream;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * The LazySerDe class combines the lazy property of LazySimpleSerDe class and
+ * the binary property of BinarySortable class. Lazy means a field is not
+ * deserialized until required. Binary means a field is serialized in binary
+ * compact format.
+ */
+public class LazySerDe implements SerDe {
+
+ public static final Log LOG = LogFactory.getLog(LazySerDe.class.getName());
+
+ public LazySerDe() {
+ }
+
+ List<String> columnNames;
+ List<TypeInfo> columnTypes;
+
+ TypeInfo rowTypeInfo;
+ ObjectInspector cachedObjectInspector;
+
+ // The object for storing row data
+ LazyColumnar cachedLazyStruct;
+
+ /**
+ * Initialize the SerDe with configuration and table information.
+ */
+ @Override
+ public void initialize(Configuration conf, Properties tbl) throws SerDeException {
+ // Get column names and types
+ String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
+ String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
+ if (columnNameProperty.length() == 0) {
+ columnNames = new ArrayList<String>();
+ } else {
+ columnNames = Arrays.asList(columnNameProperty.split(","));
+ }
+ if (columnTypeProperty.length() == 0) {
+ columnTypes = new ArrayList<TypeInfo>();
+ } else {
+ columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+ }
+ assert (columnNames.size() == columnTypes.size());
+ // Create row related objects
+ rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
+ // Create the object inspector and the lazy binary struct object
+ cachedObjectInspector = LazyUtils.getLazyObjectInspectorFromTypeInfo(rowTypeInfo, true);
+ cachedLazyStruct = (LazyColumnar) LazyFactory.createLazyObject(cachedObjectInspector);
+ // output debug info
+ LOG.debug("LazySerDe initialized with: columnNames=" + columnNames + " columnTypes=" + columnTypes);
+ }
+
+ /**
+ * Returns the ObjectInspector for the row.
+ */
+ @Override
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return cachedObjectInspector;
+ }
+
+ /**
+ * Returns the Writable Class after serialization.
+ */
+ @Override
+ public Class<? extends Writable> getSerializedClass() {
+ return BytesWritable.class;
+ }
+
+ // The wrapper for byte array
+ ByteArrayRef byteArrayRef;
+
+ /**
+ * Deserialize a table record to a Lazy struct.
+ */
+ @SuppressWarnings("deprecation")
+ @Override
+ public Object deserialize(Writable field) throws SerDeException {
+ if (byteArrayRef == null) {
+ byteArrayRef = new ByteArrayRef();
+ }
+ if (field instanceof BytesWritable) {
+ BytesWritable b = (BytesWritable) field;
+ if (b.getSize() == 0) {
+ return null;
+ }
+ // For backward-compatibility with hadoop 0.17
+ byteArrayRef.setData(b.get());
+ cachedLazyStruct.init(byteArrayRef.getData(), 0, b.getSize());
+ } else if (field instanceof Text) {
+ Text t = (Text) field;
+ if (t.getLength() == 0) {
+ return null;
+ }
+ byteArrayRef.setData(t.getBytes());
+ cachedLazyStruct.init(byteArrayRef.getData(), 0, t.getLength());
+ } else {
+ throw new SerDeException(getClass().toString() + ": expects either BytesWritable or Text object!");
+ }
+ return cachedLazyStruct;
+ }
+
+ /**
+ * The reusable output buffer and serialize byte buffer.
+ */
+ BytesWritable serializeBytesWritable = new BytesWritable();
+ ByteStream.Output serializeByteStream = new ByteStream.Output();
+
+ /**
+ * Serialize an object to a byte buffer in a binary compact way.
+ */
+ @Override
+ public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
+ // make sure it is a struct record or not
+ serializeByteStream.reset();
+
+ if (objInspector.getCategory() != Category.STRUCT) {
+ // serialize the primitive object
+ serialize(serializeByteStream, obj, objInspector);
+ } else {
+ // serialize the row as a struct
+ serializeStruct(serializeByteStream, obj, (StructObjectInspector) objInspector);
+ }
+ // return the serialized bytes
+ serializeBytesWritable.set(serializeByteStream.getData(), 0, serializeByteStream.getCount());
+ return serializeBytesWritable;
+ }
+
+ boolean nullMapKey = false;
+
+ /**
+ * Serialize a struct object without writing the byte size. This function is
+ * shared by both row serialization and struct serialization.
+ *
+ * @param byteStream
+ * the byte stream storing the serialization data
+ * @param obj
+ * the struct object to serialize
+ * @param objInspector
+ * the struct object inspector
+ */
+ private void serializeStruct(Output byteStream, Object obj, StructObjectInspector soi) {
+ // do nothing for null struct
+ if (null == obj) {
+ return;
+ }
+ /*
+ * Interleave serializing one null byte and 8 struct fields in each
+ * round, in order to support data deserialization with different table
+ * schemas
+ */
+ List<? extends StructField> fields = soi.getAllStructFieldRefs();
+ int size = fields.size();
+ int lasti = 0;
+ byte nullByte = 0;
+ for (int i = 0; i < size; i++) {
+ // set bit to 1 if a field is not null
+ if (null != soi.getStructFieldData(obj, fields.get(i))) {
+ nullByte |= 1 << (i % 8);
+ }
+ // write the null byte every eight elements or
+ // if this is the last element and serialize the
+ // corresponding 8 struct fields at the same time
+ if (7 == i % 8 || i == size - 1) {
+ serializeByteStream.write(nullByte);
+ for (int j = lasti; j <= i; j++) {
+ serialize(serializeByteStream, soi.getStructFieldData(obj, fields.get(j)), fields.get(j)
+ .getFieldObjectInspector());
+ }
+ lasti = i + 1;
+ nullByte = 0;
+ }
+ }
+ }
+
+ /**
+ * A recursive function that serialize an object to a byte buffer based on
+ * its object inspector.
+ *
+ * @param byteStream
+ * the byte stream storing the serialization data
+ * @param obj
+ * the object to serialize
+ * @param objInspector
+ * the object inspector
+ */
+ private void serialize(Output byteStream, Object obj, ObjectInspector objInspector) {
+
+ // do nothing for null object
+ if (null == obj) {
+ return;
+ }
+
+ switch (objInspector.getCategory()) {
+ case PRIMITIVE: {
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector) objInspector;
+ switch (poi.getPrimitiveCategory()) {
+ case VOID: {
+ return;
+ }
+ case BOOLEAN: {
+ boolean v = ((BooleanObjectInspector) poi).get(obj);
+ byteStream.write((byte) (v ? 1 : 0));
+ return;
+ }
+ case BYTE: {
+ ByteObjectInspector boi = (ByteObjectInspector) poi;
+ byte v = boi.get(obj);
+ byteStream.write(v);
+ return;
+ }
+ case SHORT: {
+ ShortObjectInspector spoi = (ShortObjectInspector) poi;
+ short v = spoi.get(obj);
+ byteStream.write((byte) (v >> 8));
+ byteStream.write((byte) (v));
+ return;
+ }
+ case INT: {
+ IntObjectInspector ioi = (IntObjectInspector) poi;
+ int v = ioi.get(obj);
+ LazyUtils.writeVInt(byteStream, v);
+ return;
+ }
+ case LONG: {
+ LongObjectInspector loi = (LongObjectInspector) poi;
+ long v = loi.get(obj);
+ LazyUtils.writeVLong(byteStream, v);
+ return;
+ }
+ case FLOAT: {
+ FloatObjectInspector foi = (FloatObjectInspector) poi;
+ int v = Float.floatToIntBits(foi.get(obj));
+ byteStream.write((byte) (v >> 24));
+ byteStream.write((byte) (v >> 16));
+ byteStream.write((byte) (v >> 8));
+ byteStream.write((byte) (v));
+ return;
+ }
+ case DOUBLE: {
+ DoubleObjectInspector doi = (DoubleObjectInspector) poi;
+ long v = Double.doubleToLongBits(doi.get(obj));
+ byteStream.write((byte) (v >> 56));
+ byteStream.write((byte) (v >> 48));
+ byteStream.write((byte) (v >> 40));
+ byteStream.write((byte) (v >> 32));
+ byteStream.write((byte) (v >> 24));
+ byteStream.write((byte) (v >> 16));
+ byteStream.write((byte) (v >> 8));
+ byteStream.write((byte) (v));
+ return;
+ }
+ case STRING: {
+ StringObjectInspector soi = (StringObjectInspector) poi;
+ Text t = soi.getPrimitiveWritableObject(obj);
+ /* write byte size of the string which is a vint */
+ int length = t.getLength();
+ LazyUtils.writeVInt(byteStream, length);
+ /* write string itself */
+ byte[] data = t.getBytes();
+ byteStream.write(data, 0, length);
+ return;
+ }
+ default: {
+ throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
+ }
+ }
+ }
+ case LIST: {
+ ListObjectInspector loi = (ListObjectInspector) objInspector;
+ ObjectInspector eoi = loi.getListElementObjectInspector();
+
+ // 1/ reserve spaces for the byte size of the list
+ // which is a integer and takes four bytes
+ int byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ int listStart = byteStream.getCount();
+
+ // 2/ write the size of the list as a VInt
+ int size = loi.getListLength(obj);
+ LazyUtils.writeVInt(byteStream, size);
+
+ // 3/ write the null bytes
+ byte nullByte = 0;
+ for (int eid = 0; eid < size; eid++) {
+ // set the bit to 1 if an element is not null
+ if (null != loi.getListElement(obj, eid)) {
+ nullByte |= 1 << (eid % 8);
+ }
+ // store the byte every eight elements or
+ // if this is the last element
+ if (7 == eid % 8 || eid == size - 1) {
+ byteStream.write(nullByte);
+ nullByte = 0;
+ }
+ }
+
+ // 4/ write element by element from the list
+ for (int eid = 0; eid < size; eid++) {
+ serialize(byteStream, loi.getListElement(obj, eid), eoi);
+ }
+
+ // 5/ update the list byte size
+ int listEnd = byteStream.getCount();
+ int listSize = listEnd - listStart;
+ byte[] bytes = byteStream.getData();
+ bytes[byteSizeStart] = (byte) (listSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (listSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (listSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (listSize);
+
+ return;
+ }
+ case MAP: {
+ MapObjectInspector moi = (MapObjectInspector) objInspector;
+ ObjectInspector koi = moi.getMapKeyObjectInspector();
+ ObjectInspector voi = moi.getMapValueObjectInspector();
+ Map<?, ?> map = moi.getMap(obj);
+
+ // 1/ reserve spaces for the byte size of the map
+ // which is a integer and takes four bytes
+ int byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ int mapStart = byteStream.getCount();
+
+ // 2/ write the size of the map which is a VInt
+ int size = map.size();
+ LazyUtils.writeVInt(byteStream, size);
+
+ // 3/ write the null bytes
+ int b = 0;
+ byte nullByte = 0;
+ for (Map.Entry<?, ?> entry : map.entrySet()) {
+ // set the bit to 1 if a key is not null
+ if (null != entry.getKey()) {
+ nullByte |= 1 << (b % 8);
+ } else if (!nullMapKey) {
+ nullMapKey = true;
+ LOG.warn("Null map key encountered! Ignoring similar problems.");
+ }
+ b++;
+ // set the bit to 1 if a value is not null
+ if (null != entry.getValue()) {
+ nullByte |= 1 << (b % 8);
+ }
+ b++;
+ // write the byte to stream every 4 key-value pairs
+ // or if this is the last key-value pair
+ if (0 == b % 8 || b == size * 2) {
+ byteStream.write(nullByte);
+ nullByte = 0;
+ }
+ }
+
+ // 4/ write key-value pairs one by one
+ for (Map.Entry<?, ?> entry : map.entrySet()) {
+ serialize(byteStream, entry.getKey(), koi);
+ serialize(byteStream, entry.getValue(), voi);
+ }
+
+ // 5/ update the byte size of the map
+ int mapEnd = byteStream.getCount();
+ int mapSize = mapEnd - mapStart;
+ byte[] bytes = byteStream.getData();
+ bytes[byteSizeStart] = (byte) (mapSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (mapSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (mapSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (mapSize);
+
+ return;
+ }
+ case STRUCT: {
+ // 1/ reserve spaces for the byte size of the struct
+ // which is a integer and takes four bytes
+ int byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ int structStart = byteStream.getCount();
+
+ // 2/ serialize the struct
+ serializeStruct(byteStream, obj, (StructObjectInspector) objInspector);
+
+ // 3/ update the byte size of the struct
+ int structEnd = byteStream.getCount();
+ int structSize = structEnd - structStart;
+ byte[] bytes = byteStream.getData();
+ bytes[byteSizeStart] = (byte) (structSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (structSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (structSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (structSize);
+
+ return;
+ }
+ default: {
+ throw new RuntimeException("Unrecognized type: " + objInspector.getCategory());
+ }
+ }
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyShort.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyShort.java
new file mode 100644
index 0000000..f493b37
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyShort.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyShortObjectInspector;
+
+/**
+ * LazyObject for storing a value of Short.
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project. As with the specification, this implementation relied on code laid out in <a href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's Delight, (Addison Wesley, 2002)</a> as well as <a href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ */
+public class LazyShort extends LazyPrimitive<LazyShortObjectInspector, ShortWritable> {
+
+ public LazyShort(LazyShortObjectInspector oi) {
+ super(oi);
+ data = new ShortWritable();
+ }
+
+ public LazyShort(LazyShort copy) {
+ super(copy);
+ data = new ShortWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ assert (2 == length);
+ data.set(LazyUtils.byteArrayToShort(bytes, start));
+ }
+
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyString.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyString.java
new file mode 100644
index 0000000..0293af8
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyString.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.Text;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyStringObjectInspector;
+
+/**
+ * LazyObject for storing a value of String.
+ */
+public class LazyString extends LazyPrimitive<LazyStringObjectInspector, Text> {
+
+ public LazyString(LazyStringObjectInspector oi) {
+ super(oi);
+ data = new Text();
+ }
+
+ public LazyString(LazyString copy) {
+ super(copy);
+ data = new Text(copy.data);
+ }
+
+ VInt vInt = new LazyUtils.VInt();
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ // get the byte length of the string
+ LazyUtils.readVInt(bytes, start, vInt);
+ if (vInt.value + vInt.length != length)
+ throw new IllegalStateException("parse string: length mismatch, expected " + (vInt.value + vInt.length)
+ + " but get " + length);
+ assert (length - vInt.length > -1);
+ data.set(bytes, start + vInt.length, length - vInt.length);
+ }
+
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyStruct.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyStruct.java
new file mode 100644
index 0000000..47e95e4
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyStruct.java
@@ -0,0 +1,234 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyStructObjectInspector;
+
+/**
+ * LazyStruct is serialized as follows: start A B A B A B end bytes[] ->
+ * |-----|---------|--- ... ---|-----|---------|
+ * Section A is one null-byte, corresponding to eight struct fields in Section
+ * B. Each bit indicates whether the corresponding field is null (0) or not null
+ * (1). Each field is a LazyObject.
+ * Following B, there is another section A and B. This pattern repeats until the
+ * all struct fields are serialized.
+ */
+public class LazyStruct extends LazyNonPrimitive<LazyStructObjectInspector> {
+
+ private static Log LOG = LogFactory.getLog(LazyStruct.class.getName());
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean parsed;
+
+ /**
+ * The fields of the struct.
+ */
+ @SuppressWarnings("rawtypes")
+ LazyObject[] fields;
+
+ /**
+ * Whether a field is initialized or not.
+ */
+ boolean[] fieldInited;
+
+ /**
+ * Whether a field is null or not. Because length is 0 does not means the
+ * field is null. In particular, a 0-length string is not null.
+ */
+ boolean[] fieldIsNull;
+
+ /**
+ * The start positions and lengths of struct fields. Only valid when the
+ * data is parsed.
+ */
+ int[] fieldStart;
+ int[] fieldLength;
+
+ /**
+ * Construct a LazyStruct object with an ObjectInspector.
+ */
+ protected LazyStruct(LazyStructObjectInspector oi) {
+ super(oi);
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ super.init(bytes, start, length);
+ parsed = false;
+ }
+
+ RecordInfo recordInfo = new LazyUtils.RecordInfo();
+ boolean missingFieldWarned = false;
+ boolean extraFieldWarned = false;
+
+ /**
+ * Parse the byte[] and fill fieldStart, fieldLength, fieldInited and
+ * fieldIsNull.
+ */
+ private void parse() {
+
+ List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
+
+ if (fields == null) {
+ fields = new LazyObject[fieldRefs.size()];
+ for (int i = 0; i < fields.length; i++) {
+ ObjectInspector insp = fieldRefs.get(i).getFieldObjectInspector();
+ fields[i] = insp == null ? null : LazyFactory.createLazyObject(insp);
+ }
+ fieldInited = new boolean[fields.length];
+ fieldIsNull = new boolean[fields.length];
+ fieldStart = new int[fields.length];
+ fieldLength = new int[fields.length];
+ }
+
+ /**
+ * Please note that one null byte is followed by eight fields, then more
+ * null byte and fields.
+ */
+
+ int fieldId = 0;
+ int structByteEnd = start + length;
+
+ byte nullByte = bytes[start];
+ int lastFieldByteEnd = start + 1;
+ // Go through all bytes in the byte[]
+ for (int i = 0; i < fields.length; i++) {
+ fieldIsNull[i] = true;
+ if ((nullByte & (1 << (i % 8))) != 0) {
+ fieldIsNull[i] = false;
+ LazyUtils.checkObjectByteInfo(fieldRefs.get(i).getFieldObjectInspector(), bytes, lastFieldByteEnd,
+ recordInfo);
+ fieldStart[i] = lastFieldByteEnd + recordInfo.elementOffset;
+ fieldLength[i] = recordInfo.elementSize;
+ lastFieldByteEnd = fieldStart[i] + fieldLength[i];
+ }
+
+ // count how many fields are there
+ if (lastFieldByteEnd <= structByteEnd) {
+ fieldId++;
+ }
+ // next byte is a null byte if there are more bytes to go
+ if (7 == (i % 8)) {
+ if (lastFieldByteEnd < structByteEnd) {
+ nullByte = bytes[lastFieldByteEnd];
+ lastFieldByteEnd++;
+ } else {
+ // otherwise all null afterwards
+ nullByte = 0;
+ lastFieldByteEnd++;
+ }
+ }
+ }
+
+ // Extra bytes at the end?
+ if (!extraFieldWarned && lastFieldByteEnd < structByteEnd) {
+ extraFieldWarned = true;
+ LOG.warn("Extra bytes detected at the end of the row! Ignoring similar " + "problems.");
+ }
+
+ // Missing fields?
+ if (!missingFieldWarned && lastFieldByteEnd > structByteEnd) {
+ missingFieldWarned = true;
+ LOG.warn("Missing fields! Expected " + fields.length + " fields but " + "only got " + fieldId
+ + "! Ignoring similar problems.");
+ }
+
+ Arrays.fill(fieldInited, false);
+ parsed = true;
+ }
+
+ /**
+ * Get one field out of the struct.
+ * If the field is a primitive field, return the actual object. Otherwise
+ * return the LazyObject. This is because PrimitiveObjectInspector does not
+ * have control over the object used by the user - the user simply directly
+ * use the Object instead of going through Object
+ * PrimitiveObjectInspector.get(Object).
+ *
+ * @param fieldID
+ * The field ID
+ * @return The field as a LazyObject
+ */
+ public Object getField(int fieldID) {
+ if (!parsed) {
+ parse();
+ }
+ return uncheckedGetField(fieldID);
+ }
+
+ /**
+ * Get the field out of the row without checking parsed. This is called by
+ * both getField and getFieldsAsList.
+ *
+ * @param fieldID
+ * The id of the field starting from 0.
+ * @return The value of the field
+ */
+ private Object uncheckedGetField(int fieldID) {
+ // Test the length first so in most cases we avoid doing a byte[]
+ // comparison.
+ if (fieldIsNull[fieldID]) {
+ return null;
+ }
+ if (!fieldInited[fieldID]) {
+ fieldInited[fieldID] = true;
+ fields[fieldID].init(bytes, fieldStart[fieldID], fieldLength[fieldID]);
+ }
+ return fields[fieldID].getObject();
+ }
+
+ ArrayList<Object> cachedList;
+
+ /**
+ * Get the values of the fields as an ArrayList.
+ *
+ * @return The values of the fields as an ArrayList.
+ */
+ public ArrayList<Object> getFieldsAsList() {
+ if (!parsed) {
+ parse();
+ }
+ if (cachedList == null) {
+ cachedList = new ArrayList<Object>();
+ } else {
+ cachedList.clear();
+ }
+ for (int i = 0; i < fields.length; i++) {
+ cachedList.add(uncheckedGetField(i));
+ }
+ return cachedList;
+ }
+
+ @Override
+ public Object getObject() {
+ return this;
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyUtils.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyUtils.java
new file mode 100644
index 0000000..6554ccc
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyUtils.java
@@ -0,0 +1,503 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.WritableUtils;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyObjectInspectorFactory;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/**
+ * LazyUtils.
+ */
+public final class LazyUtils {
+
+ /**
+ * Convert the byte array to an int starting from the given offset. Refer to
+ * code by aeden on DZone Snippets:
+ *
+ * @param b
+ * the byte array
+ * @param offset
+ * the array offset
+ * @return the integer
+ */
+ public static int byteArrayToInt(byte[] b, int offset) {
+ int value = 0;
+ for (int i = 0; i < 4; i++) {
+ int shift = (4 - 1 - i) * 8;
+ value += (b[i + offset] & 0x000000FF) << shift;
+ }
+ return value;
+ }
+
+ /**
+ * Convert the byte array to a long starting from the given offset.
+ *
+ * @param b
+ * the byte array
+ * @param offset
+ * the array offset
+ * @return the long
+ */
+ public static long byteArrayToLong(byte[] b, int offset) {
+ long value = 0;
+ for (int i = 0; i < 8; i++) {
+ int shift = (8 - 1 - i) * 8;
+ value += ((long) (b[i + offset] & 0x00000000000000FF)) << shift;
+ }
+ return value;
+ }
+
+ /**
+ * Convert the byte array to a short starting from the given offset.
+ *
+ * @param b
+ * the byte array
+ * @param offset
+ * the array offset
+ * @return the short
+ */
+ public static short byteArrayToShort(byte[] b, int offset) {
+ short value = 0;
+ value += (b[offset] & 0x000000FF) << 8;
+ value += (b[offset + 1] & 0x000000FF);
+ return value;
+ }
+
+ /**
+ * Record is the unit that data is serialized in. A record includes two
+ * parts. The first part stores the size of the element and the second part
+ * stores the real element. size element record ->
+ * |----|-------------------------|
+ * A RecordInfo stores two information of a record, the size of the "size"
+ * part which is the element offset and the size of the element part which
+ * is element size.
+ */
+ public static class RecordInfo {
+ public RecordInfo() {
+ elementOffset = 0;
+ elementSize = 0;
+ }
+
+ public byte elementOffset;
+ public int elementSize;
+
+ @Override
+ public String toString() {
+ return "(" + elementOffset + ", " + elementSize + ")";
+ }
+ }
+
+ static VInt vInt = new LazyUtils.VInt();
+
+ /**
+ * Check a particular field and set its size and offset in bytes based on
+ * the field type and the bytes arrays.
+ * For void, boolean, byte, short, int, long, float and double, there is no
+ * offset and the size is fixed. For string, map, list, struct, the first
+ * four bytes are used to store the size. So the offset is 4 and the size is
+ * computed by concating the first four bytes together. The first four bytes
+ * are defined with respect to the offset in the bytes arrays.
+ *
+ * @param objectInspector
+ * object inspector of the field
+ * @param bytes
+ * bytes arrays store the table row
+ * @param offset
+ * offset of this field
+ * @param recordInfo
+ * modify this byteinfo object and return it
+ */
+ public static void checkObjectByteInfo(ObjectInspector objectInspector, byte[] bytes, int offset,
+ RecordInfo recordInfo) {
+ Category category = objectInspector.getCategory();
+ switch (category) {
+ case PRIMITIVE:
+ PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) objectInspector)
+ .getPrimitiveCategory();
+ switch (primitiveCategory) {
+ case VOID:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 0;
+ break;
+ case BOOLEAN:
+ case BYTE:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 1;
+ break;
+ case SHORT:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 2;
+ break;
+ case FLOAT:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 4;
+ break;
+ case DOUBLE:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 8;
+ break;
+ case INT:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
+ break;
+ case LONG:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
+ break;
+ case STRING:
+ // using vint instead of 4 bytes
+ LazyUtils.readVInt(bytes, offset, vInt);
+ recordInfo.elementOffset = vInt.length;
+ recordInfo.elementSize = vInt.value;
+ break;
+ default: {
+ throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory);
+ }
+ }
+ break;
+ case LIST:
+ case MAP:
+ case STRUCT:
+ recordInfo.elementOffset = 4;
+ recordInfo.elementSize = LazyUtils.byteArrayToInt(bytes, offset);
+ break;
+ default: {
+ throw new RuntimeException("Unrecognized non-primitive type: " + category);
+ }
+ }
+ }
+
+ /**
+ * A zero-compressed encoded long.
+ */
+ public static class VLong {
+ public VLong() {
+ value = 0;
+ length = 0;
+ }
+
+ public long value;
+ public byte length;
+ };
+
+ /**
+ * Reads a zero-compressed encoded long from a byte array and returns it.
+ *
+ * @param bytes
+ * the byte array
+ * @param offset
+ * offset of the array to read from
+ * @param vlong
+ * storing the deserialized long and its size in byte
+ */
+ public static void readVLong(byte[] bytes, int offset, VLong vlong) {
+ byte firstByte = bytes[offset];
+ vlong.length = (byte) WritableUtils.decodeVIntSize(firstByte);
+ if (vlong.length == 1) {
+ vlong.value = firstByte;
+ return;
+ }
+ long i = 0;
+ for (int idx = 0; idx < vlong.length - 1; idx++) {
+ byte b = bytes[offset + 1 + idx];
+ i = i << 8;
+ i = i | (b & 0xFF);
+ }
+ vlong.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i);
+ }
+
+ /**
+ * A zero-compressed encoded integer.
+ */
+ public static class VInt implements Serializable {
+ private static final long serialVersionUID = 1L;
+
+ public VInt() {
+ value = 0;
+ length = 0;
+ }
+
+ public int value;
+ public byte length;
+ };
+
+ /**
+ * Reads a zero-compressed encoded int from a byte array and returns it.
+ *
+ * @param bytes
+ * the byte array
+ * @param offset
+ * offset of the array to read from
+ * @param vInt
+ * storing the deserialized int and its size in byte
+ */
+ public static void readVInt(byte[] bytes, int offset, VInt vInt) {
+ byte firstByte = bytes[offset];
+ vInt.length = (byte) WritableUtils.decodeVIntSize(firstByte);
+ if (vInt.length == 1) {
+ vInt.value = firstByte;
+ return;
+ }
+ int i = 0;
+ for (int idx = 0; idx < vInt.length - 1; idx++) {
+ byte b = bytes[offset + 1 + idx];
+ i = i << 8;
+ i = i | (b & 0xFF);
+ }
+ vInt.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1) : i);
+ }
+
+ /**
+ * Writes a zero-compressed encoded int to a byte array.
+ *
+ * @param byteStream
+ * the byte array/stream
+ * @param i
+ * the int
+ */
+ public static void writeVInt(Output byteStream, int i) {
+ writeVLong(byteStream, i);
+ }
+
+ /**
+ * Write a zero-compressed encoded long to a byte array.
+ *
+ * @param byteStream
+ * the byte array/stream
+ * @param l
+ * the long
+ */
+ public static void writeVLong(Output byteStream, long l) {
+ if (l >= -112 && l <= 127) {
+ byteStream.write((byte) l);
+ return;
+ }
+
+ int len = -112;
+ if (l < 0) {
+ l ^= -1L; // take one's complement'
+ len = -120;
+ }
+
+ long tmp = l;
+ while (tmp != 0) {
+ tmp = tmp >> 8;
+ len--;
+ }
+
+ byteStream.write((byte) len);
+
+ len = (len < -120) ? -(len + 120) : -(len + 112);
+
+ for (int idx = len; idx != 0; idx--) {
+ int shiftbits = (idx - 1) * 8;
+ long mask = 0xFFL << shiftbits;
+ byteStream.write((byte) ((l & mask) >> shiftbits));
+ }
+ }
+
+ static Map<TypeInfo, ObjectInspector> cachedLazyObjectInspector = new ConcurrentHashMap<TypeInfo, ObjectInspector>();
+
+ /**
+ * Returns the lazy binary object inspector that can be used to inspect an
+ * lazy binary object of that typeInfo
+ * For primitive types, we use the standard writable object inspector.
+ */
+ public static ObjectInspector getLazyObjectInspectorFromTypeInfo(TypeInfo typeInfo, boolean topLevel) {
+ if (typeInfo == null)
+ throw new IllegalStateException("illegal type null ");
+ ObjectInspector result = cachedLazyObjectInspector.get(typeInfo);
+ if (result == null) {
+ switch (typeInfo.getCategory()) {
+ case PRIMITIVE: {
+ result = PrimitiveObjectInspectorFactory
+ .getPrimitiveLazyObjectInspector(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
+ break;
+ }
+ case LIST: {
+ ObjectInspector elementObjectInspector = getLazyObjectInspectorFromTypeInfo(
+ ((ListTypeInfo) typeInfo).getListElementTypeInfo(), false);
+ result = LazyObjectInspectorFactory.getLazyListObjectInspector(elementObjectInspector);
+ break;
+ }
+ case MAP: {
+ MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
+ ObjectInspector keyObjectInspector = getLazyObjectInspectorFromTypeInfo(
+ mapTypeInfo.getMapKeyTypeInfo(), false);
+ ObjectInspector valueObjectInspector = getLazyObjectInspectorFromTypeInfo(
+ mapTypeInfo.getMapValueTypeInfo(), false);
+ result = LazyObjectInspectorFactory.getLazyMapObjectInspector(keyObjectInspector,
+ valueObjectInspector);
+ break;
+ }
+ case STRUCT: {
+ StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
+ List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
+ List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+ List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
+
+ for (int i = 0; i < fieldTypeInfos.size(); i++) {
+ fieldObjectInspectors.add(getLazyObjectInspectorFromTypeInfo(fieldTypeInfos.get(i), false));
+ }
+
+ // if it is top level then create columnar
+ if (topLevel)
+ result = LazyObjectInspectorFactory.getLazyColumnarObjectInspector(fieldNames,
+ fieldObjectInspectors);
+ // if it is not top level then create struct
+ else
+ result = LazyObjectInspectorFactory.getLazyStructObjectInspector(fieldNames,
+ fieldObjectInspectors);
+
+ break;
+ }
+ default: {
+ result = null;
+ }
+ }
+ cachedLazyObjectInspector.put(typeInfo, result);
+ }
+ return result;
+ }
+
+ /**
+ * get top-level lazy object inspector
+ *
+ * @param fieldNames
+ * @param fieldTypeInfos
+ * @return
+ */
+ public static ObjectInspector getLazyObjectInspector(List<String> fieldNames, List<TypeInfo> fieldTypeInfos) {
+ List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
+ for (int i = 0; i < fieldTypeInfos.size(); i++) {
+ fieldObjectInspectors.add(getLazyObjectInspectorFromTypeInfo(fieldTypeInfos.get(i), false));
+ }
+
+ return LazyObjectInspectorFactory.getLazyColumnarObjectInspector(fieldNames, fieldObjectInspectors);
+ }
+
+ private LazyUtils() {
+ // prevent instantiation
+ }
+
+ /**
+ * Returns -1 if the first byte sequence is lexicographically less than the
+ * second; returns +1 if the second byte sequence is lexicographically less
+ * than the first; otherwise return 0.
+ */
+ public static int compare(byte[] b1, int start1, int length1, byte[] b2, int start2, int length2) {
+
+ int min = Math.min(length1, length2);
+
+ for (int i = 0; i < min; i++) {
+ if (b1[start1 + i] == b2[start2 + i]) {
+ continue;
+ }
+ if (b1[start1 + i] < b2[start2 + i]) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+
+ if (length1 < length2) {
+ return -1;
+ }
+ if (length1 > length2) {
+ return 1;
+ }
+ return 0;
+ }
+
+ public static int hashBytes(byte[] data, int start, int len) {
+ int hash = 1;
+ for (int i = start; i < len; i++) {
+ hash = (31 * hash) + data[i];
+ }
+ return hash;
+ }
+
+ /**
+ * Writes a zero-compressed encoded int to a byte array.
+ *
+ * @param byteStream
+ * the byte array/stream
+ * @param i
+ * the int
+ */
+ public static void writeVInt(DataOutput byteStream, int i) throws IOException {
+ writeVLong(byteStream, i);
+ }
+
+ /**
+ * Write a zero-compressed encoded long to a byte array.
+ *
+ * @param byteStream
+ * the byte array/stream
+ * @param l
+ * the long
+ */
+ public static void writeVLong(DataOutput byteStream, long l) throws IOException {
+ if (l >= -112 && l <= 127) {
+ byteStream.write((byte) l);
+ return;
+ }
+
+ int len = -112;
+ if (l < 0) {
+ l ^= -1L; // take one's complement'
+ len = -120;
+ }
+
+ long tmp = l;
+ while (tmp != 0) {
+ tmp = tmp >> 8;
+ len--;
+ }
+
+ byteStream.write((byte) len);
+
+ len = (len < -120) ? -(len + 120) : -(len + 112);
+
+ for (int idx = len; idx != 0; idx--) {
+ int shiftbits = (idx - 1) * 8;
+ long mask = 0xFFL << shiftbits;
+ byteStream.write((byte) ((l & mask) >> shiftbits));
+ }
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyColumnarObjectInspector.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyColumnarObjectInspector.java
new file mode 100644
index 0000000..b1ca622
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyColumnarObjectInspector.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
+
+/**
+ * ObjectInspector for LazyColumnar.
+ *
+ * @see LazyColumnar
+ */
+public class LazyColumnarObjectInspector extends StandardStructObjectInspector implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ public LazyColumnarObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
+ super(structFieldNames, structFieldObjectInspectors);
+ }
+
+ public LazyColumnarObjectInspector(List<StructField> fields) {
+ super(fields);
+ }
+
+ @Override
+ public Object getStructFieldData(Object data, StructField fieldRef) {
+ if (data == null) {
+ return null;
+ }
+ LazyColumnar struct = (LazyColumnar) data;
+ MyField f = (MyField) fieldRef;
+
+ int fieldID = f.getFieldID();
+ assert (fieldID >= 0 && fieldID < fields.size());
+
+ Object column = struct.getField(fieldID);
+ return column;
+ }
+
+ @Override
+ public List<Object> getStructFieldsDataAsList(Object data) {
+ if (data == null) {
+ return null;
+ }
+ LazyColumnar struct = (LazyColumnar) data;
+ return struct.getFieldsAsList();
+ }
+
+ public String toString() {
+ String str = "";
+ for (MyField f : fields) {
+ str += f.getFieldName() + ":" + f.getFieldObjectInspector().getTypeName() + " ";
+ }
+ return str;
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyListObjectInspector.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyListObjectInspector.java
new file mode 100644
index 0000000..aaa5d66
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyListObjectInspector.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyArray;
+
+/**
+ * ObjectInspector for LazyList.
+ */
+public class LazyListObjectInspector extends StandardListObjectInspector {
+
+ protected LazyListObjectInspector(ObjectInspector listElementObjectInspector) {
+ super(listElementObjectInspector);
+ }
+
+ @Override
+ public List<?> getList(Object data) {
+ if (data == null) {
+ return null;
+ }
+ LazyArray array = (LazyArray) data;
+ return array.getList();
+ }
+
+ @Override
+ public Object getListElement(Object data, int index) {
+ if (data == null) {
+ return null;
+ }
+ LazyArray array = (LazyArray) data;
+ return array.getListElementObject(index);
+ }
+
+ @Override
+ public int getListLength(Object data) {
+ if (data == null) {
+ return -1;
+ }
+ LazyArray array = (LazyArray) data;
+ return array.getListLength();
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyMapObjectInspector.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyMapObjectInspector.java
new file mode 100644
index 0000000..1b0c412
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyMapObjectInspector.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
+
+import java.util.Map;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyMap;
+
+/**
+ * ObjectInspector for LazyMap.
+ *
+ * @see LazyMap
+ */
+public class LazyMapObjectInspector extends StandardMapObjectInspector {
+
+ protected LazyMapObjectInspector(ObjectInspector mapKeyObjectInspector, ObjectInspector mapValueObjectInspector) {
+ super(mapKeyObjectInspector, mapValueObjectInspector);
+ }
+
+ @Override
+ public Map<?, ?> getMap(Object data) {
+ if (data == null) {
+ return null;
+ }
+ return ((LazyMap) data).getMap();
+ }
+
+ @Override
+ public int getMapSize(Object data) {
+ if (data == null) {
+ return -1;
+ }
+ return ((LazyMap) data).getMapSize();
+ }
+
+ @Override
+ public Object getMapValueElement(Object data, Object key) {
+ if (data == null) {
+ return -1;
+ }
+ return ((LazyMap) data).getMapValueElement(key);
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyObjectInspectorFactory.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyObjectInspectorFactory.java
new file mode 100644
index 0000000..8093c94
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyObjectInspectorFactory.java
@@ -0,0 +1,82 @@
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+/**
+ * ObjectInspectorFactory is the primary way to create new ObjectInspector
+ * instances.
+ * SerDe classes should call the static functions in this library to create an
+ * ObjectInspector to return to the caller of SerDe2.getObjectInspector().
+ * The reason of having caches here is that ObjectInspectors do not have an
+ * internal state - so ObjectInspectors with the same construction parameters
+ * should result in exactly the same ObjectInspector.
+ */
+
+public final class LazyObjectInspectorFactory {
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyColumnarObjectInspector> cachedLazyColumnarObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyColumnarObjectInspector>();
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyStructObjectInspector> cachedLazyStructObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyStructObjectInspector>();
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyListObjectInspector> cachedLazyListObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyListObjectInspector>();
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyMapObjectInspector> cachedLazyMapObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyMapObjectInspector>();
+
+ public static LazyColumnarObjectInspector getLazyColumnarObjectInspector(List<String> structFieldNames,
+ List<ObjectInspector> structFieldObjectInspectors) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(structFieldNames);
+ signature.add(structFieldObjectInspectors);
+ LazyColumnarObjectInspector result = cachedLazyColumnarObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyColumnarObjectInspector(structFieldNames, structFieldObjectInspectors);
+ cachedLazyColumnarObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ public static LazyStructObjectInspector getLazyStructObjectInspector(List<String> structFieldNames,
+ List<ObjectInspector> structFieldObjectInspectors) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(structFieldNames);
+ signature.add(structFieldObjectInspectors);
+ LazyStructObjectInspector result = cachedLazyStructObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyStructObjectInspector(structFieldNames, structFieldObjectInspectors);
+ cachedLazyStructObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ public static LazyListObjectInspector getLazyListObjectInspector(ObjectInspector listElementInspector) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(listElementInspector);
+ LazyListObjectInspector result = cachedLazyListObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyListObjectInspector(listElementInspector);
+ cachedLazyListObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ public static LazyMapObjectInspector getLazyMapObjectInspector(ObjectInspector keyInspector,
+ ObjectInspector valueInspector) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(keyInspector);
+ signature.add(valueInspector);
+ LazyMapObjectInspector result = cachedLazyMapObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyMapObjectInspector(keyInspector, valueInspector);
+ cachedLazyMapObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ private LazyObjectInspectorFactory() {
+ // prevent instantiation
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyStructObjectInspector.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyStructObjectInspector.java
new file mode 100644
index 0000000..ad70d4c
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyStructObjectInspector.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyStruct;
+
+/**
+ * ObjectInspector for LazyStruct.
+ *
+ * @see LazyStruct
+ */
+public class LazyStructObjectInspector extends StandardStructObjectInspector {
+
+ protected LazyStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
+ super(structFieldNames, structFieldObjectInspectors);
+ }
+
+ protected LazyStructObjectInspector(List<StructField> fields) {
+ super(fields);
+ }
+
+ @Override
+ public Object getStructFieldData(Object data, StructField fieldRef) {
+ if (data == null) {
+ return null;
+ }
+ LazyStruct struct = (LazyStruct) data;
+ MyField f = (MyField) fieldRef;
+
+ int fieldID = f.getFieldID();
+ assert (fieldID >= 0 && fieldID < fields.size());
+
+ return struct.getField(fieldID);
+ }
+
+ @Override
+ public List<Object> getStructFieldsDataAsList(Object data) {
+ if (data == null) {
+ return null;
+ }
+ LazyStruct struct = (LazyStruct) data;
+ return struct.getFieldsAsList();
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/AbstractPrimitiveLazyObjectInspector.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/AbstractPrimitiveLazyObjectInspector.java
new file mode 100644
index 0000000..eaa2bbc
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/AbstractPrimitiveLazyObjectInspector.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry;
+import org.apache.hadoop.io.Writable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyPrimitive;
+
+/**
+ * An AbstractPrimitiveLazyObjectInspector for a LazyPrimitive object.
+ */
+public abstract class AbstractPrimitiveLazyObjectInspector<T extends Writable> extends AbstractPrimitiveObjectInspector {
+
+ protected AbstractPrimitiveLazyObjectInspector(PrimitiveTypeEntry typeEntry) {
+ super(typeEntry);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public T getPrimitiveWritableObject(Object o) {
+ if (o == null)
+ System.out.println("sth. wrong");
+ return o == null ? null : ((LazyPrimitive<?, T>) o).getWritableObject();
+ }
+
+ @Override
+ public boolean preferWritable() {
+ return true;
+ }
+
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyBooleanObjectInspector.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyBooleanObjectInspector.java
new file mode 100644
index 0000000..7927c1e
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyBooleanObjectInspector.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.io.BooleanWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyBoolean;
+
+/**
+ * A WritableBooleanObjectInspector inspects a BooleanWritable Object.
+ */
+public class LazyBooleanObjectInspector extends AbstractPrimitiveLazyObjectInspector<BooleanWritable> implements
+ BooleanObjectInspector {
+
+ LazyBooleanObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.booleanTypeEntry);
+ }
+
+ @Override
+ public boolean get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyBoolean((LazyBoolean) o);
+ }
+
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Boolean.valueOf(get(o));
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyByteObjectInspector.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyByteObjectInspector.java
new file mode 100644
index 0000000..10a881c
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyByteObjectInspector.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.io.ByteWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyByte;
+
+/**
+ * A WritableByteObjectInspector inspects a ByteWritable Object.
+ */
+public class LazyByteObjectInspector extends AbstractPrimitiveLazyObjectInspector<ByteWritable> implements
+ ByteObjectInspector {
+
+ LazyByteObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.byteTypeEntry);
+ }
+
+ @Override
+ public byte get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyByte((LazyByte) o);
+ }
+
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Byte.valueOf(get(o));
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyDoubleObjectInspector.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyDoubleObjectInspector.java
new file mode 100644
index 0000000..9f98b56
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyDoubleObjectInspector.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.io.DoubleWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyDouble;
+
+/**
+ * A WritableDoubleObjectInspector inspects a DoubleWritable Object.
+ */
+public class LazyDoubleObjectInspector extends AbstractPrimitiveLazyObjectInspector<DoubleWritable> implements
+ DoubleObjectInspector {
+
+ LazyDoubleObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.doubleTypeEntry);
+ }
+
+ @Override
+ public double get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyDouble((LazyDouble) o);
+ }
+
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Double.valueOf(get(o));
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyFloatObjectInspector.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyFloatObjectInspector.java
new file mode 100644
index 0000000..bf3e9a2
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyFloatObjectInspector.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.io.FloatWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyFloat;
+
+/**
+ * A FloatObjectInspector inspects a FloatWritable Object.
+ */
+public class LazyFloatObjectInspector extends AbstractPrimitiveLazyObjectInspector<FloatWritable> implements
+ FloatObjectInspector {
+
+ LazyFloatObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.floatTypeEntry);
+ }
+
+ @Override
+ public float get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyFloat((LazyFloat) o);
+ }
+
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Float.valueOf(get(o));
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyIntObjectInspector.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyIntObjectInspector.java
new file mode 100644
index 0000000..87bcb0d
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyIntObjectInspector.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.io.IntWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyInteger;
+
+/**
+ * A WritableIntObjectInspector inspects a IntWritable Object.
+ */
+public class LazyIntObjectInspector extends AbstractPrimitiveLazyObjectInspector<IntWritable> implements
+ IntObjectInspector {
+
+ LazyIntObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.intTypeEntry);
+ }
+
+ @Override
+ public int get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyInteger((LazyInteger) o);
+ }
+
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Integer.valueOf(get(o));
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyLongObjectInspector.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyLongObjectInspector.java
new file mode 100644
index 0000000..06b5d3c
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyLongObjectInspector.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.io.LongWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyLong;
+
+/**
+ * A WritableLongObjectInspector inspects a LongWritable Object.
+ */
+public class LazyLongObjectInspector extends AbstractPrimitiveLazyObjectInspector<LongWritable> implements
+ LongObjectInspector {
+
+ LazyLongObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.longTypeEntry);
+ }
+
+ @Override
+ public long get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyLong((LazyLong) o);
+ }
+
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Long.valueOf(get(o));
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java
new file mode 100644
index 0000000..5d7ef48
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import java.util.ArrayList;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+
+/**
+ * LazyPrimitiveObjectInspectorFactory is the primary way to create new
+ * ObjectInspector instances.
+ * SerDe classes should call the static functions in this library to create an
+ * ObjectInspector to return to the caller of SerDe2.getObjectInspector().
+ * The reason of having caches here is that ObjectInspector is because
+ * ObjectInspectors do not have an internal state - so ObjectInspectors with the
+ * same construction parameters should result in exactly the same
+ * ObjectInspector.
+ */
+public final class LazyPrimitiveObjectInspectorFactory {
+
+ public static final LazyBooleanObjectInspector LAZY_BOOLEAN_OBJECT_INSPECTOR = new LazyBooleanObjectInspector();
+ public static final LazyByteObjectInspector LAZY_BYTE_OBJECT_INSPECTOR = new LazyByteObjectInspector();
+ public static final LazyShortObjectInspector LAZY_SHORT_OBJECT_INSPECTOR = new LazyShortObjectInspector();
+ public static final LazyIntObjectInspector LAZY_INT_OBJECT_INSPECTOR = new LazyIntObjectInspector();
+ public static final LazyLongObjectInspector LAZY_LONG_OBJECT_INSPECTOR = new LazyLongObjectInspector();
+ public static final LazyFloatObjectInspector LAZY_FLOAT_OBJECT_INSPECTOR = new LazyFloatObjectInspector();
+ public static final LazyDoubleObjectInspector LAZY_DOUBLE_OBJECT_INSPECTOR = new LazyDoubleObjectInspector();
+ public static final LazyVoidObjectInspector LAZY_VOID_OBJECT_INSPECTOR = new LazyVoidObjectInspector();
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyStringObjectInspector> cachedLazyStringObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyStringObjectInspector>();
+
+ public static LazyStringObjectInspector getLazyStringObjectInspector(boolean escaped, byte escapeChar) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(Boolean.valueOf(escaped));
+ signature.add(Byte.valueOf(escapeChar));
+ LazyStringObjectInspector result = cachedLazyStringObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyStringObjectInspector(escaped, escapeChar);
+ cachedLazyStringObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ public static AbstractPrimitiveLazyObjectInspector<?> getLazyObjectInspector(PrimitiveCategory primitiveCategory,
+ boolean escaped, byte escapeChar) {
+
+ switch (primitiveCategory) {
+ case BOOLEAN:
+ return LAZY_BOOLEAN_OBJECT_INSPECTOR;
+ case BYTE:
+ return LAZY_BYTE_OBJECT_INSPECTOR;
+ case SHORT:
+ return LAZY_SHORT_OBJECT_INSPECTOR;
+ case INT:
+ return LAZY_INT_OBJECT_INSPECTOR;
+ case LONG:
+ return LAZY_LONG_OBJECT_INSPECTOR;
+ case FLOAT:
+ return LAZY_FLOAT_OBJECT_INSPECTOR;
+ case DOUBLE:
+ return LAZY_DOUBLE_OBJECT_INSPECTOR;
+ case STRING:
+ return getLazyStringObjectInspector(escaped, escapeChar);
+ case VOID:
+ return LAZY_VOID_OBJECT_INSPECTOR;
+ default:
+ throw new RuntimeException("Internal error: Cannot find ObjectInspector " + " for " + primitiveCategory);
+ }
+ }
+
+ private LazyPrimitiveObjectInspectorFactory() {
+ // prevent instantiation
+ }
+
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyShortObjectInspector.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyShortObjectInspector.java
new file mode 100644
index 0000000..b02d9bc
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyShortObjectInspector.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyShort;
+
+/**
+ * A WritableShortObjectInspector inspects a ShortWritable Object.
+ */
+public class LazyShortObjectInspector extends AbstractPrimitiveLazyObjectInspector<ShortWritable> implements
+ ShortObjectInspector {
+
+ LazyShortObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.shortTypeEntry);
+ }
+
+ @Override
+ public short get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyShort((LazyShort) o);
+ }
+
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Short.valueOf(get(o));
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyStringObjectInspector.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyStringObjectInspector.java
new file mode 100644
index 0000000..4d649dc
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyStringObjectInspector.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.io.Text;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyString;
+
+/**
+ * A WritableStringObjectInspector inspects a Text Object.
+ */
+public class LazyStringObjectInspector extends AbstractPrimitiveLazyObjectInspector<Text> implements
+ StringObjectInspector {
+
+ boolean escaped;
+ byte escapeChar;
+
+ LazyStringObjectInspector(boolean escaped, byte escapeChar) {
+ super(PrimitiveObjectInspectorUtils.stringTypeEntry);
+ this.escaped = escaped;
+ this.escapeChar = escapeChar;
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyString((LazyString) o);
+ }
+
+ @Override
+ public Text getPrimitiveWritableObject(Object o) {
+ return o == null ? null : ((LazyString) o).getWritableObject();
+ }
+
+ @Override
+ public String getPrimitiveJavaObject(Object o) {
+ return o == null ? null : ((LazyString) o).getWritableObject().toString();
+ }
+
+ public boolean isEscaped() {
+ return escaped;
+ }
+
+ public byte getEscapeChar() {
+ return escapeChar;
+ }
+
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyVoidObjectInspector.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyVoidObjectInspector.java
new file mode 100644
index 0000000..c916191
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyVoidObjectInspector.java
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
+import org.apache.hadoop.io.NullWritable;
+
+/**
+ * A WritableVoidObjectInspector inspects a NullWritable Object.
+ */
+public class LazyVoidObjectInspector extends AbstractPrimitiveLazyObjectInspector<NullWritable> implements
+ VoidObjectInspector {
+
+ LazyVoidObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.voidTypeEntry);
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o;
+ }
+
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ throw new RuntimeException("Internal error: cannot create Void object.");
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/PrimitiveObjectInspectorFactory.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/PrimitiveObjectInspectorFactory.java
new file mode 100644
index 0000000..33f0e51
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/PrimitiveObjectInspectorFactory.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import java.util.HashMap;
+
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+
+/**
+ * PrimitiveObjectInspectorFactory is the primary way to create new
+ * PrimitiveObjectInspector instances.
+ * The reason of having caches here is that ObjectInspector is because
+ * ObjectInspectors do not have an internal state - so ObjectInspectors with the
+ * same construction parameters should result in exactly the same
+ * ObjectInspector.
+ */
+public final class PrimitiveObjectInspectorFactory {
+
+ public static final LazyBooleanObjectInspector LazyBooleanObjectInspector = new LazyBooleanObjectInspector();
+ public static final LazyByteObjectInspector LazyByteObjectInspector = new LazyByteObjectInspector();
+ public static final LazyShortObjectInspector LazyShortObjectInspector = new LazyShortObjectInspector();
+ public static final LazyIntObjectInspector LazyIntObjectInspector = new LazyIntObjectInspector();
+ public static final LazyLongObjectInspector LazyLongObjectInspector = new LazyLongObjectInspector();
+ public static final LazyFloatObjectInspector LazyFloatObjectInspector = new LazyFloatObjectInspector();
+ public static final LazyDoubleObjectInspector LazyDoubleObjectInspector = new LazyDoubleObjectInspector();
+ public static final LazyStringObjectInspector LazyStringObjectInspector = new LazyStringObjectInspector(false,
+ (byte) '\\');
+ public static final LazyVoidObjectInspector LazyVoidObjectInspector = new LazyVoidObjectInspector();
+
+ private static HashMap<PrimitiveCategory, AbstractPrimitiveLazyObjectInspector<?>> cachedPrimitiveLazyInspectorCache = new HashMap<PrimitiveCategory, AbstractPrimitiveLazyObjectInspector<?>>();
+
+ static {
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.BOOLEAN, LazyBooleanObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.BYTE, LazyByteObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.SHORT, LazyShortObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.INT, LazyIntObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.LONG, LazyLongObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.FLOAT, LazyFloatObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.DOUBLE, LazyDoubleObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.STRING, LazyStringObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.VOID, LazyVoidObjectInspector);
+ }
+
+ /**
+ * Returns the PrimitiveWritableObjectInspector for the PrimitiveCategory.
+ *
+ * @param primitiveCategory
+ */
+ public static AbstractPrimitiveLazyObjectInspector<?> getPrimitiveLazyObjectInspector(
+ PrimitiveCategory primitiveCategory) {
+ AbstractPrimitiveLazyObjectInspector<?> result = cachedPrimitiveLazyInspectorCache.get(primitiveCategory);
+ if (result == null) {
+ throw new RuntimeException("Internal error: Cannot find ObjectInspector " + " for " + primitiveCategory);
+ }
+ return result;
+ }
+
+ private PrimitiveObjectInspectorFactory() {
+ // prevent instantiation
+ }
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/IHiveParser.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/IHiveParser.java
new file mode 100644
index 0000000..7830c52
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/IHiveParser.java
@@ -0,0 +1,16 @@
+package edu.uci.ics.hivesterix.serde.parser;
+
+import java.io.IOException;
+
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+
+public interface IHiveParser {
+ /**
+ * parse one hive rwo into
+ *
+ * @param row
+ * @param objectInspector
+ * @param tb
+ */
+ public void parse(byte[] data, int start, int length, ArrayTupleBuilder tb) throws IOException;
+}
diff --git a/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/TextToBinaryTupleParser.java b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/TextToBinaryTupleParser.java
new file mode 100644
index 0000000..38e1b36
--- /dev/null
+++ b/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/TextToBinaryTupleParser.java
@@ -0,0 +1,174 @@
+package edu.uci.ics.hivesterix.serde.parser;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.lazy.LazyInteger;
+import org.apache.hadoop.hive.serde2.lazy.LazyLong;
+import org.apache.hadoop.hive.serde2.lazy.LazyShort;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.io.Text;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+
+public class TextToBinaryTupleParser implements IHiveParser {
+ private int[] invertedIndex;
+ private int[] fieldEnds;
+ private int lastNecessaryFieldIndex;
+ private LazySimpleStructObjectInspector inputObjectInspector;
+ private List<? extends StructField> fieldRefs;
+
+ public TextToBinaryTupleParser(int[] outputColumnsOffset, ObjectInspector structInspector) {
+ int size = 0;
+ for (int i = 0; i < outputColumnsOffset.length; i++)
+ if (outputColumnsOffset[i] >= 0)
+ size++;
+ invertedIndex = new int[size];
+ for (int i = 0; i < outputColumnsOffset.length; i++)
+ if (outputColumnsOffset[i] >= 0) {
+ invertedIndex[outputColumnsOffset[i]] = i;
+ lastNecessaryFieldIndex = i;
+ }
+ fieldEnds = new int[outputColumnsOffset.length];
+ for (int i = 0; i < fieldEnds.length; i++)
+ fieldEnds[i] = 0;
+ inputObjectInspector = (LazySimpleStructObjectInspector) structInspector;
+ fieldRefs = inputObjectInspector.getAllStructFieldRefs();
+ }
+
+ @Override
+ public void parse(byte[] bytes, int start, int length, ArrayTupleBuilder tb) throws IOException {
+ byte separator = inputObjectInspector.getSeparator();
+ boolean lastColumnTakesRest = inputObjectInspector.getLastColumnTakesRest();
+ boolean isEscaped = inputObjectInspector.isEscaped();
+ byte escapeChar = inputObjectInspector.getEscapeChar();
+ DataOutput output = tb.getDataOutput();
+
+ int structByteEnd = start + length - 1;
+ int fieldId = 0;
+ int fieldByteEnd = start;
+
+ // Go through all bytes in the byte[]
+ while (fieldByteEnd <= structByteEnd && fieldId <= lastNecessaryFieldIndex) {
+ if (fieldByteEnd == structByteEnd || bytes[fieldByteEnd] == separator) {
+ // Reached the end of a field?
+ if (lastColumnTakesRest && fieldId == fieldEnds.length - 1) {
+ fieldByteEnd = structByteEnd;
+ }
+ fieldEnds[fieldId] = fieldByteEnd;
+ if (fieldId == fieldEnds.length - 1 || fieldByteEnd == structByteEnd) {
+ // for the case of null fields
+ for (int i = fieldId; i < fieldEnds.length; i++) {
+ fieldEnds[i] = fieldByteEnd;
+ }
+ break;
+ }
+ fieldByteEnd++;
+ fieldId++;
+ } else {
+ if (isEscaped && bytes[fieldByteEnd] == escapeChar && fieldByteEnd + 1 < structByteEnd) {
+ // ignore the char after escape_char
+ fieldByteEnd += 2;
+ } else {
+ fieldByteEnd++;
+ }
+ }
+ }
+
+ for (int i = 0; i < invertedIndex.length; i++) {
+ int index = invertedIndex[i];
+ StructField fieldRef = fieldRefs.get(index);
+ ObjectInspector inspector = fieldRef.getFieldObjectInspector();
+ Category category = inspector.getCategory();
+ int fieldStart = index == 0 ? 0 : fieldEnds[index - 1] + 1;
+ int fieldEnd = fieldEnds[index];
+ if (bytes[fieldEnd] == separator)
+ fieldEnd--;
+ int fieldLen = fieldEnd - fieldStart + 1;
+ switch (category) {
+ case PRIMITIVE:
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector) inspector;
+ switch (poi.getPrimitiveCategory()) {
+ case VOID: {
+ break;
+ }
+ case BOOLEAN: {
+ output.write(bytes[fieldStart]);
+ break;
+ }
+ case BYTE: {
+ output.write(bytes[fieldStart]);
+ break;
+ }
+ case SHORT: {
+ short v = LazyShort.parseShort(bytes, fieldStart, fieldLen);
+ output.write((byte) (v >> 8));
+ output.write((byte) (v));
+ break;
+ }
+ case INT: {
+ int v = LazyInteger.parseInt(bytes, fieldStart, fieldLen);
+ LazyUtils.writeVInt(output, v);
+ break;
+ }
+ case LONG: {
+ long v = LazyLong.parseLong(bytes, fieldStart, fieldLen);
+ LazyUtils.writeVLong(output, v);
+ break;
+ }
+ case FLOAT: {
+ float value = Float.parseFloat(Text.decode(bytes, fieldStart, fieldLen));
+ int v = Float.floatToIntBits(value);
+ output.write((byte) (v >> 24));
+ output.write((byte) (v >> 16));
+ output.write((byte) (v >> 8));
+ output.write((byte) (v));
+ break;
+ }
+ case DOUBLE: {
+ try {
+ double value = Double.parseDouble(Text.decode(bytes, fieldStart, fieldLen));
+ long v = Double.doubleToLongBits(value);
+ output.write((byte) (v >> 56));
+ output.write((byte) (v >> 48));
+ output.write((byte) (v >> 40));
+ output.write((byte) (v >> 32));
+ output.write((byte) (v >> 24));
+ output.write((byte) (v >> 16));
+ output.write((byte) (v >> 8));
+ output.write((byte) (v));
+ } catch (NumberFormatException e) {
+ throw e;
+ }
+ break;
+ }
+ case STRING: {
+ LazyUtils.writeVInt(output, fieldLen);
+ output.write(bytes, fieldStart, fieldLen);
+ break;
+ }
+ default: {
+ throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
+ }
+ }
+ break;
+ case STRUCT:
+ throw new NotImplementedException("Unrecognized type: struct ");
+ case LIST:
+ throw new NotImplementedException("Unrecognized type: struct ");
+ case MAP:
+ throw new NotImplementedException("Unrecognized type: struct ");
+ case UNION:
+ throw new NotImplementedException("Unrecognized type: struct ");
+ }
+ tb.addFieldEndOffset();
+ }
+ }
+}
diff --git a/hivesterix-serde/src/test/java/edu/uci/ics/hyracks/AppTest.java b/hivesterix-serde/src/test/java/edu/uci/ics/hyracks/AppTest.java
new file mode 100644
index 0000000..0c701c8
--- /dev/null
+++ b/hivesterix-serde/src/test/java/edu/uci/ics/hyracks/AppTest.java
@@ -0,0 +1,38 @@
+package edu.uci.ics.hyracks;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * Unit test for simple App.
+ */
+public class AppTest
+ extends TestCase
+{
+ /**
+ * Create the test case
+ *
+ * @param testName name of the test case
+ */
+ public AppTest( String testName )
+ {
+ super( testName );
+ }
+
+ /**
+ * @return the suite of tests being tested
+ */
+ public static Test suite()
+ {
+ return new TestSuite( AppTest.class );
+ }
+
+ /**
+ * Rigourous Test :-)
+ */
+ public void testApp()
+ {
+ assertTrue( true );
+ }
+}