refactor the kmer hash/partition/normarlized code
git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_genomix@3134 123451ca-8445-de46-9d55-352943316053
diff --git a/genomix/genomix-data/pom.xml b/genomix/genomix-data/pom.xml
index 651beb6..0d99c4a 100644
--- a/genomix/genomix-data/pom.xml
+++ b/genomix/genomix-data/pom.xml
@@ -22,8 +22,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
</plugins>
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/normalizers/Integer64NormalizedKeyComputerFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/normalizers/Integer64NormalizedKeyComputerFactory.java
deleted file mode 100644
index e69de29..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/normalizers/Integer64NormalizedKeyComputerFactory.java
+++ /dev/null
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/normalizers/VLongNormalizedKeyComputerFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/normalizers/VLongNormalizedKeyComputerFactory.java
deleted file mode 100644
index 17ca8cb..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/normalizers/VLongNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,52 +0,0 @@
-package edu.uci.ics.genomix.data.normalizers;
-
-import edu.uci.ics.genomix.data.partition.KmerHashPartitioncomputerFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class VLongNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
- private static final long serialVersionUID = 8735044913496854551L;
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
- return new INormalizedKeyComputer() {
- private static final int POSTIVE_LONG_MASK = (3 << 30);
- private static final int NON_NEGATIVE_INT_MASK = (2 << 30);
- private static final int NEGATIVE_LONG_MASK = (0 << 30);
-
- /**
- * one kmer
- */
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- long value = KmerHashPartitioncomputerFactory.getLong(bytes, start);
-
- int highValue = (int) (value >> 32);
- if (highValue > 0) {
- /** * larger than Integer.MAX */
- int highNmk = getKey(highValue);
- highNmk >>= 2;
- highNmk |= POSTIVE_LONG_MASK;
- return highNmk;
- } else if (highValue == 0) {
- /** * smaller than Integer.MAX but >=0 */
- int lowNmk = (int) value;
- lowNmk >>= 2;
- lowNmk |= NON_NEGATIVE_INT_MASK;
- return lowNmk;
- } else {
- /** * less than 0: have not optimized for that */
- int highNmk = getKey(highValue);
- highNmk >>= 2;
- highNmk |= NEGATIVE_LONG_MASK;
- return highNmk;
- }
- }
-
- private int getKey(int value) {
- return value ^ Integer.MIN_VALUE;
- }
- };
- }
-}
\ No newline at end of file
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/serde/ByteSerializerDeserializer.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/ByteSerializerDeserializer.java
similarity index 95%
rename from genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/serde/ByteSerializerDeserializer.java
rename to genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/ByteSerializerDeserializer.java
index d88c0a0..98159c0 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/serde/ByteSerializerDeserializer.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/ByteSerializerDeserializer.java
@@ -1,4 +1,4 @@
-package edu.uci.ics.genomix.data.serde;
+package edu.uci.ics.genomix.data.std.accessors;
import java.io.DataInput;
import java.io.DataOutput;
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerBinaryHashFunctionFamily.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerBinaryHashFunctionFamily.java
new file mode 100644
index 0000000..b34772d
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerBinaryHashFunctionFamily.java
@@ -0,0 +1,21 @@
+package edu.uci.ics.genomix.data.std.accessors;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
+
+public class KmerBinaryHashFunctionFamily implements IBinaryHashFunctionFamily {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IBinaryHashFunction createBinaryHashFunction(final int seed) {
+
+ return new IBinaryHashFunction() {
+
+ @Override
+ public int hash(byte[] bytes, int offset, int length) {
+ return KmerHashPartitioncomputerFactory.hashBytes(bytes,
+ offset, length);
+ }
+ };
+ }
+}
\ No newline at end of file
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/partition/KmerHashPartitioncomputerFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerHashPartitioncomputerFactory.java
similarity index 70%
rename from genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/partition/KmerHashPartitioncomputerFactory.java
rename to genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerHashPartitioncomputerFactory.java
index ce60917..d136ef3 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/partition/KmerHashPartitioncomputerFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerHashPartitioncomputerFactory.java
@@ -1,4 +1,4 @@
-package edu.uci.ics.genomix.data.partition;
+package edu.uci.ics.genomix.data.std.accessors;
import java.nio.ByteBuffer;
@@ -19,14 +19,6 @@
return hash;
}
- public static long getLong(byte[] bytes, int offset) {
- return (((long) (bytes[offset] & 0xff)) << 56) + (((long) (bytes[offset + 1] & 0xff)) << 48)
- + (((long) (bytes[offset + 2] & 0xff)) << 40) + (((long) (bytes[offset + 3] & 0xff)) << 32)
- + (((long) (bytes[offset + 4] & 0xff)) << 24) + (((long) (bytes[offset + 5] & 0xff)) << 16)
- + (((long) (bytes[offset + 6] & 0xff)) << 8) + (((long) (bytes[offset + 7] & 0xff)) << 0);
- }
-
-
@Override
public ITuplePartitionComputer createPartitioner() {
return new ITuplePartitionComputer() {
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerNormarlizedComputerFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerNormarlizedComputerFactory.java
new file mode 100644
index 0000000..1ca90c2
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/KmerNormarlizedComputerFactory.java
@@ -0,0 +1,23 @@
+package edu.uci.ics.genomix.data.std.accessors;
+
+import edu.uci.ics.genomix.data.std.primitive.KmerPointable;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class KmerNormarlizedComputerFactory implements
+ INormalizedKeyComputerFactory {
+ private static final long serialVersionUID = 8735044913496854551L;
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+ return new INormalizedKeyComputer() {
+ /**
+ * read one int from Kmer, make sure this int is consistent whith Kmer compartor
+ */
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ return KmerPointable.getIntReverse(bytes, start, length);
+ }
+ };
+ }
+}
\ No newline at end of file
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/LongBinaryHashFunctionFamily.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/LongBinaryHashFunctionFamily.java
deleted file mode 100644
index e69de29..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/LongBinaryHashFunctionFamily.java
+++ /dev/null
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/LongHashFunctionFamily.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/LongHashFunctionFamily.java
deleted file mode 100644
index e69de29..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/LongHashFunctionFamily.java
+++ /dev/null
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/MurmurHash3BinaryHashFunctionFamily.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/MurmurHash3BinaryHashFunctionFamily.java
deleted file mode 100644
index e69de29..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/MurmurHash3BinaryHashFunctionFamily.java
+++ /dev/null
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/VLongBinaryHashFunctionFamily.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/VLongBinaryHashFunctionFamily.java
deleted file mode 100644
index 7ead93e..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/accessors/VLongBinaryHashFunctionFamily.java
+++ /dev/null
@@ -1,30 +0,0 @@
-package edu.uci.ics.genomix.data.std.accessors;
-
-import edu.uci.ics.genomix.data.std.primitive.VLongKmerPointable;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
-import edu.uci.ics.hyracks.data.std.api.IHashable;
-
-public class VLongBinaryHashFunctionFamily implements IBinaryHashFunctionFamily {
- private static final long serialVersionUID = 1L;
-
- @Override
- public IBinaryHashFunction createBinaryHashFunction(final int seed) {
-
- return new IBinaryHashFunction() {
- private VLongKmerPointable p = new VLongKmerPointable();
-
- @Override
- public int hash(byte[] bytes, int offset, int length) {
- if (length + offset >= bytes.length)
- throw new IllegalStateException("out of bound");
- p.set(bytes, offset, length);
- int hash = Math.abs( p.hash() * (seed + 1));
- if (hash < 0) {
- hash = Math.abs(hash + 1);
- }
- return hash;
- }
- };
- }
-}
\ No newline at end of file
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/primitive/KmerPointable.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/primitive/KmerPointable.java
new file mode 100644
index 0000000..7864830
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/primitive/KmerPointable.java
@@ -0,0 +1,131 @@
+package edu.uci.ics.genomix.data.std.primitive;
+
+import edu.uci.ics.genomix.data.std.accessors.KmerHashPartitioncomputerFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.data.std.api.AbstractPointable;
+import edu.uci.ics.hyracks.data.std.api.IComparable;
+import edu.uci.ics.hyracks.data.std.api.IHashable;
+import edu.uci.ics.hyracks.data.std.api.INumeric;
+import edu.uci.ics.hyracks.data.std.api.IPointable;
+import edu.uci.ics.hyracks.data.std.api.IPointableFactory;
+
+public final class KmerPointable extends AbstractPointable implements
+ IHashable, IComparable, INumeric {
+ public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public boolean isFixedLength() {
+ return false;
+ }
+
+ @Override
+ public int getFixedLength() {
+ return -1;
+ }
+ };
+
+ public static final IPointableFactory FACTORY = new IPointableFactory() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IPointable createPointable() {
+ return new KmerPointable();
+ }
+
+ @Override
+ public ITypeTraits getTypeTraits() {
+ return TYPE_TRAITS;
+ }
+ };
+
+ public static short getShortReverse(byte[] bytes, int offset, int length) {
+ if (length < 2) {
+ return (short) (bytes[offset]);
+ }
+ return (short) (((bytes[offset + length - 1] & 0xff) << 8) + (bytes[offset
+ + length - 2] & 0xff));
+ }
+
+ public static int getIntReverse(byte[] bytes, int offset, int length) {
+ if (length < 4) {
+ return getShortReverse(bytes, offset, length);
+ }
+ return ((bytes[offset + length - 1] & 0xff) << 24)
+ + ((bytes[offset + length - 2] & 0xff) << 16)
+ + ((bytes[offset + length - 3] & 0xff) << 8)
+ + ((bytes[offset + length - 4] & 0xff) << 0);
+ }
+
+ public static long getLongReverse(byte[] bytes, int offset, int length) {
+ if (length < 8) {
+ return getIntReverse(bytes, offset, length);
+ }
+ return (((long) (bytes[offset + length - 1] & 0xff)) << 56)
+ + (((long) (bytes[offset + length - 2] & 0xff)) << 48)
+ + (((long) (bytes[offset + length - 3] & 0xff)) << 40)
+ + (((long) (bytes[offset + length - 4] & 0xff)) << 32)
+ + (((long) (bytes[offset + length - 5] & 0xff)) << 24)
+ + (((long) (bytes[offset + length - 6] & 0xff)) << 16)
+ + (((long) (bytes[offset + length - 7] & 0xff)) << 8)
+ + (((long) (bytes[offset + length - 8] & 0xff)) << 0);
+ }
+
+ @Override
+ public int compareTo(IPointable pointer) {
+ return compareTo(pointer.getByteArray(), pointer.getStartOffset(),
+ pointer.getLength());
+ }
+
+ @Override
+ public int compareTo(byte[] bytes, int offset, int length) {
+
+ if (this.length != length) {
+ return this.length - length;
+ }
+
+ for (int i = length - 1; i >= 0; i--) {
+ if (this.bytes[this.start + i] < bytes[offset + i]) {
+ return -1;
+ } else if (this.bytes[this.start + i] > bytes[offset + i]) {
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ @Override
+ public int hash() {
+ return KmerHashPartitioncomputerFactory.hashBytes(bytes, start, length);
+ }
+
+ @Override
+ public byte byteValue() {
+ return bytes[start + length - 1];
+ }
+
+ @Override
+ public short shortValue() {
+ return getShortReverse(bytes, start, length);
+ }
+
+ @Override
+ public int intValue() {
+ return getIntReverse(bytes, start, length);
+ }
+
+ @Override
+ public long longValue() {
+ return getLongReverse(bytes, start, length);
+ }
+
+ @Override
+ public float floatValue() {
+ return Float.intBitsToFloat(intValue());
+ }
+
+ @Override
+ public double doubleValue() {
+ return Double.longBitsToDouble(longValue());
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/primitive/VLongKmerPointable.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/primitive/VLongKmerPointable.java
deleted file mode 100644
index cb71310..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/primitive/VLongKmerPointable.java
+++ /dev/null
@@ -1,111 +0,0 @@
-package edu.uci.ics.genomix.data.std.primitive;
-
-import edu.uci.ics.genomix.data.partition.KmerHashPartitioncomputerFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
-import edu.uci.ics.hyracks.data.std.api.AbstractPointable;
-import edu.uci.ics.hyracks.data.std.api.IComparable;
-import edu.uci.ics.hyracks.data.std.api.IHashable;
-import edu.uci.ics.hyracks.data.std.api.INumeric;
-import edu.uci.ics.hyracks.data.std.api.IPointable;
-import edu.uci.ics.hyracks.data.std.api.IPointableFactory;
-
-public final class VLongKmerPointable extends AbstractPointable implements
- IHashable, IComparable, INumeric {
- public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
- private static final long serialVersionUID = 1L;
-
- @Override
- public boolean isFixedLength() {
- return false;
- }
-
- @Override
- public int getFixedLength() {
- return -1;
- }
- };
-
- public static final IPointableFactory FACTORY = new IPointableFactory() {
- private static final long serialVersionUID = 1L;
-
- @Override
- public IPointable createPointable() {
- return new VLongKmerPointable();
- }
-
- @Override
- public ITypeTraits getTypeTraits() {
- return TYPE_TRAITS;
- }
- };
-
-
- public long getLong() {
- return KmerHashPartitioncomputerFactory.getLong(bytes, start);
- }
-
- @Override
- public int compareTo(IPointable pointer) {
- return compareTo(pointer.getByteArray(), pointer.getStartOffset(),
- pointer.getLength());
- }
-
- @Override
- public int compareTo(byte[] bytes, int start, int length) {
-
- if (this.length != length) {
- return this.length - length;
- }
-
- for (int i = 0; i < length; i++) {
- if (this.bytes[this.start + i] < bytes[start + i]) {
- return -1;
- } else if (this.bytes[this.start + i] > bytes[start + i]) {
- return 1;
- }
- }
- return 0;
- }
-
- @Override
- public int hash() {// BKDRHash
- int hash = 1;
- for (int i = start + 1; i <= start + length; i++)
- hash = (31 * hash) + (int) bytes[i];
- if (hash < 0) {
- hash = -(hash + 1);
- }
- return hash;
- }
-
- @Override
- public byte byteValue() {
- return (byte) bytes[start + 1];
- }
-
- @Override
- public short shortValue() {
-
- return (short) ((bytes[start + 2] & 0xff) << 8 + bytes[start + 1] & 0xff);
- }
-
- @Override
- public int intValue() {
- return (int) ((bytes[start + 4] & 0xff) << 24 + (bytes[start + 3] & 0xff) << 16 + (bytes[start + 2] & 0xff) << 8 + bytes[start + 1] & 0xff);
- }
-
- @Override
- public long longValue() {
- return getLong();
- }
-
- @Override
- public float floatValue() {
- return getLong();
- }
-
- @Override
- public double doubleValue() {
- return getLong();
- }
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/primitive/VLongPointable.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/primitive/VLongPointable.java
deleted file mode 100644
index e69de29..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/data/std/primitive/VLongPointable.java
+++ /dev/null
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/FileScanDescriptor.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/FileScanDescriptor.java
deleted file mode 100644
index e69de29..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/FileScanDescriptor.java
+++ /dev/null
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/PrinterOperatorDescriptor.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/PrinterOperatorDescriptor.java
deleted file mode 100644
index e69de29..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/PrinterOperatorDescriptor.java
+++ /dev/null
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java
index fe22251..5764d3f 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java
@@ -7,7 +7,7 @@
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
-import edu.uci.ics.genomix.data.serde.ByteSerializerDeserializer;
+import edu.uci.ics.genomix.data.std.accessors.ByteSerializerDeserializer;
import edu.uci.ics.genomix.type.Kmer;
import edu.uci.ics.genomix.type.Kmer.GENE_CODE;
import edu.uci.ics.hyracks.api.comm.IFrameWriter;
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/Tester.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/Tester.java
deleted file mode 100644
index e69de29..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/Tester.java
+++ /dev/null
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/DistributedMergeLmerAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/DistributedMergeLmerAggregateFactory.java
index 00f4256..98a0cd9 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/DistributedMergeLmerAggregateFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/DistributedMergeLmerAggregateFactory.java
@@ -3,7 +3,7 @@
import java.io.DataOutput;
import java.io.IOException;
-import edu.uci.ics.genomix.data.serde.ByteSerializerDeserializer;
+import edu.uci.ics.genomix.data.std.accessors.ByteSerializerDeserializer;
import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/MergeKmerAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/MergeKmerAggregateFactory.java
index 32c50bb..08ff462 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/MergeKmerAggregateFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/MergeKmerAggregateFactory.java
@@ -3,7 +3,7 @@
import java.io.DataOutput;
import java.io.IOException;
-import edu.uci.ics.genomix.data.serde.ByteSerializerDeserializer;
+import edu.uci.ics.genomix.data.std.accessors.ByteSerializerDeserializer;
import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/util/NonSyncWriter.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/util/NonSyncWriter.java
deleted file mode 100644
index 24c4113..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/dataflow/util/NonSyncWriter.java
+++ /dev/null
@@ -1,26 +0,0 @@
-package edu.uci.ics.genomix.dataflow.util;
-
-import java.io.IOException;
-
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.io.SequenceFile.ValueBytes;
-
-public class NonSyncWriter {
- private FSDataOutputStream out;
-
- public NonSyncWriter(FSDataOutputStream output) {
- out = output;
- }
-
- public void appendRaw(byte[] keyData, int keyOffset, int keyLength,
- ValueBytes val) throws IOException {
- out.writeInt(keyLength + val.getSize()); // total record length
-
- out.writeInt(keyLength); // key portion length
-
- out.write(keyData, keyOffset, keyLength); // key
-
- val.writeUncompressedBytes(out); // value
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGenBrujinGraph.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGenBrujinGraph.java
index 7c4dcf0..c4a6300 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGenBrujinGraph.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGenBrujinGraph.java
@@ -7,11 +7,11 @@
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
-import edu.uci.ics.genomix.data.normalizers.VLongNormalizedKeyComputerFactory;
-import edu.uci.ics.genomix.data.partition.KmerHashPartitioncomputerFactory;
-import edu.uci.ics.genomix.data.serde.ByteSerializerDeserializer;
-import edu.uci.ics.genomix.data.std.accessors.VLongBinaryHashFunctionFamily;
-import edu.uci.ics.genomix.data.std.primitive.VLongKmerPointable;
+import edu.uci.ics.genomix.data.std.accessors.ByteSerializerDeserializer;
+import edu.uci.ics.genomix.data.std.accessors.KmerHashPartitioncomputerFactory;
+import edu.uci.ics.genomix.data.std.accessors.KmerNormarlizedComputerFactory;
+import edu.uci.ics.genomix.data.std.accessors.KmerBinaryHashFunctionFamily;
+import edu.uci.ics.genomix.data.std.primitive.KmerPointable;
import edu.uci.ics.genomix.dataflow.ConnectorPolicyAssignmentPolicy;
import edu.uci.ics.genomix.dataflow.KMerSequenceWriterFactory;
import edu.uci.ics.genomix.dataflow.KMerTextWriterFactory;
@@ -110,8 +110,8 @@
keyFields,
frameLimits,
new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(VLongKmerPointable.FACTORY) },
- new VLongNormalizedKeyComputerFactory(),
+ .of(KmerPointable.FACTORY) },
+ new KmerNormarlizedComputerFactory(),
aggeragater,
new DistributedMergeLmerAggregateFactory(),
combineOutputRec,
@@ -119,7 +119,7 @@
new FieldHashPartitionComputerFactory(
keyFields,
new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory
- .of(VLongKmerPointable.FACTORY) }),
+ .of(KmerPointable.FACTORY) }),
tableSize), true);
}
@@ -137,9 +137,9 @@
recordSizeInBytes,
tableSize,
new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(VLongKmerPointable.FACTORY) },
- new IBinaryHashFunctionFamily[] { new VLongBinaryHashFunctionFamily() },
- hashfuncStartLevel, new VLongNormalizedKeyComputerFactory(),
+ .of(KmerPointable.FACTORY) },
+ new IBinaryHashFunctionFamily[] { new KmerBinaryHashFunctionFamily() },
+ hashfuncStartLevel, new KmerNormarlizedComputerFactory(),
new MergeKmerAggregateFactory(),
new DistributedMergeLmerAggregateFactory(), combineOutputRec,
true);
@@ -166,12 +166,12 @@
new KmerHashPartitioncomputerFactory(),
keyFields,
new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(VLongKmerPointable.FACTORY) });
+ .of(KmerPointable.FACTORY) });
crossGrouper = new PreclusteredGroupOperatorDescriptor(
jobSpec,
keyFields,
new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
- .of(VLongKmerPointable.FACTORY) },
+ .of(KmerPointable.FACTORY) },
new DistributedMergeLmerAggregateFactory(),
combineOutputRec);
break;
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGenContigsGeneration.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGenContigsGeneration.java
deleted file mode 100644
index e69de29..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGenContigsGeneration.java
+++ /dev/null
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGenGraphCleanning.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGenGraphCleanning.java
deleted file mode 100644
index e69de29..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/job/JobGenGraphCleanning.java
+++ /dev/null
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/example/jobrun/JobRunTest.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/example/jobrun/JobRunTest.java
index 12ca9c9..3e80ab7 100644
--- a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/example/jobrun/JobRunTest.java
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/example/jobrun/JobRunTest.java
@@ -118,8 +118,16 @@
}
@Test
+ public void TestAll() throws Exception{
+ cleanUpReEntry();
+ TestExternalGroupby();
+ cleanUpReEntry();
+ TestPreClusterGroupby();
+ cleanUpReEntry();
+ TestHybridGroupby();
+ }
+
public void TestExternalGroupby() throws Exception {
- //cleanUpReEntry();
conf.set(GenomixJob.GROUPBY_TYPE, "external");
conf.set(GenomixJob.OUTPUT_FORMAT, "binary");
System.err.println("Testing ExternalGroupBy");
@@ -127,9 +135,7 @@
Assert.assertEquals(true, checkResults());
}
- @Test
public void TestPreClusterGroupby() throws Exception {
- cleanUpReEntry();
conf.set(GenomixJob.GROUPBY_TYPE, "precluster");
conf.set(GenomixJob.OUTPUT_FORMAT, "binary");
System.err.println("Testing PreClusterGroupBy");
@@ -137,9 +143,7 @@
Assert.assertEquals(true, checkResults());
}
- @Test
public void TestHybridGroupby() throws Exception {
- cleanUpReEntry();
conf.set(GenomixJob.GROUPBY_TYPE, "hybrid");
conf.set(GenomixJob.OUTPUT_FORMAT, "binary");
System.err.println("Testing HybridGroupBy");