add package to build old package success
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
index 03e2fd9..5be5f83 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
@@ -50,10 +50,96 @@
}
public static byte getSymbolFromCode(byte code) {
- if (code > 3 || code < 0) {
- throw new IllegalArgumentException("Not such gene code");
+ if (code > 3) {
+ return '!';
}
return GENE_SYMBOL[code];
}
+ public static byte getAdjBit(byte t) {
+ byte r = 0;
+ switch (t) {
+ case 'A':
+ case 'a':
+ r = 1 << A;
+ break;
+ case 'C':
+ case 'c':
+ r = 1 << C;
+ break;
+ case 'G':
+ case 'g':
+ r = 1 << G;
+ break;
+ case 'T':
+ case 't':
+ r = 1 << T;
+ break;
+ }
+ return r;
+ }
+
+ /**
+ * It works for path merge. Merge the kmer by his next, we need to make sure
+ * the @{t} is a single neighbor.
+ *
+ * @param t
+ * the neighbor code in BitMap
+ * @return the genecode
+ */
+ public static byte getGeneCodeFromBitMap(byte t) {
+ switch (t) {
+ case 1 << A:
+ return A;
+ case 1 << C:
+ return C;
+ case 1 << G:
+ return G;
+ case 1 << T:
+ return T;
+ }
+ return -1;
+ }
+
+ public static byte getBitMapFromGeneCode(byte t) {
+ return (byte) (1 << t);
+ }
+
+ public static int countNumberOfBitSet(int i) {
+ int c = 0;
+ for (; i != 0; c++) {
+ i &= i - 1;
+ }
+ return c;
+ }
+
+ public static int inDegree(byte bitmap) {
+ return countNumberOfBitSet((bitmap >> 4) & 0x0f);
+ }
+
+ public static int outDegree(byte bitmap) {
+ return countNumberOfBitSet(bitmap & 0x0f);
+ }
+
+ public static byte mergePreNextAdj(byte pre, byte next) {
+ return (byte) (pre << 4 | (next & 0x0f));
+ }
+
+ public static String getSymbolFromBitMap(byte code) {
+ int left = (code >> 4) & 0x0F;
+ int right = code & 0x0F;
+ StringBuilder str = new StringBuilder();
+ for (int i = A; i <= T; i++) {
+ if ((left & (1 << i)) != 0) {
+ str.append((char) GENE_SYMBOL[i]);
+ }
+ }
+ str.append('|');
+ for (int i = A; i <= T; i++) {
+ if ((right & (1 << i)) != 0) {
+ str.append((char) GENE_SYMBOL[i]);
+ }
+ }
+ return str.toString();
+ }
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingMapper.java
index b1e5e59..1b158ed 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingMapper.java
@@ -26,9 +26,7 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings({ "unused", "deprecation" })
public class ResultsCheckingMapper extends MapReduceBase implements Mapper<KmerBytesWritable, KmerCountValue, Text, Text> {
KmerBytesWritable valWriter;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixCombiner.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixCombiner.java
index 8a4cdc9..76515f3 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixCombiner.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixCombiner.java
@@ -23,9 +23,7 @@
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
/**
* This class implement the combiner operator of Mapreduce model
*/
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java
index cd0ee2d..e3f3fb2 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java
@@ -28,9 +28,7 @@
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
/**
* This class implement driver which start the mapreduce program for graphbuilding
*/
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixMapper.java
index e9fa3f0..868c40f 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixMapper.java
@@ -27,10 +27,7 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
/**
* This class implement mapper operator of mapreduce model
*/
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixReducer.java
index 9b284df..7f9b2bf 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixReducer.java
@@ -22,9 +22,7 @@
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
/**
* This class implement reducer operator of mapreduce model
*/
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterDriver.java
index a2eafeb..1db513f 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterDriver.java
@@ -27,8 +27,7 @@
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings("deprecation")
public class CountFilterDriver {
private static class Options {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterMapper.java
index 4c25597..da0c42e 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterMapper.java
@@ -23,9 +23,7 @@
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings({ "deprecation" })
public class CountFilterMapper extends MapReduceBase implements
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterReducer.java
index 58be646..c241b52 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterReducer.java
@@ -22,8 +22,7 @@
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings("deprecation")
public class CountFilterReducer extends MapReduceBase implements
Reducer<KmerBytesWritable, ByteWritable, KmerBytesWritable, ByteWritable> {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/GeneCode.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/GeneCode.java
new file mode 100644
index 0000000..4b95e04
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/GeneCode.java
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hadoop.oldtype;
+
+public class GeneCode {
+ public final static byte[] GENE_SYMBOL = { 'A', 'C', 'G', 'T' };
+ /**
+ * make sure this 4 ids equal to the sequence id of char in {@GENE_SYMBOL
+ * }
+ */
+ public static final byte A = 0;
+ public static final byte C = 1;
+ public static final byte G = 2;
+ public static final byte T = 3;
+
+ public static byte getCodeFromSymbol(byte ch) {
+ byte r = 0;
+ switch (ch) {
+ case 'A':
+ case 'a':
+ r = A;
+ break;
+ case 'C':
+ case 'c':
+ r = C;
+ break;
+ case 'G':
+ case 'g':
+ r = G;
+ break;
+ case 'T':
+ case 't':
+ r = T;
+ break;
+ }
+ return r;
+ }
+
+ public static byte getSymbolFromCode(byte code) {
+ if (code > 3) {
+ return '!';
+ }
+ return GENE_SYMBOL[code];
+ }
+
+ public static byte getAdjBit(byte t) {
+ byte r = 0;
+ switch (t) {
+ case 'A':
+ case 'a':
+ r = 1 << A;
+ break;
+ case 'C':
+ case 'c':
+ r = 1 << C;
+ break;
+ case 'G':
+ case 'g':
+ r = 1 << G;
+ break;
+ case 'T':
+ case 't':
+ r = 1 << T;
+ break;
+ }
+ return r;
+ }
+
+ /**
+ * It works for path merge. Merge the kmer by his next, we need to make sure
+ * the @{t} is a single neighbor.
+ *
+ * @param t
+ * the neighbor code in BitMap
+ * @return the genecode
+ */
+ public static byte getGeneCodeFromBitMap(byte t) {
+ switch (t) {
+ case 1 << A:
+ return A;
+ case 1 << C:
+ return C;
+ case 1 << G:
+ return G;
+ case 1 << T:
+ return T;
+ }
+ return -1;
+ }
+
+ public static byte getBitMapFromGeneCode(byte t) {
+ return (byte) (1 << t);
+ }
+
+ public static int countNumberOfBitSet(int i) {
+ int c = 0;
+ for (; i != 0; c++) {
+ i &= i - 1;
+ }
+ return c;
+ }
+
+ public static int inDegree(byte bitmap) {
+ return countNumberOfBitSet((bitmap >> 4) & 0x0f);
+ }
+
+ public static int outDegree(byte bitmap) {
+ return countNumberOfBitSet(bitmap & 0x0f);
+ }
+
+ public static byte mergePreNextAdj(byte pre, byte next) {
+ return (byte) (pre << 4 | (next & 0x0f));
+ }
+
+ public static String getSymbolFromBitMap(byte code) {
+ int left = (code >> 4) & 0x0F;
+ int right = code & 0x0F;
+ StringBuilder str = new StringBuilder();
+ for (int i = A; i <= T; i++) {
+ if ((left & (1 << i)) != 0) {
+ str.append((char) GENE_SYMBOL[i]);
+ }
+ }
+ str.append('|');
+ for (int i = A; i <= T; i++) {
+ if ((right & (1 << i)) != 0) {
+ str.append((char) GENE_SYMBOL[i]);
+ }
+ }
+ return str.toString();
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerBytesWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerBytesWritable.java
new file mode 100644
index 0000000..6d53649
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerBytesWritable.java
@@ -0,0 +1,313 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hadoop.oldtype;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+
+import org.apache.hadoop.io.BinaryComparable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableComparator;
+
+/**
+ * Fix kmer length byteswritable
+ * It was used to generate the graph in which phase the kmer length doesn't change.
+ * Thus the size of bytes doesn't change either.
+ */
+public class KmerBytesWritable extends BinaryComparable implements Serializable, WritableComparable<BinaryComparable> {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+ private static final byte[] EMPTY_BYTES = {};
+
+ protected int size;
+ protected byte[] bytes;
+ protected int kmerlength;
+
+ @Deprecated
+ public KmerBytesWritable() {
+ this(0, EMPTY_BYTES);
+ }
+
+ public KmerBytesWritable(int k, byte[] storage) {
+ this.kmerlength = k;
+ if (k > 0) {
+ this.size = KmerUtil.getByteNumFromK(kmerlength);
+ this.bytes = storage;
+ if (this.bytes.length < size) {
+ throw new ArrayIndexOutOfBoundsException("Storage is smaller than required space for kmerlength:k");
+ }
+ } else {
+ this.bytes = storage;
+ this.size = 0;
+ }
+ }
+
+ /**
+ * Initial Kmer space by kmerlength
+ *
+ * @param k
+ * kmerlength
+ */
+ public KmerBytesWritable(int k) {
+ this.kmerlength = k;
+ this.size = KmerUtil.getByteNumFromK(kmerlength);
+ if (k > 0) {
+ this.bytes = new byte[this.size];
+ } else {
+ this.bytes = EMPTY_BYTES;
+ }
+ }
+
+ public KmerBytesWritable(KmerBytesWritable right) {
+ if (right != null) {
+ this.kmerlength = right.kmerlength;
+ this.size = right.size;
+ this.bytes = new byte[right.size];
+ set(right);
+ }else{
+ this.kmerlength = 0;
+ this.size = 0;
+ this.bytes = EMPTY_BYTES;
+ }
+ }
+
+ public byte getGeneCodeAtPosition(int pos) {
+ if (pos >= kmerlength) {
+ return -1;
+ }
+ int posByte = pos / 4;
+ int shift = (pos % 4) << 1;
+ return (byte) ((bytes[size - 1 - posByte] >> shift) & 0x3);
+ }
+
+ public int getKmerLength() {
+ return this.kmerlength;
+ }
+
+ @Override
+ public byte[] getBytes() {
+ return bytes;
+ }
+
+ @Override
+ public int getLength() {
+ return size;
+ }
+
+ /**
+ * Read Kmer from read text into bytes array e.g. AATAG will compress as
+ * [0x000G, 0xATAA]
+ *
+ * @param k
+ * @param array
+ * @param start
+ */
+ public void setByRead(byte[] array, int start) {
+ byte l = 0;
+ int bytecount = 0;
+ int bcount = this.size - 1;
+ for (int i = start; i < start + kmerlength && i < array.length; i++) {
+ byte code = GeneCode.getCodeFromSymbol(array[i]);
+ l |= (byte) (code << bytecount);
+ bytecount += 2;
+ if (bytecount == 8) {
+ bytes[bcount--] = l;
+ l = 0;
+ bytecount = 0;
+ }
+ }
+ if (bcount >= 0) {
+ bytes[0] = l;
+ }
+ }
+
+ /**
+ * Compress Reversed Kmer into bytes array AATAG will compress as
+ * [0x000A,0xATAG]
+ *
+ * @param input
+ * array
+ * @param start
+ * position
+ */
+ public void setByReadReverse(byte[] array, int start) {
+ byte l = 0;
+ int bytecount = 0;
+ int bcount = size - 1;
+ for (int i = start + kmerlength - 1; i >= 0 && i < array.length; i--) {
+ byte code = GeneCode.getCodeFromSymbol(array[i]);
+ l |= (byte) (code << bytecount);
+ bytecount += 2;
+ if (bytecount == 8) {
+ bytes[bcount--] = l;
+ l = 0;
+ bytecount = 0;
+ }
+ }
+ if (bcount >= 0) {
+ bytes[0] = l;
+ }
+ }
+
+ /**
+ * Shift Kmer to accept new char input
+ *
+ * @param c
+ * Input new gene character
+ * @return the shift out gene, in gene code format
+ */
+ public byte shiftKmerWithNextChar(byte c) {
+ return shiftKmerWithNextCode(GeneCode.getCodeFromSymbol(c));
+ }
+
+ /**
+ * Shift Kmer to accept new gene code
+ *
+ * @param c
+ * Input new gene code
+ * @return the shift out gene, in gene code format
+ */
+ public byte shiftKmerWithNextCode(byte c) {
+ byte output = (byte) (bytes[size - 1] & 0x03);
+ for (int i = size - 1; i > 0; i--) {
+ byte in = (byte) (bytes[i - 1] & 0x03);
+ bytes[i] = (byte) (((bytes[i] >>> 2) & 0x3f) | (in << 6));
+ }
+ int pos = ((kmerlength - 1) % 4) << 1;
+ byte code = (byte) (c << pos);
+ bytes[0] = (byte) (((bytes[0] >>> 2) & 0x3f) | code);
+ clearLeadBit();
+ return output;
+ }
+
+ /**
+ * Shift Kmer to accept new input char
+ *
+ * @param c
+ * Input new gene character
+ * @return the shiftout gene, in gene code format
+ */
+ public byte shiftKmerWithPreChar(byte c) {
+ return shiftKmerWithPreCode(GeneCode.getCodeFromSymbol(c));
+ }
+
+ /**
+ * Shift Kmer to accept new gene code
+ *
+ * @param c
+ * Input new gene code
+ * @return the shiftout gene, in gene code format
+ */
+ public byte shiftKmerWithPreCode(byte c) {
+ int pos = ((kmerlength - 1) % 4) << 1;
+ byte output = (byte) ((bytes[0] >> pos) & 0x03);
+ for (int i = 0; i < size - 1; i++) {
+ byte in = (byte) ((bytes[i + 1] >> 6) & 0x03);
+ bytes[i] = (byte) ((bytes[i] << 2) | in);
+ }
+ bytes[size - 1] = (byte) ((bytes[size - 1] << 2) | c);
+ clearLeadBit();
+ return output;
+ }
+
+ protected void clearLeadBit() {
+ if (kmerlength % 4 != 0) {
+ bytes[0] &= (1 << ((kmerlength % 4) << 1)) - 1;
+ }
+ }
+
+ public void set(KmerBytesWritable newData) {
+ if (kmerlength != newData.kmerlength){
+ throw new IllegalArgumentException("kmerSize is different, try to use VKmerBytesWritable instead");
+ }
+ if (kmerlength > 0 ){
+ set(newData.bytes, 0, newData.size);
+ }
+ }
+
+ public void set(byte[] newData, int offset, int length) {
+ if (kmerlength > 0){
+ System.arraycopy(newData, offset, bytes, 0, size);
+ }
+ }
+
+ /**
+ * Don't read the kmerlength from datastream,
+ * Read it from configuration
+ */
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ this.kmerlength = in.readInt();
+ this.size = KmerUtil.getByteNumFromK(kmerlength);
+ if (this.kmerlength > 0) {
+ if (this.bytes.length < this.size) {
+ this.bytes = new byte[this.size];
+ }
+ in.readFully(bytes, 0, size);
+ }
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(kmerlength);
+ if (kmerlength > 0) {
+ out.write(bytes, 0, size);
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() * 31 + this.kmerlength;
+ }
+
+ @Override
+ public boolean equals(Object right_obj) {
+ if (right_obj instanceof KmerBytesWritable)
+ return this.kmerlength == ((KmerBytesWritable) right_obj).kmerlength && super.equals(right_obj);
+ return false;
+ }
+
+ @Override
+ public String toString() {
+ return KmerUtil.recoverKmerFrom(this.kmerlength, this.getBytes(), 0, this.getLength());
+ }
+
+ public static class Comparator extends WritableComparator {
+ public final int LEAD_BYTES = 4;
+
+ public Comparator() {
+ super(KmerBytesWritable.class);
+ }
+
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ int kmerlength1 = readInt(b1, s1);
+ int kmerlength2 = readInt(b2, s2);
+ if (kmerlength1 == kmerlength2) {
+ return compareBytes(b1, s1 + LEAD_BYTES, l1 - LEAD_BYTES, b2, s2 + LEAD_BYTES, l2 - LEAD_BYTES);
+ }
+ return kmerlength1 - kmerlength2;
+ }
+ }
+
+ static { // register this comparator
+ WritableComparator.define(KmerBytesWritable.class, new Comparator());
+ }
+
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerCountValue.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerCountValue.java
new file mode 100644
index 0000000..d0310ac
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerCountValue.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.hadoop.oldtype;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+
+public class KmerCountValue implements Writable {
+ private byte adjBitMap;
+ private byte count;
+
+ public KmerCountValue(byte bitmap, byte count) {
+ set(bitmap, count);
+ }
+
+ public KmerCountValue() {
+ adjBitMap = 0;
+ count = 0;
+ }
+
+ @Override
+ public void readFields(DataInput arg0) throws IOException {
+ adjBitMap = arg0.readByte();
+ count = arg0.readByte();
+ }
+
+ @Override
+ public void write(DataOutput arg0) throws IOException {
+ arg0.writeByte(adjBitMap);
+ arg0.writeByte(count);
+ }
+
+ @Override
+ public String toString() {
+ return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(count);
+ }
+
+ public void set(byte bitmap, byte count) {
+ this.adjBitMap = bitmap;
+ this.count = count;
+ }
+
+ public byte getAdjBitMap() {
+ return adjBitMap;
+ }
+
+ public void setAdjBitMap(byte adjBitMap) {
+ this.adjBitMap = adjBitMap;
+ }
+
+ public byte getCount() {
+ return count;
+ }
+}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerUtil.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerUtil.java
new file mode 100644
index 0000000..4f62bb7
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerUtil.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hadoop.oldtype;
+
+public class KmerUtil {
+ public static final String empty = "";
+
+ public static int getByteNumFromK(int k) {
+ int x = k / 4;
+ if (k % 4 != 0) {
+ x += 1;
+ }
+ return x;
+ }
+
+ public static byte reverseKmerByte(byte k) {
+ int x = (((k >> 2) & 0x33) | ((k << 2) & 0xcc));
+ return (byte) (((x >> 4) & 0x0f) | ((x << 4) & 0xf0));
+ }
+
+ public static String recoverKmerFrom(int k, byte[] keyData, int keyStart, int keyLength) {
+ StringBuilder strKmer = new StringBuilder();
+ int byteId = keyStart + keyLength - 1;
+ if (byteId < 0) {
+ return empty;
+ }
+ byte currentbyte = keyData[byteId];
+ for (int geneCount = 0; geneCount < k; geneCount++) {
+ if (geneCount % 4 == 0 && geneCount > 0) {
+ currentbyte = keyData[--byteId];
+ }
+ strKmer.append((char) GeneCode.GENE_SYMBOL[(currentbyte >> ((geneCount % 4) * 2)) & 0x03]);
+ }
+ return strKmer.toString();
+ }
+
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/MergePathValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/MergePathValueWritable.java
new file mode 100644
index 0000000..aff8e9d
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/MergePathValueWritable.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.hadoop.oldtype;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.BinaryComparable;
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.hadoop.oldtype.*;
+
+public class MergePathValueWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
+
+ private static final byte[] EMPTY_BYTES = {};
+ private byte adjBitMap;
+ private byte flag;
+ private VKmerBytesWritable kmer;
+
+ public MergePathValueWritable() {
+ this((byte) 0, (byte) 0, 0, EMPTY_BYTES);
+ }
+
+ public MergePathValueWritable(int k) {
+ this.adjBitMap = 0;
+ this.flag = 0;
+ this.kmer = new VKmerBytesWritable(k);
+ }
+
+ public MergePathValueWritable(byte adjBitMap, byte flag, int kmerSize, byte[] bytes) {
+ this.adjBitMap = adjBitMap;
+ this.flag = flag;
+ this.kmer = new VKmerBytesWritable(kmerSize, bytes);
+ kmer.set(bytes, 0, bytes.length);
+ }
+
+ public void set(MergePathValueWritable right) {
+ set(right.getAdjBitMap(), right.getFlag(), right.getKmer());
+ }
+
+ public void set(byte adjBitMap, byte flag, VKmerBytesWritable kmer) {
+ this.kmer.set(kmer);
+ this.adjBitMap = adjBitMap;
+ this.flag = flag;
+ }
+
+ @Override
+ public void readFields(DataInput arg0) throws IOException {
+ // TODO Auto-generated method stub
+ kmer.readFields(arg0);
+ adjBitMap = arg0.readByte();
+ flag = arg0.readByte();
+ }
+
+ @Override
+ public void write(DataOutput arg0) throws IOException {
+ // TODO Auto-generated method stub
+
+ kmer.write(arg0);
+ arg0.writeByte(adjBitMap);
+ arg0.writeByte(flag);
+ }
+
+ public VKmerBytesWritable getKmer() {
+ if (kmer.getLength() != 0) {
+ return kmer;
+ }
+ return null;
+ }
+
+ public byte getAdjBitMap() {
+ return this.adjBitMap;
+ }
+
+ public byte getFlag() {
+ return this.flag;
+ }
+
+ public String toString() {
+ return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag);
+ }
+
+ public String pureToString() {
+ return GeneCode.getSymbolFromBitMap(adjBitMap);
+ }
+ @Override
+ public byte[] getBytes() {
+ // TODO Auto-generated method stub
+ if (kmer.getLength() != 0) {
+ return kmer.getBytes();
+ } else
+ return null;
+
+ }
+
+ public int getKmerLength() {
+ return kmer.getKmerLength();
+ }
+
+ @Override
+ public int getLength() {
+ return kmer.getLength();
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/VKmerBytesWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/VKmerBytesWritable.java
new file mode 100644
index 0000000..fb60699
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/VKmerBytesWritable.java
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hadoop.oldtype;
+
+public class VKmerBytesWritable extends KmerBytesWritable {
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ @Deprecated
+ public VKmerBytesWritable() {
+ super();
+ }
+
+ public VKmerBytesWritable(int k, byte[] storage) {
+ super(k, storage);
+ }
+
+ public VKmerBytesWritable(int k) {
+ super(k);
+ }
+
+ public VKmerBytesWritable(KmerBytesWritable other) {
+ super(other);
+ }
+
+ protected void setSize(int size) {
+ if (size > getCapacity()) {
+ setCapacity((size * 3 / 2));
+ }
+ this.size = size;
+ }
+
+ protected int getCapacity() {
+ return bytes.length;
+ }
+
+ protected void setCapacity(int new_cap) {
+ if (new_cap != getCapacity()) {
+ byte[] new_data = new byte[new_cap];
+ if (new_cap < size) {
+ size = new_cap;
+ }
+ if (size != 0) {
+ System.arraycopy(bytes, 0, new_data, 0, size);
+ }
+ bytes = new_data;
+ }
+ }
+
+ /**
+ * Read Kmer from read text into bytes array e.g. AATAG will compress as
+ * [0x000G, 0xATAA]
+ *
+ * @param k
+ * @param array
+ * @param start
+ */
+ public void setByRead(int k, byte[] array, int start) {
+ reset(k);
+ super.setByRead(array, start);
+ }
+
+ /**
+ * Compress Reversed Kmer into bytes array AATAG will compress as
+ * [0x000A,0xATAG]
+ *
+ * @param input
+ * array
+ * @param start
+ * position
+ */
+ public void setByReadReverse(int k, byte[] array, int start) {
+ reset(k);
+ super.setByReadReverse(array, start);
+ }
+
+ @Override
+ public void set(KmerBytesWritable newData) {
+ if (newData == null){
+ this.set(0,null,0,0);
+ }else{
+ this.set(newData.kmerlength, newData.bytes, 0, newData.size);
+ }
+ }
+
+ public void set(int k, byte[] newData, int offset, int length) {
+ reset(k);
+ if (k > 0 ){
+ System.arraycopy(newData, offset, bytes, 0, size);
+ }
+ }
+
+ /**
+ * Reset array by kmerlength
+ *
+ * @param k
+ */
+ public void reset(int k) {
+ this.kmerlength = k;
+ setSize(0);
+ setSize(KmerUtil.getByteNumFromK(k));
+ clearLeadBit();
+ }
+
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/VKmerBytesWritableFactory.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/VKmerBytesWritableFactory.java
new file mode 100644
index 0000000..0334991
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/VKmerBytesWritableFactory.java
@@ -0,0 +1,311 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hadoop.oldtype;
+
+public class VKmerBytesWritableFactory {
+ private VKmerBytesWritable kmer;
+
+ public VKmerBytesWritableFactory(int k) {
+ kmer = new VKmerBytesWritable(k);
+ }
+
+ /**
+ * Read Kmer from read text into bytes array e.g. AATAG will compress as
+ * [0x000G, 0xATAA]
+ *
+ * @param k
+ * @param array
+ * @param start
+ */
+ public VKmerBytesWritable getKmerByRead(int k, byte[] array, int start) {
+ kmer.setByRead(k, array, start);
+ return kmer;
+ }
+
+ /**
+ * Compress Reversed Kmer into bytes array AATAG will compress as
+ * [0x000A,0xATAG]
+ *
+ * @param array
+ * @param start
+ */
+ public VKmerBytesWritable getKmerByReadReverse(int k, byte[] array, int start) {
+ kmer.setByReadReverse(k, array, start);
+ return kmer;
+ }
+
+ /**
+ * Get last kmer from kmer-chain.
+ * e.g. kmerChain is AAGCTA, if k =5, it will
+ * return AGCTA
+ *
+ * @param k
+ * @param kInChain
+ * @param kmerChain
+ * @return LastKmer bytes array
+ */
+ public VKmerBytesWritable getLastKmerFromChain(int lastK, final KmerBytesWritable kmerChain) {
+ if (lastK > kmerChain.getKmerLength()) {
+ return null;
+ }
+ if (lastK == kmerChain.getKmerLength()) {
+ kmer.set(kmerChain);
+ return kmer;
+ }
+ kmer.reset(lastK);
+
+ /** from end to start */
+ int byteInChain = kmerChain.getLength() - 1 - (kmerChain.getKmerLength() - lastK) / 4;
+ int posInByteOfChain = ((kmerChain.getKmerLength() - lastK) % 4) << 1; // *2
+ int byteInKmer = kmer.getLength() - 1;
+ for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
+ kmer.getBytes()[byteInKmer] = (byte) ((0xff & kmerChain.getBytes()[byteInChain]) >> posInByteOfChain);
+ kmer.getBytes()[byteInKmer] |= ((kmerChain.getBytes()[byteInChain - 1] << (8 - posInByteOfChain)));
+ }
+
+ /** last kmer byte */
+ if (byteInKmer == 0) {
+ kmer.getBytes()[0] = (byte) ((kmerChain.getBytes()[0] & 0xff) >> posInByteOfChain);
+ }
+ kmer.clearLeadBit();
+ return kmer;
+ }
+
+ /**
+ * Get first kmer from kmer-chain e.g. kmerChain is AAGCTA, if k=5, it will
+ * return AAGCT
+ *
+ * @param k
+ * @param kInChain
+ * @param kmerChain
+ * @return FirstKmer bytes array
+ */
+ public VKmerBytesWritable getFirstKmerFromChain(int firstK, final KmerBytesWritable kmerChain) {
+ if (firstK > kmerChain.getKmerLength()) {
+ return null;
+ }
+ if (firstK == kmerChain.getKmerLength()) {
+ kmer.set(kmerChain);
+ return kmer;
+ }
+ kmer.reset(firstK);
+
+ int i = 1;
+ for (; i < kmer.getLength(); i++) {
+ kmer.getBytes()[kmer.getLength() - i] = kmerChain.getBytes()[kmerChain.getLength() - i];
+ }
+ int posInByteOfChain = (firstK % 4) << 1; // *2
+ if (posInByteOfChain == 0) {
+ kmer.getBytes()[0] = kmerChain.getBytes()[kmerChain.getLength() - i];
+ } else {
+ kmer.getBytes()[0] = (byte) (kmerChain.getBytes()[kmerChain.getLength() - i] & ((1 << posInByteOfChain) - 1));
+ }
+ kmer.clearLeadBit();
+ return kmer;
+ }
+
+ public VKmerBytesWritable getSubKmerFromChain(int startK, int kSize, final KmerBytesWritable kmerChain) {
+ if (startK + kSize > kmerChain.getKmerLength()) {
+ return null;
+ }
+ if (startK == 0 && kSize == kmerChain.getKmerLength()) {
+ kmer.set(kmerChain);
+ return kmer;
+ }
+ kmer.reset(kSize);
+
+ /** from end to start */
+ int byteInChain = kmerChain.getLength() - 1 - startK / 4;
+ int posInByteOfChain = startK % 4 << 1; // *2
+ int byteInKmer = kmer.getLength() - 1;
+ for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
+ kmer.getBytes()[byteInKmer] = (byte) ((0xff & kmerChain.getBytes()[byteInChain]) >> posInByteOfChain);
+ kmer.getBytes()[byteInKmer] |= ((kmerChain.getBytes()[byteInChain - 1] << (8 - posInByteOfChain)));
+ }
+
+ /** last kmer byte */
+ if (byteInKmer == 0) {
+ kmer.getBytes()[0] = (byte) ((kmerChain.getBytes()[0] & 0xff) >> posInByteOfChain);
+ }
+ kmer.clearLeadBit();
+ return kmer;
+ }
+
+ /**
+ * Merge kmer with next neighbor in gene-code format.
+ * The k of new kmer will increase by 1
+ * e.g. AAGCT merge with A => AAGCTA
+ *
+ * @param k
+ * :input k of kmer
+ * @param kmer
+ * : input bytes of kmer
+ * @param nextCode
+ * : next neighbor in gene-code format
+ * @return the merged Kmer, this K of this Kmer is k+1
+ */
+ public VKmerBytesWritable mergeKmerWithNextCode(final KmerBytesWritable kmer, byte nextCode) {
+ this.kmer.reset(kmer.getKmerLength() + 1);
+ for (int i = 1; i <= kmer.getLength(); i++) {
+ this.kmer.getBytes()[this.kmer.getLength() - i] = kmer.getBytes()[kmer.getLength() - i];
+ }
+ if (this.kmer.getLength() > kmer.getLength()) {
+ this.kmer.getBytes()[0] = (byte) (nextCode & 0x3);
+ } else {
+ this.kmer.getBytes()[0] = (byte) (kmer.getBytes()[0] | ((nextCode & 0x3) << ((kmer.getKmerLength() % 4) << 1)));
+ }
+ this.kmer.clearLeadBit();
+ return this.kmer;
+ }
+
+ /**
+ * Merge kmer with previous neighbor in gene-code format.
+ * The k of new kmer will increase by 1
+ * e.g. AAGCT merge with A => AAAGCT
+ *
+ * @param k
+ * :input k of kmer
+ * @param kmer
+ * : input bytes of kmer
+ * @param preCode
+ * : next neighbor in gene-code format
+ * @return the merged Kmer,this K of this Kmer is k+1
+ */
+ public VKmerBytesWritable mergeKmerWithPreCode(final KmerBytesWritable kmer, byte preCode) {
+ this.kmer.reset(kmer.getKmerLength() + 1);
+ int byteInMergedKmer = 0;
+ if (kmer.getKmerLength() % 4 == 0) {
+ this.kmer.getBytes()[0] = (byte) ((kmer.getBytes()[0] >> 6) & 0x3);
+ byteInMergedKmer++;
+ }
+ for (int i = 0; i < kmer.getLength() - 1; i++, byteInMergedKmer++) {
+ this.kmer.getBytes()[byteInMergedKmer] = (byte) ((kmer.getBytes()[i] << 2) | ((kmer.getBytes()[i + 1] >> 6) & 0x3));
+ }
+ this.kmer.getBytes()[byteInMergedKmer] = (byte) ((kmer.getBytes()[kmer.getLength() - 1] << 2) | (preCode & 0x3));
+ this.kmer.clearLeadBit();
+ return this.kmer;
+ }
+
+ /**
+ * Merge two kmer to one kmer
+ * e.g. ACTA + ACCGT => ACTAACCGT
+ *
+ * @param preK
+ * : previous k of kmer
+ * @param kmerPre
+ * : bytes array of previous kmer
+ * @param nextK
+ * : next k of kmer
+ * @param kmerNext
+ * : bytes array of next kmer
+ * @return merged kmer, the new k is @preK + @nextK
+ */
+ public VKmerBytesWritable mergeTwoKmer(final KmerBytesWritable preKmer, final KmerBytesWritable nextKmer) {
+ kmer.reset(preKmer.getKmerLength() + nextKmer.getKmerLength());
+ int i = 1;
+ for (; i <= preKmer.getLength(); i++) {
+ kmer.getBytes()[kmer.getLength() - i] = preKmer.getBytes()[preKmer.getLength() - i];
+ }
+ if (i > 1) {
+ i--;
+ }
+ if (preKmer.getKmerLength() % 4 == 0) {
+ for (int j = 1; j <= nextKmer.getLength(); j++) {
+ kmer.getBytes()[kmer.getLength() - i - j] = nextKmer.getBytes()[nextKmer.getLength() - j];
+ }
+ } else {
+ int posNeedToMove = ((preKmer.getKmerLength() % 4) << 1);
+ kmer.getBytes()[kmer.getLength() - i] |= nextKmer.getBytes()[nextKmer.getLength() - 1] << posNeedToMove;
+ for (int j = 1; j < nextKmer.getLength(); j++) {
+ kmer.getBytes()[kmer.getLength() - i - j] = (byte) (((nextKmer.getBytes()[nextKmer.getLength() - j] & 0xff) >> (8 - posNeedToMove)) | (nextKmer
+ .getBytes()[nextKmer.getLength() - j - 1] << posNeedToMove));
+ }
+ if (nextKmer.getKmerLength() % 4 == 0 || (nextKmer.getKmerLength() % 4) * 2 + posNeedToMove > 8) {
+ kmer.getBytes()[0] = (byte) ((0xff & nextKmer.getBytes()[0]) >> (8 - posNeedToMove));
+ }
+ }
+ kmer.clearLeadBit();
+ return kmer;
+ }
+
+ /**
+ * Safely shifted the kmer forward without change the input kmer
+ * e.g. AGCGC shift with T => GCGCT
+ *
+ * @param k
+ * : kmer length
+ * @param kmer
+ * : input kmer
+ * @param afterCode
+ * : input genecode
+ * @return new created kmer that shifted by afterCode, the K will not change
+ */
+ public VKmerBytesWritable shiftKmerWithNextCode(final KmerBytesWritable kmer, byte afterCode) {
+ this.kmer.set(kmer);
+ this.kmer.shiftKmerWithNextCode(afterCode);
+ return this.kmer;
+ }
+
+ /**
+ * Safely shifted the kmer backward without change the input kmer
+ * e.g. AGCGC shift with T => TAGCG
+ *
+ * @param k
+ * : kmer length
+ * @param kmer
+ * : input kmer
+ * @param preCode
+ * : input genecode
+ * @return new created kmer that shifted by preCode, the K will not change
+ */
+ public VKmerBytesWritable shiftKmerWithPreCode(final KmerBytesWritable kmer, byte preCode) {
+ this.kmer.set(kmer);
+ this.kmer.shiftKmerWithPreCode(preCode);
+ return this.kmer;
+ }
+
+ /**
+ * get the reverse sequence of given kmer
+ *
+ * @param kmer
+ */
+ public VKmerBytesWritable reverse(final KmerBytesWritable kmer) {
+ this.kmer.reset(kmer.getKmerLength());
+
+ int curPosAtKmer = ((kmer.getKmerLength() - 1) % 4) << 1;
+ int curByteAtKmer = 0;
+
+ int curPosAtReverse = 0;
+ int curByteAtReverse = this.kmer.getLength() - 1;
+ this.kmer.getBytes()[curByteAtReverse] = 0;
+ for (int i = 0; i < kmer.getKmerLength(); i++) {
+ byte gene = (byte) ((kmer.getBytes()[curByteAtKmer] >> curPosAtKmer) & 0x03);
+ this.kmer.getBytes()[curByteAtReverse] |= gene << curPosAtReverse;
+ curPosAtReverse += 2;
+ if (curPosAtReverse >= 8) {
+ curPosAtReverse = 0;
+ this.kmer.getBytes()[--curByteAtReverse] = 0;
+ }
+ curPosAtKmer -= 2;
+ if (curPosAtKmer < 0) {
+ curPosAtKmer = 6;
+ curByteAtKmer++;
+ }
+ }
+ this.kmer.clearLeadBit();
+ return this.kmer;
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java
index 4c7f033..56ce79b 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java
@@ -32,12 +32,10 @@
import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
import edu.uci.ics.genomix.hadoop.pmcommon.SNodeInitialMapper;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
import edu.uci.ics.genomix.hadoop.pmcommon.MergePathMultiSeqOutputFormat;
import edu.uci.ics.genomix.hadoop.pmcommon.SNodeInitialMapper;
import edu.uci.ics.genomix.hadoop.pmcommon.SNodeInitialReducer;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings("deprecation")
public class MergePathH1Driver {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Mapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Mapper.java
index f10999a..ff64d12 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Mapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Mapper.java
@@ -22,10 +22,7 @@
import org.apache.hadoop.mapred.Reporter;
import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings("deprecation")
public class MergePathH1Mapper extends MapReduceBase implements
Mapper<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Reducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Reducer.java
index 1309174..f6c4f42 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Reducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Reducer.java
@@ -24,10 +24,7 @@
import org.apache.hadoop.mapred.lib.MultipleOutputs;
import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings("deprecation")
public class MergePathH1Reducer extends MapReduceBase implements
Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java
index 8d832e5..216da7a 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java
@@ -34,8 +34,7 @@
import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
import edu.uci.ics.genomix.hadoop.pmcommon.SNodeInitialMapper;
import edu.uci.ics.genomix.hadoop.pmcommon.SNodeInitialReducer;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings("deprecation")
public class MergePathH2Driver {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Mapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Mapper.java
index 64b0bb1..941ffa3 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Mapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Mapper.java
@@ -22,10 +22,7 @@
import org.apache.hadoop.mapred.Reporter;
import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings("deprecation")
public class MergePathH2Mapper extends MapReduceBase implements
Mapper<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Reducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Reducer.java
index 5f4f938..ad67c4a 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Reducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Reducer.java
@@ -24,9 +24,7 @@
import org.apache.hadoop.mapred.lib.MultipleOutputs;
import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings("deprecation")
public class MergePathH2Reducer extends MapReduceBase implements
Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathMultiSeqOutputFormat.java
index 479d664..f098317 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathMultiSeqOutputFormat.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathMultiSeqOutputFormat.java
@@ -16,7 +16,7 @@
import java.io.File;
import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.hadoop.oldtype.*;
public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<VKmerBytesWritable, MergePathValueWritable>{
@Override
protected String generateLeafFileName(String name) {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathValueWritable.java
index 31dee7c..5610e23 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathValueWritable.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathValueWritable.java
@@ -21,9 +21,7 @@
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.WritableComparable;
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
public class MergePathValueWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
private static final byte[] EMPTY_BYTES = {};
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/SNodeInitialMapper.java
index 3e3790a..6aa0cf2 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/SNodeInitialMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/SNodeInitialMapper.java
@@ -22,10 +22,7 @@
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings("deprecation")
public class SNodeInitialMapper extends MapReduceBase implements
Mapper<KmerBytesWritable, ByteWritable, KmerBytesWritable, MergePathValueWritable> {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/SNodeInitialReducer.java
index 69fa985..78b7b8b 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/SNodeInitialReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/SNodeInitialReducer.java
@@ -24,9 +24,7 @@
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.MultipleOutputs;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings("deprecation")
public class SNodeInitialReducer extends MapReduceBase implements
Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatDriver.java
index e7400be..3768d3a 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatDriver.java
@@ -28,8 +28,7 @@
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings("deprecation")
public class GenomixStatDriver {
private static class Options {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatMapper.java
index 623a923..a6afcf9 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatMapper.java
@@ -23,9 +23,7 @@
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings({ "unused", "deprecation" })
public class GenomixStatMapper extends MapReduceBase implements
Mapper<BytesWritable, KmerCountValue, BytesWritable, KmerCountValue> {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatReducer.java
index 090e680..a4ae561 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatReducer.java
@@ -22,8 +22,7 @@
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings("deprecation")
public class GenomixStatReducer extends MapReduceBase implements
Reducer<BytesWritable, KmerCountValue, BytesWritable, KmerCountValue> {
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphbuilding/GraphBuildingTest.java
index 0ef4c51..7d55dc7 100755
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphbuilding/GraphBuildingTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphbuilding/GraphBuildingTest.java
@@ -33,9 +33,7 @@
import edu.uci.ics.genomix.hadoop.graphbuilding.GenomixDriver;
import edu.uci.ics.genomix.hadoop.utils.TestUtils;
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
+import edu.uci.ics.genomix.hadoop.oldtype.*;
/**
* This class test the correctness of graphbuilding program
*/
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterTest.java
index 5f8b3db..c3fc9b9 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterTest.java
@@ -34,8 +34,7 @@
import edu.uci.ics.genomix.hadoop.graphcountfilter.CountFilterDriver;
import edu.uci.ics.genomix.hadoop.utils.TestUtils;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings("deprecation")
public class CountFilterTest {
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Test.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Test.java
index 5f5b40a..de98179 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Test.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Test.java
@@ -33,8 +33,7 @@
import edu.uci.ics.genomix.hadoop.pathmergingh1.MergePathH1Driver;
import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
import edu.uci.ics.genomix.hadoop.utils.TestUtils;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings("deprecation")
public class MergePathH1Test {
private static final String ACTUAL_RESULT_DIR = "actual3";
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Test.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Test.java
index 536ed3c..9ba98ef 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Test.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Test.java
@@ -33,8 +33,7 @@
import edu.uci.ics.genomix.hadoop.pathmergingh2.MergePathH2Driver;
import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
import edu.uci.ics.genomix.hadoop.utils.TestUtils;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
+import edu.uci.ics.genomix.hadoop.oldtype.*;
@SuppressWarnings("deprecation")
public class MergePathH2Test {
private static final String ACTUAL_RESULT_DIR = "actual4";