clean the useless code for project
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/PositionListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/PositionListWritable.java
deleted file mode 100644
index 226ce2c..0000000
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/PositionListWritable.java
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.oldtype;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.hadoop.io.Writable;
-
-import edu.uci.ics.genomix.data.Marshal;
-
-public class PositionListWritable implements Writable, Iterable<PositionWritable>, Serializable {
- /**
- *
- */
- private static final long serialVersionUID = 1L;
- protected byte[] storage;
- protected int offset;
- protected int valueCount;
- protected static final byte[] EMPTY = {};
- public static final int INTBYTES = 4;
-
- protected PositionWritable posIter = new PositionWritable();
-
- public PositionListWritable() {
- this.storage = EMPTY;
- this.valueCount = 0;
- this.offset = 0;
- }
-
- public PositionListWritable(int count, byte[] data, int offset) {
- setNewReference(count, data, offset);
- }
-
- public PositionListWritable(List<PositionWritable> posns) {
- this();
- for (PositionWritable p : posns) {
- append(p);
- }
- }
-
- public void setNewReference(int count, byte[] data, int offset) {
- this.valueCount = count;
- this.storage = data;
- this.offset = offset;
- }
-
- protected void setSize(int size) {
- if (size > getCapacity()) {
- setCapacity((size * 3 / 2));
- }
- }
-
- protected int getCapacity() {
- return storage.length - offset;
- }
-
- protected void setCapacity(int new_cap) {
- if (new_cap > getCapacity()) {
- byte[] new_data = new byte[new_cap];
- if (storage.length - offset > 0) {
- System.arraycopy(storage, offset, new_data, 0, storage.length - offset);
- }
- storage = new_data;
- offset = 0;
- }
- }
-
- public PositionWritable getPosition(int i) {
- if (i >= valueCount) {
- throw new ArrayIndexOutOfBoundsException("No such positions");
- }
- posIter.setNewReference(storage, offset + i * PositionWritable.LENGTH);
- return posIter;
- }
-
- public void resetPosition(int i, int readID, byte posInRead) {
- if (i >= valueCount) {
- throw new ArrayIndexOutOfBoundsException("No such positions");
- }
- Marshal.putInt(readID, storage, offset + i * PositionWritable.LENGTH);
- storage[offset + INTBYTES] = posInRead;
- }
-
- @Override
- public Iterator<PositionWritable> iterator() {
- Iterator<PositionWritable> it = new Iterator<PositionWritable>() {
-
- private int currentIndex = 0;
-
- @Override
- public boolean hasNext() {
- return currentIndex < valueCount;
- }
-
- @Override
- public PositionWritable next() {
- return getPosition(currentIndex++);
- }
-
- @Override
- public void remove() {
- if(currentIndex < valueCount)
- System.arraycopy(storage, offset + currentIndex * PositionWritable.LENGTH,
- storage, offset + (currentIndex - 1) * PositionWritable.LENGTH,
- (valueCount - currentIndex) * PositionWritable.LENGTH);
- valueCount--;
- currentIndex--;
- }
- };
- return it;
- }
-
- /*
- * remove the first instance of @toRemove. Uses a linear scan. Throws an exception if not in this list.
- */
- public void remove(PositionWritable toRemove) {
- Iterator<PositionWritable> posIterator = this.iterator();
- while (posIterator.hasNext()) {
- if(toRemove.equals(posIterator.next())) {
- posIterator.remove();
- return;
- }
- }
- //throw new ArrayIndexOutOfBoundsException("the PositionWritable `" + toRemove.toString() + "` was not found in this list.");
- }
-
- public void set(PositionListWritable list2) {
- set(list2.valueCount, list2.storage, list2.offset);
- }
-
- public void set(int valueCount, byte[] newData, int offset) {
- this.valueCount = valueCount;
- setSize(valueCount * PositionWritable.LENGTH);
- if (valueCount > 0) {
- System.arraycopy(newData, offset, storage, this.offset, valueCount * PositionWritable.LENGTH);
- }
- }
-
- public void reset() {
- valueCount = 0;
- }
-
- public void append(PositionWritable pos) {
- if(pos != null){
- setSize((1 + valueCount) * PositionWritable.LENGTH);
- System.arraycopy(pos.getByteArray(), pos.getStartOffset(), storage, offset + valueCount
- * PositionWritable.LENGTH, pos.getLength());
- valueCount += 1;
- }
- }
-
- public void append(int readID, byte posInRead) {
- setSize((1 + valueCount) * PositionWritable.LENGTH);
- Marshal.putInt(readID, storage, offset + valueCount * PositionWritable.LENGTH);
- storage[offset + valueCount * PositionWritable.LENGTH + PositionWritable.INTBYTES] = posInRead;
- valueCount += 1;
- }
-
- /*
- * Append the otherList to the end of myList
- */
- public void appendList(PositionListWritable otherList) {
- if (otherList.valueCount > 0) {
- setSize((valueCount + otherList.valueCount) * PositionWritable.LENGTH);
- // copy contents of otherList into the end of my storage
- System.arraycopy(otherList.storage, otherList.offset,
- storage, offset + valueCount * PositionWritable.LENGTH,
- otherList.valueCount * PositionWritable.LENGTH);
- valueCount += otherList.valueCount;
- }
- }
-
- public static int getCountByDataLength(int length) {
- if (length % PositionWritable.LENGTH != 0) {
- for (StackTraceElement ste : Thread.currentThread().getStackTrace()) {
- System.out.println(ste);
- }
- throw new IllegalArgumentException("Length of positionlist is invalid");
- }
- return length / PositionWritable.LENGTH;
- }
-
- public int getCountOfPosition() {
- return valueCount;
- }
-
- public byte[] getByteArray() {
- return storage;
- }
-
- public int getStartOffset() {
- return offset;
- }
-
- public int getLength() {
- return valueCount * PositionWritable.LENGTH;
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- this.valueCount = in.readInt();
- setSize(valueCount * PositionWritable.LENGTH);
- in.readFully(storage, offset, valueCount * PositionWritable.LENGTH);
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeInt(valueCount);
- out.write(storage, offset, valueCount * PositionWritable.LENGTH);
- }
-
- @Override
- public String toString() {
- StringBuilder sbuilder = new StringBuilder();
- sbuilder.append('[');
- for (PositionWritable pos : this) {
- sbuilder.append(pos.toString());
- sbuilder.append(',');
- }
- if (valueCount > 0) {
- sbuilder.setCharAt(sbuilder.length() - 1, ']');
- } else {
- sbuilder.append(']');
- }
- return sbuilder.toString();
- }
-
- @Override
- public int hashCode() {
- return Marshal.hashBytes(getByteArray(), getStartOffset(), getLength());
- }
-
- @Override
- public boolean equals(Object o) {
- if (!(o instanceof PositionListWritable))
- return false;
- PositionListWritable other = (PositionListWritable) o;
- if (this.valueCount != other.valueCount)
- return false;
- for (int i=0; i < this.valueCount; i++) {
- if (!this.getPosition(i).equals(other.getPosition(i)))
- return false;
- }
- return true;
- }
-}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/PositionWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/PositionWritable.java
deleted file mode 100644
index 378e6da..0000000
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/oldtype/PositionWritable.java
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.oldtype;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.Serializable;
-
-import org.apache.hadoop.io.RawComparator;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableComparator;
-
-import edu.uci.ics.genomix.data.Marshal;
-
-public class PositionWritable implements WritableComparable<PositionWritable>, Serializable {
- /**
- *
- */
- private static final long serialVersionUID = 1L;
- protected byte[] storage;
- protected int offset;
- public static final int LENGTH = 5;
- public static final int INTBYTES = 4;
-
- public PositionWritable() {
- storage = new byte[LENGTH];
- offset = 0;
- }
-
- public PositionWritable(int readID, byte posInRead) {
- this();
- set(readID, posInRead);
- }
-
- public PositionWritable(byte[] storage, int offset) {
- setNewReference(storage, offset);
- }
-
- public void setNewReference(byte[] storage, int offset) {
- this.storage = storage;
- this.offset = offset;
- }
-
- public void set(PositionWritable pos) {
- set(pos.getReadID(), pos.getPosInRead());
- }
-
- public void set(int readID, byte posInRead) {
- Marshal.putInt(readID, storage, offset);
- storage[offset + INTBYTES] = posInRead;
- }
-
- public int getReadID() {
- return Marshal.getInt(storage, offset);
- }
-
- public byte getPosInRead() {
- return storage[offset + INTBYTES];
- }
-
- public byte[] getByteArray() {
- return storage;
- }
-
- public int getStartOffset() {
- return offset;
- }
-
- public int getLength() {
- return LENGTH;
- }
-
- public boolean isSameReadID(PositionWritable other) {
- return getReadID() == other.getReadID();
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- in.readFully(storage, offset, LENGTH);
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- out.write(storage, offset, LENGTH);
- }
-
- @Override
- public int hashCode() {
- return Marshal.hashBytes(getByteArray(), getStartOffset(), getLength());
- }
-
- @Override
- public boolean equals(Object o) {
- if (!(o instanceof PositionWritable))
- return false;
- PositionWritable other = (PositionWritable) o;
- return this.getReadID() == other.getReadID() && this.getPosInRead() == other.getPosInRead();
- }
-
- @Override
- public int compareTo(PositionWritable other) {
- int diff1 = this.getReadID() - other.getReadID();
- if (diff1 == 0) {
- int diff2 = Math.abs((int) this.getPosInRead()) - Math.abs((int) other.getPosInRead());
- if (diff2 == 0) {
- return this.getPosInRead() - other.getPosInRead();
- }
- return diff2;
- }
- return diff1;
- }
-
- @Override
- public String toString() {
- return "(" + Integer.toString(getReadID()) + "," + Integer.toString((int) getPosInRead()) + ")";
- }
-
- /** A Comparator optimized for IntWritable. */
- public static class Comparator extends WritableComparator {
- public Comparator() {
- super(PositionWritable.class);
- }
-
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- int thisValue = Marshal.getInt(b1, s1);
- int thatValue = Marshal.getInt(b2, s2);
- int diff1 = thisValue - thatValue;
- if (diff1 == 0) {
- int diff2 = Math.abs((int) b1[s1 + INTBYTES]) - Math.abs((int) b2[s2 + INTBYTES]);
- if (diff2 == 0) {
- return b1[s1 + INTBYTES] - b2[s2 + INTBYTES];
- }
- return diff2;
- }
- return diff1;
- }
- }
-
- public static class FirstComparator implements RawComparator<PositionWritable> {
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- return WritableComparator.compareBytes(b1, s1, l1 - 1, b2, s2, l2 - 1);
- }
-
- @Override
- public int compare(PositionWritable o1, PositionWritable o2) {
- int l = o1.getReadID();
- int r = o2.getReadID();
- return l == r ? 0 : (l < r ? -1 : 1);
- }
- }
-
- static { // register this comparator
- WritableComparator.define(PositionWritable.class, new Comparator());
- }
-}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/GeneCode.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/GeneCode.java
deleted file mode 100644
index c3d8a98..0000000
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/GeneCode.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.velvet.oldtype;
-
-public class GeneCode {
- public final static byte[] GENE_SYMBOL = { 'A', 'C', 'G', 'T' };
- /**
- * make sure this 4 ids equal to the sequence id of char in {@GENE_SYMBOL
- * }
- */
- public static final byte A = 0;
- public static final byte C = 1;
- public static final byte G = 2;
- public static final byte T = 3;
-
- public static byte getCodeFromSymbol(byte ch) {
- byte r = 0;
- switch (ch) {
- case 'A':
- case 'a':
- r = A;
- break;
- case 'C':
- case 'c':
- r = C;
- break;
- case 'G':
- case 'g':
- r = G;
- break;
- case 'T':
- case 't':
- r = T;
- break;
- }
- return r;
- }
-
- public static byte getPairedGeneCode(byte genecode){
- if ( genecode < 0 || genecode > 3){
- throw new IllegalArgumentException("Invalid genecode");
- }
- return (byte) (3- genecode);
- }
-
- public static byte getPairedCodeFromSymbol(byte ch){
- return getPairedGeneCode(getCodeFromSymbol(ch));
- }
-
- public static byte getSymbolFromCode(byte code) {
- if (code > 3 || code < 0 ) {
- throw new IllegalArgumentException("Invalid genecode");
- }
- return GENE_SYMBOL[code];
- }
-}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/KmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/KmerBytesWritable.java
deleted file mode 100644
index 630dbad..0000000
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/KmerBytesWritable.java
+++ /dev/null
@@ -1,502 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.velvet.oldtype;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.Serializable;
-
-import org.apache.hadoop.io.BinaryComparable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableComparator;
-
-import edu.uci.ics.genomix.data.KmerUtil;
-
-/**
- * Variable kmer length byteswritable
- * It was used to generate the graph in which phase the kmer length doesn't change.
- * Thus the size of bytes doesn't change either.
- */
-public class KmerBytesWritable extends BinaryComparable implements Serializable, WritableComparable<BinaryComparable> {
- /**
- *
- */
- private static final long serialVersionUID = 1L;
- private static final byte[] EMPTY_BYTES = {};
-
- protected int size;
- protected byte[] bytes;
- protected int offset;
- protected int kmerlength;
-
- public KmerBytesWritable() {
- this(0, EMPTY_BYTES, 0);
- }
-
- public KmerBytesWritable(int k, byte[] storage, int offset) {
- setNewReference(k, storage, offset);
- }
-
- public KmerBytesWritable(int k, String kmer) {
- setNewReference(kmer.length(), kmer.getBytes(), 0);
- }
-
- /**
- * Initial Kmer space by kmerlength
- *
- * @param k
- * kmerlength
- */
- public KmerBytesWritable(int k) {
- this.kmerlength = k;
- this.size = KmerUtil.getByteNumFromK(kmerlength);
- if (k > 0) {
- this.bytes = new byte[this.size];
- } else {
- this.bytes = EMPTY_BYTES;
- }
- this.offset = 0;
- }
-
- public KmerBytesWritable(KmerBytesWritable right) {
- this(right.kmerlength);
- set(right);
- }
-
- /**
- * Deep copy of the given kmer
- *
- * @param newData
- */
- public void set(KmerBytesWritable newData) {
- if (newData == null) {
- this.set(0, EMPTY_BYTES, 0);
- } else {
- this.set(newData.kmerlength, newData.bytes, newData.getOffset());
- }
- }
-
- /**
- * Deep copy of the given bytes data
- * It will not change the kmerlength
- *
- * @param newData
- * @param offset
- */
- public void set(byte[] newData, int offset) {
- if (kmerlength > 0) {
- System.arraycopy(newData, offset, bytes, this.offset, size);
- }
- }
-
- /**
- * Deep copy of the given data, and also set to new kmerlength
- *
- * @param k
- * : new kmer length
- * @param newData
- * : data storage
- * @param offset
- * : start offset
- */
- public void set(int k, byte[] newData, int offset) {
- reset(k);
- if (k > 0) {
- System.arraycopy(newData, offset, bytes, this.offset, size);
- }
- }
-
- /**
- * Reset array by kmerlength
- *
- * @param k
- */
- public void reset(int k) {
- this.kmerlength = k;
- setSize(KmerUtil.getByteNumFromK(k));
- clearLeadBit();
- }
-
- /**
- * Point this datablock to the given bytes array
- * It works like the pointer to new datablock.
- * kmerlength will not change
- *
- * @param newData
- * @param offset
- */
- public void setNewReference(byte[] newData, int offset) {
- this.bytes = newData;
- this.offset = offset;
- if (newData.length - offset < size) {
- throw new IllegalArgumentException("Not given enough space");
- }
- }
-
- /**
- * Point this datablock to the given bytes array
- * It works like the pointer to new datablock.
- * It also set the new kmerlength
- *
- * @param k
- * @param newData
- * @param offset
- */
- public void setNewReference(int k, byte[] newData, int offset) {
- this.kmerlength = k;
- this.size = KmerUtil.getByteNumFromK(k);
- setNewReference(newData, offset);
- }
-
- protected void setSize(int size) {
- if (size > getCapacity()) {
- setCapacity((size * 3 / 2));
- }
- this.size = size;
- }
-
- protected int getCapacity() {
- return bytes.length;
- }
-
- protected void setCapacity(int new_cap) {
- if (new_cap != getCapacity()) {
- byte[] new_data = new byte[new_cap];
- if (new_cap < size) {
- size = new_cap;
- }
- if (size != 0) {
- System.arraycopy(bytes, offset, new_data, 0, size);
- }
- bytes = new_data;
- offset = 0;
- }
- }
-
- /**
- * Get one genecode (A|G|C|T) from the given kmer index
- * e.g. Get the 4th gene of the kmer ACGTA will return T
- *
- * @param pos
- * @return
- */
- public byte getGeneCodeAtPosition(int pos) {
- if (pos >= kmerlength) {
- throw new IllegalArgumentException("gene position out of bound");
- }
- int posByte = pos / 4;
- int shift = (pos % 4) << 1;
- return (byte) ((bytes[offset + size - 1 - posByte] >> shift) & 0x3);
- }
-
- public int getKmerLength() {
- return this.kmerlength;
- }
-
- @Override
- public byte[] getBytes() {
- return bytes;
- }
-
- public int getOffset() {
- return offset;
- }
-
- @Override
- public int getLength() {
- return size;
- }
-
- /**
- * Read Kmer from read text into bytes array e.g. AATAG will compress as
- * [0x000G, 0xATAA]
- *
- * @param k
- * @param array
- * @param start
- */
- public void setByRead(byte[] array, int start) {
- byte l = 0;
- int bytecount = 0;
- int bcount = this.size - 1;
- for (int i = start; i < start + kmerlength && i < array.length; i++) {
- byte code = GeneCode.getCodeFromSymbol(array[i]);
- l |= (byte) (code << bytecount);
- bytecount += 2;
- if (bytecount == 8) {
- bytes[offset + bcount--] = l;
- l = 0;
- bytecount = 0;
- }
- }
- if (bcount >= 0) {
- bytes[offset] = l;
- }
- }
-
- public void setByRead(int k, byte[] array, int start) {
- reset(k);
- setByRead(array, start);
- }
-
- /**
- * Compress Reversed read into bytes array
- * e.g. AATAG will paired to CTATT, and then compress as
- * [0x000T,0xTATC]
- *
- * @param input
- * array
- * @param start
- * position
- */
- public void setByReadReverse(byte[] array, int start) {
- byte l = 0;
- int bytecount = 0;
- int bcount = size - 1;
- for (int i = start + kmerlength - 1; i >= 0 && i < array.length; i--) {
- byte code = GeneCode.getPairedCodeFromSymbol(array[i]);
- l |= (byte) (code << bytecount);
- bytecount += 2;
- if (bytecount == 8) {
- bytes[offset + bcount--] = l;
- l = 0;
- bytecount = 0;
- }
- }
- if (bcount >= 0) {
- bytes[offset] = l;
- }
- }
-
- public void setByReadReverse(int k, byte[] array, int start) {
- reset(k);
- setByReadReverse(array, start);
- }
-
- /**
- * Shift Kmer to accept new char input
- *
- * @param c
- * Input new gene character
- * @return the shift out gene, in gene code format
- */
- public byte shiftKmerWithNextChar(byte c) {
- return shiftKmerWithNextCode(GeneCode.getCodeFromSymbol(c));
- }
-
- /**
- * Shift Kmer to accept new gene code
- *
- * @param c
- * Input new gene code
- * @return the shift out gene, in gene code format
- */
- public byte shiftKmerWithNextCode(byte c) {
- byte output = (byte) (bytes[offset + size - 1] & 0x03);
- for (int i = size - 1; i > 0; i--) {
- byte in = (byte) (bytes[offset + i - 1] & 0x03);
- bytes[offset + i] = (byte) (((bytes[offset + i] >>> 2) & 0x3f) | (in << 6));
- }
- int pos = ((kmerlength - 1) % 4) << 1;
- byte code = (byte) (c << pos);
- bytes[offset] = (byte) (((bytes[offset] >>> 2) & 0x3f) | code);
- clearLeadBit();
- return output;
- }
-
- /**
- * Shift Kmer to accept new input char
- *
- * @param c
- * Input new gene character
- * @return the shiftout gene, in gene code format
- */
- public byte shiftKmerWithPreChar(byte c) {
- return shiftKmerWithPreCode(GeneCode.getCodeFromSymbol(c));
- }
-
- /**
- * Shift Kmer to accept new gene code
- *
- * @param c
- * Input new gene code
- * @return the shiftout gene, in gene code format
- */
- public byte shiftKmerWithPreCode(byte c) {
- int pos = ((kmerlength - 1) % 4) << 1;
- byte output = (byte) ((bytes[offset] >> pos) & 0x03);
- for (int i = 0; i < size - 1; i++) {
- byte in = (byte) ((bytes[offset + i + 1] >> 6) & 0x03);
- bytes[offset + i] = (byte) ((bytes[offset + i] << 2) | in);
- }
- bytes[offset + size - 1] = (byte) ((bytes[offset + size - 1] << 2) | c);
- clearLeadBit();
- return output;
- }
-
- /**
- * Merge Kmer with the next connected Kmer
- * e.g. AAGCTAA merge with AACAACC, if the initial kmerSize = 3
- * then it will return AAGCTAACAACC
- *
- * @param initialKmerSize
- * : the initial kmerSize
- * @param kmer
- * : the next kmer
- */
- public void mergeNextKmer(int initialKmerSize, KmerBytesWritable kmer) {
- int preKmerLength = kmerlength;
- int preSize = size;
- this.kmerlength += kmer.kmerlength - initialKmerSize + 1;
- setSize(KmerUtil.getByteNumFromK(kmerlength));
- for (int i = 1; i <= preSize; i++) {
- bytes[offset + size - i] = bytes[offset + preSize - i];
- }
- for (int k = initialKmerSize - 1; k < kmer.getKmerLength(); k += 4) {
- byte onebyte = getOneByteFromKmerAtPosition(k, kmer.getBytes(), kmer.getOffset(), kmer.getLength());
- appendOneByteAtPosition(preKmerLength + k - initialKmerSize + 1, onebyte, bytes, offset, size);
- }
- clearLeadBit();
- }
-
- /**
- * Merge Kmer with the previous connected Kmer
- * e.g. AACAACC merge with AAGCTAA, if the initial kmerSize = 3
- * then it will return AAGCTAACAACC
- *
- * @param initialKmerSize
- * : the initial kmerSize
- * @param preKmer
- * : the previous kmer
- */
- public void mergePreKmer(int initialKmerSize, KmerBytesWritable preKmer) {
- int preKmerLength = kmerlength;
- int preSize = size;
- this.kmerlength += preKmer.kmerlength - initialKmerSize + 1;
- setSize(KmerUtil.getByteNumFromK(kmerlength));
- byte cacheByte = getOneByteFromKmerAtPosition(0, bytes, offset, preSize);
-
- // copy prekmer
- for (int k = 0; k < preKmer.kmerlength - initialKmerSize + 1; k += 4) {
- byte onebyte = getOneByteFromKmerAtPosition(k, preKmer.bytes, preKmer.offset, preKmer.size);
- appendOneByteAtPosition(k, onebyte, bytes, offset, size);
- }
-
- // copy current kmer
- int k = 4;
- for (; k < preKmerLength; k += 4) {
- byte onebyte = getOneByteFromKmerAtPosition(k, bytes, offset, preSize);
- appendOneByteAtPosition(preKmer.kmerlength - initialKmerSize + k - 4 + 1, cacheByte, bytes, offset, size);
- cacheByte = onebyte;
- }
- appendOneByteAtPosition(preKmer.kmerlength - initialKmerSize + k - 4 + 1, cacheByte, bytes, offset, size);
- clearLeadBit();
- }
-
- public static void appendOneByteAtPosition(int k, byte onebyte, byte[] buffer, int start, int length) {
- int position = start + length - 1 - k / 4;
- if (position < start) {
- throw new IllegalArgumentException("Buffer for kmer storage is invalid");
- }
- int shift = ((k) % 4) << 1;
- int mask = shift == 0 ? 0 : ((1 << shift) - 1);
-
- buffer[position] = (byte) ((buffer[position] & mask) | ((0xff & onebyte) << shift));
- if (position > start && shift != 0) {
- buffer[position - 1] = (byte) ((buffer[position - 1] & (0xff - mask)) | ((byte) ((0xff & onebyte) >> (8 - shift))));
- }
- }
-
- public static byte getOneByteFromKmerAtPosition(int k, byte[] buffer, int start, int length) {
- int position = start + length - 1 - k / 4;
- if (position < start) {
- throw new IllegalArgumentException("Buffer of kmer storage is invalid");
- }
- int shift = (k % 4) << 1;
- byte data = (byte) (((0xff) & buffer[position]) >> shift);
- if (shift != 0 && position > start) {
- data |= 0xff & (buffer[position - 1] << (8 - shift));
- }
- return data;
- }
-
- protected void clearLeadBit() {
- if (kmerlength % 4 != 0) {
- bytes[offset] &= (1 << ((kmerlength % 4) << 1)) - 1;
- }
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- this.kmerlength = in.readInt();
- this.size = KmerUtil.getByteNumFromK(kmerlength);
- if (this.kmerlength > 0) {
- if (this.bytes.length < this.size) {
- this.bytes = new byte[this.size];
- this.offset = 0;
- }
- in.readFully(bytes, offset, size);
- }
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeInt(kmerlength);
- if (kmerlength > 0) {
- out.write(bytes, offset, size);
- }
- }
-
- @Override
- public int hashCode() {
- return super.hashCode() * 31 + this.kmerlength;
- }
-
- @Override
- public boolean equals(Object right_obj) {
- if (right_obj instanceof KmerBytesWritable)
- return this.kmerlength == ((KmerBytesWritable) right_obj).kmerlength && super.equals(right_obj);
- return false;
- }
-
- @Override
- public String toString() {
- return KmerUtil.recoverKmerFrom(this.kmerlength, this.getBytes(), offset, this.getLength());
- }
-
- public static class Comparator extends WritableComparator {
- public final int LEAD_BYTES = 4;
-
- public Comparator() {
- super(KmerBytesWritable.class);
- }
-
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- int kmerlength1 = readInt(b1, s1);
- int kmerlength2 = readInt(b2, s2);
- if (kmerlength1 == kmerlength2) {
- return compareBytes(b1, s1 + LEAD_BYTES, l1 - LEAD_BYTES, b2, s2 + LEAD_BYTES, l2 - LEAD_BYTES);
- }
- return kmerlength1 - kmerlength2;
- }
- }
-
- static { // register this comparator
- WritableComparator.define(KmerBytesWritable.class, new Comparator());
- }
-
-}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/KmerBytesWritableFactory.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/KmerBytesWritableFactory.java
deleted file mode 100644
index b0aaebc..0000000
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/KmerBytesWritableFactory.java
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.velvet.oldtype;
-
-public class KmerBytesWritableFactory {
- private KmerBytesWritable kmer;
-
- public KmerBytesWritableFactory(int k) {
- kmer = new KmerBytesWritable(k);
- }
-
- /**
- * Read Kmer from read text into bytes array e.g. AATAG will compress as
- * [0x000G, 0xATAA]
- *
- * @param k
- * @param array
- * @param start
- */
- public KmerBytesWritable getKmerByRead(int k, byte[] array, int start) {
- kmer.reset(k);
- kmer.setByRead(array, start);
- return kmer;
- }
-
- /**
- * Compress Reversed Kmer into bytes array AATAG will compress as
- * [0x000A,0xATAG]
- *
- * @param array
- * @param start
- */
- public KmerBytesWritable getKmerByReadReverse(int k, byte[] array, int start) {
- kmer.reset(k);
- kmer.setByReadReverse(array, start);
- return kmer;
- }
-
- /**
- * Get last kmer from kmer-chain.
- * e.g. kmerChain is AAGCTA, if k =5, it will
- * return AGCTA
- *
- * @param k
- * @param kInChain
- * @param kmerChain
- * @return LastKmer bytes array
- */
- public KmerBytesWritable getLastKmerFromChain(int lastK, final KmerBytesWritable kmerChain) {
- if (lastK > kmerChain.getKmerLength()) {
- return null;
- }
- if (lastK == kmerChain.getKmerLength()) {
- kmer.set(kmerChain);
- return kmer;
- }
- kmer.reset(lastK);
-
- /** from end to start */
- int byteInChain = kmerChain.getLength() - 1 - (kmerChain.getKmerLength() - lastK) / 4;
- int posInByteOfChain = ((kmerChain.getKmerLength() - lastK) % 4) << 1; // *2
- int byteInKmer = kmer.getLength() - 1;
- for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
- kmer.getBytes()[byteInKmer] = (byte) ((0xff & kmerChain.getBytes()[byteInChain]) >> posInByteOfChain);
- kmer.getBytes()[byteInKmer] |= ((kmerChain.getBytes()[byteInChain - 1] << (8 - posInByteOfChain)));
- }
-
- /** last kmer byte */
- if (byteInKmer == 0) {
- kmer.getBytes()[0] = (byte) ((kmerChain.getBytes()[0] & 0xff) >> posInByteOfChain);
- }
- kmer.clearLeadBit();
- return kmer;
- }
-
- /**
- * Get first kmer from kmer-chain e.g. kmerChain is AAGCTA, if k=5, it will
- * return AAGCT
- *
- * @param k
- * @param kInChain
- * @param kmerChain
- * @return FirstKmer bytes array
- */
- public KmerBytesWritable getFirstKmerFromChain(int firstK, final KmerBytesWritable kmerChain) {
- if (firstK > kmerChain.getKmerLength()) {
- return null;
- }
- if (firstK == kmerChain.getKmerLength()) {
- kmer.set(kmerChain);
- return kmer;
- }
- kmer.reset(firstK);
-
- int i = 1;
- for (; i < kmer.getLength(); i++) {
- kmer.getBytes()[kmer.getLength() - i] = kmerChain.getBytes()[kmerChain.getLength() - i];
- }
- int posInByteOfChain = (firstK % 4) << 1; // *2
- if (posInByteOfChain == 0) {
- kmer.getBytes()[0] = kmerChain.getBytes()[kmerChain.getLength() - i];
- } else {
- kmer.getBytes()[0] = (byte) (kmerChain.getBytes()[kmerChain.getLength() - i] & ((1 << posInByteOfChain) - 1));
- }
- kmer.clearLeadBit();
- return kmer;
- }
-
- public KmerBytesWritable getSubKmerFromChain(int startK, int kSize, final KmerBytesWritable kmerChain) {
- if (startK + kSize > kmerChain.getKmerLength()) {
- return null;
- }
- if (startK == 0 && kSize == kmerChain.getKmerLength()) {
- kmer.set(kmerChain);
- return kmer;
- }
- kmer.reset(kSize);
-
- /** from end to start */
- int byteInChain = kmerChain.getLength() - 1 - startK / 4;
- int posInByteOfChain = startK % 4 << 1; // *2
- int byteInKmer = kmer.getLength() - 1;
- for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
- kmer.getBytes()[byteInKmer] = (byte) ((0xff & kmerChain.getBytes()[byteInChain]) >> posInByteOfChain);
- kmer.getBytes()[byteInKmer] |= ((kmerChain.getBytes()[byteInChain - 1] << (8 - posInByteOfChain)));
- }
-
- /** last kmer byte */
- if (byteInKmer == 0) {
- kmer.getBytes()[0] = (byte) ((kmerChain.getBytes()[0] & 0xff) >> posInByteOfChain);
- }
- kmer.clearLeadBit();
- return kmer;
- }
-
- /**
- * Merge kmer with next neighbor in gene-code format.
- * The k of new kmer will increase by 1
- * e.g. AAGCT merge with A => AAGCTA
- *
- * @param k
- * :input k of kmer
- * @param kmer
- * : input bytes of kmer
- * @param nextCode
- * : next neighbor in gene-code format
- * @return the merged Kmer, this K of this Kmer is k+1
- */
- public KmerBytesWritable mergeKmerWithNextCode(final KmerBytesWritable kmer, byte nextCode) {
- this.kmer.reset(kmer.getKmerLength() + 1);
- for (int i = 1; i <= kmer.getLength(); i++) {
- this.kmer.getBytes()[this.kmer.getLength() - i] = kmer.getBytes()[kmer.getLength() - i];
- }
- if (this.kmer.getLength() > kmer.getLength()) {
- this.kmer.getBytes()[0] = (byte) (nextCode & 0x3);
- } else {
- this.kmer.getBytes()[0] = (byte) (kmer.getBytes()[0] | ((nextCode & 0x3) << ((kmer.getKmerLength() % 4) << 1)));
- }
- this.kmer.clearLeadBit();
- return this.kmer;
- }
-
- /**
- * Merge kmer with previous neighbor in gene-code format.
- * The k of new kmer will increase by 1
- * e.g. AAGCT merge with A => AAAGCT
- *
- * @param k
- * :input k of kmer
- * @param kmer
- * : input bytes of kmer
- * @param preCode
- * : next neighbor in gene-code format
- * @return the merged Kmer,this K of this Kmer is k+1
- */
- public KmerBytesWritable mergeKmerWithPreCode(final KmerBytesWritable kmer, byte preCode) {
- this.kmer.reset(kmer.getKmerLength() + 1);
- int byteInMergedKmer = 0;
- if (kmer.getKmerLength() % 4 == 0) {
- this.kmer.getBytes()[0] = (byte) ((kmer.getBytes()[0] >> 6) & 0x3);
- byteInMergedKmer++;
- }
- for (int i = 0; i < kmer.getLength() - 1; i++, byteInMergedKmer++) {
- this.kmer.getBytes()[byteInMergedKmer] = (byte) ((kmer.getBytes()[i] << 2) | ((kmer.getBytes()[i + 1] >> 6) & 0x3));
- }
- this.kmer.getBytes()[byteInMergedKmer] = (byte) ((kmer.getBytes()[kmer.getLength() - 1] << 2) | (preCode & 0x3));
- this.kmer.clearLeadBit();
- return this.kmer;
- }
-
- /**
- * Merge two kmer to one kmer
- * e.g. ACTA + ACCGT => ACTAACCGT
- *
- * @param preK
- * : previous k of kmer
- * @param kmerPre
- * : bytes array of previous kmer
- * @param nextK
- * : next k of kmer
- * @param kmerNext
- * : bytes array of next kmer
- * @return merged kmer, the new k is @preK + @nextK
- */
- public KmerBytesWritable mergeTwoKmer(final KmerBytesWritable preKmer, final KmerBytesWritable nextKmer) {
- kmer.reset(preKmer.getKmerLength() + nextKmer.getKmerLength());
- int i = 1;
- for (; i <= preKmer.getLength(); i++) {
- kmer.getBytes()[kmer.getLength() - i] = preKmer.getBytes()[preKmer.getLength() - i];
- }
- if (i > 1) {
- i--;
- }
- if (preKmer.getKmerLength() % 4 == 0) {
- for (int j = 1; j <= nextKmer.getLength(); j++) {
- kmer.getBytes()[kmer.getLength() - i - j] = nextKmer.getBytes()[nextKmer.getLength() - j];
- }
- } else {
- int posNeedToMove = ((preKmer.getKmerLength() % 4) << 1);
- kmer.getBytes()[kmer.getLength() - i] |= nextKmer.getBytes()[nextKmer.getLength() - 1] << posNeedToMove;
- for (int j = 1; j < nextKmer.getLength(); j++) {
- kmer.getBytes()[kmer.getLength() - i - j] = (byte) (((nextKmer.getBytes()[nextKmer.getLength() - j] & 0xff) >> (8 - posNeedToMove)) | (nextKmer
- .getBytes()[nextKmer.getLength() - j - 1] << posNeedToMove));
- }
- if (nextKmer.getKmerLength() % 4 == 0 || (nextKmer.getKmerLength() % 4) * 2 + posNeedToMove > 8) {
- kmer.getBytes()[0] = (byte) ((0xff & nextKmer.getBytes()[0]) >> (8 - posNeedToMove));
- }
- }
- kmer.clearLeadBit();
- return kmer;
- }
-
- /**
- * Safely shifted the kmer forward without change the input kmer
- * e.g. AGCGC shift with T => GCGCT
- *
- * @param k
- * : kmer length
- * @param kmer
- * : input kmer
- * @param afterCode
- * : input genecode
- * @return new created kmer that shifted by afterCode, the K will not change
- */
- public KmerBytesWritable shiftKmerWithNextCode(final KmerBytesWritable kmer, byte afterCode) {
- this.kmer.set(kmer);
- this.kmer.shiftKmerWithNextCode(afterCode);
- return this.kmer;
- }
-
- /**
- * Safely shifted the kmer backward without change the input kmer
- * e.g. AGCGC shift with T => TAGCG
- *
- * @param k
- * : kmer length
- * @param kmer
- * : input kmer
- * @param preCode
- * : input genecode
- * @return new created kmer that shifted by preCode, the K will not change
- */
- public KmerBytesWritable shiftKmerWithPreCode(final KmerBytesWritable kmer, byte preCode) {
- this.kmer.set(kmer);
- this.kmer.shiftKmerWithPreCode(preCode);
- return this.kmer;
- }
-
- /**
- * get the reverse sequence of given kmer
- *
- * @param kmer
- */
- public KmerBytesWritable reverse(final KmerBytesWritable kmer) {
- this.kmer.reset(kmer.getKmerLength());
-
- int curPosAtKmer = ((kmer.getKmerLength() - 1) % 4) << 1;
- int curByteAtKmer = 0;
-
- int curPosAtReverse = 0;
- int curByteAtReverse = this.kmer.getLength() - 1;
- this.kmer.getBytes()[curByteAtReverse] = 0;
- for (int i = 0; i < kmer.getKmerLength(); i++) {
- byte gene = (byte) ((kmer.getBytes()[curByteAtKmer] >> curPosAtKmer) & 0x03);
- this.kmer.getBytes()[curByteAtReverse] |= gene << curPosAtReverse;
- curPosAtReverse += 2;
- if (curPosAtReverse >= 8) {
- curPosAtReverse = 0;
- this.kmer.getBytes()[--curByteAtReverse] = 0;
- }
- curPosAtKmer -= 2;
- if (curPosAtKmer < 0) {
- curPosAtKmer = 6;
- curByteAtKmer++;
- }
- }
- this.kmer.clearLeadBit();
- return this.kmer;
- }
-}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/NodeWritable.java
deleted file mode 100644
index 128bf9f..0000000
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/NodeWritable.java
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.velvet.oldtype;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.Serializable;
-
-import org.apache.hadoop.io.WritableComparable;
-
-public class NodeWritable implements WritableComparable<NodeWritable>, Serializable {
- /**
- *
- */
- private static final long serialVersionUID = 1L;
- private PositionWritable nodeID;
- private PositionListWritable forwardForwardList;
- private PositionListWritable forwardReverseList;
- private PositionListWritable reverseForwardList;
- private PositionListWritable reverseReverseList;
- private KmerBytesWritable kmer;
-
- public NodeWritable() {
- this(21);
- }
-
- public NodeWritable(int kmerSize) {
- nodeID = new PositionWritable(0, (byte) 0);
- forwardForwardList = new PositionListWritable();
- forwardReverseList = new PositionListWritable();
- reverseForwardList = new PositionListWritable();
- reverseReverseList = new PositionListWritable();
- kmer = new KmerBytesWritable(kmerSize);
- }
-
- public NodeWritable(PositionWritable nodeID, PositionListWritable FFList, PositionListWritable FRList,
- PositionListWritable RFList, PositionListWritable RRList, KmerBytesWritable kmer) {
- this(kmer.getKmerLength());
- this.nodeID.set(nodeID);
- forwardForwardList.set(FFList);
- forwardReverseList.set(FRList);
- reverseForwardList.set(RFList);
- reverseReverseList.set(RRList);
- kmer.set(kmer);
- }
-
- public void setNodeID(PositionWritable ref) {
- this.setNodeID(ref.getReadID(), ref.getPosInRead());
- }
-
- public void setNodeID(int readID, byte posInRead) {
- nodeID.set(readID, posInRead);
- }
-
- public void setKmer(KmerBytesWritable right) {
- this.kmer.set(right);
- }
-
- public void reset(int kmerSize) {
- nodeID.set(0, (byte) 0);
- forwardForwardList.reset();
- forwardReverseList.reset();
- reverseForwardList.reset();
- reverseReverseList.reset();
- kmer.reset(kmerSize);
- }
-
- public PositionListWritable getFFList() {
- return forwardForwardList;
- }
-
- public PositionListWritable getFRList() {
- return forwardReverseList;
- }
-
- public PositionListWritable getRFList() {
- return reverseForwardList;
- }
-
- public PositionListWritable getRRList() {
- return reverseReverseList;
- }
-
- public PositionWritable getNodeID() {
- return nodeID;
- }
-
- public KmerBytesWritable getKmer() {
- return kmer;
- }
-
- public int getCount() {
- return kmer.getKmerLength();
- }
-
- public void mergeForwardNext(NodeWritable nextNode, int initialKmerSize) {
- this.forwardForwardList.set(nextNode.forwardForwardList);
- this.forwardReverseList.set(nextNode.forwardReverseList);
- kmer.mergeNextKmer(initialKmerSize, nextNode.getKmer());
- }
-
- public void mergeForwardPre(NodeWritable preNode, int initialKmerSize) {
- this.reverseForwardList.set(preNode.reverseForwardList);
- this.reverseReverseList.set(preNode.reverseReverseList);
- kmer.mergePreKmer(initialKmerSize, preNode.getKmer());
- }
-
- public void set(NodeWritable node) {
- this.nodeID.set(node.getNodeID().getReadID(), node.getNodeID().getPosInRead());
- this.forwardForwardList.set(node.forwardForwardList);
- this.forwardReverseList.set(node.forwardReverseList);
- this.reverseForwardList.set(node.reverseForwardList);
- this.reverseReverseList.set(node.reverseReverseList);
- this.kmer.set(node.kmer);
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- this.nodeID.readFields(in);
- this.forwardForwardList.readFields(in);
- this.forwardReverseList.readFields(in);
- this.reverseForwardList.readFields(in);
- this.reverseReverseList.readFields(in);
- this.kmer.readFields(in);
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- this.nodeID.write(out);
- this.forwardForwardList.write(out);
- this.forwardReverseList.write(out);
- this.reverseForwardList.write(out);
- this.reverseReverseList.write(out);
- this.kmer.write(out);
- }
-
- @Override
- public int compareTo(NodeWritable other) {
- return this.nodeID.compareTo(other.nodeID);
- }
-
- @Override
- public int hashCode() {
- return nodeID.hashCode();
- }
-
- @Override
- public boolean equals(Object o) {
- if (o instanceof NodeWritable) {
- NodeWritable nw = (NodeWritable) o;
- return (this.nodeID.equals(nw.nodeID) && this.forwardForwardList.equals(nw.forwardForwardList)
- && this.forwardReverseList.equals(nw.forwardReverseList)
- && this.reverseForwardList.equals(nw.reverseForwardList)
- && this.reverseReverseList.equals(nw.reverseReverseList) && this.kmer.equals(nw.kmer));
- }
- return false;
- }
-
- @Override
- public String toString() {
- StringBuilder sbuilder = new StringBuilder();
- sbuilder.append('(');
- sbuilder.append(nodeID.toString()).append('\t');
- sbuilder.append(forwardForwardList.toString()).append('\t');
- sbuilder.append(forwardReverseList.toString()).append('\t');
- sbuilder.append(reverseForwardList.toString()).append('\t');
- sbuilder.append(reverseReverseList.toString()).append('\t');
- sbuilder.append(kmer.toString()).append(')');
- return sbuilder.toString();
- }
-
- public int inDegree() {
- return reverseReverseList.getCountOfPosition() + reverseForwardList.getCountOfPosition();
- }
-
- public int outDegree() {
- return forwardForwardList.getCountOfPosition() + forwardReverseList.getCountOfPosition();
- }
-
- /*
- * Return if this node is a "path" compressible node, that is, it has an in-degree and out-degree of 1
- */
- public boolean isPathNode() {
- return inDegree() == 1 && outDegree() == 1;
- }
-
-}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/PositionListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/PositionListWritable.java
deleted file mode 100644
index b6c42c2..0000000
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/PositionListWritable.java
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.velvet.oldtype;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.hadoop.io.Writable;
-
-import edu.uci.ics.genomix.data.Marshal;
-
-public class PositionListWritable implements Writable, Iterable<PositionWritable>, Serializable {
- /**
- *
- */
- private static final long serialVersionUID = 1L;
- protected byte[] storage;
- protected int offset;
- protected int valueCount;
- protected static final byte[] EMPTY = {};
- public static final int INTBYTES = 4;
-
- protected PositionWritable posIter = new PositionWritable();
-
- public PositionListWritable() {
- this.storage = EMPTY;
- this.valueCount = 0;
- this.offset = 0;
- }
-
- public PositionListWritable(int count, byte[] data, int offset) {
- setNewReference(count, data, offset);
- }
-
- public PositionListWritable(List<PositionWritable> posns) {
- this();
- for (PositionWritable p : posns) {
- append(p);
- }
- }
-
- public void setNewReference(int count, byte[] data, int offset) {
- this.valueCount = count;
- this.storage = data;
- this.offset = offset;
- }
-
- protected void setSize(int size) {
- if (size > getCapacity()) {
- setCapacity((size * 3 / 2));
- }
- }
-
- protected int getCapacity() {
- return storage.length - offset;
- }
-
- protected void setCapacity(int new_cap) {
- if (new_cap > getCapacity()) {
- byte[] new_data = new byte[new_cap];
- if (storage.length - offset > 0) {
- System.arraycopy(storage, offset, new_data, 0, storage.length - offset);
- }
- storage = new_data;
- offset = 0;
- }
- }
-
- public PositionWritable getPosition(int i) {
- if (i >= valueCount) {
- throw new ArrayIndexOutOfBoundsException("No such positions");
- }
- posIter.setNewReference(storage, offset + i * PositionWritable.LENGTH);
- return posIter;
- }
-
- public void resetPosition(int i, int readID, byte posInRead) {
- if (i >= valueCount) {
- throw new ArrayIndexOutOfBoundsException("No such positions");
- }
- Marshal.putInt(readID, storage, offset + i * PositionWritable.LENGTH);
- storage[offset + INTBYTES] = posInRead;
- }
-
- @Override
- public Iterator<PositionWritable> iterator() {
- Iterator<PositionWritable> it = new Iterator<PositionWritable>() {
-
- private int currentIndex = 0;
-
- @Override
- public boolean hasNext() {
- return currentIndex < valueCount;
- }
-
- @Override
- public PositionWritable next() {
- return getPosition(currentIndex++);
- }
-
- @Override
- public void remove() {
- }
- };
- return it;
- }
-
- public void set(PositionListWritable list2) {
- set(list2.valueCount, list2.storage, list2.offset);
- }
-
- public void set(int valueCount, byte[] newData, int offset) {
- this.valueCount = valueCount;
- setSize(valueCount * PositionWritable.LENGTH);
- if (valueCount > 0) {
- System.arraycopy(newData, offset, storage, this.offset, valueCount * PositionWritable.LENGTH);
- }
- }
-
- public void reset() {
- valueCount = 0;
- }
-
- public void append(PositionWritable pos) {
- setSize((1 + valueCount) * PositionWritable.LENGTH);
- System.arraycopy(pos.getByteArray(), pos.getStartOffset(), storage, offset + valueCount
- * PositionWritable.LENGTH, pos.getLength());
- valueCount += 1;
- }
-
- public void append(int readID, byte posInRead) {
- setSize((1 + valueCount) * PositionWritable.LENGTH);
- Marshal.putInt(readID, storage, offset + valueCount * PositionWritable.LENGTH);
- storage[offset + valueCount * PositionWritable.LENGTH + PositionWritable.INTBYTES] = posInRead;
- valueCount += 1;
- }
-
- public static int getCountByDataLength(int length) {
- if (length % PositionWritable.LENGTH != 0) {
- for (StackTraceElement ste : Thread.currentThread().getStackTrace()) {
- System.out.println(ste);
- }
- throw new IllegalArgumentException("Length of positionlist is invalid");
- }
- return length / PositionWritable.LENGTH;
- }
-
- public int getCountOfPosition() {
- return valueCount;
- }
-
- public byte[] getByteArray() {
- return storage;
- }
-
- public int getStartOffset() {
- return offset;
- }
-
- public int getLength() {
- return valueCount * PositionWritable.LENGTH;
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- this.valueCount = in.readInt();
- setSize(valueCount * PositionWritable.LENGTH);
- in.readFully(storage, offset, valueCount * PositionWritable.LENGTH);
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeInt(valueCount);
- out.write(storage, offset, valueCount * PositionWritable.LENGTH);
- }
-
- @Override
- public String toString() {
- StringBuilder sbuilder = new StringBuilder();
- sbuilder.append('[');
- for (PositionWritable pos : this) {
- sbuilder.append(pos.toString());
- sbuilder.append(',');
- }
- if (valueCount > 0) {
- sbuilder.setCharAt(sbuilder.length() - 1, ']');
- } else {
- sbuilder.append(']');
- }
- return sbuilder.toString();
- }
-
- @Override
- public int hashCode() {
- return Marshal.hashBytes(getByteArray(), getStartOffset(), getLength());
- }
-
- @Override
- public boolean equals(Object o) {
- if (!(o instanceof PositionListWritable))
- return false;
- PositionListWritable other = (PositionListWritable) o;
- if (this.valueCount != other.valueCount)
- return false;
- for (int i=0; i < this.valueCount; i++) {
- if (!this.getPosition(i).equals(other.getPosition(i)))
- return false;
- }
- return true;
- }
-}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/PositionWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/PositionWritable.java
deleted file mode 100644
index 1d509bb..0000000
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/velvet/oldtype/PositionWritable.java
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.velvet.oldtype;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.Serializable;
-
-import org.apache.hadoop.io.RawComparator;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableComparator;
-
-import edu.uci.ics.genomix.data.Marshal;
-
-public class PositionWritable implements WritableComparable<PositionWritable>, Serializable {
- /**
- *
- */
- private static final long serialVersionUID = 1L;
- protected byte[] storage;
- protected int offset;
- public static final int LENGTH = 5;
- public static final int INTBYTES = 4;
-
- public PositionWritable() {
- storage = new byte[LENGTH];
- offset = 0;
- }
-
- public PositionWritable(int readID, byte posInRead) {
- this();
- set(readID, posInRead);
- }
-
- public PositionWritable(byte[] storage, int offset) {
- setNewReference(storage, offset);
- }
-
- public void setNewReference(byte[] storage, int offset) {
- this.storage = storage;
- this.offset = offset;
- }
-
- public void set(PositionWritable pos) {
- set(pos.getReadID(), pos.getPosInRead());
- }
-
- public void set(int readID, byte posInRead) {
- Marshal.putInt(readID, storage, offset);
- storage[offset + INTBYTES] = posInRead;
- }
-
- public int getReadID() {
- return Marshal.getInt(storage, offset);
- }
-
- public byte getPosInRead() {
- return storage[offset + INTBYTES];
- }
-
- public byte[] getByteArray() {
- return storage;
- }
-
- public int getStartOffset() {
- return offset;
- }
-
- public int getLength() {
- return LENGTH;
- }
-
- public boolean isSameReadID(PositionWritable other) {
- return getReadID() == other.getReadID();
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- in.readFully(storage, offset, LENGTH);
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- out.write(storage, offset, LENGTH);
- }
-
- @Override
- public int hashCode() {
- return Marshal.hashBytes(getByteArray(), getStartOffset(), getLength());
- }
-
- @Override
- public boolean equals(Object o) {
- if (!(o instanceof PositionWritable))
- return false;
- PositionWritable other = (PositionWritable) o;
- return this.getReadID() == other.getReadID() && this.getPosInRead() == other.getPosInRead();
- }
-
- @Override
- public int compareTo(PositionWritable other) {
- int diff1 = this.getReadID() - other.getReadID();
- if (diff1 == 0) {
- int diff2 = Math.abs((int) this.getPosInRead()) - Math.abs((int) other.getPosInRead());
- if (diff2 == 0) {
- return this.getPosInRead() - other.getPosInRead();
- }
- return diff2;
- }
- return diff1;
- }
-
- @Override
- public String toString() {
- return "(" + Integer.toString(getReadID()) + "," + Integer.toString((int) getPosInRead()) + ")";
- }
-
- /** A Comparator optimized for IntWritable. */
- public static class Comparator extends WritableComparator {
- public Comparator() {
- super(PositionWritable.class);
- }
-
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- int thisValue = Marshal.getInt(b1, s1);
- int thatValue = Marshal.getInt(b2, s2);
- int diff1 = thisValue - thatValue;
- if (diff1 == 0) {
- int diff2 = Math.abs((int) b1[s1 + INTBYTES]) - Math.abs((int) b2[s2 + INTBYTES]);
- if (diff2 == 0) {
- return b1[s1 + INTBYTES] - b2[s2 + INTBYTES];
- }
- return diff2;
- }
- return diff1;
- }
- }
-
- public static class FirstComparator implements RawComparator<PositionWritable> {
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- return WritableComparator.compareBytes(b1, s1, l1 - 1, b2, s2, l2 - 1);
- }
-
- @Override
- public int compare(PositionWritable o1, PositionWritable o2) {
- int l = o1.getReadID();
- int r = o2.getReadID();
- return l == r ? 0 : (l < r ? -1 : 1);
- }
- }
-
- static { // register this comparator
- WritableComparator.define(PositionWritable.class, new Comparator());
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingDriver.java
deleted file mode 100644
index c5a7c23..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingDriver.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.gbresultschecking;
-
-import java.io.IOException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.TextOutputFormat;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-@SuppressWarnings("deprecation")
-public class ResultsCheckingDriver {
- private static class Options {
- @Option(name = "-inputpath1", usage = "the input path", required = true)
- public String inputPath1;
-
- @Option(name = "-inputpath2", usage = "the input path", required = true)
- public String inputPath2;
-
- @Option(name = "-outputpath", usage = "the output path", required = true)
- public String outputPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- @Option(name = "-kmer-kmerByteSize", usage = "the kmerByteSize of kmer", required = true)
- public int sizeKmer;
-
- }
-
- public void run(String inputPath1, String inputPath2, String outputPath, int numReducers, int sizeKmer,
- String defaultConfPath) throws IOException {
-
- JobConf conf = new JobConf(ResultsCheckingDriver.class);
-
- conf.setInt("sizeKmer", sizeKmer);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
-
- conf.setJobName("Results Checking");
- conf.setMapperClass(ResultsCheckingMapper.class);
- conf.setReducerClass(ResultsCheckingReducer.class);
-
- conf.setMapOutputKeyClass(Text.class);
- conf.setMapOutputValueClass(Text.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(TextOutputFormat.class);
-
- conf.setOutputKeyClass(Text.class);
- conf.setOutputValueClass(Text.class);
-
- Path[] inputList = new Path[2];
- inputList[0] = new Path(inputPath1);
- inputList[1] = new Path(inputPath2);
-
- FileInputFormat.setInputPaths(conf, inputList);
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
-
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- ResultsCheckingDriver driver = new ResultsCheckingDriver();
- driver.run(options.inputPath1, options.inputPath2, options.outputPath, options.numReducers, options.sizeKmer,
- null);
- }
-
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingMapper.java
deleted file mode 100644
index 1b158ed..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingMapper.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.gbresultschecking;
-
-import java.io.IOException;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings({ "unused", "deprecation" })
-public class ResultsCheckingMapper extends MapReduceBase implements Mapper<KmerBytesWritable, KmerCountValue, Text, Text> {
- KmerBytesWritable valWriter;
- private final static IntWritable one = new IntWritable(1);
- public static Text textkey = new Text();
- public static Text textvalue = new Text();
- public static String INPUT_PATH;
- public static int KMER_SIZE;
-
- public void configure(JobConf job) {
- KMER_SIZE = job.getInt("sizeKmer", 0);
- valWriter= new KmerBytesWritable(KMER_SIZE);
- }
-
- @Override
- public void map(KmerBytesWritable key, KmerCountValue value, OutputCollector<Text, Text> output, Reporter reporter)
- throws IOException {
-
- FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
- String filename = fileSplit.getPath().getName();
- textkey.set(key.toString() + "\t" + value.toString());
- textvalue.set(filename);
- output.collect(textkey, textvalue);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingReducer.java
deleted file mode 100644
index e93548f..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingReducer.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.gbresultschecking;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-@SuppressWarnings("deprecation")
-public class ResultsCheckingReducer extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
-
- public static Text textkey = new Text();
- public static Text textvalue = new Text();
-
- @Override
- public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
- throws IOException {
- textkey.set(key);
- textvalue.set(values.next());
- if (values.hasNext() == false) {
- output.collect(textkey, textvalue);
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixCombiner.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixCombiner.java
deleted file mode 100755
index 76515f3..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixCombiner.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hadoop.graphbuilding;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-/**
- * This class implement the combiner operator of Mapreduce model
- */
-@SuppressWarnings("deprecation")
-public class GenomixCombiner extends MapReduceBase implements
- Reducer<KmerBytesWritable, KmerCountValue, KmerBytesWritable, KmerCountValue> {
- private KmerCountValue vaWriter = new KmerCountValue();
-
- @Override
- public void reduce(KmerBytesWritable key, Iterator<KmerCountValue> values,
- OutputCollector<KmerBytesWritable, KmerCountValue> output, Reporter reporter) throws IOException {
- byte groupByAdjList = 0;
- int count = 0;
- byte bytCount = 0;
- while (values.hasNext()) {
- //Merge By the all adjacent Nodes;
- KmerCountValue geneValue = values.next();
- groupByAdjList = (byte) (groupByAdjList | geneValue.getAdjBitMap());
- count = count + (int) geneValue.getCount();
- }
- if (count >= 127)
- bytCount = (byte) 127;
- else
- bytCount = (byte) count;
- vaWriter.set(groupByAdjList, bytCount);
- output.collect(key, vaWriter);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java
deleted file mode 100755
index b4885b5..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hadoop.graphbuilding;
-
-import java.io.IOException;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.TextInputFormat;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-/**
- * This class implement driver which start the mapreduce program for graphbuilding
- */
-@SuppressWarnings("deprecation")
-public class GenomixDriver {
- private static class Options {
- @Option(name = "-inputpath", usage = "the input path", required = true)
- public String inputPath;
-
- @Option(name = "-outputpath", usage = "the output path", required = true)
- public String outputPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- @Option(name = "-kmer-kmerByteSize", usage = "the kmerByteSize of kmer", required = true)
- public int sizeKmer;
- }
-
- public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath)
- throws IOException {
-
- JobConf conf = new JobConf(GenomixDriver.class);
- conf.setInt("sizeKmer", sizeKmer);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
-
- conf.setJobName("Genomix Graph Building");
- conf.setMapperClass(GenomixMapper.class);
- conf.setReducerClass(GenomixReducer.class);
- conf.setCombinerClass(GenomixCombiner.class);
-
- conf.setMapOutputKeyClass(KmerBytesWritable.class);
- conf.setMapOutputValueClass(KmerCountValue.class);
-
- conf.setInputFormat(TextInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
- conf.setOutputKeyClass(KmerBytesWritable.class);
- conf.setOutputValueClass(KmerCountValue.class);
- FileInputFormat.setInputPaths(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
-
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- GenomixDriver driver = new GenomixDriver();
- driver.run(options.inputPath, options.outputPath, options.numReducers, options.sizeKmer, null);
- }
-
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixMapper.java
deleted file mode 100755
index 868c40f..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixMapper.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hadoop.graphbuilding;
-
-import java.io.IOException;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-/**
- * This class implement mapper operator of mapreduce model
- */
-@SuppressWarnings("deprecation")
-public class GenomixMapper extends MapReduceBase implements
- Mapper<LongWritable, Text, KmerBytesWritable, KmerCountValue> {
-
- public static int KMER_SIZE;
- public KmerCountValue outputAdjList;
- public KmerBytesWritable outputKmer;
-
- @Override
- public void configure(JobConf job) {
- KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
- outputAdjList = new KmerCountValue();
- outputKmer = new KmerBytesWritable(KMER_SIZE);
- }
-
- /*succeed node
- A 00000001 1
- C 00000010 2
- G 00000100 4
- T 00001000 8
- precursor node
- A 00010000 16
- C 00100000 32
- G 01000000 64
- T 10000000 128*/
- @Override
- public void map(LongWritable key, Text value, OutputCollector<KmerBytesWritable, KmerCountValue> output,
- Reporter reporter) throws IOException {
- /* A 00
- C 01
- G 10
- T 11*/
- String geneLine = value.toString(); // Read the Real Gene Line
- Pattern genePattern = Pattern.compile("[AGCT]+");
- Matcher geneMatcher = genePattern.matcher(geneLine);
- boolean isValid = geneMatcher.matches();
- if (isValid == true) {
- /** first kmer */
- byte count = 1;
- byte[] array = geneLine.getBytes();
- outputKmer.setByRead(array, 0);
- byte pre = 0;
- byte next = GeneCode.getAdjBit(array[KMER_SIZE]);
- byte adj = GeneCode.mergePreNextAdj(pre, next);
- outputAdjList.set(adj, count);
- output.collect(outputKmer, outputAdjList);
- /** middle kmer */
- for (int i = KMER_SIZE; i < array.length - 1; i++) {
- pre = GeneCode.getBitMapFromGeneCode(outputKmer.shiftKmerWithNextChar(array[i]));
- next = GeneCode.getAdjBit(array[i + 1]);
- adj = GeneCode.mergePreNextAdj(pre, next);
- outputAdjList.set(adj, count);
- output.collect(outputKmer, outputAdjList);
- }
- /** last kmer */
- pre = GeneCode.getBitMapFromGeneCode(outputKmer.shiftKmerWithNextChar(array[array.length - 1]));
- next = 0;
- adj = GeneCode.mergePreNextAdj(pre, next);
- outputAdjList.set(adj, count);
- output.collect(outputKmer, outputAdjList);
- }
- }
-}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixReducer.java
deleted file mode 100755
index 7f9b2bf..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixReducer.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.graphbuilding;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-/**
- * This class implement reducer operator of mapreduce model
- */
-@SuppressWarnings("deprecation")
-public class GenomixReducer extends MapReduceBase implements
- Reducer<KmerBytesWritable, KmerCountValue, KmerBytesWritable, KmerCountValue> {
- KmerCountValue valWriter = new KmerCountValue();
- static enum MyCounters { NUM_RECORDS };
- @Override
- public void reduce(KmerBytesWritable key, Iterator<KmerCountValue> values,
- OutputCollector<KmerBytesWritable, KmerCountValue> output, Reporter reporter) throws IOException {
- byte groupByAdjList = 0;
- int count = 0;
- byte bytCount = 0;
- while (values.hasNext()) {
- //Merge By the all adjacent Nodes;
- KmerCountValue geneValue = values.next();
- groupByAdjList = (byte) (groupByAdjList | geneValue.getAdjBitMap());
- count = count + (int) geneValue.getCount();
- }
- if (count >= 127)
- bytCount = (byte) 127;
- else
- bytCount = (byte) count;
- valWriter.set(groupByAdjList, bytCount);
- output.collect(key, valWriter);
- reporter.incrCounter(MyCounters.NUM_RECORDS, 1);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3.java
deleted file mode 100644
index b28328f..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3.java
+++ /dev/null
@@ -1,231 +0,0 @@
-package edu.uci.ics.genomix.hadoop.graphclean.mergepaths.h3;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import edu.uci.ics.genomix.hadoop.pmcommon.NodeWithFlagWritable;
-import edu.uci.ics.genomix.hadoop.pmcommon.PathNodeInitial.PathNodeFlag;
-import edu.uci.ics.genomix.oldtype.NodeWritable;
-import edu.uci.ics.genomix.oldtype.PositionWritable;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class MergePathsH3 extends Configured implements Tool {
- /*
- * Flags used when sending messages
- */
- public static class MergeMessageFlag extends PathNodeFlag {
- public static final byte FROM_SUCCESSOR = 1 << 5;
- public static final byte FROM_PREDECESSOR = 1 << 6;
- public static final byte IS_PSEUDOHEAD = ((byte) 1 << 6); //TODO FIXME
- }
-
- /*
- * Mapper class: Partition the graph using random pseudoheads.
- * Heads send themselves to their successors, and all others map themselves.
- */
- private static class MergePathsH3Mapper extends MapReduceBase implements
- Mapper<PositionWritable, NodeWithFlagWritable, PositionWritable, NodeWithFlagWritable> {
- private static long randSeed;
- private Random randGenerator;
- private float probBeingRandomHead;
-
- private int KMER_SIZE;
- private PositionWritable outputKey;
- private NodeWithFlagWritable outputValue;
- private NodeWritable curNode;
- private byte headFlag;
- private byte outFlag;
- private boolean finalMerge;
-
- public void configure(JobConf conf) {
- KMER_SIZE = conf.getInt("sizeKmer", 0);
- KmerBytesWritable.setGlobalKmerLength(KMER_SIZE);
-
- randSeed = conf.getLong("randomSeed", 0);
- randGenerator = new Random(randSeed);
- probBeingRandomHead = conf.getFloat("probBeingRandomHead", 0.5f);
- finalMerge = conf.getBoolean("finalMerge", false);
-
- outputValue = new NodeWithFlagWritable(KMER_SIZE);
- outputKey = new PositionWritable();
- curNode = new NodeWritable(KMER_SIZE);
- }
-
- protected boolean isNodeRandomHead(PositionWritable nodeID) {
- // "deterministically random", based on node id
- randGenerator.setSeed(randSeed ^ nodeID.hashCode());
- return randGenerator.nextFloat() < probBeingRandomHead;
- }
-
- @Override
- public void map(PositionWritable key, NodeWithFlagWritable value,
- OutputCollector<PositionWritable, NodeWithFlagWritable> output, Reporter reporter)
- throws IOException {
- curNode = value.getNode();
- // Map all path vertices; Heads and pseudoheads are sent to their successors
- // NOTE: all mapping nodes are already simple paths
-
- // Node may be marked as head b/c it's a real head, it's a previously merged head, or the node appears as a random head
- headFlag = (byte) (MergeMessageFlag.IS_HEAD & value.getFlag());
- // remove all pseudoheads on the last iteration
- if (!finalMerge) {
- headFlag |= (MergeMessageFlag.IS_PSEUDOHEAD & value.getFlag());
- }
-
- outFlag = (byte) (headFlag | (MergeMessageFlag.IS_TAIL & value.getFlag()));
- if (headFlag != 0 || isNodeRandomHead(curNode.getNodeID())) {
- // head nodes send themselves to their successor
- //outputKey.set(curNode.getOutgoingList().getPosition(0));
- if (!finalMerge) {
- headFlag |= (MergeMessageFlag.IS_PSEUDOHEAD & value.getFlag());
- }
- outFlag |= MergeMessageFlag.FROM_PREDECESSOR;
-
- outputValue.set(outFlag, curNode);
- output.collect(outputKey, outputValue);
- } else {
- // tail nodes map themselves
- outFlag |= MergeMessageFlag.MSG_SELF;
- outputValue.set(outFlag, curNode);
- output.collect(key, outputValue);
- }
- }
- }
-
- /*
- * Reducer class: merge nodes that co-occur; for singletons, remap the original nodes
- */
- private static class MergePathsH3Reducer extends MapReduceBase implements
- Reducer<PositionWritable, NodeWithFlagWritable, PositionWritable, NodeWithFlagWritable> {
-
- private int KMER_SIZE;
- private NodeWithFlagWritable inputValue;
- private NodeWithFlagWritable outputValue;
- private NodeWritable headNode;
- private NodeWritable tailNode;
- private int count;
- private byte outFlag;
-
- public void configure(JobConf conf) {
- KMER_SIZE = conf.getInt("sizeKmer", 0);
- outputValue = new NodeWithFlagWritable(KMER_SIZE);
- headNode = new NodeWritable(KMER_SIZE);
- tailNode = new NodeWritable(KMER_SIZE);
- }
-
- @Override
- public void reduce(PositionWritable key, Iterator<NodeWithFlagWritable> values,
- OutputCollector<PositionWritable, NodeWithFlagWritable> output, Reporter reporter)
- throws IOException {
-
- inputValue = values.next();
- if (!values.hasNext()) {
- // all single nodes must be remapped
- if ((inputValue.getFlag() & MergeMessageFlag.MSG_SELF) == MergeMessageFlag.MSG_SELF) {
- // FROM_SELF => remap self
- output.collect(key, inputValue);
- } else {
- // FROM_PREDECESSOR => remap predecessor
- output.collect(inputValue.getNode().getNodeID(), inputValue);
- }
- } else {
- // multiple inputs => a merge will take place. Aggregate both, then collect the merged path
- count = 0;
- outFlag = MergeMessageFlag.EMPTY_MESSAGE;
- while (true) { // process values; break when no more
- count++;
- outFlag |= (inputValue.getFlag() & (MergeMessageFlag.IS_HEAD | MergeMessageFlag.IS_PSEUDOHEAD | MergeMessageFlag.IS_TAIL));
- if ((inputValue.getFlag() & MergeMessageFlag.FROM_PREDECESSOR) == MergeMessageFlag.FROM_PREDECESSOR) {
- headNode.set(inputValue.getNode());
- } else {
- tailNode.set(inputValue.getNode());
- }
- if (!values.hasNext()) {
- break;
- } else {
- inputValue = values.next();
- }
- }
- if (count != 2) {
- throw new IOException("Expected two nodes in MergePathsH3 reduce; saw " + String.valueOf(count));
- }
- // merge the head and tail as saved output, this merged node is now a head
- //headNode.mergeNext(tailNode, KMER_SIZE);
- outputValue.set(outFlag, headNode);
-
- if ((outFlag & MergeMessageFlag.IS_TAIL) == MergeMessageFlag.IS_TAIL) {
- // Pseudoheads merging with tails don't become heads.
- // Reset the IS_PSEUDOHEAD flag
- outFlag &= ~MergeMessageFlag.IS_PSEUDOHEAD;
-
- if ((outFlag & MergeMessageFlag.IS_HEAD) == MergeMessageFlag.IS_HEAD) {
- // True heads meeting tails => merge is complete for this node
- // TODO: send to the "complete" collector
- }
- }
- reporter.incrCounter("genomix", "num_merged", 1);
- output.collect(key, outputValue);
- }
- }
- }
-
- /*
- * Run one iteration of the mergePaths algorithm
- */
- public RunningJob run(String inputPath, String outputPath, JobConf baseConf) throws IOException {
- JobConf conf = new JobConf(baseConf);
- conf.setJarByClass(MergePathsH3.class);
- conf.setJobName("MergePathsH3 " + inputPath);
-
- FileInputFormat.addInputPath(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
-
- conf.setMapOutputKeyClass(PositionWritable.class);
- conf.setMapOutputValueClass(NodeWithFlagWritable.class);
- conf.setOutputKeyClass(PositionWritable.class);
- conf.setOutputValueClass(NodeWithFlagWritable.class);
-
- conf.setMapperClass(MergePathsH3Mapper.class);
- conf.setReducerClass(MergePathsH3Reducer.class);
-
- FileSystem.get(conf).delete(new Path(outputPath), true);
-
- return JobClient.runJob(conf);
- }
-
- @Override
- public int run(String[] arg0) throws Exception {
- // TODO Auto-generated method stub
- return 0;
- }
-
- public static void main(String[] args) throws Exception {
- int res = ToolRunner.run(new Configuration(), new MergePathsH3(), args);
- System.out.println("Ran the job fine!");
- System.exit(res);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3Driver.java
deleted file mode 100644
index cd54705..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3Driver.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.graphclean.mergepaths.h3;
-
-import java.io.IOException;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RunningJob;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-@SuppressWarnings("deprecation")
-public class MergePathsH3Driver {
-
- private static class Options {
- @Option(name = "-inputpath", usage = "the input path", required = true)
- public String inputPath;
-
- @Option(name = "-outputpath", usage = "the output path", required = true)
- public String outputPath;
-
- @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
- public String mergeResultPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- @Option(name = "-kmer-kmerByteSize", usage = "the kmerByteSize of kmer", required = true)
- public int sizeKmer;
-
- @Option(name = "-merge-rounds", usage = "the maximum number of rounds to merge", required = false)
- public int mergeRound;
-
- @Option(name = "-hadoop-conf", usage = "an (optional) hadoop configuration xml", required = false)
- public String hadoopConf;
-
- }
-
- public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, int mergeRound,
- String defaultConfPath, JobConf defaultConf) throws IOException {
- JobConf baseConf = defaultConf == null ? new JobConf() : defaultConf;
- if (defaultConfPath != null) {
- baseConf.addResource(new Path(defaultConfPath));
- }
- baseConf.setNumReduceTasks(numReducers);
- baseConf.setInt("sizeKmer", sizeKmer);
-
- FileSystem dfs = FileSystem.get(baseConf);
- String prevOutput = inputPath;
- dfs.delete(new Path(outputPath), true); // clear any previous output
-
- String tmpOutputPath = "NO_JOBS_DONE";
- boolean finalMerge = false;
- for (int iMerge = 1; iMerge <= mergeRound; iMerge++) {
- baseConf.setInt("iMerge", iMerge);
- baseConf.setBoolean("finalMerge", finalMerge);
- MergePathsH3 merger = new MergePathsH3();
- tmpOutputPath = inputPath + ".mergepathsH3." + String.valueOf(iMerge);
- RunningJob job = merger.run(prevOutput, tmpOutputPath, baseConf);
- if (job.getCounters().findCounter("genomix", "num_merged").getValue() == 0) {
- if (!finalMerge) {
- // all of the pseudoheads have found each other. H3 now behaves like H1
- finalMerge = true;
- } else {
- // already in final merge stage and all paths were merged before. We're done!
- break;
- }
- }
- }
- dfs.rename(new Path(tmpOutputPath), new Path(outputPath)); // save final results
- }
-
- public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, int mergeRound,
- String defaultConfPath) throws IOException {
- run(inputPath, outputPath, numReducers, sizeKmer, mergeRound, defaultConfPath, null);
- }
-
- public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, int mergeRound,
- JobConf defaultConf) throws IOException {
- run(inputPath, outputPath, numReducers, sizeKmer, mergeRound, null, defaultConf);
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- MergePathsH3Driver driver = new MergePathsH3Driver();
- driver.run(options.inputPath, options.outputPath, options.numReducers, options.sizeKmer, options.mergeRound,
- null, null);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h4/MergePathsH4.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h4/MergePathsH4.java
deleted file mode 100644
index 595acf1..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h4/MergePathsH4.java
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hadoop.graphclean.mergepaths.h4;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.lib.IdentityMapper;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import edu.uci.ics.genomix.hadoop.pmcommon.MergePathMultiSeqOutputFormat;
-import edu.uci.ics.genomix.hadoop.pmcommon.NodeWithFlagWritable;
-import edu.uci.ics.genomix.hadoop.pmcommon.NodeWithFlagWritable.MessageFlag;
-import edu.uci.ics.genomix.oldtype.NodeWritable;
-import edu.uci.ics.genomix.oldtype.PositionWritable;
-
-/*
- * a probabilistic merge algorithm for merging long single paths (chains without only 1 incoming and outgoing edge)
- * The merge is guaranteed to succeed, but not all nodes that could be merged in an iteration will be.
- *
- * There are two steps to the merge:
- * 1. (H4UpdatesMapper & H4UpdatesReducer): the direction of the merge is chosen and all
- * neighbor's edges are updated with the merge intent
- * 2. H4MergeMapper & H4MergeReducer): the nodes initiating the merge are "sent" to their neighbors, kmers are combined, and edges
- * are again updated (since the merge-initiator may be neighbor to another merging node).
- */
-@SuppressWarnings("deprecation")
-public class MergePathsH4 extends Configured implements Tool {
-
- private enum MergeDir {
- NO_MERGE,
- FORWARD,
- BACKWARD
- }
-
- /*
- * Mapper class: randomly chooses a direction to merge s.t. if a merge takes place, it will be successful.
- * Sends update messages to all of this node's neighbors who their new neighbor will be
- */
- public static class H4UpdatesMapper extends MapReduceBase implements
- Mapper<PositionWritable, NodeWithFlagWritable, PositionWritable, NodeWithFlagWritable> {
- private static long randSeed;
- private Random randGenerator;
- private float probBeingRandomHead;
-
- private int KMER_SIZE;
- private NodeWithFlagWritable outputValue;
- private NodeWithFlagWritable mergeMsgValue;
- private NodeWithFlagWritable updateMsgValue;
-
- private NodeWritable curNode;
- private PositionWritable curID;
- private PositionWritable nextID;
- private PositionWritable prevID;
- private boolean mergeableNext;
- private boolean mergeablePrev;
- private boolean curHead;
- private boolean nextHead;
- private boolean prevHead;
- private MergeDir mergeDir;
- private byte inFlag;
- private byte headFlag;
- private byte tailFlag;
- private byte mergeMsgFlag;
- private byte nextDir;
- private byte prevDir;
-
- public void configure(JobConf conf) {
-
- randSeed = conf.getLong("randomSeed", 0);
- randGenerator = new Random(randSeed);
- probBeingRandomHead = conf.getFloat("probBeingRandomHead", 0.5f);
-
- KMER_SIZE = conf.getInt("sizeKmer", 0);
- outputValue = new NodeWithFlagWritable(KMER_SIZE);
-
- mergeMsgValue = new NodeWithFlagWritable(KMER_SIZE);
- updateMsgValue = new NodeWithFlagWritable(KMER_SIZE);
-
- curNode = new NodeWritable(KMER_SIZE);
- curID = new PositionWritable();
- nextID = new PositionWritable();
- prevID = new PositionWritable();
- }
-
- protected boolean isNodeRandomHead(PositionWritable nodeID) {
- // "deterministically random", based on node id
- randGenerator.setSeed(randSeed ^ nodeID.hashCode());
-
- // similar hashcodes will produce similar initial random values. Burn through a few to increase spread
- for (int i = 0; i < 100; i++) {
- randGenerator.nextFloat();
- }
- return randGenerator.nextFloat() < probBeingRandomHead;
- }
-
- /*
- * set nextID to the element that's next (in the node's FF or FR list), returning true when there is a next neighbor
- */
- protected boolean setNextInfo(NodeWritable node) {
- if (node.getFFList().getCountOfPosition() > 0) {
- nextDir = MessageFlag.DIR_FF;
- nextID.set(node.getFFList().getPosition(0));
- nextHead = isNodeRandomHead(nextID);
- return true;
- }
- if (node.getFRList().getCountOfPosition() > 0) {
- nextDir = MessageFlag.DIR_FR;
- nextID.set(node.getFRList().getPosition(0));
- nextHead = isNodeRandomHead(nextID);
- return true;
- }
- return false;
- }
-
- /*
- * set prevID to the element that's previous (in the node's RR or RF list), returning true when there is a previous neighbor
- */
- protected boolean setPrevInfo(NodeWritable node) {
- if (node.getRRList().getCountOfPosition() > 0) {
- prevDir = MessageFlag.DIR_RR;
- prevID.set(node.getRRList().getPosition(0));
- prevHead = isNodeRandomHead(prevID);
- return true;
- }
- if (node.getRFList().getCountOfPosition() > 0) {
- prevDir = MessageFlag.DIR_RF;
- prevID.set(node.getRFList().getPosition(0));
- prevHead = isNodeRandomHead(prevID);
- return true;
- }
- return false;
- }
-
- @Override
- public void map(PositionWritable key, NodeWithFlagWritable value,
- OutputCollector<PositionWritable, NodeWithFlagWritable> output, Reporter reporter) throws IOException {
- inFlag = value.getFlag();
- curNode.set(value.getNode());
- curID.set(curNode.getNodeID());
- mergeDir = MergeDir.NO_MERGE; // no merge to happen
- headFlag = (byte) (MessageFlag.IS_HEAD & inFlag);
- tailFlag = (byte) (MessageFlag.IS_TAIL & inFlag);
- mergeMsgFlag = (byte) (headFlag | tailFlag);
-
- curHead = isNodeRandomHead(curID);
- // the headFlag and tailFlag's indicate if the node is at the beginning or end of a simple path.
- // We prevent merging towards non-path nodes
- boolean isPath = curNode.isSimpleOrTerminalPath();
- mergeableNext = setNextInfo(curNode) && tailFlag == 0;
- mergeablePrev = setPrevInfo(curNode) && headFlag == 0;
-
- // decide where we're going to merge to
- if (isPath && (mergeableNext || mergeablePrev)) {
- if (curHead) {
- if (mergeableNext && !nextHead) {
- // merge forward
- mergeMsgFlag |= NodeWithFlagWritable.mirrorDirection(nextDir);
- mergeDir = MergeDir.FORWARD;
- } else if (mergeablePrev && !prevHead) {
- // merge backwards
- mergeMsgFlag |= NodeWithFlagWritable.mirrorDirection(prevDir);
- mergeDir = MergeDir.BACKWARD;
- }
- } else {
- // I'm a tail
- if (mergeableNext && mergeablePrev) {
- if ((!nextHead && !prevHead) && (curID.compareTo(nextID) > 0 && curID.compareTo(prevID) > 0)) {
- // tails on both sides, and I'm the "local minimum"
- // compress me towards the tail in forward dir
- mergeMsgFlag |= NodeWithFlagWritable.mirrorDirection(nextDir);
- mergeDir = MergeDir.FORWARD;
- }
- } else if (!mergeablePrev) {
- // no previous node
- if (!nextHead && curID.compareTo(nextID) > 0) {
- // merge towards tail in forward dir
- mergeMsgFlag |= NodeWithFlagWritable.mirrorDirection(nextDir);
- mergeDir = MergeDir.FORWARD;
- }
- } else if (!mergeableNext) {
- // no next node
- if (!prevHead && curID.compareTo(prevID) > 0) {
- // merge towards tail in reverse dir
- mergeMsgFlag |= NodeWithFlagWritable.mirrorDirection(prevDir);
- mergeDir = MergeDir.BACKWARD;
- }
- }
- }
- }
-
- if (mergeDir == MergeDir.NO_MERGE) {
- mergeMsgFlag |= MessageFlag.MSG_SELF;
- mergeMsgValue.set(mergeMsgFlag, curNode);
- output.collect(curID, mergeMsgValue);
- } else {
- // this node will do a merge next round
- mergeMsgFlag |= MessageFlag.MSG_UPDATE_MERGE;
- mergeMsgValue.set(mergeMsgFlag, curNode);
- output.collect(curID, mergeMsgValue);
-
- sendUpdateToNeighbors(curNode, (byte) (mergeMsgFlag & MessageFlag.DIR_MASK), output);
- }
- }
-
- /*
- * when performing a merge, an update message needs to be sent to my neighbors
- */
- private void sendUpdateToNeighbors(NodeWritable node, byte mergeDir,
- OutputCollector<PositionWritable, NodeWithFlagWritable> collector) throws IOException {
- PositionWritable mergeSource = node.getNodeID();
- PositionWritable mergeTarget = node.getListFromDir(mergeDir).getPosition(0);
-
- // I need to notify in the opposite direction as I'm merging
- Iterator<PositionWritable> posIterator1;
- byte dir1;
- Iterator<PositionWritable> posIterator2;
- byte dir2;
- switch (mergeDir) {
- case MessageFlag.DIR_FF:
- case MessageFlag.DIR_FR:
- // merging forward; tell my previous neighbors
- posIterator1 = node.getRRList().iterator();
- dir1 = MessageFlag.DIR_RR;
- posIterator2 = node.getRFList().iterator();
- dir2 = MessageFlag.DIR_RF;
- break;
- case MessageFlag.DIR_RF:
- case MessageFlag.DIR_RR:
- posIterator1 = node.getFFList().iterator();
- dir1 = MessageFlag.DIR_FF;
- posIterator2 = node.getFRList().iterator();
- dir2 = MessageFlag.DIR_FR;
- break;
- default:
- throw new IOException("Unrecognized direction in sendUpdateToNeighbors: " + mergeDir);
- }
- while (posIterator1.hasNext()) {
- updateMsgValue.setAsUpdateMessage(mergeDir, dir1, mergeSource, mergeTarget);
- collector.collect(posIterator1.next(), updateMsgValue);
- }
- while (posIterator2.hasNext()) {
- updateMsgValue.setAsUpdateMessage(mergeDir, dir2, mergeSource, mergeTarget);
- collector.collect(posIterator2.next(), outputValue);
- }
- }
- }
-
- /*
- * Reducer class: processes the update messages from updateMapper
- */
- private static class H4UpdatesReducer extends MapReduceBase implements
- Reducer<PositionWritable, NodeWithFlagWritable, PositionWritable, NodeWithFlagWritable> {
- private int KMER_SIZE;
- private NodeWithFlagWritable inputValue;
- private NodeWithFlagWritable outputValue;
- private PositionWritable outPosn;
- private boolean sawCurNode;
- private byte inFlag;
-
- // to prevent GC on update messages, we keep them all in one list and use the Node set method rather than creating new Node's
- private ArrayList<NodeWithFlagWritable> updateMsgs;
- private int updateMsgsSize;
- private int updateMsgsCount;
-
- public void configure(JobConf conf) {
- KMER_SIZE = conf.getInt("sizeKmer", 0);
- inputValue = new NodeWithFlagWritable(KMER_SIZE);
- outputValue = new NodeWithFlagWritable(KMER_SIZE);
- outPosn = new PositionWritable();
- updateMsgs = new ArrayList<NodeWithFlagWritable>();
- updateMsgsSize = updateMsgs.size();
- }
-
- private void addUpdateMessage(NodeWithFlagWritable myInputValue) {
- updateMsgsCount++;
- if (updateMsgsCount >= updateMsgsSize) {
- updateMsgs.add(new NodeWithFlagWritable(myInputValue)); // make a copy of inputValue-- not a reference!
- } else {
- updateMsgs.get(updateMsgsCount - 1).set(myInputValue); // update existing reference
- }
- }
-
- /*
- * Process updates from mapper
- *
- * (non-Javadoc)
- * @see org.apache.hadoop.mapred.Reducer#reduce(java.lang.Object, java.util.Iterator, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
- */
- @Override
- public void reduce(PositionWritable key, Iterator<NodeWithFlagWritable> values,
- OutputCollector<PositionWritable, NodeWithFlagWritable> output, Reporter reporter) throws IOException {
- sawCurNode = false;
- updateMsgsCount = 0;
-
- byte inMsg;
- while (values.hasNext()) {
- inputValue.set(values.next());
- inFlag = inputValue.getFlag();
- inMsg = (byte) (inFlag & MessageFlag.MSG_MASK);
-
- switch (inMsg) {
- case MessageFlag.MSG_UPDATE_MERGE:
- case MessageFlag.MSG_SELF:
- if (sawCurNode)
- throw new IOException("Saw more than one MSG_SELF! previously seen self: "
- + outputValue.getNode() + " current self: " + inputValue.getNode());
- if (inMsg == MessageFlag.MSG_SELF) {
- outPosn.set(inputValue.getNode().getNodeID());
- } else if (inMsg == MessageFlag.MSG_UPDATE_MERGE) {
- // merge messages are sent to their merge recipient
- outPosn.set(inputValue.getNode().getListFromDir(inMsg).getPosition(0));
- } else {
- throw new IOException("Unrecongized MessageFlag MSG: " + inMsg);
- }
- outputValue.set(inFlag, inputValue.getNode());
- sawCurNode = true;
- break;
- case MessageFlag.MSG_UPDATE_EDGE:
- addUpdateMessage(inputValue);
- break;
- default:
- throw new IOException("Unrecognized message type: " + (inFlag & MessageFlag.MSG_MASK));
- }
- }
- if (!sawCurNode) {
- throw new IOException("Never saw self in recieve update messages!");
- }
-
- // process all the update messages for this node
- for (int i = 0; i < updateMsgsCount; i++) {
- outputValue.processUpdates(updateMsgs.get(i), KMER_SIZE);
- }
- output.collect(outPosn, outputValue);
- }
- }
-
- /*
- * Reducer class: processes merge messages
- */
- private static class H4MergeReducer extends MapReduceBase implements
- Reducer<PositionWritable, NodeWithFlagWritable, PositionWritable, NodeWithFlagWritable> {
- private MultipleOutputs mos;
- public static final String TO_UPDATE_OUTPUT = "toUpdate";
- public static final String COMPLETE_OUTPUT = "complete";
- private OutputCollector<PositionWritable, NodeWithFlagWritable> toUpdateCollector;
- private OutputCollector<NodeWritable, NullWritable> completeCollector;
-
- private int KMER_SIZE;
- private NodeWithFlagWritable inputValue;
- private NodeWithFlagWritable outputValue;
- private PositionWritable outputKey;
- private boolean sawCurNode;
- private byte inFlag;
-
- // to prevent GC on update messages, we keep them all in one list and use the Node set method rather than creating new Node's
- private ArrayList<NodeWithFlagWritable> mergeMsgs;
- private int updateMsgsSize;
- private int mergeMsgsCount;
-
- public void configure(JobConf conf) {
- mos = new MultipleOutputs(conf);
- KMER_SIZE = conf.getInt("sizeKmer", 0);
- inputValue = new NodeWithFlagWritable(KMER_SIZE);
- outputValue = new NodeWithFlagWritable(KMER_SIZE);
- outputKey = new PositionWritable();
- mergeMsgs = new ArrayList<NodeWithFlagWritable>();
- updateMsgsSize = mergeMsgs.size();
- }
-
- private void addMergeMessage(NodeWithFlagWritable myInputValue) {
- mergeMsgsCount++;
- if (mergeMsgsCount >= updateMsgsSize) {
- mergeMsgs.add(new NodeWithFlagWritable(myInputValue)); // make a copy of inputValue-- not a reference!
- } else {
- mergeMsgs.get(mergeMsgsCount - 1).set(myInputValue); // update existing reference
- }
- }
-
- /*
- * Process merges
- *
- * (non-Javadoc)
- * @see org.apache.hadoop.mapred.Reducer#reduce(java.lang.Object, java.util.Iterator, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
- */
- @SuppressWarnings("unchecked")
- @Override
- public void reduce(PositionWritable key, Iterator<NodeWithFlagWritable> values,
- OutputCollector<PositionWritable, NodeWithFlagWritable> toMergeCollector, Reporter reporter)
- throws IOException {
- toUpdateCollector = mos.getCollector(TO_UPDATE_OUTPUT, reporter);
- completeCollector = mos.getCollector(COMPLETE_OUTPUT, reporter);
- sawCurNode = false;
- mergeMsgsCount = 0;
-
- while (values.hasNext()) {
- inputValue.set(values.next());
- inFlag = inputValue.getFlag();
- switch (inFlag & MessageFlag.MSG_MASK) {
- case MessageFlag.MSG_SELF:
- if (sawCurNode)
- throw new IOException("Saw more than one MSG_SELF! previously seen self: "
- + outputValue.getNode() + " current self: " + inputValue.getNode());
- outputKey.set(inputValue.getNode().getNodeID());
- outputValue.set(inFlag, inputValue.getNode());
- sawCurNode = true;
- break;
- case MessageFlag.MSG_UPDATE_MERGE:
- addMergeMessage(inputValue);
- break;
- case MessageFlag.MSG_UPDATE_EDGE:
- throw new IOException("Error: update message recieved during merge phase!" + inputValue);
- default:
- throw new IOException("Unrecognized message type: " + (inFlag & MessageFlag.MSG_MASK));
- }
- }
- if (!sawCurNode) {
- throw new IOException("Never saw self in recieve update messages!");
- }
-
- // process all the merge messages for this node
- for (int i = 0; i < mergeMsgsCount; i++) {
- outputValue.processUpdates(mergeMsgs.get(i), KMER_SIZE);
- }
-
- if (!outputValue.getNode().isSimpleOrTerminalPath()) {
- // not a mergeable path, can't tell if it still needs updates!
- toUpdateCollector.collect(outputKey, outputValue);
- } else if ((outputValue.getFlag() & MessageFlag.IS_HEAD) > 0
- && ((outputValue.getFlag() & MessageFlag.IS_TAIL) > 0)) {
- // H + T indicates a complete path
- completeCollector.collect(outputValue.getNode(), NullWritable.get());
- } else {
- // not finished merging yet
- toMergeCollector.collect(outputKey, outputValue);
- }
- }
-
- public void close() throws IOException {
- mos.close();
- }
- }
-
- /*
- * Run one iteration of the mergePaths algorithm
- */
- public RunningJob run(String inputPath, String toMergeOutput, String toUpdateOutput, String completeOutput, JobConf baseConf)
- throws IOException {
- JobConf conf = new JobConf(baseConf);
- FileSystem dfs = FileSystem.get(conf);
- conf.setJarByClass(MergePathsH4.class);
- conf.setJobName("MergePathsH4 " + inputPath);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
- conf.setMapOutputKeyClass(PositionWritable.class);
- conf.setMapOutputValueClass(NodeWithFlagWritable.class);
- conf.setOutputKeyClass(PositionWritable.class);
- conf.setOutputValueClass(NodeWithFlagWritable.class);
-
- // step 1: decide merge dir and send updates
- FileInputFormat.setInputPaths(conf, inputPath);
- String outputUpdatesTmp = "h4.updatesProcessed." + new Random().nextDouble() + ".tmp"; // random filename
- FileOutputFormat.setOutputPath(conf, new Path(outputUpdatesTmp));
- dfs.delete(new Path(outputUpdatesTmp), true);
- conf.setMapperClass(H4UpdatesMapper.class);
- conf.setReducerClass(H4UpdatesReducer.class);
- RunningJob job = JobClient.runJob(conf);
-
- // step 2: process merges
- FileInputFormat.setInputPaths(conf, outputUpdatesTmp);
-// for (Path out : FileInputFormat.getInputPaths(conf)) {
-// System.out.println(out);
-// }
- Path outputMergeTmp = new Path("h4.mergeProcessed." + new Random().nextDouble() + ".tmp"); // random filename
- FileOutputFormat.setOutputPath(conf, outputMergeTmp);
- MultipleOutputs.addNamedOutput(conf, H4MergeReducer.TO_UPDATE_OUTPUT, MergePathMultiSeqOutputFormat.class,
- PositionWritable.class, NodeWithFlagWritable.class);
- MultipleOutputs.addNamedOutput(conf, H4MergeReducer.COMPLETE_OUTPUT, MergePathMultiSeqOutputFormat.class,
- NodeWritable.class, NullWritable.class);
- dfs.delete(outputMergeTmp, true);
- conf.setMapperClass(IdentityMapper.class);
- conf.setReducerClass(H4MergeReducer.class);
- job = JobClient.runJob(conf);
-
- // move the tmp outputs to the arg-spec'ed dirs. If there is no such dir, create an empty one to simplify downstream processing
- if (!dfs.rename(new Path(outputMergeTmp + File.separator + H4MergeReducer.TO_UPDATE_OUTPUT), new Path(
- toUpdateOutput))) {
- dfs.mkdirs(new Path(toUpdateOutput));
- }
- if (!dfs.rename(new Path(outputMergeTmp + File.separator + H4MergeReducer.COMPLETE_OUTPUT), new Path(
- completeOutput))) {
- dfs.mkdirs(new Path(completeOutput));
- }
- if (!dfs.rename(outputMergeTmp, new Path(toMergeOutput))) {
- dfs.mkdirs(new Path(toMergeOutput));
- }
-
- return job;
- }
-
- @Override
- public int run(String[] args) throws Exception {
- int res = ToolRunner.run(new Configuration(), new MergePathsH4(), args);
- return res;
- }
-
- public static void main(String[] args) throws Exception {
- int res = ToolRunner.run(new Configuration(), new MergePathsH4(), args);
- System.exit(res);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h4/MergePathsH4Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h4/MergePathsH4Driver.java
deleted file mode 100644
index 1f6a157..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h4/MergePathsH4Driver.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.graphclean.mergepaths.h4;
-
-import java.io.IOException;
-import java.util.ArrayList;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.tools.ant.util.IdentityMapper;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-import edu.uci.ics.genomix.hadoop.pmcommon.ConvertGraphFromNodeWithFlagToNodeWritable;
-import edu.uci.ics.genomix.hadoop.pmcommon.PathNodeInitial;
-
-@SuppressWarnings("deprecation")
-public class MergePathsH4Driver {
-
- private static final String TO_MERGE = "toMerge";
- private static final String TO_UPDATE = "toUpdate";
- private static final String COMPLETE = "complete";
- private String mergeOutput;
- private String toUpdateOutput;
- private String completeOutput;
-
- private void setOutputPaths(String basePath, int mergeIteration) {
- basePath = basePath.replaceAll("/$", ""); // strip trailing slash
- mergeOutput = basePath + "_" + TO_MERGE + "_i" + mergeIteration;
- toUpdateOutput = basePath + "_" + TO_UPDATE + "_i" + mergeIteration;
- completeOutput = basePath + "_" + COMPLETE + "_i" + mergeIteration;
- }
-
- private static class Options {
- @Option(name = "-inputpath", usage = "the input path", required = true)
- public String inputPath;
-
- @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
- public String mergeResultPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- @Option(name = "-kmer-kmerByteSize", usage = "the kmerByteSize of kmer", required = true)
- public int sizeKmer;
-
- @Option(name = "-merge-rounds", usage = "the maximum number of rounds to merge", required = false)
- public int mergeRound;
-
- @Option(name = "-hadoop-conf", usage = "an (optional) hadoop configuration xml", required = false)
- public String hadoopConf;
-
- }
-
- /*
- * Main driver for path merging. Given a graph, this driver runs
- * PathNodeInitial to ID heads and tails, then does up to @mergeRound
- * iterations of path merging. Updates during the merge are batch-processed
- * at the end in a final update job.
- */
- public String run(String inputGraphPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath,
- JobConf defaultConf) throws IOException {
- JobConf baseConf = defaultConf == null ? new JobConf() : defaultConf;
- if (defaultConfPath != null) {
- baseConf.addResource(new Path(defaultConfPath));
- }
- baseConf.setNumReduceTasks(numReducers);
- baseConf.setInt("sizeKmer", sizeKmer);
- FileSystem dfs = FileSystem.get(baseConf);
-
- int iMerge = 0;
- boolean mergeComplete = false;
- String prevToMergeOutput = inputGraphPath;
- ArrayList<String> completeOutputs = new ArrayList<String>();
-
- // identify head and tail nodes with pathnode initial
- PathNodeInitial inith4 = new PathNodeInitial();
- setOutputPaths(inputGraphPath, iMerge);
- inith4.run(prevToMergeOutput, mergeOutput, toUpdateOutput, completeOutput, baseConf);
- completeOutputs.add(completeOutput);
- // dfs.copyToLocalFile(new Path(mergeOutput), new Path("initial-toMerge"));
- // dfs.copyToLocalFile(new Path(completeOutput), new Path("initial-complete"));
-
- // several iterations of merging
- MergePathsH4 merger = new MergePathsH4();
- for (iMerge = 1; iMerge <= mergeRound; iMerge++) {
- prevToMergeOutput = mergeOutput;
- setOutputPaths(inputGraphPath, iMerge);
- merger.run(prevToMergeOutput, mergeOutput, toUpdateOutput, completeOutput, baseConf);
- completeOutputs.add(completeOutput);
- // dfs.copyToLocalFile(new Path(mergeOutput), new Path("i" + iMerge +"-toMerge"));
- // dfs.copyToLocalFile(new Path(completeOutput), new Path("i" + iMerge +"-complete"));
-
- if (dfs.listStatus(new Path(mergeOutput)) == null || dfs.listStatus(new Path(mergeOutput)).length == 0) {
- // no output from previous run-- we are done!
- mergeComplete = true;
- break;
- }
- }
- if (!mergeComplete) {
- // if the merge didn't finish, we have to do one final iteration to convert back into (NodeWritable, NullWritable) pairs
- prevToMergeOutput = mergeOutput;
- setOutputPaths(inputGraphPath, iMerge);
- ConvertGraphFromNodeWithFlagToNodeWritable converter = new ConvertGraphFromNodeWithFlagToNodeWritable();
- converter.run(prevToMergeOutput, completeOutput, baseConf);
- completeOutputs.add(completeOutput);
- }
-
- // final output string is a comma-separated list of completeOutputs
- StringBuilder sb = new StringBuilder();
- String delim = "";
- for (String output : completeOutputs) {
- sb.append(delim).append(output);
- delim = ",";
- }
- String finalInputs = sb.toString();
- return finalInputs;
- }
-
- public String run(String inputPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath)
- throws IOException {
- return run(inputPath, numReducers, sizeKmer, mergeRound, defaultConfPath, null);
- }
-
- public String run(String inputPath, int numReducers, int sizeKmer, int mergeRound, JobConf defaultConf)
- throws IOException {
- return run(inputPath, numReducers, sizeKmer, mergeRound, null, defaultConf);
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- MergePathsH4Driver driver = new MergePathsH4Driver();
- String outputs = driver.run(options.inputPath, options.numReducers, options.sizeKmer, options.mergeRound, null, null);
- System.out.println("Job ran. Find outputs in " + outputs);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/removetips/RemoveTips.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/removetips/RemoveTips.java
deleted file mode 100644
index f1bfec6..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/removetips/RemoveTips.java
+++ /dev/null
@@ -1,152 +0,0 @@
-package edu.uci.ics.genomix.hadoop.graphclean.removetips;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Random;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import edu.uci.ics.genomix.hadoop.graphclean.mergepaths.h3.MergePathsH3.MergeMessageFlag;
-import edu.uci.ics.genomix.hadoop.graphclean.mergepaths.h4.MergePathsH4;
-import edu.uci.ics.genomix.hadoop.graphclean.mergepaths.h4.MergePathsH4.H4UpdatesMapper;
-import edu.uci.ics.genomix.hadoop.pmcommon.NodeWithFlagWritable;
-import edu.uci.ics.genomix.oldtype.NodeWritable;
-import edu.uci.ics.genomix.oldtype.PositionWritable;
-
-@SuppressWarnings("deprecation")
-public class RemoveTips extends Configured implements Tool {
-
- /*
- * Mapper class: removes any tips by not mapping them at all
- */
- private static class RemoveTipsMapper extends MapReduceBase implements
- Mapper<PositionWritable, NodeWithFlagWritable, PositionWritable, NodeWithFlagWritable> {
- private int KMER_SIZE;
- private int removeTipsMinLength;
-
- private NodeWithFlagWritable outputValue;
- private NodeWritable curNode;
-
- public void configure(JobConf conf) {
- removeTipsMinLength = conf.getInt("removeTipsMinLength", 0);
- outputValue = new NodeWithFlagWritable(KMER_SIZE);
- curNode = new NodeWritable(KMER_SIZE);
- }
-
- @Override
- public void map(PositionWritable key, NodeWithFlagWritable value,
- OutputCollector<PositionWritable, NodeWithFlagWritable> output, Reporter reporter)
- throws IOException {
- curNode.set(value.getNode());
- if ((curNode.inDegree() == 0 || curNode.outDegree() == 0)
- && curNode.getKmer().getKmerLength() < removeTipsMinLength) {
- // kill this node by NOT mapping it. Update my neighbors with a suicide note
- //TODO: update neighbors by removing me from its list
- } else {
- outputValue.setAsCopy(MergeMessageFlag.MSG_SELF, curNode);
- output.collect(key, value);
- }
- }
- }
-
- /*
- * Reducer class: keeps mapped nodes
- */
- private static class MergePathsH4Reducer extends MapReduceBase implements
- Reducer<PositionWritable, NodeWithFlagWritable, PositionWritable, NodeWithFlagWritable> {
-
- private int KMER_SIZE;
- private NodeWithFlagWritable inputValue;
- private NodeWithFlagWritable outputValue;
- private NodeWritable curNode;
- private NodeWritable prevNode;
- private NodeWritable nextNode;
- private boolean sawCurNode;
- private boolean sawPrevNode;
- private boolean sawNextNode;
- private int count;
- private byte outFlag;
-
- public void configure(JobConf conf) {
- KMER_SIZE = conf.getInt("sizeKmer", 0);
- outputValue = new NodeWithFlagWritable(KMER_SIZE);
- curNode = new NodeWritable(KMER_SIZE);
- prevNode = new NodeWritable(KMER_SIZE);
- nextNode = new NodeWritable(KMER_SIZE);
- }
-
- @Override
- public void reduce(PositionWritable key, Iterator<NodeWithFlagWritable> values,
- OutputCollector<PositionWritable, NodeWithFlagWritable> output, Reporter reporter)
- throws IOException {
-
- inputValue.set(values.next());
- if (!values.hasNext()) {
- if ((inputValue.getFlag() & MergeMessageFlag.MSG_SELF) > 0) {
- // FROM_SELF => keep self
- output.collect(key, inputValue);
- } else {
- throw new IOException("Only one value recieved in merge, but it wasn't from self!");
- }
- } else {
- throw new IOException("Expected only one node during reduce... saw more");
- }
- }
- }
-
- /*
- * Run one iteration of the mergePaths algorithm
- */
- public RunningJob run(String inputPath, String outputPath, JobConf baseConf) throws IOException {
- JobConf conf = new JobConf(baseConf);
- conf.setJarByClass(MergePathsH4.class);
- conf.setJobName("MergePathsH4 " + inputPath);
-
- FileInputFormat.addInputPath(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
-
- conf.setMapOutputKeyClass(PositionWritable.class);
- conf.setMapOutputValueClass(NodeWithFlagWritable.class);
- conf.setOutputKeyClass(PositionWritable.class);
- conf.setOutputValueClass(NodeWithFlagWritable.class);
-
- conf.setMapperClass(H4UpdatesMapper.class);
- conf.setReducerClass(MergePathsH4Reducer.class);
-
- FileSystem.get(conf).delete(new Path(outputPath), true);
-
- return JobClient.runJob(conf);
- }
-
- @Override
- public int run(String[] arg0) throws Exception {
- // TODO Auto-generated method stub
- return 0;
- }
-
- public static void main(String[] args) throws Exception {
- int res = ToolRunner.run(new Configuration(), new MergePathsH4(), args);
- System.out.println("Ran the job fine!");
- System.exit(res);
- }
-}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterDriver.java
deleted file mode 100644
index 1db513f..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterDriver.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.graphcountfilter;
-
-import java.io.IOException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings("deprecation")
-public class CountFilterDriver {
- private static class Options {
- @Option(name = "-inputpath", usage = "the input path", required = true)
- public String inputPath;
-
- @Option(name = "-outputpath", usage = "the output path", required = true)
- public String outputPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- @Option(name = "-count-threshold", usage = "the threshold of count", required = true)
- public int countThreshold;
- }
-
- public void run(String inputPath, String outputPath, int numReducers, int countThreshold, String defaultConfPath)
- throws IOException {
-
- JobConf conf = new JobConf(CountFilterDriver.class);
- conf.setInt("countThreshold", countThreshold);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
-
- conf.setJobName("Count Filter");
- conf.setMapperClass(CountFilterMapper.class);
- conf.setReducerClass(CountFilterReducer.class);
- conf.setCombinerClass(CountFilterReducer.class);
-
- conf.setMapOutputKeyClass(KmerBytesWritable.class);
- conf.setMapOutputValueClass(ByteWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
-
- conf.setOutputKeyClass(KmerBytesWritable.class);
- conf.setOutputValueClass(ByteWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
-
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- CountFilterDriver driver = new CountFilterDriver();
- driver.run(options.inputPath, options.outputPath, options.numReducers, options.countThreshold, null);
- }
-
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterMapper.java
deleted file mode 100644
index da0c42e..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterMapper.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.graphcountfilter;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-
-@SuppressWarnings({ "deprecation" })
-public class CountFilterMapper extends MapReduceBase implements
- Mapper<KmerBytesWritable, KmerCountValue, KmerBytesWritable, ByteWritable> {
- private int THRESHOLD;
- private ByteWritable adjByte = new ByteWritable();
- @Override
- public void configure(JobConf job) {
- THRESHOLD = Integer.parseInt(job.get("countThreshold"));
- }
- public void map(KmerBytesWritable key, KmerCountValue value, OutputCollector<KmerBytesWritable, ByteWritable> output,
- Reporter reporter) throws IOException {
- if(value.getCount() >= THRESHOLD){
- adjByte.set(value.getAdjBitMap());
- output.collect(key, adjByte );
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterReducer.java
deleted file mode 100644
index c241b52..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterReducer.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.graphcountfilter;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings("deprecation")
-public class CountFilterReducer extends MapReduceBase implements
- Reducer<KmerBytesWritable, ByteWritable, KmerBytesWritable, ByteWritable> {
- @Override
- public void reduce(KmerBytesWritable key, Iterator<ByteWritable> values,
- OutputCollector<KmerBytesWritable, ByteWritable> output, Reporter reporter) throws IOException {
- output.collect(key, values.next()); //Output the Pair
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/GeneCode.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/GeneCode.java
deleted file mode 100644
index 4b95e04..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/GeneCode.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hadoop.oldtype;
-
-public class GeneCode {
- public final static byte[] GENE_SYMBOL = { 'A', 'C', 'G', 'T' };
- /**
- * make sure this 4 ids equal to the sequence id of char in {@GENE_SYMBOL
- * }
- */
- public static final byte A = 0;
- public static final byte C = 1;
- public static final byte G = 2;
- public static final byte T = 3;
-
- public static byte getCodeFromSymbol(byte ch) {
- byte r = 0;
- switch (ch) {
- case 'A':
- case 'a':
- r = A;
- break;
- case 'C':
- case 'c':
- r = C;
- break;
- case 'G':
- case 'g':
- r = G;
- break;
- case 'T':
- case 't':
- r = T;
- break;
- }
- return r;
- }
-
- public static byte getSymbolFromCode(byte code) {
- if (code > 3) {
- return '!';
- }
- return GENE_SYMBOL[code];
- }
-
- public static byte getAdjBit(byte t) {
- byte r = 0;
- switch (t) {
- case 'A':
- case 'a':
- r = 1 << A;
- break;
- case 'C':
- case 'c':
- r = 1 << C;
- break;
- case 'G':
- case 'g':
- r = 1 << G;
- break;
- case 'T':
- case 't':
- r = 1 << T;
- break;
- }
- return r;
- }
-
- /**
- * It works for path merge. Merge the kmer by his next, we need to make sure
- * the @{t} is a single neighbor.
- *
- * @param t
- * the neighbor code in BitMap
- * @return the genecode
- */
- public static byte getGeneCodeFromBitMap(byte t) {
- switch (t) {
- case 1 << A:
- return A;
- case 1 << C:
- return C;
- case 1 << G:
- return G;
- case 1 << T:
- return T;
- }
- return -1;
- }
-
- public static byte getBitMapFromGeneCode(byte t) {
- return (byte) (1 << t);
- }
-
- public static int countNumberOfBitSet(int i) {
- int c = 0;
- for (; i != 0; c++) {
- i &= i - 1;
- }
- return c;
- }
-
- public static int inDegree(byte bitmap) {
- return countNumberOfBitSet((bitmap >> 4) & 0x0f);
- }
-
- public static int outDegree(byte bitmap) {
- return countNumberOfBitSet(bitmap & 0x0f);
- }
-
- public static byte mergePreNextAdj(byte pre, byte next) {
- return (byte) (pre << 4 | (next & 0x0f));
- }
-
- public static String getSymbolFromBitMap(byte code) {
- int left = (code >> 4) & 0x0F;
- int right = code & 0x0F;
- StringBuilder str = new StringBuilder();
- for (int i = A; i <= T; i++) {
- if ((left & (1 << i)) != 0) {
- str.append((char) GENE_SYMBOL[i]);
- }
- }
- str.append('|');
- for (int i = A; i <= T; i++) {
- if ((right & (1 << i)) != 0) {
- str.append((char) GENE_SYMBOL[i]);
- }
- }
- return str.toString();
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerBytesWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerBytesWritable.java
deleted file mode 100644
index 884b2a9..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerBytesWritable.java
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hadoop.oldtype;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.Serializable;
-
-import org.apache.hadoop.io.BinaryComparable;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableComparator;
-
-/**
- * Fix kmer length byteswritable
- * It was used to generate the graph in which phase the kmer length doesn't change.
- * Thus the kmerByteSize of bytes doesn't change either.
- */
-public class KmerBytesWritable extends BinaryComparable implements Serializable, WritableComparable<BinaryComparable> {
- /**
- *
- */
- private static final long serialVersionUID = 1L;
- private static final byte[] EMPTY_BYTES = {};
-
- protected int size;
- protected byte[] bytes;
- protected int kmerlength;
-
- @Deprecated
- public KmerBytesWritable() {
- this(0, EMPTY_BYTES);
- }
-
- public KmerBytesWritable(int k, byte[] storage) {
- this.kmerlength = k;
- if (k > 0) {
- this.size = KmerUtil.getByteNumFromK(kmerlength);
- this.bytes = storage;
- if (this.bytes.length < size) {
- throw new ArrayIndexOutOfBoundsException("Storage is smaller than required space for kmerlength:k");
- }
- } else {
- this.bytes = storage;
- this.size = 0;
- }
- }
-
- /**
- * Initial Kmer space by kmerlength
- *
- * @param k
- * kmerlength
- */
- public KmerBytesWritable(int k) {
- this.kmerlength = k;
- this.size = KmerUtil.getByteNumFromK(kmerlength);
- if (k > 0) {
- this.bytes = new byte[this.size];
- } else {
- this.bytes = EMPTY_BYTES;
- }
- }
-
- public KmerBytesWritable(KmerBytesWritable right) {
- if (right != null) {
- this.kmerlength = right.kmerlength;
- this.size = right.size;
- this.bytes = new byte[right.size];
- set(right);
- }else{
- this.kmerlength = 0;
- this.size = 0;
- this.bytes = EMPTY_BYTES;
- }
- }
-
- public byte getGeneCodeAtPosition(int pos) {
- if (pos >= kmerlength) {
- return -1;
- }
- int posByte = pos / 4;
- int shift = (pos % 4) << 1;
- return (byte) ((bytes[size - 1 - posByte] >> shift) & 0x3);
- }
-
- public int getKmerLength() {
- return this.kmerlength;
- }
-
- @Override
- public byte[] getBytes() {
- return bytes;
- }
-
- @Override
- public int getLength() {
- return size;
- }
-
- /**
- * Read Kmer from read text into bytes array e.g. AATAG will compress as
- * [0x000G, 0xATAA]
- *
- * @param k
- * @param array
- * @param start
- */
- public void setByRead(byte[] array, int start) {
- byte l = 0;
- int bytecount = 0;
- int bcount = this.size - 1;
- for (int i = start; i < start + kmerlength && i < array.length; i++) {
- byte code = GeneCode.getCodeFromSymbol(array[i]);
- l |= (byte) (code << bytecount);
- bytecount += 2;
- if (bytecount == 8) {
- bytes[bcount--] = l;
- l = 0;
- bytecount = 0;
- }
- }
- if (bcount >= 0) {
- bytes[0] = l;
- }
- }
-
- /**
- * Compress Reversed Kmer into bytes array AATAG will compress as
- * [0x000A,0xATAG]
- *
- * @param input
- * array
- * @param start
- * position
- */
- public void setByReadReverse(byte[] array, int start) {
- byte l = 0;
- int bytecount = 0;
- int bcount = size - 1;
- for (int i = start + kmerlength - 1; i >= 0 && i < array.length; i--) {
- byte code = GeneCode.getCodeFromSymbol(array[i]);
- l |= (byte) (code << bytecount);
- bytecount += 2;
- if (bytecount == 8) {
- bytes[bcount--] = l;
- l = 0;
- bytecount = 0;
- }
- }
- if (bcount >= 0) {
- bytes[0] = l;
- }
- }
-
- /**
- * Shift Kmer to accept new char input
- *
- * @param c
- * Input new gene character
- * @return the shift out gene, in gene code format
- */
- public byte shiftKmerWithNextChar(byte c) {
- return shiftKmerWithNextCode(GeneCode.getCodeFromSymbol(c));
- }
-
- /**
- * Shift Kmer to accept new gene code
- *
- * @param c
- * Input new gene code
- * @return the shift out gene, in gene code format
- */
- public byte shiftKmerWithNextCode(byte c) {
- byte output = (byte) (bytes[size - 1] & 0x03);
- for (int i = size - 1; i > 0; i--) {
- byte in = (byte) (bytes[i - 1] & 0x03);
- bytes[i] = (byte) (((bytes[i] >>> 2) & 0x3f) | (in << 6));
- }
- int pos = ((kmerlength - 1) % 4) << 1;
- byte code = (byte) (c << pos);
- bytes[0] = (byte) (((bytes[0] >>> 2) & 0x3f) | code);
- clearLeadBit();
- return output;
- }
-
- /**
- * Shift Kmer to accept new input char
- *
- * @param c
- * Input new gene character
- * @return the shiftout gene, in gene code format
- */
- public byte shiftKmerWithPreChar(byte c) {
- return shiftKmerWithPreCode(GeneCode.getCodeFromSymbol(c));
- }
-
- /**
- * Shift Kmer to accept new gene code
- *
- * @param c
- * Input new gene code
- * @return the shiftout gene, in gene code format
- */
- public byte shiftKmerWithPreCode(byte c) {
- int pos = ((kmerlength - 1) % 4) << 1;
- byte output = (byte) ((bytes[0] >> pos) & 0x03);
- for (int i = 0; i < size - 1; i++) {
- byte in = (byte) ((bytes[i + 1] >> 6) & 0x03);
- bytes[i] = (byte) ((bytes[i] << 2) | in);
- }
- bytes[size - 1] = (byte) ((bytes[size - 1] << 2) | c);
- clearLeadBit();
- return output;
- }
-
- protected void clearLeadBit() {
- if (kmerlength % 4 != 0) {
- bytes[0] &= (1 << ((kmerlength % 4) << 1)) - 1;
- }
- }
-
- public void set(KmerBytesWritable newData) {
- if (kmerlength != newData.kmerlength){
- throw new IllegalArgumentException("kmerSize is different, try to use VKmerBytesWritable instead");
- }
- if (kmerlength > 0 ){
- set(newData.bytes, 0, newData.size);
- }
- }
-
- public void set(byte[] newData, int offset, int length) {
- if (kmerlength > 0){
- System.arraycopy(newData, offset, bytes, 0, size);
- }
- }
-
- /**
- * Don't read the kmerlength from datastream,
- * Read it from configuration
- */
- @Override
- public void readFields(DataInput in) throws IOException {
- this.kmerlength = in.readInt();
- this.size = KmerUtil.getByteNumFromK(kmerlength);
- if (this.kmerlength > 0) {
- if (this.bytes.length < this.size) {
- this.bytes = new byte[this.size];
- }
- in.readFully(bytes, 0, size);
- }
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeInt(kmerlength);
- if (kmerlength > 0) {
- out.write(bytes, 0, size);
- }
- }
-
- @Override
- public int hashCode() {
- return super.hashCode() * 31 + this.kmerlength;
- }
-
- @Override
- public boolean equals(Object right_obj) {
- if (right_obj instanceof KmerBytesWritable)
- return this.kmerlength == ((KmerBytesWritable) right_obj).kmerlength && super.equals(right_obj);
- return false;
- }
-
- @Override
- public String toString() {
- return KmerUtil.recoverKmerFrom(this.kmerlength, this.getBytes(), 0, this.getLength());
- }
-
- public static class Comparator extends WritableComparator {
- public final int LEAD_BYTES = 4;
-
- public Comparator() {
- super(KmerBytesWritable.class);
- }
-
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- int kmerlength1 = readInt(b1, s1);
- int kmerlength2 = readInt(b2, s2);
- if (kmerlength1 == kmerlength2) {
- return compareBytes(b1, s1 + LEAD_BYTES, l1 - LEAD_BYTES, b2, s2 + LEAD_BYTES, l2 - LEAD_BYTES);
- }
- return kmerlength1 - kmerlength2;
- }
- }
-
- static { // register this comparator
- WritableComparator.define(KmerBytesWritable.class, new Comparator());
- }
-
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerCountValue.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerCountValue.java
deleted file mode 100644
index d0310ac..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerCountValue.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.oldtype;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.Writable;
-
-public class KmerCountValue implements Writable {
- private byte adjBitMap;
- private byte count;
-
- public KmerCountValue(byte bitmap, byte count) {
- set(bitmap, count);
- }
-
- public KmerCountValue() {
- adjBitMap = 0;
- count = 0;
- }
-
- @Override
- public void readFields(DataInput arg0) throws IOException {
- adjBitMap = arg0.readByte();
- count = arg0.readByte();
- }
-
- @Override
- public void write(DataOutput arg0) throws IOException {
- arg0.writeByte(adjBitMap);
- arg0.writeByte(count);
- }
-
- @Override
- public String toString() {
- return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(count);
- }
-
- public void set(byte bitmap, byte count) {
- this.adjBitMap = bitmap;
- this.count = count;
- }
-
- public byte getAdjBitMap() {
- return adjBitMap;
- }
-
- public void setAdjBitMap(byte adjBitMap) {
- this.adjBitMap = adjBitMap;
- }
-
- public byte getCount() {
- return count;
- }
-}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerUtil.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerUtil.java
deleted file mode 100644
index 4f62bb7..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerUtil.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hadoop.oldtype;
-
-public class KmerUtil {
- public static final String empty = "";
-
- public static int getByteNumFromK(int k) {
- int x = k / 4;
- if (k % 4 != 0) {
- x += 1;
- }
- return x;
- }
-
- public static byte reverseKmerByte(byte k) {
- int x = (((k >> 2) & 0x33) | ((k << 2) & 0xcc));
- return (byte) (((x >> 4) & 0x0f) | ((x << 4) & 0xf0));
- }
-
- public static String recoverKmerFrom(int k, byte[] keyData, int keyStart, int keyLength) {
- StringBuilder strKmer = new StringBuilder();
- int byteId = keyStart + keyLength - 1;
- if (byteId < 0) {
- return empty;
- }
- byte currentbyte = keyData[byteId];
- for (int geneCount = 0; geneCount < k; geneCount++) {
- if (geneCount % 4 == 0 && geneCount > 0) {
- currentbyte = keyData[--byteId];
- }
- strKmer.append((char) GeneCode.GENE_SYMBOL[(currentbyte >> ((geneCount % 4) * 2)) & 0x03]);
- }
- return strKmer.toString();
- }
-
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/MergePathValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/MergePathValueWritable.java
deleted file mode 100644
index aff8e9d..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/MergePathValueWritable.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.oldtype;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.BinaryComparable;
-import org.apache.hadoop.io.WritableComparable;
-
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-
-public class MergePathValueWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
-
- private static final byte[] EMPTY_BYTES = {};
- private byte adjBitMap;
- private byte flag;
- private VKmerBytesWritable kmer;
-
- public MergePathValueWritable() {
- this((byte) 0, (byte) 0, 0, EMPTY_BYTES);
- }
-
- public MergePathValueWritable(int k) {
- this.adjBitMap = 0;
- this.flag = 0;
- this.kmer = new VKmerBytesWritable(k);
- }
-
- public MergePathValueWritable(byte adjBitMap, byte flag, int kmerSize, byte[] bytes) {
- this.adjBitMap = adjBitMap;
- this.flag = flag;
- this.kmer = new VKmerBytesWritable(kmerSize, bytes);
- kmer.set(bytes, 0, bytes.length);
- }
-
- public void set(MergePathValueWritable right) {
- set(right.getAdjBitMap(), right.getFlag(), right.getKmer());
- }
-
- public void set(byte adjBitMap, byte flag, VKmerBytesWritable kmer) {
- this.kmer.set(kmer);
- this.adjBitMap = adjBitMap;
- this.flag = flag;
- }
-
- @Override
- public void readFields(DataInput arg0) throws IOException {
- // TODO Auto-generated method stub
- kmer.readFields(arg0);
- adjBitMap = arg0.readByte();
- flag = arg0.readByte();
- }
-
- @Override
- public void write(DataOutput arg0) throws IOException {
- // TODO Auto-generated method stub
-
- kmer.write(arg0);
- arg0.writeByte(adjBitMap);
- arg0.writeByte(flag);
- }
-
- public VKmerBytesWritable getKmer() {
- if (kmer.getLength() != 0) {
- return kmer;
- }
- return null;
- }
-
- public byte getAdjBitMap() {
- return this.adjBitMap;
- }
-
- public byte getFlag() {
- return this.flag;
- }
-
- public String toString() {
- return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag);
- }
-
- public String pureToString() {
- return GeneCode.getSymbolFromBitMap(adjBitMap);
- }
- @Override
- public byte[] getBytes() {
- // TODO Auto-generated method stub
- if (kmer.getLength() != 0) {
- return kmer.getBytes();
- } else
- return null;
-
- }
-
- public int getKmerLength() {
- return kmer.getKmerLength();
- }
-
- @Override
- public int getLength() {
- return kmer.getLength();
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/VKmerBytesWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/VKmerBytesWritable.java
deleted file mode 100644
index fb60699..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/VKmerBytesWritable.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hadoop.oldtype;
-
-public class VKmerBytesWritable extends KmerBytesWritable {
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- @Deprecated
- public VKmerBytesWritable() {
- super();
- }
-
- public VKmerBytesWritable(int k, byte[] storage) {
- super(k, storage);
- }
-
- public VKmerBytesWritable(int k) {
- super(k);
- }
-
- public VKmerBytesWritable(KmerBytesWritable other) {
- super(other);
- }
-
- protected void setSize(int size) {
- if (size > getCapacity()) {
- setCapacity((size * 3 / 2));
- }
- this.size = size;
- }
-
- protected int getCapacity() {
- return bytes.length;
- }
-
- protected void setCapacity(int new_cap) {
- if (new_cap != getCapacity()) {
- byte[] new_data = new byte[new_cap];
- if (new_cap < size) {
- size = new_cap;
- }
- if (size != 0) {
- System.arraycopy(bytes, 0, new_data, 0, size);
- }
- bytes = new_data;
- }
- }
-
- /**
- * Read Kmer from read text into bytes array e.g. AATAG will compress as
- * [0x000G, 0xATAA]
- *
- * @param k
- * @param array
- * @param start
- */
- public void setByRead(int k, byte[] array, int start) {
- reset(k);
- super.setByRead(array, start);
- }
-
- /**
- * Compress Reversed Kmer into bytes array AATAG will compress as
- * [0x000A,0xATAG]
- *
- * @param input
- * array
- * @param start
- * position
- */
- public void setByReadReverse(int k, byte[] array, int start) {
- reset(k);
- super.setByReadReverse(array, start);
- }
-
- @Override
- public void set(KmerBytesWritable newData) {
- if (newData == null){
- this.set(0,null,0,0);
- }else{
- this.set(newData.kmerlength, newData.bytes, 0, newData.size);
- }
- }
-
- public void set(int k, byte[] newData, int offset, int length) {
- reset(k);
- if (k > 0 ){
- System.arraycopy(newData, offset, bytes, 0, size);
- }
- }
-
- /**
- * Reset array by kmerlength
- *
- * @param k
- */
- public void reset(int k) {
- this.kmerlength = k;
- setSize(0);
- setSize(KmerUtil.getByteNumFromK(k));
- clearLeadBit();
- }
-
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/VKmerBytesWritableFactory.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/VKmerBytesWritableFactory.java
deleted file mode 100644
index 0334991..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/VKmerBytesWritableFactory.java
+++ /dev/null
@@ -1,311 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hadoop.oldtype;
-
-public class VKmerBytesWritableFactory {
- private VKmerBytesWritable kmer;
-
- public VKmerBytesWritableFactory(int k) {
- kmer = new VKmerBytesWritable(k);
- }
-
- /**
- * Read Kmer from read text into bytes array e.g. AATAG will compress as
- * [0x000G, 0xATAA]
- *
- * @param k
- * @param array
- * @param start
- */
- public VKmerBytesWritable getKmerByRead(int k, byte[] array, int start) {
- kmer.setByRead(k, array, start);
- return kmer;
- }
-
- /**
- * Compress Reversed Kmer into bytes array AATAG will compress as
- * [0x000A,0xATAG]
- *
- * @param array
- * @param start
- */
- public VKmerBytesWritable getKmerByReadReverse(int k, byte[] array, int start) {
- kmer.setByReadReverse(k, array, start);
- return kmer;
- }
-
- /**
- * Get last kmer from kmer-chain.
- * e.g. kmerChain is AAGCTA, if k =5, it will
- * return AGCTA
- *
- * @param k
- * @param kInChain
- * @param kmerChain
- * @return LastKmer bytes array
- */
- public VKmerBytesWritable getLastKmerFromChain(int lastK, final KmerBytesWritable kmerChain) {
- if (lastK > kmerChain.getKmerLength()) {
- return null;
- }
- if (lastK == kmerChain.getKmerLength()) {
- kmer.set(kmerChain);
- return kmer;
- }
- kmer.reset(lastK);
-
- /** from end to start */
- int byteInChain = kmerChain.getLength() - 1 - (kmerChain.getKmerLength() - lastK) / 4;
- int posInByteOfChain = ((kmerChain.getKmerLength() - lastK) % 4) << 1; // *2
- int byteInKmer = kmer.getLength() - 1;
- for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
- kmer.getBytes()[byteInKmer] = (byte) ((0xff & kmerChain.getBytes()[byteInChain]) >> posInByteOfChain);
- kmer.getBytes()[byteInKmer] |= ((kmerChain.getBytes()[byteInChain - 1] << (8 - posInByteOfChain)));
- }
-
- /** last kmer byte */
- if (byteInKmer == 0) {
- kmer.getBytes()[0] = (byte) ((kmerChain.getBytes()[0] & 0xff) >> posInByteOfChain);
- }
- kmer.clearLeadBit();
- return kmer;
- }
-
- /**
- * Get first kmer from kmer-chain e.g. kmerChain is AAGCTA, if k=5, it will
- * return AAGCT
- *
- * @param k
- * @param kInChain
- * @param kmerChain
- * @return FirstKmer bytes array
- */
- public VKmerBytesWritable getFirstKmerFromChain(int firstK, final KmerBytesWritable kmerChain) {
- if (firstK > kmerChain.getKmerLength()) {
- return null;
- }
- if (firstK == kmerChain.getKmerLength()) {
- kmer.set(kmerChain);
- return kmer;
- }
- kmer.reset(firstK);
-
- int i = 1;
- for (; i < kmer.getLength(); i++) {
- kmer.getBytes()[kmer.getLength() - i] = kmerChain.getBytes()[kmerChain.getLength() - i];
- }
- int posInByteOfChain = (firstK % 4) << 1; // *2
- if (posInByteOfChain == 0) {
- kmer.getBytes()[0] = kmerChain.getBytes()[kmerChain.getLength() - i];
- } else {
- kmer.getBytes()[0] = (byte) (kmerChain.getBytes()[kmerChain.getLength() - i] & ((1 << posInByteOfChain) - 1));
- }
- kmer.clearLeadBit();
- return kmer;
- }
-
- public VKmerBytesWritable getSubKmerFromChain(int startK, int kSize, final KmerBytesWritable kmerChain) {
- if (startK + kSize > kmerChain.getKmerLength()) {
- return null;
- }
- if (startK == 0 && kSize == kmerChain.getKmerLength()) {
- kmer.set(kmerChain);
- return kmer;
- }
- kmer.reset(kSize);
-
- /** from end to start */
- int byteInChain = kmerChain.getLength() - 1 - startK / 4;
- int posInByteOfChain = startK % 4 << 1; // *2
- int byteInKmer = kmer.getLength() - 1;
- for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
- kmer.getBytes()[byteInKmer] = (byte) ((0xff & kmerChain.getBytes()[byteInChain]) >> posInByteOfChain);
- kmer.getBytes()[byteInKmer] |= ((kmerChain.getBytes()[byteInChain - 1] << (8 - posInByteOfChain)));
- }
-
- /** last kmer byte */
- if (byteInKmer == 0) {
- kmer.getBytes()[0] = (byte) ((kmerChain.getBytes()[0] & 0xff) >> posInByteOfChain);
- }
- kmer.clearLeadBit();
- return kmer;
- }
-
- /**
- * Merge kmer with next neighbor in gene-code format.
- * The k of new kmer will increase by 1
- * e.g. AAGCT merge with A => AAGCTA
- *
- * @param k
- * :input k of kmer
- * @param kmer
- * : input bytes of kmer
- * @param nextCode
- * : next neighbor in gene-code format
- * @return the merged Kmer, this K of this Kmer is k+1
- */
- public VKmerBytesWritable mergeKmerWithNextCode(final KmerBytesWritable kmer, byte nextCode) {
- this.kmer.reset(kmer.getKmerLength() + 1);
- for (int i = 1; i <= kmer.getLength(); i++) {
- this.kmer.getBytes()[this.kmer.getLength() - i] = kmer.getBytes()[kmer.getLength() - i];
- }
- if (this.kmer.getLength() > kmer.getLength()) {
- this.kmer.getBytes()[0] = (byte) (nextCode & 0x3);
- } else {
- this.kmer.getBytes()[0] = (byte) (kmer.getBytes()[0] | ((nextCode & 0x3) << ((kmer.getKmerLength() % 4) << 1)));
- }
- this.kmer.clearLeadBit();
- return this.kmer;
- }
-
- /**
- * Merge kmer with previous neighbor in gene-code format.
- * The k of new kmer will increase by 1
- * e.g. AAGCT merge with A => AAAGCT
- *
- * @param k
- * :input k of kmer
- * @param kmer
- * : input bytes of kmer
- * @param preCode
- * : next neighbor in gene-code format
- * @return the merged Kmer,this K of this Kmer is k+1
- */
- public VKmerBytesWritable mergeKmerWithPreCode(final KmerBytesWritable kmer, byte preCode) {
- this.kmer.reset(kmer.getKmerLength() + 1);
- int byteInMergedKmer = 0;
- if (kmer.getKmerLength() % 4 == 0) {
- this.kmer.getBytes()[0] = (byte) ((kmer.getBytes()[0] >> 6) & 0x3);
- byteInMergedKmer++;
- }
- for (int i = 0; i < kmer.getLength() - 1; i++, byteInMergedKmer++) {
- this.kmer.getBytes()[byteInMergedKmer] = (byte) ((kmer.getBytes()[i] << 2) | ((kmer.getBytes()[i + 1] >> 6) & 0x3));
- }
- this.kmer.getBytes()[byteInMergedKmer] = (byte) ((kmer.getBytes()[kmer.getLength() - 1] << 2) | (preCode & 0x3));
- this.kmer.clearLeadBit();
- return this.kmer;
- }
-
- /**
- * Merge two kmer to one kmer
- * e.g. ACTA + ACCGT => ACTAACCGT
- *
- * @param preK
- * : previous k of kmer
- * @param kmerPre
- * : bytes array of previous kmer
- * @param nextK
- * : next k of kmer
- * @param kmerNext
- * : bytes array of next kmer
- * @return merged kmer, the new k is @preK + @nextK
- */
- public VKmerBytesWritable mergeTwoKmer(final KmerBytesWritable preKmer, final KmerBytesWritable nextKmer) {
- kmer.reset(preKmer.getKmerLength() + nextKmer.getKmerLength());
- int i = 1;
- for (; i <= preKmer.getLength(); i++) {
- kmer.getBytes()[kmer.getLength() - i] = preKmer.getBytes()[preKmer.getLength() - i];
- }
- if (i > 1) {
- i--;
- }
- if (preKmer.getKmerLength() % 4 == 0) {
- for (int j = 1; j <= nextKmer.getLength(); j++) {
- kmer.getBytes()[kmer.getLength() - i - j] = nextKmer.getBytes()[nextKmer.getLength() - j];
- }
- } else {
- int posNeedToMove = ((preKmer.getKmerLength() % 4) << 1);
- kmer.getBytes()[kmer.getLength() - i] |= nextKmer.getBytes()[nextKmer.getLength() - 1] << posNeedToMove;
- for (int j = 1; j < nextKmer.getLength(); j++) {
- kmer.getBytes()[kmer.getLength() - i - j] = (byte) (((nextKmer.getBytes()[nextKmer.getLength() - j] & 0xff) >> (8 - posNeedToMove)) | (nextKmer
- .getBytes()[nextKmer.getLength() - j - 1] << posNeedToMove));
- }
- if (nextKmer.getKmerLength() % 4 == 0 || (nextKmer.getKmerLength() % 4) * 2 + posNeedToMove > 8) {
- kmer.getBytes()[0] = (byte) ((0xff & nextKmer.getBytes()[0]) >> (8 - posNeedToMove));
- }
- }
- kmer.clearLeadBit();
- return kmer;
- }
-
- /**
- * Safely shifted the kmer forward without change the input kmer
- * e.g. AGCGC shift with T => GCGCT
- *
- * @param k
- * : kmer length
- * @param kmer
- * : input kmer
- * @param afterCode
- * : input genecode
- * @return new created kmer that shifted by afterCode, the K will not change
- */
- public VKmerBytesWritable shiftKmerWithNextCode(final KmerBytesWritable kmer, byte afterCode) {
- this.kmer.set(kmer);
- this.kmer.shiftKmerWithNextCode(afterCode);
- return this.kmer;
- }
-
- /**
- * Safely shifted the kmer backward without change the input kmer
- * e.g. AGCGC shift with T => TAGCG
- *
- * @param k
- * : kmer length
- * @param kmer
- * : input kmer
- * @param preCode
- * : input genecode
- * @return new created kmer that shifted by preCode, the K will not change
- */
- public VKmerBytesWritable shiftKmerWithPreCode(final KmerBytesWritable kmer, byte preCode) {
- this.kmer.set(kmer);
- this.kmer.shiftKmerWithPreCode(preCode);
- return this.kmer;
- }
-
- /**
- * get the reverse sequence of given kmer
- *
- * @param kmer
- */
- public VKmerBytesWritable reverse(final KmerBytesWritable kmer) {
- this.kmer.reset(kmer.getKmerLength());
-
- int curPosAtKmer = ((kmer.getKmerLength() - 1) % 4) << 1;
- int curByteAtKmer = 0;
-
- int curPosAtReverse = 0;
- int curByteAtReverse = this.kmer.getLength() - 1;
- this.kmer.getBytes()[curByteAtReverse] = 0;
- for (int i = 0; i < kmer.getKmerLength(); i++) {
- byte gene = (byte) ((kmer.getBytes()[curByteAtKmer] >> curPosAtKmer) & 0x03);
- this.kmer.getBytes()[curByteAtReverse] |= gene << curPosAtReverse;
- curPosAtReverse += 2;
- if (curPosAtReverse >= 8) {
- curPosAtReverse = 0;
- this.kmer.getBytes()[--curByteAtReverse] = 0;
- }
- curPosAtKmer -= 2;
- if (curPosAtKmer < 0) {
- curPosAtKmer = 6;
- curByteAtKmer++;
- }
- }
- this.kmer.clearLeadBit();
- return this.kmer;
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java
deleted file mode 100644
index 12307fe..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.pathmergingh1;
-
-import java.io.IOException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.TextOutputFormat;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
-import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
-import edu.uci.ics.genomix.hadoop.pmcommon.SNodeInitialMapper;
-import edu.uci.ics.genomix.hadoop.pmcommon.MergePathMultiSeqOutputFormat;
-import edu.uci.ics.genomix.hadoop.pmcommon.SNodeInitialMapper;
-import edu.uci.ics.genomix.hadoop.pmcommon.SNodeInitialReducer;
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings("deprecation")
-public class MergePathH1Driver {
-
- private static class Options {
- @Option(name = "-inputpath", usage = "the input path", required = true)
- public String inputPath;
-
- @Option(name = "-outputpath", usage = "the output path", required = true)
- public String outputPath;
-
- @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
- public String mergeResultPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- @Option(name = "-kmer-kmerByteSize", usage = "the kmerByteSize of kmer", required = true)
- public int sizeKmer;
-
- @Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
- public int mergeRound;
-
- }
-
- public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer,
- int mergeRound, String defaultConfPath) throws IOException {
-
- JobConf conf = new JobConf(MergePathH1Driver.class);
- conf.setInt("sizeKmer", sizeKmer);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
- conf.setJobName("Initial Path-Starting-Points Table");
- conf.setMapperClass(SNodeInitialMapper.class);
- conf.setReducerClass(SNodeInitialReducer.class);
-
- conf.setMapOutputKeyClass(KmerBytesWritable.class);
- conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
-
- String singlePointPath = "comSinglePath0";
-
- MultipleOutputs.addNamedOutput(conf, singlePointPath, MergePathMultiSeqOutputFormat.class,
- VKmerBytesWritable.class, MergePathValueWritable.class);
-
- conf.setOutputKeyClass(VKmerBytesWritable.class);
- conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(inputPath + "stepNext"));
- conf.setNumReduceTasks(numReducers);
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(inputPath + "stepNext"), true);
- JobClient.runJob(conf);
- dfs.rename(new Path(inputPath + "stepNext" + "/" + singlePointPath), new Path(mergeResultPath + "/"
- + singlePointPath));
- int iMerge = 0;
- /*----------------------------------------------------------------------*/
- for (iMerge = 1; iMerge <= mergeRound; iMerge++) {
-// if (!dfs.exists(new Path(inputPath + "-step1")))
-// break;
- conf = new JobConf(MergePathH1Driver.class);
- conf.setInt("sizeKmer", sizeKmer);
- conf.setInt("iMerge", iMerge);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
- conf.setJobName("Path Merge");
-
- conf.setMapperClass(MergePathH1Mapper.class);
- conf.setReducerClass(MergePathH1Reducer.class);
-
- conf.setMapOutputKeyClass(VKmerBytesWritable.class);
- conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
-
- String uncompSinglePath = "uncompSinglePath" + iMerge;
- String comSinglePath = "comSinglePath" + iMerge;
- String comCircle = "comCircle" + iMerge;
-
- MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiSeqOutputFormat.class,
- VKmerBytesWritable.class, MergePathValueWritable.class);
-
- MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiSeqOutputFormat.class,
- VKmerBytesWritable.class, MergePathValueWritable.class);
-
- MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiSeqOutputFormat.class,
- VKmerBytesWritable.class, MergePathValueWritable.class);
-
- conf.setOutputKeyClass(VKmerBytesWritable.class);
- conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext"));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- dfs.delete(new Path(inputPath + "stepNext"), true);
- dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext"));
- dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath));
- dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle));
- }
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- MergePathH1Driver driver = new MergePathH1Driver();
- driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers,
- options.sizeKmer, options.mergeRound, null);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Mapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Mapper.java
deleted file mode 100644
index ff64d12..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Mapper.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.pathmergingh1;
-
-import java.io.IOException;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings("deprecation")
-public class MergePathH1Mapper extends MapReduceBase implements
- Mapper<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
- private int KMER_SIZE;
- private VKmerBytesWritableFactory outputKmerFactory;
- private MergePathValueWritable outputValue;
- private VKmerBytesWritable tmpKmer;
- private VKmerBytesWritable outputKmer;
-
- public void configure(JobConf job) {
- KMER_SIZE = job.getInt("sizeKmer", 0);
- outputKmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
- outputValue = new MergePathValueWritable();
- tmpKmer = new VKmerBytesWritable(KMER_SIZE);
- outputKmer = new VKmerBytesWritable(KMER_SIZE);
- }
-
- @Override
- public void map(VKmerBytesWritable key, MergePathValueWritable value,
- OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- byte precursor = (byte) 0xF0;
- byte succeed = (byte) 0x0F;
- byte adjBitMap = value.getAdjBitMap();
- byte bitFlag = value.getFlag();
- precursor = (byte) (precursor & adjBitMap);
- precursor = (byte) ((precursor & 0xff) >> 4);
- succeed = (byte) (succeed & adjBitMap);
- byte bitStartEnd = (byte) (0x01 & bitFlag);
- if (bitStartEnd == 1) {
- /**
- * eg. the kmer: AGCGT(already merge 3 kmers sizeof 3), adjMap C|G
- * succeedCode -> G then tmpKmer store the succeding neighbor: GTG ->outputKmer
- * then we store the AGC in the tmpKmer -> outputValue
- */
- byte succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
- tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
- outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
- tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
- outputValue.set(adjBitMap, bitFlag, tmpKmer);
- output.collect(outputKmer, outputValue);
- } else {
- output.collect(key, value);
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Reducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Reducer.java
deleted file mode 100644
index f6c4f42..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Reducer.java
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.pathmergingh1;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-
-import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings("deprecation")
-public class MergePathH1Reducer extends MapReduceBase implements
- Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
- private VKmerBytesWritableFactory kmerFactory;
- private VKmerBytesWritable outputKmer;
- private int KMER_SIZE;
- private MergePathValueWritable outputValue;
- MultipleOutputs mos = null;
- private int I_MERGE;
-
- public void configure(JobConf job) {
- mos = new MultipleOutputs(job);
- I_MERGE = Integer.parseInt(job.get("iMerge"));
- KMER_SIZE = job.getInt("sizeKmer", 0);
- outputValue = new MergePathValueWritable();
- kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
- outputKmer = new VKmerBytesWritable(KMER_SIZE);
- }
-
- @SuppressWarnings("unchecked")
- @Override
- public void reduce(VKmerBytesWritable key, Iterator<MergePathValueWritable> values,
- OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- outputValue = values.next();
- if (values.hasNext() == true) {
- byte bitFlag = outputValue.getFlag();
- byte bitStartEnd = (byte) (0x01 & outputValue.getFlag());
- if (bitStartEnd == 0) {
- /**
- * eg. if 2 records go into same group, the first is start-point: (GTG, null, A|T, 0) the second is: (GTG, AGC, C|G, 1)
- * the results of combing: AGCGTG, null, C|T, 1
- */
- //first record is non-start point
-
- byte nextAdj = outputValue.getAdjBitMap();
- byte succeed = (byte) 0x0F;
- succeed = (byte) (succeed & nextAdj);
- //second record must be start point
- outputValue = values.next();
- byte adjBitMap = outputValue.getAdjBitMap();
- if (outputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
- else
- outputKmer.set(key);
- byte outputFlag = (byte) (0x81 & bitFlag);
- outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
- adjBitMap = (byte) (adjBitMap & 0xF0);
- adjBitMap = (byte) (adjBitMap | succeed);
- outputValue.set(adjBitMap, outputFlag, null);
- //judge whether the node after merging has contain the start-point and end-point
- bitFlag = outputValue.getFlag();
- bitStartEnd = (byte) (0x81 & bitFlag);
- if (bitStartEnd == (byte) 0x81) {
- mos.getCollector("comSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
- } else
- mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
- } else {
- /**
- * eg. if 2 records go into same group, the first is start-point:(GTG, AGC, C|G, 1) the second is: (GTG, null, A|T, 0)
- * the results of combing: AGCGTG, null, C|T, 1
- */
- //first record is start point
- byte adjBitMap = outputValue.getAdjBitMap();
- if (outputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
- else
- outputKmer.set(key);
- //second record is non start point
- outputValue = values.next();
- byte nextAdj = outputValue.getAdjBitMap();
- byte succeed = (byte) 0x0F;
- succeed = (byte) (succeed & nextAdj);
- //set outputFlag for first record
- byte outputFlag = (byte) (0x81 & bitFlag);
- outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
- adjBitMap = (byte) (adjBitMap & 0xF0);
- adjBitMap = (byte) (adjBitMap | succeed);
- outputValue.set(adjBitMap, outputFlag, null);
- //judge whether the node after merging has contain the start-point and end-point
- bitFlag = outputValue.getFlag();
- bitStartEnd = (byte) (0x81 & bitFlag);
- if (bitStartEnd == (byte) 0x81) {
- mos.getCollector("comSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
- } else
- mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
- }
- } else {
- mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(key, outputValue);
- }
- }
-
- public void close() throws IOException {
- // TODO Auto-generated method stub
- mos.close();
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java
deleted file mode 100644
index 8f8996b..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.pathmergingh2;
-
-import java.io.IOException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.TextOutputFormat;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
-import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-import edu.uci.ics.genomix.hadoop.pmcommon.MergePathMultiSeqOutputFormat;
-import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
-import edu.uci.ics.genomix.hadoop.pmcommon.SNodeInitialMapper;
-import edu.uci.ics.genomix.hadoop.pmcommon.SNodeInitialReducer;
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings("deprecation")
-public class MergePathH2Driver {
-
- private static class Options {
- @Option(name = "-inputpath", usage = "the input path", required = true)
- public String inputPath;
-
- @Option(name = "-outputpath", usage = "the output path", required = true)
- public String outputPath;
-
- @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
- public String mergeResultPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- @Option(name = "-kmer-kmerByteSize", usage = "the kmerByteSize of kmer", required = true)
- public int sizeKmer;
-
- @Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
- public int mergeRound;
-
- }
-
- public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer,
- int mergeRound, String defaultConfPath) throws IOException {
-
- JobConf conf = new JobConf(MergePathH2Driver.class);
- conf.setInt("sizeKmer", sizeKmer);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
- conf.setJobName("Initial Path-Starting-Points Table");
- conf.setMapperClass(SNodeInitialMapper.class);
- conf.setReducerClass(SNodeInitialReducer.class);
-
- conf.setMapOutputKeyClass(KmerBytesWritable.class);
- conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
-
- String singlePointPath = "comSinglePath0";
-
- MultipleOutputs.addNamedOutput(conf, singlePointPath, MergePathMultiSeqOutputFormat.class,
- VKmerBytesWritable.class, MergePathValueWritable.class);
-
- conf.setOutputKeyClass(VKmerBytesWritable.class);
- conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(inputPath + "stepNext"));
- conf.setNumReduceTasks(numReducers);
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(inputPath + "stepNext"), true);
- JobClient.runJob(conf);
- dfs.rename(new Path(inputPath + "stepNext" + "/" + singlePointPath), new Path(mergeResultPath + "/" + singlePointPath));
-
- int iMerge = 0;
- for (iMerge = 1; iMerge <= mergeRound; iMerge++) {
-// if (!dfs.exists(new Path(inputPath + "-step1")))
-// break;
- conf = new JobConf(MergePathH2Driver.class);
- conf.setInt("sizeKmer", sizeKmer);
- conf.setInt("iMerge", iMerge);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
- conf.setJobName("Path Merge");
-
- conf.setMapperClass(MergePathH2Mapper.class);
- conf.setReducerClass(MergePathH2Reducer.class);
-
- conf.setMapOutputKeyClass(VKmerBytesWritable.class);
- conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
-
- String uncompSinglePath = "uncompSinglePath" + iMerge;
- String comSinglePath = "comSinglePath" + iMerge;
- String comCircle = "comCircle" + iMerge;
-
- MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiSeqOutputFormat.class,
- VKmerBytesWritable.class, MergePathValueWritable.class);
-
- MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiSeqOutputFormat.class,
- VKmerBytesWritable.class, MergePathValueWritable.class);
-
- MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiSeqOutputFormat.class,
- VKmerBytesWritable.class, MergePathValueWritable.class);
-
- conf.setOutputKeyClass(VKmerBytesWritable.class);
- conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext"));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- dfs.delete(new Path(inputPath + "stepNext"), true);
- dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext"));
- dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath));
- dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle));
- }
-/* conf = new JobConf(MergePathH2Driver.class);
- conf.setInt("sizeKmer", sizeKmer);
- conf.setInt("iMerge", iMerge);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
- conf.setJobName("Path Merge");
-
- conf.setMapperClass(MergePathH2Mapper.class);
- conf.setReducerClass(MergePathH2Reducer.class);
-
- conf.setMapOutputKeyClass(VKmerBytesWritable.class);
- conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
-
- String uncompSinglePath = "uncompSinglePath" + iMerge;
- String comSinglePath = "comSinglePath" + iMerge;
- String comCircle = "comCircle" + iMerge;
-
- MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiTextOutputFormat.class,
- VKmerBytesWritable.class, MergePathValueWritable.class);
-
- MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiTextOutputFormat.class,
- VKmerBytesWritable.class, MergePathValueWritable.class);
-
- MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiTextOutputFormat.class,
- VKmerBytesWritable.class, MergePathValueWritable.class);
-
- conf.setOutputKeyClass(VKmerBytesWritable.class);
- conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext"));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- dfs.delete(new Path(inputPath + "stepNext"), true);
- dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext"));
- dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath));
- dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle));*/
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- MergePathH2Driver driver = new MergePathH2Driver();
- driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers,
- options.sizeKmer, options.mergeRound, null);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Mapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Mapper.java
deleted file mode 100644
index 941ffa3..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Mapper.java
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.pathmergingh2;
-
-import java.io.IOException;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings("deprecation")
-public class MergePathH2Mapper extends MapReduceBase implements
- Mapper<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
-
- private int KMER_SIZE;
- private VKmerBytesWritableFactory outputKmerFactory;
- private MergePathValueWritable outputValue;
- private VKmerBytesWritable tmpKmer;
- private VKmerBytesWritable outputKmer;
-
- public void configure(JobConf job) {
- KMER_SIZE = job.getInt("sizeKmer", 0);
- outputKmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
- outputValue = new MergePathValueWritable();
- tmpKmer = new VKmerBytesWritable(KMER_SIZE);
- outputKmer = new VKmerBytesWritable(KMER_SIZE);
- }
-
- @Override
- public void map(VKmerBytesWritable key, MergePathValueWritable value,
- OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- byte precursor = (byte) 0xF0;
- byte succeed = (byte) 0x0F;
- byte adjBitMap = value.getAdjBitMap();
- byte bitFlag = value.getFlag();
- precursor = (byte) (precursor & adjBitMap);
- precursor = (byte) ((precursor & 0xff) >> 4);
- succeed = (byte) (succeed & adjBitMap);
- byte bitStartEnd = (byte) (0x81 & bitFlag);
-
- switch (bitStartEnd) {
- case (byte) 0x01:
- //if this record is start-point, it will just maps its succeed nodes
- /**
- * eg. the kmer: AGCGT(already merge 3 kmers sizeof 3), adjMap C|G
- * succeedCode -> G then tmpKmer store the succeding neighbor: GTG ->outputKmer
- * then we store the AGC in the tmpKmer -> outputValue
- */
- byte succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
- tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
- outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
- tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
- //mark the flag of key --> reverse record
- bitFlag = (byte) (bitFlag | 0x08);
- outputValue.set(adjBitMap, bitFlag, tmpKmer);
- output.collect(outputKmer, outputValue);
- break;
-
- case (byte) 0x80:
- //if the record is end-point, it will just maps itself
- /**
- * eg. the kmer: AGCGT(already merge 3 kmers sizeof 3), adjMap C|G
- * tmpKmer store the first kmer: AGC ->outputKmer
- * then we store the GT in the tmpKmer -> outputValue
- */
- tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(KMER_SIZE, key));
- outputKmer.set(tmpKmer);
- tmpKmer.set(outputKmerFactory.getLastKmerFromChain(key.getKmerLength() - KMER_SIZE, key));
- //mark the flag of key --> itself record
- bitFlag = (byte) (bitFlag | 0x10);
- outputValue.set(adjBitMap, bitFlag, tmpKmer);
- output.collect(outputKmer, outputValue);
- break;
- //if the record is non-start/end point, it will maps its succeed nodes and itself
- case (byte) 0x00:
- succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
- //it maps the succeed nodes
- tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
- outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
- tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
- bitFlag = (byte) (bitFlag | 0x08);
- outputValue.set(adjBitMap, bitFlag, tmpKmer);
- output.collect(outputKmer, outputValue);
- //it maps itself
- bitFlag = (byte) (bitFlag & 0xF7);
- tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(KMER_SIZE, key));
- outputKmer.set(tmpKmer);
- tmpKmer.set(outputKmerFactory.getLastKmerFromChain(key.getKmerLength() - KMER_SIZE, key));
- bitFlag = (byte) (bitFlag | 0x10);
- outputValue.set(adjBitMap, bitFlag, tmpKmer);
- output.collect(outputKmer, outputValue);
- break;
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Reducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Reducer.java
deleted file mode 100644
index ad67c4a..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Reducer.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.pathmergingh2;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-
-import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings("deprecation")
-public class MergePathH2Reducer extends MapReduceBase implements
- Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
-
- private VKmerBytesWritableFactory kmerFactory;
- private VKmerBytesWritable outputKmer;
- private VKmerBytesWritable tmpKmer1;
- private VKmerBytesWritable tmpKmer2;
- private int KMER_SIZE;
- private MergePathValueWritable outputValue;
- private MergePathValueWritable tmpOutputValue;
- MultipleOutputs mos = null;
- private int I_MERGE;
-
- public void configure(JobConf job) {
- mos = new MultipleOutputs(job);
- I_MERGE = Integer.parseInt(job.get("iMerge"));
- KMER_SIZE = job.getInt("sizeKmer", 0);
- outputValue = new MergePathValueWritable();
- tmpOutputValue = new MergePathValueWritable();
- kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
- outputKmer = new VKmerBytesWritable(KMER_SIZE);
- tmpKmer1 = new VKmerBytesWritable(KMER_SIZE);
- tmpKmer2 = new VKmerBytesWritable(KMER_SIZE);
- }
-
- @SuppressWarnings("unchecked")
- public void reduce(VKmerBytesWritable key, Iterator<MergePathValueWritable> values,
- OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- outputValue = values.next();
- outputKmer.set(key);
- if (values.hasNext() == true) {
- byte bitFlag = outputValue.getFlag();
- //decide whether this record is start or end
- byte bitStartEnd = (byte) (0x81 & bitFlag);
- //decide whether this record is reverse
- byte bitPosiNegative = (byte) (0x18 & bitFlag);
- byte succeed = (byte) 0x0F;
- switch (bitPosiNegative) {
- case (byte) 0x08:
- //the first record is reverse record
- /**
- * eg. if 2 records go into same group, the first is reverse: (GTG, AGC, C|G, 0x08) the second is itself: (GTG, null, A|T, 0x10)
- * the results of combing: AGCGTG, null, C|T, 0x01
- */
- if (outputValue.getKmerLength() != 0)
- tmpKmer1.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
- else
- tmpKmer1.set(key);
- byte adjBitMap = outputValue.getAdjBitMap();
- //get the next value record
- outputValue = values.next();
- bitStartEnd = (byte) (0x81 & outputValue.getFlag());
- //if this record contain end-point
- if (bitStartEnd == (byte) 0x80) {
- if (outputValue.getKmerLength() != 0)
- tmpKmer2.set(kmerFactory.mergeTwoKmer(key, outputValue.getKmer()));
- else
- tmpKmer2.set(key);
- byte tmpFlag = (byte) 0x80;
- tmpOutputValue.set(outputValue.getAdjBitMap(), tmpFlag, null);
- mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(tmpKmer2, tmpOutputValue);
- }
- if (outputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(tmpKmer1, outputValue.getKmer()));
- else
- outputKmer.set(tmpKmer1);
- succeed = (byte) (succeed & outputValue.getAdjBitMap());
- adjBitMap = (byte) (adjBitMap & 0xF0);
- adjBitMap = (byte) (adjBitMap | succeed);
- byte outputFlag = (byte) (0x81 & bitFlag);
- outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
- outputValue.set(adjBitMap, outputFlag, null);
- // decide whether the merged record is complete, if so, then it output to the complete file
- bitFlag = outputValue.getFlag();
- bitStartEnd = (byte) (0x81 & bitFlag);
- if (bitStartEnd == (byte) 0x81) {
- mos.getCollector("comSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
- } else
- mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
- break;
- case (byte) 0x10:
- //the first record value is 'itself' format
- /**
- * eg. if 2 records go into same group, the first is itself: (GTG, null, A|T, 0x10) the second is reverse: (GTG, AGC, C|G, 0x08)
- * the results of combing: AGCGTG, null, C|T, 0x01
- */
- if (outputValue.getKmerLength() != 0)
- tmpKmer1.set(kmerFactory.mergeTwoKmer(key, outputValue.getKmer()));
- else
- tmpKmer1.set(key);
- //if this record contain end-point
- if (bitStartEnd == (byte) 0x80) {
- byte tmpFlag = (byte) 0x80;
- tmpOutputValue.set(outputValue.getAdjBitMap(), tmpFlag, null);
- mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(tmpKmer1, tmpOutputValue);
- }
- succeed = (byte) (succeed & outputValue.getAdjBitMap());
- outputValue = values.next();
- if (outputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer1));
- else
- outputKmer.set(tmpKmer1);
- adjBitMap = outputValue.getAdjBitMap();
- adjBitMap = (byte) (adjBitMap & 0xF0);
- adjBitMap = (byte) (adjBitMap | succeed);
- outputFlag = (byte) (0x81 & bitFlag);
- outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
- outputValue.set(adjBitMap, outputFlag, null);
- // decide whether the merged record is complete, if so, then it output to the complete file
- bitFlag = outputValue.getFlag();
- bitStartEnd = (byte) (0x81 & bitFlag);
- if (bitStartEnd == (byte) 0x81) {
- mos.getCollector("comSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
- } else
- mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
- break;
- }
- }
- }
-
- public void close() throws IOException {
- // TODO Auto-generated method stub
- mos.close();
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/ConvertGraphFromNodeWithFlagToNodeWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/ConvertGraphFromNodeWithFlagToNodeWritable.java
deleted file mode 100644
index bc44fd6..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/ConvertGraphFromNodeWithFlagToNodeWritable.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.pmcommon;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import edu.uci.ics.genomix.hadoop.pmcommon.NodeWithFlagWritable.MessageFlag;
-import edu.uci.ics.genomix.oldtype.NodeWritable;
-import edu.uci.ics.genomix.oldtype.PositionWritable;
-
-/*
- * Convert the graph from (PositionWritable, NodeWritableWithFlag) to (NodeWritable, NullWritable)
- */
-@SuppressWarnings("deprecation")
-public class ConvertGraphFromNodeWithFlagToNodeWritable extends Configured implements Tool {
-
- public static class ConvertGraphMapper extends MapReduceBase implements
- Mapper<PositionWritable, NodeWithFlagWritable, NodeWritable, NullWritable> {
-
- /*
- * Convert the graph
- */
- @Override
- public void map(PositionWritable key, NodeWithFlagWritable value,
- OutputCollector<NodeWritable, NullWritable> output, Reporter reporter) throws IOException {
- output.collect(value.getNode(), NullWritable.get());
- }
- }
-
- /*
- * Convert the graph
- */
- public RunningJob run(String inputPath, String outputPath, JobConf baseConf) throws IOException {
- JobConf conf = new JobConf(baseConf);
- conf.setJarByClass(ConvertGraphFromNodeWithFlagToNodeWritable.class);
- conf.setJobName("Convert graph to NodeWritable " + inputPath);
- conf.setMapOutputKeyClass(NodeWritable.class);
- conf.setMapOutputValueClass(NullWritable.class);
- conf.setOutputKeyClass(NodeWritable.class);
- conf.setOutputValueClass(NullWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
- FileInputFormat.addInputPaths(conf, inputPath);
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(outputPath), true); // clean output dir
-
- conf.setMapperClass(ConvertGraphMapper.class);
-// conf.setReducerClass(PathNodeInitialReducer.class);
- conf.setNumReduceTasks(0);
- RunningJob job = JobClient.runJob(conf);
-
- return job;
- }
-
- @Override
- public int run(String[] args) throws Exception {
- int res = ToolRunner.run(new Configuration(), new ConvertGraphFromNodeWithFlagToNodeWritable(), args);
- return res;
- }
-
- public static void main(String[] args) throws Exception {
- int res = new ConvertGraphFromNodeWithFlagToNodeWritable().run(args);
- System.exit(res);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathMultiSeqOutputFormat.java
deleted file mode 100644
index f098317..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathMultiSeqOutputFormat.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.pmcommon;
-
-import java.io.File;
-import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<VKmerBytesWritable, MergePathValueWritable>{
- @Override
- protected String generateLeafFileName(String name) {
- // TODO Auto-generated method stub System.out.println(name);
- String[] names = name.split("-");
- return names[0] + File.separator + name;
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathMultiTextOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathMultiTextOutputFormat.java
deleted file mode 100644
index 885d512..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathMultiTextOutputFormat.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.pmcommon;
-
-import java.io.File;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
-
-public class MergePathMultiTextOutputFormat extends MultipleTextOutputFormat<Text, Text>{
- @Override
- protected String generateLeafFileName(String name) {
- // TODO Auto-generated method stub System.out.println(name);
- String[] names = name.split("-");
- return names[0] + File.separator + name;
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathValueWritable.java
deleted file mode 100644
index 5610e23..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/MergePathValueWritable.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.pmcommon;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.BinaryComparable;
-import org.apache.hadoop.io.WritableComparable;
-
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-public class MergePathValueWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
-
- private static final byte[] EMPTY_BYTES = {};
- private byte adjBitMap;
- private byte flag;
- private VKmerBytesWritable kmer;
-
- public MergePathValueWritable() {
- this((byte) 0, (byte) 0, 0, EMPTY_BYTES);
- }
-
- public MergePathValueWritable(int k) {
- this.adjBitMap = 0;
- this.flag = 0;
- this.kmer = new VKmerBytesWritable(k);
- }
-
- public MergePathValueWritable(byte adjBitMap, byte flag, int kmerSize, byte[] bytes) {
- this.adjBitMap = adjBitMap;
- this.flag = flag;
- this.kmer = new VKmerBytesWritable(kmerSize, bytes);
- kmer.set(bytes, 0, bytes.length);
- }
-
- public void set(MergePathValueWritable right) {
- set(right.getAdjBitMap(), right.getFlag(), right.getKmer());
- }
-
- public void set(byte adjBitMap, byte flag, VKmerBytesWritable kmer) {
- this.kmer.set(kmer);
- this.adjBitMap = adjBitMap;
- this.flag = flag;
- }
-
- @Override
- public void readFields(DataInput arg0) throws IOException {
- // TODO Auto-generated method stub
- kmer.readFields(arg0);
- adjBitMap = arg0.readByte();
- flag = arg0.readByte();
- }
-
- @Override
- public void write(DataOutput arg0) throws IOException {
- // TODO Auto-generated method stub
-
- kmer.write(arg0);
- arg0.writeByte(adjBitMap);
- arg0.writeByte(flag);
- }
-
- public VKmerBytesWritable getKmer() {
- if (kmer.getLength() != 0) {
- return kmer;
- }
- return null;
- }
-
- public byte getAdjBitMap() {
- return this.adjBitMap;
- }
-
- public byte getFlag() {
- return this.flag;
- }
-
- public String toString() {
- return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag);
- }
-
- public String pureToString() {
- return GeneCode.getSymbolFromBitMap(adjBitMap);
- }
- @Override
- public byte[] getBytes() {
- // TODO Auto-generated method stub
- if (kmer.getLength() != 0) {
- return kmer.getBytes();
- } else
- return null;
-
- }
-
- public int getKmerLength() {
- return kmer.getKmerLength();
- }
-
- @Override
- public int getLength() {
- return kmer.getLength();
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/NodeWithFlagWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/NodeWithFlagWritable.java
deleted file mode 100644
index 1336bf1..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/NodeWithFlagWritable.java
+++ /dev/null
@@ -1,285 +0,0 @@
-package edu.uci.ics.genomix.hadoop.pmcommon;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Iterator;
-
-import javax.management.RuntimeErrorException;
-
-import org.apache.hadoop.io.BinaryComparable;
-import org.apache.hadoop.io.WritableComparable;
-
-import edu.uci.ics.genomix.oldtype.NodeWritable;
-import edu.uci.ics.genomix.oldtype.PositionListWritable;
-import edu.uci.ics.genomix.oldtype.PositionWritable;
-import edu.uci.ics.genomix.oldtype.NodeWritable.DirectionFlag;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-
-/*
- * Simple "Message" class, allowing a NodeWritable to be sent, along with a message flag.
- * This class is used as the value in several MapReduce algorithms.
- */
-public class NodeWithFlagWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
- private byte flag;
- private NodeWritable node;
-
- public static class MessageFlag extends DirectionFlag {
- public static final byte EMPTY_MESSAGE = 0;
- // message types
- public static final byte MSG_SELF = 0b01 << 2;
- public static final byte MSG_UPDATE_MERGE = 0b10 << 2;
- public static final byte MSG_UPDATE_EDGE = 0b11 << 2;
- public static final byte MSG_MASK = 0b11 << 2;
- // additional info
- public static final byte IS_HEAD = 0b1 << 4;
- public static final byte IS_TAIL = 0b1 << 5;
- // extra bit used differently in each operation
- public static final byte EXTRA_FLAG = 1 << 6;
- }
-
- public void setAsUpdateMessage(byte mergeDir, byte neighborDir, PositionWritable nodeToDelete,
- PositionWritable nodeToAdd) {
- byte neighborToMeDir = mirrorDirection(neighborDir);
- byte neighborToMergeDir = flipDirection(neighborToMeDir, mergeDir);
-
- // clear previous kmer and edge data
- node.reset(0);
-
- // indicate the node to delete
- setFlag((byte) (MessageFlag.MSG_UPDATE_EDGE | neighborToMeDir));
- node.getNodeID().set(nodeToDelete);
-
- // add the new node to the appropriate list
- node.getListFromDir(neighborToMergeDir).append(nodeToAdd);
- }
-
- /*
- * Returns the edge dir for B->A when the A->B edge is type @dir
- */
- public static byte mirrorDirection(byte dir) {
- switch (dir) {
- case MessageFlag.DIR_FF:
- return MessageFlag.DIR_RR;
- case MessageFlag.DIR_FR:
- return MessageFlag.DIR_FR;
- case MessageFlag.DIR_RF:
- return MessageFlag.DIR_RF;
- case MessageFlag.DIR_RR:
- return MessageFlag.DIR_FF;
- default:
- throw new RuntimeException("Unrecognized direction in flipDirection: " + dir);
- }
- }
-
- /*
- * When A->B edge type is @neighborDir and B will merge towards C along a
- *
- * @mergeDir edge, returns the new edge type for A->C
- */
- public static byte flipDirection(byte neighborDir, byte mergeDir) {
- switch (mergeDir) {
-
- case MessageFlag.DIR_FF:
- case MessageFlag.DIR_RR:
- // no change since the merging node didn't flip
- return neighborDir;
-
- case MessageFlag.DIR_FR:
- case MessageFlag.DIR_RF:
- // merging node is flipping; my edge type must also flip
- switch (neighborDir) {
- case MessageFlag.DIR_FF:
- return MessageFlag.DIR_FR;
- case MessageFlag.DIR_FR:
- return MessageFlag.DIR_FF;
- case MessageFlag.DIR_RF:
- return MessageFlag.DIR_RR;
- case MessageFlag.DIR_RR:
- return MessageFlag.DIR_RF;
- default:
- throw new RuntimeException("Unrecognized direction for neighborDir: " + neighborDir);
- }
-
- default:
- throw new RuntimeException("Unrecognized direction for mergeDir: " + mergeDir);
- }
- }
-
- /*
- * Process any changes to @self contained in @updateMsg. This includes
- * merges and edge updates.
- */
- public void processUpdates(NodeWithFlagWritable updateMsg, int kmerSize) throws IOException {
- byte updateFlag = updateMsg.getFlag();
- NodeWritable updateNode = updateMsg.getNode();
-
- if ((updateFlag & MessageFlag.MSG_UPDATE_EDGE) == MessageFlag.MSG_UPDATE_EDGE) {
- // this message wants to update the edges of node.
- // remove position and merge its position lists with node
- if (!updateNode.equals(NodeWritable.EMPTY_NODE)) {
- // need to remove updateNode from the specified PositionList
- node.getListFromDir(updateFlag).remove(updateNode.getNodeID());
- }
- // now merge positionlists from update and node
- node.getFFList().appendList(updateNode.getFFList());
- node.getFRList().appendList(updateNode.getFRList());
- node.getRFList().appendList(updateNode.getRFList());
- node.getRRList().appendList(updateNode.getRRList());
- } else if ((updateFlag & MessageFlag.MSG_UPDATE_MERGE) == MessageFlag.MSG_UPDATE_MERGE) {
- // this message wants to merge node with updateNode.
- // the direction flag indicates node's relationship with updateNode
-// node.getListFromDir(updateFlag).remove(updateNode.getNodeID()); // remove the node from my edges
- node.getKmer().mergeWithKmerInDir(updateFlag, kmerSize, updateNode.getKmer()); // merge with its kmer
-
- // pass along H/T information from the merging node. flipping H ->T, T -> H
- switch (updateFlag & MessageFlag.DIR_MASK) {
- case MessageFlag.DIR_FF:
- case MessageFlag.DIR_RR:
- flag |= (byte) (updateFlag & MessageFlag.IS_HEAD);
- flag |= (byte) (updateFlag & MessageFlag.IS_TAIL);
- break;
- case MessageFlag.DIR_FR:
- case MessageFlag.DIR_RF:
- if ((updateFlag & MessageFlag.IS_HEAD) > 0)
- flag |= (byte) (updateFlag & MessageFlag.IS_TAIL);
- if ((updateFlag & MessageFlag.IS_TAIL) > 0)
- flag |= (byte) (updateFlag & MessageFlag.IS_HEAD);
- break;
- default:
- throw new IOException("Unrecognized direction in updateFlag: " + updateFlag);
- }
-
- // merge my edges with the incoming node's edges, accounting for if the node flipped in
- // the merge and if it's my predecessor or successor
- switch (updateFlag & MessageFlag.DIR_MASK) {
- case MessageFlag.DIR_FF:
- // node merging with me is FF to me
- node.getFFList().set(updateNode.getFFList());
- node.getFRList().set(updateNode.getFRList());
- // update isn't allowed to have any other successors (mirror & flip)
- if (updateNode.getRFList().getCountOfPosition() > 0)
- throw new IOException("Invalid merge detected! Node: " + node + " merging towards "
- + updateNode + "along" + (updateFlag & MessageFlag.DIR_MASK));
- break;
- case MessageFlag.DIR_FR:
- // flip edges
- node.getFFList().set(updateNode.getRFList());
- node.getFRList().set(updateNode.getRRList());
- if (updateNode.getFFList().getCountOfPosition() > 0)
- throw new IOException("Invalid merge detected! Node: " + node + " merging towards "
- + updateNode + "along" + (updateFlag & MessageFlag.DIR_MASK));
- break;
- case MessageFlag.DIR_RF:
- // flip edges
- node.getRFList().set(updateNode.getFFList());
- node.getRRList().set(updateNode.getFRList());
- if (updateNode.getRRList().getCountOfPosition() > 0)
- throw new IOException("Invalid merge detected! Node: " + node + " merging towards "
- + updateNode + "along" + (updateFlag & MessageFlag.DIR_MASK));
- break;
- case MessageFlag.DIR_RR:
- node.getRFList().set(updateNode.getRFList());
- node.getRRList().set(updateNode.getRRList());
- if (updateNode.getFRList().getCountOfPosition() > 0)
- throw new IOException("Invalid merge detected! Node: " + node + " merging towards "
- + updateNode + "along" + (updateFlag & MessageFlag.DIR_MASK));
- break;
- default:
- throw new IOException("Unrecognized direction in updateFlag: " + updateFlag);
- }
- }
- }
-
- public NodeWithFlagWritable() {
- this(0);
- }
-
- public NodeWithFlagWritable(int k) {
- this.flag = 0;
- this.node = new NodeWritable(k);
- }
-
- public NodeWithFlagWritable(byte flag, int kmerSize) {
- this.flag = flag;
- this.node = new NodeWritable(kmerSize);
- }
-
- public NodeWithFlagWritable(byte flag, NodeWritable node) {
- this(node.getKmer().getKmerLength());
- set(flag, node);
- }
-
- public NodeWithFlagWritable(NodeWithFlagWritable other) {
- this(other.flag, other.node);
- }
-
- public void set(NodeWithFlagWritable right) {
- set(right.getFlag(), right.getNode());
- }
-
- public void set(byte flag, NodeWritable node) {
- this.node.set(node);
- this.flag = flag;
- }
-
- @Override
- public void readFields(DataInput arg0) throws IOException {
- node.readFields(arg0);
- flag = arg0.readByte();
- }
-
- @Override
- public void write(DataOutput arg0) throws IOException {
- node.write(arg0);
- arg0.writeByte(flag);
- }
-
- public NodeWritable getNode() {
- return node;
- }
-
- public byte getFlag() {
- return this.flag;
- }
-
- public void setFlag(byte flag) {
- this.flag = flag;
- }
-
- public String toString() {
- return node.toString() + '\t' + String.valueOf(flag);
- }
-
- @Override
- public byte[] getBytes() {
- if (node.getCount() != 0) {
- return node.getKmer().getBytes();
- } else
- return null;
- }
-
- @Override
- public int getLength() {
- return node.getCount();
- }
-
- @Override
- public int hashCode() {
- // return super.hashCode() + flag + node.hashCode();
- return flag + node.hashCode();
- }
-
- @Override
- public boolean equals(Object rightObj) {
- if (rightObj instanceof NodeWithFlagWritable) {
- NodeWithFlagWritable rightMessage = (NodeWithFlagWritable) rightObj;
- return (this.flag == rightMessage.flag && this.node.equals(rightMessage.node));
- }
- return false;
- }
-
- public void setNode(NodeWritable otherNode) {
- node.set(otherNode);
- }
-}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/PathNodeInitial.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/PathNodeInitial.java
deleted file mode 100644
index 77a43bf..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/PathNodeInitial.java
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.pmcommon;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.RunningJob;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-import org.apache.hadoop.util.Tool;
-import org.apache.hadoop.util.ToolRunner;
-
-import edu.uci.ics.genomix.hadoop.pmcommon.NodeWithFlagWritable.MessageFlag;
-import edu.uci.ics.genomix.oldtype.NodeWritable;
-import edu.uci.ics.genomix.oldtype.PositionWritable;
-
-/*
- * A map-reduce job to find all nodes that are part of a simple path and the mark the nodes that
- * form their heads and tails, also identifies parts of the graph that will participate in a path merge.
- *
- * This MR job uses MultipleOutputs rather than remapping the entire graph each iteration:
- * 1. simple path nodes (indegree = outdegree = 1) (TO_MERGE_OUTPUT collector)
- * 2. non-path, "complete" nodes, which will not be affected by the path merging (COMPLETE_OUTPUT collector)
- * 3. non-path, "possibly updated" nodes, whose edges need to be updated after the merge (TO_UPDATE_OUTPUT collector)
- */
-@SuppressWarnings("deprecation")
-public class PathNodeInitial extends Configured implements Tool {
-
- public static final String COMPLETE_OUTPUT = "complete";
- public static final String TO_UPDATE_OUTPUT = "toUpdate";
-
- private static byte NEAR_PATH = MessageFlag.EXTRA_FLAG; // special-case extra flag for us
-
- public static void sendOutputToNextNeighbors(NodeWritable node, NodeWithFlagWritable outputValue,
- OutputCollector<PositionWritable, NodeWithFlagWritable> collector) throws IOException {
- Iterator<PositionWritable> posIterator = node.getFFList().iterator(); // FFList
- while (posIterator.hasNext()) {
- collector.collect(posIterator.next(), outputValue);
- }
- posIterator = node.getFRList().iterator(); // FRList
- while (posIterator.hasNext()) {
- collector.collect(posIterator.next(), outputValue);
- }
- }
-
- public static void sendOutputToPreviousNeighbors(NodeWritable node, NodeWithFlagWritable outputValue,
- OutputCollector<PositionWritable, NodeWithFlagWritable> collector) throws IOException {
- Iterator<PositionWritable> posIterator = node.getRRList().iterator(); // RRList
- while (posIterator.hasNext()) {
- collector.collect(posIterator.next(), outputValue);
- }
- posIterator = node.getRFList().iterator(); // RFList
- while (posIterator.hasNext()) {
- collector.collect(posIterator.next(), outputValue);
- }
- }
-
- public static class PathNodeInitialMapper extends MapReduceBase implements
- Mapper<NodeWritable, NullWritable, PositionWritable, NodeWithFlagWritable> {
-
- private int KMER_SIZE;
- private NodeWithFlagWritable outputValue;
- private int inDegree;
- private int outDegree;
- private boolean pathNode;
-
- public void configure(JobConf conf) {
- KMER_SIZE = conf.getInt("sizeKmer", 0);
- outputValue = new NodeWithFlagWritable(KMER_SIZE);
- }
-
- /*
- * Identify the heads and tails of simple path nodes and their neighbors
- *
- * (non-Javadoc)
- * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
- */
- @Override
- public void map(NodeWritable key, NullWritable value,
- OutputCollector<PositionWritable, NodeWithFlagWritable> output, Reporter reporter) throws IOException {
- inDegree = key.inDegree();
- outDegree = key.outDegree();
- if (inDegree == 1 && outDegree == 1) {
- pathNode = true;
- } else if (inDegree == 0 && outDegree == 1) {
- pathNode = true;
- // start of a tip. needs to merge & be marked as head
- outputValue.set(MessageFlag.IS_HEAD, NodeWritable.EMPTY_NODE);
- output.collect(key.getNodeID(), outputValue);
- } else if (inDegree == 1 && outDegree == 0) {
- pathNode = true;
- // end of a tip. needs to merge & be marked as tail
- outputValue.set(MessageFlag.IS_TAIL, NodeWritable.EMPTY_NODE);
- output.collect(key.getNodeID(), outputValue);
- } else {
- pathNode = false;
- if (outDegree > 0) {
- // Not a path myself, but my successor might be one. Map forward successor to find heads
- outputValue.set(MessageFlag.IS_HEAD, NodeWritable.EMPTY_NODE);
- sendOutputToNextNeighbors(key, outputValue, output);
- }
- if (inDegree > 0) {
- // Not a path myself, but my predecessor might be one. map predecessor to find tails
- outputValue.set(MessageFlag.IS_TAIL, NodeWritable.EMPTY_NODE);
- sendOutputToPreviousNeighbors(key, outputValue, output);
- }
- // this non-path node won't participate in the merge. Mark as "complete" (H + T)
- outputValue.set((byte) (MessageFlag.MSG_SELF | MessageFlag.IS_HEAD | MessageFlag.IS_TAIL), key);
- output.collect(key.getNodeID(), outputValue);
- }
-
- if (pathNode) {
- // simple path nodes map themselves
- outputValue.set(MessageFlag.MSG_SELF, key);
- output.collect(key.getNodeID(), outputValue);
- reporter.incrCounter("genomix", "path_nodes", 1);
-
- // also mark neighbors of paths (they are candidates for updates)
- outputValue.set(NEAR_PATH, NodeWritable.EMPTY_NODE);
- sendOutputToNextNeighbors(key, outputValue, output);
- sendOutputToPreviousNeighbors(key, outputValue, output);
- }
- }
- }
-
- public static class PathNodeInitialReducer extends MapReduceBase implements
- Reducer<PositionWritable, NodeWithFlagWritable, PositionWritable, NodeWithFlagWritable> {
- private MultipleOutputs mos;
- private OutputCollector<PositionWritable, NodeWithFlagWritable> toUpdateCollector;
- private OutputCollector<NodeWritable, NullWritable> completeCollector;
- private int KMER_SIZE;
-
- private NodeWithFlagWritable inputValue;
- private NodeWithFlagWritable outputValue;
- private NodeWritable nodeToKeep;
- private byte outputFlag;
- private byte inputFlag;
- private boolean sawSelf;
-
- @Override
- public void configure(JobConf conf) {
- mos = new MultipleOutputs(conf);
- KMER_SIZE = conf.getInt("sizeKmer", 0);
- inputValue = new NodeWithFlagWritable(KMER_SIZE);
- outputValue = new NodeWithFlagWritable(KMER_SIZE);
- nodeToKeep = new NodeWritable(KMER_SIZE);
- }
-
- @Override
- public void close() throws IOException {
- mos.close();
- }
-
- /*
- * Segregate nodes into three bins:
- * 1. mergeable nodes (maybe marked H or T)
- * 2. non-mergeable nodes that are candidates for updates
- * 3. non-mergeable nodes that are not path neighbors and won't be updated
- *
- * (non-Javadoc)
- * @see org.apache.hadoop.mapred.Reducer#reduce(java.lang.Object, java.util.Iterator, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
- */
- @SuppressWarnings("unchecked")
- @Override
- public void reduce(PositionWritable key, Iterator<NodeWithFlagWritable> values,
- OutputCollector<PositionWritable, NodeWithFlagWritable> toMergeCollector, Reporter reporter)
- throws IOException {
- toUpdateCollector = mos.getCollector(TO_UPDATE_OUTPUT, reporter);
- completeCollector = mos.getCollector(COMPLETE_OUTPUT, reporter);
-
- outputFlag = MessageFlag.EMPTY_MESSAGE;
- sawSelf = false;
- while (values.hasNext()) {
- NodeWithFlagWritable next = values.next();
- System.out.println(next);
- inputValue.set(next);
- inputFlag = inputValue.getFlag();
- outputFlag |= inputFlag;
-
- if ((inputFlag & MessageFlag.MSG_SELF) > 0) {
- // SELF -> keep this node
- if (sawSelf) {
- throw new IOException("Already saw SELF node in PathNodeInitialReducer! previous self: "
- + nodeToKeep.toString() + ". current self: " + inputValue.getNode().toString());
- }
- sawSelf = true;
- nodeToKeep.set(inputValue.getNode());
- }
- }
- if (!sawSelf) {
- throw new IOException("Didn't see a self node in PathNodeInitial! flag: " + outputFlag);
- }
-
- if (!nodeToKeep.isSimpleOrTerminalPath()) {
- // non-path nodes need updates if adjacent to path nodes
- if ((outputFlag & NEAR_PATH) > 0) {
- outputValue.set(MessageFlag.EMPTY_MESSAGE, nodeToKeep);
- toUpdateCollector.collect(key, outputValue);
- } else {
- // not adjacent... store in "complete" output
- completeCollector.collect(nodeToKeep, NullWritable.get());
- }
- } else {
- if ((outputFlag & MessageFlag.IS_HEAD) > 0 && (outputFlag & MessageFlag.IS_TAIL) > 0) {
- // path node, but cannot merge in either direction => complete
- completeCollector.collect(nodeToKeep, NullWritable.get());
- } else {
- // path nodes that are mergeable
- outputFlag &= (MessageFlag.IS_HEAD | MessageFlag.IS_TAIL); // clear flags except H/T
- outputValue.set(outputFlag, nodeToKeep);
- toMergeCollector.collect(key, outputValue);
-
- reporter.incrCounter("genomix", "path_nodes", 1);
- if ((outputFlag & MessageFlag.IS_HEAD) > 0) {
- reporter.incrCounter("genomix", "path_nodes_heads", 1);
- }
- if ((outputFlag & MessageFlag.IS_TAIL) > 0) {
- reporter.incrCounter("genomix", "path_nodes_tails", 1);
- }
- }
- }
- }
- }
-
- /*
- * Mark the head, tail, and simple path nodes in one map-reduce job.
- */
- public RunningJob run(String inputPath, String toMergeOutput, String toUpdateOutput, String completeOutput,
- JobConf baseConf) throws IOException {
- JobConf conf = new JobConf(baseConf);
- conf.setJarByClass(PathNodeInitial.class);
- conf.setJobName("PathNodeInitial " + inputPath);
- conf.setMapOutputKeyClass(PositionWritable.class);
- conf.setMapOutputValueClass(NodeWithFlagWritable.class);
- conf.setOutputKeyClass(PositionWritable.class);
- conf.setOutputValueClass(NodeWithFlagWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
- FileInputFormat.setInputPaths(conf, inputPath);
-// FileInputFormat.getInputPaths(conf);
- FileOutputFormat.setOutputPath(conf, new Path(toMergeOutput));
- MultipleOutputs.addNamedOutput(conf, TO_UPDATE_OUTPUT, MergePathMultiSeqOutputFormat.class,
- PositionWritable.class, NodeWithFlagWritable.class);
- MultipleOutputs.addNamedOutput(conf, COMPLETE_OUTPUT, MergePathMultiSeqOutputFormat.class, NodeWritable.class,
- NullWritable.class);
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(toMergeOutput), true); // clean output dir
-
- conf.setMapperClass(PathNodeInitialMapper.class);
- conf.setReducerClass(PathNodeInitialReducer.class);
- RunningJob job = JobClient.runJob(conf);
-
- // move the tmp outputs to the arg-spec'ed dirs
- if (!dfs.rename(new Path(toMergeOutput + File.separator + TO_UPDATE_OUTPUT), new Path(toUpdateOutput))) {
- dfs.mkdirs(new Path(toUpdateOutput));
- }
- if (!dfs.rename(new Path(toMergeOutput + File.separator + COMPLETE_OUTPUT), new Path(completeOutput))) {
- dfs.mkdirs(new Path(completeOutput));
- }
-
- return job;
- }
-
- @Override
- public int run(String[] args) throws Exception {
- int res = ToolRunner.run(new Configuration(), new PathNodeInitial(), args);
- return res;
- }
-
- public static void main(String[] args) throws Exception {
- int res = new PathNodeInitial().run(args);
- System.exit(res);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/SNodeInitialMapper.java
deleted file mode 100644
index 6aa0cf2..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/SNodeInitialMapper.java
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.pmcommon;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings("deprecation")
-public class SNodeInitialMapper extends MapReduceBase implements
- Mapper<KmerBytesWritable, ByteWritable, KmerBytesWritable, MergePathValueWritable> {
-
- public int KMER_SIZE;
- public KmerBytesWritable outputKmer;
- public MergePathValueWritable outputAdjList;
-
- public void configure(JobConf job) {
- KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
- outputKmer = new KmerBytesWritable(KMER_SIZE);
- outputAdjList = new MergePathValueWritable();
- }
-
- /**
- * @param adjacent the high 4 bits are useless, we just use the lower 4 bits
- * @return if the degree == 1 then return false, else return true
- */
- boolean measureDegree(byte adjacent) {
- boolean result = true;
- switch (adjacent) {
- case 0:
- result = true;
- break;
- case 1:
- result = false;
- break;
- case 2:
- result = false;
- break;
- case 3:
- result = true;
- break;
- case 4:
- result = false;
- break;
- case 5:
- result = true;
- break;
- case 6:
- result = true;
- break;
- case 7:
- result = true;
- break;
- case 8:
- result = false;
- break;
- case 9:
- result = true;
- break;
- case 10:
- result = true;
- break;
- case 11:
- result = true;
- break;
- case 12:
- result = true;
- break;
- case 13:
- result = true;
- break;
- case 14:
- result = true;
- break;
- case 15:
- result = true;
- break;
- }
- return result;
- }
-
- @Override
- public void map(KmerBytesWritable key, ByteWritable value,
- OutputCollector<KmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- //TODO clean this code piece, use the genomix-data function
- byte precursor = (byte) 0xF0;
- byte succeed = (byte) 0x0F;
- byte adjBitMap = value.get();
- byte bitFlag = (byte) 0;
- precursor = (byte) (precursor & adjBitMap);
- precursor = (byte) ((precursor & 0xff) >> 4);
- succeed = (byte) (succeed & adjBitMap);
- boolean inDegree = measureDegree(precursor);
- boolean outDegree = measureDegree(succeed);
- //if indegree == 1 and outdegree == 1, then it assigns these records' flag to 2
- if (inDegree == false && outDegree == false) {
- outputKmer.set(key);
- bitFlag = (byte) 0x02;
- outputAdjList.set(adjBitMap, bitFlag, null);
- output.collect(outputKmer, outputAdjList);
- } else {
- // other records maps its precursor neighbors
- /**
- * eg. ACT CTA|CA, it maps CAC, TAC, AAC, all the 3 pairs marked 0x80
- */
- for (int i = 0; i < 4; i++) {
- byte temp = (byte) 0x01;
- byte shiftedCode = 0;
- temp = (byte) (temp << i);
- temp = (byte) (precursor & temp);
- if (temp != 0) {
- //TODO use the genomix-data factory function
- byte precurCode = GeneCode.getGeneCodeFromBitMap(temp);
- shiftedCode = key.shiftKmerWithPreCode(precurCode);
- outputKmer.set(key);
- bitFlag = (byte) 0x80;
- outputAdjList.set((byte) 0, bitFlag, null);
- output.collect(outputKmer, outputAdjList);
- key.shiftKmerWithNextCode(shiftedCode);
- }
- }
- //and also maps its succeeding neighbors
- /**
- * eg. ACT CTA|CA, it maps CTC, CTA, all the 2 pairs marked 0x01
- */
- for (int i = 0; i < 4; i++) {
- byte temp = (byte) 0x01;
- byte shiftedCode = 0;
- temp = (byte) (temp << i);
- temp = (byte) (succeed & temp);
- if (temp != 0) {
- byte succeedCode = GeneCode.getGeneCodeFromBitMap(temp);
- shiftedCode = key.shiftKmerWithNextCode(succeedCode);
- outputKmer.set(key);
- bitFlag = (byte) 0x01;
- outputAdjList.set((byte) 0, bitFlag, null);
- output.collect(outputKmer, outputAdjList);
- key.shiftKmerWithPreCode(shiftedCode);
- }
- }
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/SNodeInitialReducer.java
deleted file mode 100644
index 78b7b8b..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pmcommon/SNodeInitialReducer.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.pmcommon;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings("deprecation")
-public class SNodeInitialReducer extends MapReduceBase implements
- Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
- private VKmerBytesWritable outputKmer = new VKmerBytesWritable();
- private MergePathValueWritable outputValue = new MergePathValueWritable();
- MultipleOutputs mos = null;
-
- public void configure(JobConf job) {
- mos = new MultipleOutputs(job);
- }
-
- @SuppressWarnings("unchecked")
- @Override
- public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
- OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- outputKmer.set(key);
- outputValue = values.next();
- byte startPointFlag = 0x00;
- byte endPointFlag = 0x00;
- /**
- * the targetPoint means that we want find the record which 1 indegree and 1 outdegree in the group which has multi-records
- */
- byte targetPointFlag = 0x00;
- byte targetAdjList = 0x00;
- //if we find the start or end point, we will use outputFlag to mark them
- byte outputFlag = 0x00;
-
- if (values.hasNext() == true) {
- //find startPointFlag, endPointFlag, targetPointFlag
-
- switch (outputValue.getFlag()) {
- case (byte) 0x01:
- startPointFlag = (byte) 0x01;
- break;
- case (byte) 0x80:
- endPointFlag = (byte) 0x80;
- break;
- case (byte) 0x02:
- targetPointFlag = (byte) 0x02;
- targetAdjList = outputValue.getAdjBitMap();
- break;
- }
- while (values.hasNext()) {
- outputValue = values.next();
- switch (outputValue.getFlag()) {
- case (byte) 0x01:
- startPointFlag = (byte) 0x01;
- break;
- case (byte) 0x80:
- endPointFlag = (byte) 0x80;
- break;
- case (byte) 0x02:
- targetPointFlag = (byte) 0x02;
- targetAdjList = outputValue.getAdjBitMap();
- break;
- }
- if (startPointFlag != (byte) 0x00 && endPointFlag != (byte) 0x00 && targetPointFlag != (byte) 0x00)
- break;
- }
- //if we find the start-point or end-point
- if (targetPointFlag == (byte) 0x02) {
- //remove the single point path
- if (startPointFlag == (byte) 0x01 && endPointFlag == (byte) 0x80) {
- outputFlag = (byte) (outputFlag | startPointFlag);
- outputFlag = (byte) (outputFlag | endPointFlag);
- outputValue.set(targetAdjList, outputFlag, null);
- mos.getCollector("comSinglePath0", reporter).collect(outputKmer, outputValue);
- } else {
- if (startPointFlag == (byte) 0x01) {
- outputFlag = (byte) (outputFlag | startPointFlag);
- }
- if (endPointFlag == (byte) 0x80) {
- outputFlag = (byte) (outputFlag | endPointFlag);
- }
- outputValue.set(targetAdjList, outputFlag, null);
- output.collect(outputKmer, outputValue);
- }
- }
- } else {
- //keep the non-start/end single point into the input files
- if (outputValue.getFlag() == (byte) 0x02) {
- byte bitFlag = 0;
- outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
- output.collect(outputKmer, outputValue);
- }
- }
- }
-
- public void close() throws IOException {
- // TODO Auto-generated method stub
- mos.close();
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatDriver.java
deleted file mode 100644
index 3768d3a..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatDriver.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hadoop.statistics;
-
-import java.io.IOException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings("deprecation")
-public class GenomixStatDriver {
- private static class Options {
- @Option(name = "-inputpath", usage = "the input path", required = true)
- public String inputPath;
-
- @Option(name = "-outputpath", usage = "the output path", required = true)
- public String outputPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- }
-
- public void run(String inputPath, String outputPath, int numReducers, String defaultConfPath)
- throws IOException {
-
- JobConf conf = new JobConf(GenomixStatDriver.class);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
-
- conf.setJobName("Genomix Statistics");
- conf.setMapperClass(GenomixStatMapper.class);
- conf.setReducerClass(GenomixStatReducer.class);
- conf.setCombinerClass(GenomixStatReducer.class);
-
- conf.setMapOutputKeyClass(BytesWritable.class);
- conf.setMapOutputValueClass(KmerCountValue.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
-
- conf.setOutputKeyClass(BytesWritable.class);
- conf.setOutputValueClass(KmerCountValue.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
-
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- GenomixStatDriver driver = new GenomixStatDriver();
- driver.run(options.inputPath, options.outputPath, options.numReducers, null);
- }
-}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatMapper.java
deleted file mode 100644
index a6afcf9..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatMapper.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hadoop.statistics;
-
-import java.io.IOException;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings({ "unused", "deprecation" })
-public class GenomixStatMapper extends MapReduceBase implements
- Mapper<BytesWritable, KmerCountValue, BytesWritable, KmerCountValue> {
-
- boolean measureDegree(byte adjacent) {
- boolean result = true;
- switch (adjacent) {
- case 0:
- result = true;
- break;
- case 1:
- result = false;
- break;
- case 2:
- result = false;
- break;
- case 3:
- result = true;
- break;
- case 4:
- result = false;
- break;
- case 5:
- result = true;
- break;
- case 6:
- result = true;
- break;
- case 7:
- result = true;
- break;
- case 8:
- result = false;
- break;
- case 9:
- result = true;
- break;
- case 10:
- result = true;
- break;
- case 11:
- result = true;
- break;
- case 12:
- result = true;
- break;
- case 13:
- result = true;
- break;
- case 14:
- result = true;
- break;
- case 15:
- result = true;
- break;
- }
- return result;
- }
- @Override
- public void map(BytesWritable key, KmerCountValue value, OutputCollector<BytesWritable, KmerCountValue> output,
- Reporter reporter) throws IOException {
- byte precursor = (byte) 0xF0;
- byte succeed = (byte) 0x0F;
- byte adj = value.getAdjBitMap();
- precursor = (byte) (precursor & adj);
- precursor = (byte) ((precursor & 0xff) >> 4);
- succeed = (byte) (succeed & adj);
- boolean inDegree = measureDegree(precursor);
- boolean outDegree = measureDegree(succeed);
- if (inDegree == true && outDegree == false) {
- output.collect(key, value);
- }
- }
-}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatReducer.java
deleted file mode 100644
index a4ae561..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatReducer.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.statistics;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings("deprecation")
-public class GenomixStatReducer extends MapReduceBase implements
- Reducer<BytesWritable, KmerCountValue, BytesWritable, KmerCountValue> {
- static enum MyCounters { NUM_RECORDS };
- KmerCountValue valWriter = new KmerCountValue();
- @Override
- public void reduce(BytesWritable key, Iterator<KmerCountValue> values,
- OutputCollector<BytesWritable, KmerCountValue> output, Reporter reporter) throws IOException {
- reporter.incrCounter(MyCounters.NUM_RECORDS, 1);
- valWriter = values.next();
- output.collect(key, valWriter);
- }
-}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingMapper.java
deleted file mode 100644
index 373a47c..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingMapper.java
+++ /dev/null
@@ -1,83 +0,0 @@
-package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
-
-import java.io.IOException;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.oldtype.PositionListWritable;
-import edu.uci.ics.genomix.oldtype.PositionWritable;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class DeepGraphBuildingMapper extends MapReduceBase implements
- Mapper<KmerBytesWritable, PositionListWritable, PositionWritable, PositionListAndKmerWritable> {
-
- private PositionWritable positionEntry;
- private PositionWritable tempVertex;
- private PositionListWritable listPosZeroInRead;
- private PositionListWritable listPosNonZeroInRead;
- private PositionListWritable tempPosList;
- private PositionListAndKmerWritable outputListAndKmer;
- private static int LAST_POSID;
- private static int KMER_SIZE;
- private static int READ_LENGTH;
- @Override
- public void configure(JobConf job) {
- KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
- READ_LENGTH = Integer.parseInt(job.get("readLength"));
- positionEntry = new PositionWritable();
- tempVertex = new PositionWritable();
- listPosZeroInRead = new PositionListWritable();
- listPosNonZeroInRead = new PositionListWritable();
- tempPosList = new PositionListWritable();
- outputListAndKmer = new PositionListAndKmerWritable();
- LAST_POSID = READ_LENGTH - KMER_SIZE + 1;
- }
-
- private boolean isStart(byte posInRead) {
- return posInRead == 1 || posInRead == -LAST_POSID;
- }
-
- @Override
- public void map(KmerBytesWritable key, PositionListWritable value,
- OutputCollector<PositionWritable, PositionListAndKmerWritable> output, Reporter reporter)
- throws IOException {
- outputListAndKmer.reset();
- int tupleCount = value.getCountOfPosition();
- scanPosition(tupleCount, value);
- scanAgainAndOutput(listPosZeroInRead, listPosNonZeroInRead, key, output);
- scanAgainAndOutput(listPosNonZeroInRead, listPosZeroInRead, key, output);
- }
-
- public void scanPosition(int tupleCount, PositionListWritable value) {
- listPosZeroInRead.reset();
- listPosNonZeroInRead.reset();
- for (int i = 0; i < tupleCount; i++) {
- positionEntry.set(value.getPosition(i));
- if (isStart(positionEntry.getPosInRead())) {
- listPosZeroInRead.append(positionEntry);
- } else {
- listPosNonZeroInRead.append(positionEntry);
- }
- }
- }
-
- public void scanAgainAndOutput(PositionListWritable outputListInRead, PositionListWritable attriListInRead,
- KmerBytesWritable kmer, OutputCollector<PositionWritable, PositionListAndKmerWritable> output) throws IOException {
- for (int i = 0; i < outputListInRead.getCountOfPosition(); i++) {
- positionEntry.set(outputListInRead.getPosition(i));
- tempPosList.reset();
- for (int j = 0; j < attriListInRead.getCountOfPosition(); j++) {
- tempVertex.set(attriListInRead.getPosition(j));
- if (tempVertex.getReadID() != positionEntry.getReadID()) {
- tempPosList.append(tempVertex);
- }
- }
- outputListAndKmer.set(tempPosList, kmer);
- output.collect(positionEntry, outputListAndKmer);
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingReducer.java
deleted file mode 100644
index 5c15d46..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingReducer.java
+++ /dev/null
@@ -1,200 +0,0 @@
-package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.oldtype.NodeWritable;
-import edu.uci.ics.genomix.oldtype.PositionListWritable;
-import edu.uci.ics.genomix.oldtype.PositionWritable;
-
-@SuppressWarnings("deprecation")
-public class DeepGraphBuildingReducer extends MapReduceBase implements
- Reducer<PositionWritable, PositionListAndKmerWritable, NodeWritable, NullWritable> {
- private PositionListAndKmerWritable curNodePosiListAndKmer = new PositionListAndKmerWritable();
- private PositionListAndKmerWritable curNodeNegaListAndKmer = new PositionListAndKmerWritable();
- private PositionListAndKmerWritable nextNodePosiListAndKmer = new PositionListAndKmerWritable();
- private PositionListAndKmerWritable nextNodeNegaListAndKmer = new PositionListAndKmerWritable();
-
- private NodeWritable curNode;
- private NodeWritable nextNode;
- private NodeWritable nextNextNode;
- private PositionListWritable incomingList = new PositionListWritable();
- private PositionListWritable outgoingList = new PositionListWritable();
- private NullWritable nullWritable = NullWritable.get();
- private int KMER_SIZE;
- private int LAST_POSID;
- private int READ_LENGTH;
-
- public void configure(JobConf job) {
- KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
- READ_LENGTH = Integer.parseInt(job.get("readLength"));
- LAST_POSID = READ_LENGTH - KMER_SIZE + 1;
- curNode = new NodeWritable(KMER_SIZE);
- nextNode = new NodeWritable(KMER_SIZE);
- nextNextNode = new NodeWritable(KMER_SIZE);
- }
-
- @Override
- public void reduce(PositionWritable key, Iterator<PositionListAndKmerWritable> values,
- OutputCollector<NodeWritable, NullWritable> output, Reporter reporter) throws IOException {
- int readID = key.getReadID();
- byte posInRead = (byte) 1;
- resetNode(curNode, readID, posInRead);
- assembleFirstTwoNodesInRead(curNodePosiListAndKmer, nextNodePosiListAndKmer, nextNodeNegaListAndKmer,
- incomingList, outgoingList, curNode, nextNode, values);
- posInRead++;
- assembleNodeFromValues(readID, posInRead, curNodePosiListAndKmer, curNodeNegaListAndKmer,
- nextNodePosiListAndKmer, nextNodeNegaListAndKmer, incomingList, outgoingList, curNode, nextNode, values);
- posInRead++;
- boolean flag = true;
- while (flag) {
- flag = assembleNodeFromValues(readID, posInRead, curNodePosiListAndKmer, curNodeNegaListAndKmer,
- nextNodePosiListAndKmer, nextNodeNegaListAndKmer, incomingList, outgoingList, nextNode,
- nextNextNode, values);
- posInRead++;
- if (curNode.inDegree() > 1 || curNode.outDegree() > 0 || nextNode.inDegree() > 0
- || nextNode.outDegree() > 0 || nextNextNode.inDegree() > 0 || nextNextNode.outDegree() > 0) {
- connect(curNode, nextNode);
- output.collect(curNode, nullWritable);
- curNode.set(nextNode);
- nextNode.set(nextNextNode);
- continue;
- }
- curNode.mergeForwardNext(nextNode, KMER_SIZE);
- nextNode.set(nextNextNode);
- }
- output.collect(curNode, nullWritable);
- }
-
- public boolean assembleNodeFromValues(int readID, byte posInRead, PositionListAndKmerWritable curNodePosiListAndKmer,
- PositionListAndKmerWritable curNodeNegaListAndKmer, PositionListAndKmerWritable nextNodePosiListAndKmer,
- PositionListAndKmerWritable nextNodeNegaListAndKmer, PositionListWritable outgoingList,
- PositionListWritable incomingList, NodeWritable curNode, NodeWritable nextNode,
- Iterator<PositionListAndKmerWritable> values) throws IOException {
- boolean flag = true;
- curNodePosiListAndKmer.set(nextNodePosiListAndKmer);
- curNodeNegaListAndKmer.set(nextNodeNegaListAndKmer);
- if (values.hasNext()) {
- nextNodeNegaListAndKmer.set(values.next());
- if (values.hasNext()) {
- nextNodePosiListAndKmer.set(values.next());
- } else {
- throw new IOException("lose the paired kmer from values");
- }
- outgoingList.reset();
- outgoingList.set(nextNodePosiListAndKmer.getVertexIDList());
- setForwardOutgoingList(curNode, outgoingList);
-
- resetNode(nextNode, readID, posInRead);
- nextNode.setKmer(nextNodePosiListAndKmer.getKmer());
-
- outgoingList.reset();
- outgoingList.set(curNodeNegaListAndKmer.getVertexIDList());
- setReverseOutgoingList(nextNode, outgoingList);
-
- if (nextNode.getNodeID().getPosInRead() == LAST_POSID) {
- incomingList.reset();
- incomingList.set(nextNodeNegaListAndKmer.getVertexIDList());
- setReverseIncomingList(nextNode, incomingList);
- }
- }
- else{
- flag = false;
- resetNode(nextNode, readID, (byte)0);
- }
- return flag;
- }
-
- public void assembleFirstTwoNodesInRead(PositionListAndKmerWritable curNodePosiListAndKmer,
- PositionListAndKmerWritable nextNodePosiListAndKmer, PositionListAndKmerWritable nextNodeNegaListAndKmer,
- PositionListWritable outgoingList, PositionListWritable incomingList, NodeWritable curNode,
- NodeWritable nextNode, Iterator<PositionListAndKmerWritable> values) throws IOException {
- nextNodeNegaListAndKmer.set(values.next());
- if (values.hasNext()) {
- nextNodePosiListAndKmer.set(values.next());
- } else {
- System.out.println(curNode.getNodeID().getReadID());
- throw new IOException("lose the paired kmer from first two nodes");
- }
-
- if (curNode.getNodeID().getPosInRead() == LAST_POSID) {
- incomingList.reset();
- incomingList.set(nextNodeNegaListAndKmer.getVertexIDList());
- setReverseIncomingList(curNode, incomingList);
- }
- incomingList.reset();
- incomingList.set(nextNodePosiListAndKmer.getVertexIDList());
-
-
- curNode.setKmer(nextNodePosiListAndKmer.getKmer());
- setForwardIncomingList(curNode, incomingList);
- }
-
- private void resetNode(NodeWritable node, int readID, byte posInRead) {
- node.reset(KMER_SIZE);
- node.setNodeID(readID, posInRead);
- }
-
- private void setReverseOutgoingList(NodeWritable node, PositionListWritable posList) {
- for (PositionWritable pos : posList) {
- if (pos.getPosInRead() > 0) {
- node.getRFList().append(pos);
- } else {
- node.getRRList().append(pos.getReadID(), (byte) -pos.getPosInRead());
- }
- }
- }
-
- private void setReverseIncomingList(NodeWritable node, PositionListWritable posList) {
- for (PositionWritable pos : posList) {
- if (pos.getPosInRead() > 0) {
- if (pos.getPosInRead() > 1) {
- node.getFRList().append(pos.getReadID(), (byte) (pos.getPosInRead() - 1));
- } else {
- throw new IllegalArgumentException("Invalid position");
- }
- } else {
- if (pos.getPosInRead() > -LAST_POSID) {
- node.getFFList().append(pos.getReadID(), (byte) -(pos.getPosInRead() - 1));
- }
- }
- }
- }
-
- private void setForwardOutgoingList(NodeWritable node, PositionListWritable posList) {
- for (PositionWritable pos : posList) {
- if (pos.getPosInRead() > 0) {
- node.getFFList().append(pos);
- } else {
- node.getFRList().append(pos.getReadID(), (byte) -pos.getPosInRead());
- }
- }
- }
-
- private void setForwardIncomingList(NodeWritable node, PositionListWritable posList) {
- for (PositionWritable pos : posList) {
- if (pos.getPosInRead() > 0) {
- if (pos.getPosInRead() > 1) {
- node.getRRList().append(pos.getReadID(), (byte) (pos.getPosInRead() - 1));
- } else {
- throw new IllegalArgumentException("position id is invalid");
- }
- } else {
- if (pos.getPosInRead() > -LAST_POSID) {
- node.getRFList().append(pos.getReadID(), (byte) -(pos.getPosInRead() - 1));
- }
- }
- }
- }
-
- private void connect(NodeWritable curNode, NodeWritable nextNode) {
- curNode.getFFList().append(nextNode.getNodeID());
- nextNode.getRRList().append(curNode.getNodeID());
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java
deleted file mode 100644
index 4d6b221..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java
+++ /dev/null
@@ -1,161 +0,0 @@
-package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
-
-import java.io.IOException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Partitioner;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.TextInputFormat;
-import org.apache.hadoop.mapred.TextOutputFormat;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-import edu.uci.ics.genomix.oldtype.NodeWritable;
-import edu.uci.ics.genomix.oldtype.PositionListWritable;
-import edu.uci.ics.genomix.oldtype.PositionWritable;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class GraphBuildingDriver {
-
- private static class Options {
- @Option(name = "-inputpath", usage = "the input path", required = true)
- public String inputPath;
-
- @Option(name = "-outputpath", usage = "the output path", required = true)
- public String outputPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- @Option(name = "-kmer-kmerByteSize", usage = "the kmerByteSize of kmer", required = true)
- public int sizeKmer;
-
- @Option(name = "-read-length", usage = "the length of read", required = true)
- public int readLength;
-
- @Option(name = "-onlytest1stjob", usage = "test", required = true)
- public String onlyTest1stJob;
-
- @Option(name = "-seq-output", usage = "sequence ouput format", required = true)
- public String seqOutput;
- }
-
- public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, int readLength,
- boolean onlyTest1stJob, boolean seqOutput, String defaultConfPath) throws IOException {
- if (onlyTest1stJob == true) {
-
- runfirstjob(inputPath, outputPath, numReducers, sizeKmer, readLength, seqOutput, defaultConfPath);
- } else {
- runfirstjob(inputPath, inputPath + "-tmp", numReducers, sizeKmer, readLength, true, defaultConfPath);
- runsecondjob(inputPath + "-tmp", outputPath, numReducers, sizeKmer, readLength, seqOutput, defaultConfPath);
- }
- }
-
- public void runfirstjob(String inputPath, String outputPath, int numReducers, int sizeKmer, int readLength, boolean seqOutput,
- String defaultConfPath) throws IOException {
- JobConf conf = new JobConf(GraphBuildingDriver.class);
- conf.setInt("sizeKmer", sizeKmer);
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
-
- conf.setJobName("graph building");
- conf.setMapperClass(GraphInvertedIndexBuildingMapper.class);
- conf.setReducerClass(GraphInvertedIndexBuildingReducer.class);
-
- conf.setMapOutputKeyClass(KmerBytesWritable.class);
- conf.setMapOutputValueClass(PositionWritable.class);
-
- conf.setInputFormat(TextInputFormat.class);
- if (seqOutput == true)
- conf.setOutputFormat(SequenceFileOutputFormat.class);
- else
- conf.setOutputFormat(TextOutputFormat.class);
-
- conf.setOutputKeyClass(KmerBytesWritable.class);
- conf.setOutputValueClass(PositionListWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- if (numReducers == 0)
- conf.setNumReduceTasks(numReducers + 2);
- else
- conf.setNumReduceTasks(numReducers);
-
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- }
-
- public void runsecondjob(String inputPath, String outputPath, int numReducers, int sizeKmer, int readLength,
- boolean seqOutput, String defaultConfPath) throws IOException {
- JobConf conf = new JobConf(GraphBuildingDriver.class);
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
- conf.setJobName("deep build");
- conf.setInt("sizeKmer", sizeKmer);
- conf.setInt("readLength", readLength);
-
- conf.setMapperClass(DeepGraphBuildingMapper.class);
- conf.setReducerClass(DeepGraphBuildingReducer.class);
-
- conf.setMapOutputKeyClass(PositionWritable.class);
- conf.setMapOutputValueClass(PositionListAndKmerWritable.class);
-
- conf.setPartitionerClass(ReadIDPartitioner.class);
-
- // grouping is done on the readID only; sorting is based on the (readID, abs(posn))
- conf.setOutputKeyComparatorClass(PositionWritable.Comparator.class);
- conf.setOutputValueGroupingComparator(PositionWritable.FirstComparator.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- if (seqOutput == true)
- conf.setOutputFormat(SequenceFileOutputFormat.class);
- else
- conf.setOutputFormat(TextOutputFormat.class);
-
- if (numReducers != 0) {
- conf.setOutputKeyClass(NodeWritable.class);
- conf.setOutputValueClass(NullWritable.class);
- } else {
- conf.setOutputKeyClass(PositionWritable.class);
- conf.setOutputValueClass(PositionListAndKmerWritable.class);
- }
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- GraphBuildingDriver driver = new GraphBuildingDriver();
- boolean onlyTest1stJob = true;
- boolean seqOutput = true;
- if (options.onlyTest1stJob.equals("true"))
- onlyTest1stJob = true;
- else
- onlyTest1stJob = false;
- if (options.seqOutput.equals("true"))
- seqOutput = true;
- else
- seqOutput = false;
- driver.run(options.inputPath, options.outputPath, options.numReducers, options.sizeKmer, options.readLength,
- onlyTest1stJob, seqOutput, null);
- }
-}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingMapper.java
deleted file mode 100644
index a634318..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingMapper.java
+++ /dev/null
@@ -1,74 +0,0 @@
-package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
-
-import java.io.IOException;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.oldtype.PositionWritable;
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class GraphInvertedIndexBuildingMapper extends MapReduceBase implements
- Mapper<LongWritable, Text, KmerBytesWritable, PositionWritable> {
-
- public static int KMER_SIZE;
- public PositionWritable outputVertexID;
- public KmerBytesWritable outputKmer;
-
- @Override
- public void configure(JobConf job) {
- KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
- outputVertexID = new PositionWritable();
- outputKmer = new KmerBytesWritable(KMER_SIZE);
- }
-
- @Override
- public void map(LongWritable key, Text value, OutputCollector<KmerBytesWritable, PositionWritable> output,
- Reporter reporter) throws IOException {
- /** first kmer */
- String[] rawLine = value.toString().split("\\t"); // Read the Real Gene Line
- if (rawLine.length != 2) {
- throw new IOException("invalid data");
- }
- int readID = 0;
- readID = Integer.parseInt(rawLine[0]);
- String geneLine = rawLine[1];
- Pattern genePattern = Pattern.compile("[AGCT]+");
- Matcher geneMatcher = genePattern.matcher(geneLine);
- boolean isValid = geneMatcher.matches();
- if (isValid == true) {
- byte[] array = geneLine.getBytes();
- if (KMER_SIZE >= array.length) {
- throw new IOException("short read");
- }
- outputKmer.setByRead(array, 0);
- outputVertexID.set(readID, (byte) 1);
- output.collect(outputKmer, outputVertexID);
- /** middle kmer */
- for (int i = KMER_SIZE; i < array.length; i++) {
- outputKmer.shiftKmerWithNextChar(array[i]);
- outputVertexID.set(readID, (byte) (i - KMER_SIZE + 2));
- output.collect(outputKmer, outputVertexID);
- }
- /** reverse first kmer */
- outputKmer.setByReadReverse(array, 0);
- outputVertexID.set(readID, (byte) -1);
- output.collect(outputKmer, outputVertexID);
- /** reverse middle kmer */
- for (int i = KMER_SIZE; i < array.length; i++) {
- outputKmer.shiftKmerWithPreCode(GeneCode.getPairedCodeFromSymbol(array[i]));
- outputVertexID.set(readID, (byte) (KMER_SIZE - i - 2));
- output.collect(outputKmer, outputVertexID);
- }
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingReducer.java
deleted file mode 100644
index fee5833..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingReducer.java
+++ /dev/null
@@ -1,28 +0,0 @@
-package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.oldtype.PositionListWritable;
-import edu.uci.ics.genomix.oldtype.PositionWritable;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-
-@SuppressWarnings({ "deprecation", "unused" })
-public class GraphInvertedIndexBuildingReducer extends MapReduceBase implements
- Reducer<KmerBytesWritable, PositionWritable, KmerBytesWritable, PositionListWritable> {
- PositionListWritable outputlist = new PositionListWritable();
- @Override
- public void reduce(KmerBytesWritable key, Iterator<PositionWritable> values,
- OutputCollector<KmerBytesWritable, PositionListWritable> output, Reporter reporter) throws IOException {
- outputlist.reset();
- while (values.hasNext()) {
- outputlist.append(values.next());
- }
- output.collect(key, outputlist);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/PositionListAndKmerWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/PositionListAndKmerWritable.java
deleted file mode 100644
index 550cc7c..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/PositionListAndKmerWritable.java
+++ /dev/null
@@ -1,87 +0,0 @@
-package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import org.apache.hadoop.io.WritableComparable;
-
-import edu.uci.ics.genomix.oldtype.PositionListWritable;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-
-public class PositionListAndKmerWritable implements WritableComparable<PositionListAndKmerWritable> {
-
- private PositionListWritable vertexIDList;
- private KmerBytesWritable kmer;
- private int countOfKmer;
-
- public PositionListAndKmerWritable(){
- countOfKmer = 0;
- vertexIDList = new PositionListWritable();
- kmer = new KmerBytesWritable();
- }
-
- public PositionListAndKmerWritable(int kmerSize) {
- countOfKmer = 0;
- vertexIDList = new PositionListWritable();
- kmer = new KmerBytesWritable(kmerSize);
- }
-
- public int getCount() {
- return countOfKmer;
- }
-
- public void setCount(int count) {
- this.countOfKmer = count;
- }
-
- public void setvertexIDList(PositionListWritable posList) {
- vertexIDList.set(posList);
- }
-
- public void reset() {
- vertexIDList.reset();
- countOfKmer = 0;
- }
-
- public PositionListWritable getVertexIDList() {
- return vertexIDList;
- }
-
- public KmerBytesWritable getKmer() {
- return kmer;
- }
-
- public void set(PositionListAndKmerWritable right) {
- this.countOfKmer = right.countOfKmer;
- this.vertexIDList.set(right.vertexIDList);
- this.kmer.setAsCopy(right.kmer);
- }
-
- public void set(PositionListWritable list, KmerBytesWritable kmer) {
- this.vertexIDList.set(list);
- this.kmer.setAsCopy(kmer);
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- this.countOfKmer = in.readInt();
- this.vertexIDList.readFields(in);
- this.kmer.readFields(in);
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeInt(this.countOfKmer);
- this.vertexIDList.write(out);
- this.kmer.write(out);
- }
-
- @Override
- public int compareTo(PositionListAndKmerWritable o) {
- return 0;
- }
-
- public String toString() {
- return vertexIDList.toString() + "\t" + kmer.toString();
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/ReadIDPartitioner.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/ReadIDPartitioner.java
deleted file mode 100644
index 38c608a..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/ReadIDPartitioner.java
+++ /dev/null
@@ -1,20 +0,0 @@
-package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
-
-//import org.apache.hadoop.mapreduce.Partitioner;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.Partitioner;
-
-import edu.uci.ics.genomix.oldtype.PositionListWritable;
-import edu.uci.ics.genomix.oldtype.PositionWritable;
-
-public class ReadIDPartitioner implements Partitioner<PositionWritable, PositionListAndKmerWritable>{
-
- @Override
- public int getPartition(PositionWritable key, PositionListAndKmerWritable value, int numPartitions){
- return (key.getReadID() & Integer.MAX_VALUE) % numPartitions;
- }
-
- @Override
- public void configure(JobConf arg0) {
- }
-}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingTest.java
deleted file mode 100644
index ea05e53..0000000
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingTest.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.gbresultschecking;
-
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MiniMRCluster;
-import org.junit.Test;
-
-import edu.uci.ics.genomix.hadoop.gbresultschecking.ResultsCheckingDriver;
-
-@SuppressWarnings("deprecation")
-public class ResultsCheckingTest {
- private static final String ACTUAL_RESULT_DIR = "actual4";
- private JobConf conf = new JobConf();
- private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
- private static final String DATA_PATH1 = "ResultsCheckingData" + "/part-00000";
- private static final String DATA_PATH2 = "ResultsCheckingData" + "/part-00001";
- private static final String HDFS_PATH1 = "/webmap1";
- private static final String HDFS_PATH2 = "/webmap2";
- private static final String RESULT_PATH = "/result4";
- private static final int COUNT_REDUCER = 4;
- private static final int SIZE_KMER = 3;
- private MiniDFSCluster dfsCluster;
- private MiniMRCluster mrCluster;
- private FileSystem dfs;
-
- @Test
- public void test() throws Exception {
- FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
- FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
- startHadoop();
- ResultsCheckingDriver tldriver = new ResultsCheckingDriver();
- tldriver.run(HDFS_PATH1, HDFS_PATH2, RESULT_PATH, COUNT_REDUCER, SIZE_KMER, HADOOP_CONF_PATH);
- dumpResult();
- cleanupHadoop();
-
- }
- private void startHadoop() throws IOException {
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.delete(new Path("build"), true);
- System.setProperty("hadoop.log.dir", "logs");
- dfsCluster = new MiniDFSCluster(conf, 2, true, null);
- dfs = dfsCluster.getFileSystem();
- mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
-
- Path src = new Path(DATA_PATH1);
- Path dest = new Path(HDFS_PATH1 + "/");
- dfs.mkdirs(dest);
- dfs.copyFromLocalFile(src, dest);
- src = new Path(DATA_PATH2);
- dest = new Path(HDFS_PATH2 + "/");
- dfs.copyFromLocalFile(src, dest);
-
- DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
- conf.writeXml(confOutput);
- confOutput.flush();
- confOutput.close();
- }
-
- private void cleanupHadoop() throws IOException {
- mrCluster.shutdown();
- dfsCluster.shutdown();
- }
-
- private void dumpResult() throws IOException {
- Path src = new Path(RESULT_PATH);
- Path dest = new Path(ACTUAL_RESULT_DIR + "/");
- dfs.copyToLocalFile(src, dest);
- }
-}
-
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphbuilding/GraphBuildingTest.java
deleted file mode 100755
index 7d55dc7..0000000
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphbuilding/GraphBuildingTest.java
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.graphbuilding;
-
-import java.io.BufferedWriter;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileWriter;
-import java.io.IOException;
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MiniMRCluster;
-import org.apache.hadoop.util.ReflectionUtils;
-import org.junit.Test;
-
-import edu.uci.ics.genomix.hadoop.graphbuilding.GenomixDriver;
-import edu.uci.ics.genomix.hadoop.utils.TestUtils;
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-/**
- * This class test the correctness of graphbuilding program
- */
-@SuppressWarnings("deprecation")
-public class GraphBuildingTest {
-
- private static final String ACTUAL_RESULT_DIR = "actual1";
- private static final String COMPARE_DIR = "compare";
- private JobConf conf = new JobConf();
- private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
- private static final String DATA_PATH = "data/webmap/Test.txt";
- private static final String HDFS_PATH = "/webmap";
- private static final String RESULT_PATH = "/result1";
- private static final String EXPECTED_PATH = "expected/result1";
- private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH;
- private static final int COUNT_REDUCER = 4;
- private static final int SIZE_KMER = 3;
- private static final String GRAPHVIZ = "Graphviz";
-
- private MiniDFSCluster dfsCluster;
- private MiniMRCluster mrCluster;
- private FileSystem dfs;
-
- @SuppressWarnings("resource")
- @Test
- public void test() throws Exception {
- FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
- FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
- startHadoop();
-
- // run graph transformation tests
- GenomixDriver tldriver = new GenomixDriver();
- tldriver.run(HDFS_PATH, RESULT_PATH, COUNT_REDUCER, SIZE_KMER, HADOOP_CONF_PATH);
-
- SequenceFile.Reader reader = null;
- Path path = new Path(RESULT_PATH + "/part-00000");
- reader = new SequenceFile.Reader(dfs, path, conf);
- KmerBytesWritable key = new KmerBytesWritable(SIZE_KMER);
- KmerCountValue value = (KmerCountValue) ReflectionUtils.newInstance(reader.getValueClass(), conf);
- File filePathTo = new File(TEST_SOURCE_DIR);
- FileUtils.forceMkdir(filePathTo);
- FileUtils.cleanDirectory(filePathTo);
- BufferedWriter bw = new BufferedWriter(new FileWriter(new File(TEST_SOURCE_DIR + "/comparesource.txt")));
- File GraphViz = new File(GRAPHVIZ);
- FileUtils.forceMkdir(GraphViz);
- FileUtils.cleanDirectory(GraphViz);
- BufferedWriter bw2 = new BufferedWriter(new FileWriter(new File(GRAPHVIZ + "/GenomixSource.txt")));
-
- while (reader.next(key, value)) {
- byte succeed = (byte) 0x0F;
- byte adjBitMap = value.getAdjBitMap();
- succeed = (byte) (succeed & adjBitMap);
- byte shiftedCode = 0;
- for(int i = 0 ; i < 4; i ++){
- byte temp = 0x01;
- temp = (byte)(temp << i);
- temp = (byte) (succeed & temp);
- if(temp != 0 ){
- bw2.write(key.toString());
- bw2.newLine();
- byte succeedCode = GeneCode.getGeneCodeFromBitMap(temp);
- shiftedCode = key.shiftKmerWithNextCode(succeedCode);
- bw2.write(key.toString());
- bw2.newLine();
- key.shiftKmerWithPreCode(shiftedCode);
- }
- }
- bw.write(key.toString() + "\t" + value.toString());
- bw.newLine();
- }
- bw2.close();
- bw.close();
-
- dumpResult();
-// TestUtils.compareWithResult(new File(TEST_SOURCE_DIR + "/comparesource.txt"), new File(EXPECTED_PATH));
-
- cleanupHadoop();
-
- }
-
- private void startHadoop() throws IOException {
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.delete(new Path("build"), true);
- System.setProperty("hadoop.log.dir", "logs");
- dfsCluster = new MiniDFSCluster(conf, 2, true, null);
- dfs = dfsCluster.getFileSystem();
- mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
-
- Path src = new Path(DATA_PATH);
- Path dest = new Path(HDFS_PATH + "/");
- dfs.mkdirs(dest);
- dfs.copyFromLocalFile(src, dest);
-
- DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
- conf.writeXml(confOutput);
- confOutput.flush();
- confOutput.close();
- }
-
- private void cleanupHadoop() throws IOException {
- mrCluster.shutdown();
- dfsCluster.shutdown();
- }
-
- private void dumpResult() throws IOException {
- Path src = new Path(RESULT_PATH);
- Path dest = new Path(ACTUAL_RESULT_DIR);
- dfs.copyToLocalFile(src, dest);
- }
-}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/TestPathMergeH3.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/TestPathMergeH3.java
deleted file mode 100644
index 239a2f7..0000000
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/TestPathMergeH3.java
+++ /dev/null
@@ -1,181 +0,0 @@
-package edu.uci.ics.genomix.hadoop.graphclean.mergepaths.h3;
-
-import java.io.File;
-import java.io.FileFilter;
-import java.io.IOException;
-import java.nio.file.FileSystems;
-import java.nio.file.FileVisitResult;
-import java.nio.file.Files;
-import java.nio.file.PathMatcher;
-import java.nio.file.Paths;
-import java.nio.file.attribute.BasicFileAttributes;
-import java.nio.file.SimpleFileVisitor;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.io.filefilter.WildcardFileFilter;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.junit.Test;
-
-import edu.uci.ics.genomix.hadoop.pmcommon.GenomixMiniClusterTest;
-import edu.uci.ics.genomix.hadoop.pmcommon.PathNodeInitial;
-import edu.uci.ics.genomix.hyracks.driver.Driver.Plan;
-import edu.uci.ics.genomix.hyracks.job.GenomixJobConf;
-
-@SuppressWarnings("deprecation")
-public class TestPathMergeH3 extends GenomixMiniClusterTest {
- protected String LOCAL_SEQUENCE_FILE = "src/test/resources/data/sequence/bubble_test1.txt";
- protected String HDFS_SEQUENCE = "/00-sequence/";
- protected String HDFS_GRAPHBUILD = "/01-graphbuild/";
- protected String HDFS_MARKPATHS = "/02-pathmark/";
- protected String HDFS_MERGED = "/03-pathmerge/";
-
- protected String GRAPHBUILD_FILE = "graphbuild.result";
- protected String PATHMARKS_FILE = "markpaths.result";
- protected String PATHMERGE_FILE = "mergepath.result";
- protected boolean regenerateGraph = true;
-
- {w
- KMER_LENGTH = 5;
- READ_LENGTH = 8;
- HDFS_PATHS = new ArrayList<String>(Arrays.asList(HDFS_SEQUENCE, HDFS_GRAPHBUILD, HDFS_MARKPATHS, HDFS_MERGED));
- conf.setInt(GenomixJobConf.KMER_LENGTH, KMER_LENGTH);
- conf.setInt(GenomixJobConf.READ_LENGTH, READ_LENGTH);
- }
-
- /*
- * Build all graphs in any "input/reads" directory
- */
- @Test
- public void BuildAllGraphs() throws Exception {
- final PathMatcher matcher = FileSystems.getDefault().getPathMatcher("glob:**/src/test/resources/input/reads/**/*.txt");
- Files.walkFileTree(Paths.get("."), new SimpleFileVisitor<java.nio.file.Path>() {
- @Override
- public FileVisitResult visitFile(java.nio.file.Path file, BasicFileAttributes attrs) throws IOException {
- if (matcher.matches(file)) {
- TestPathMergeH3 tester = new TestPathMergeH3();
- tester.LOCAL_SEQUENCE_FILE = file.toString();
- tester.GRAPHBUILD_FILE = file.getFileName().toString();
- tester.cleanUpOutput();
- TestPathMergeH3.copyLocalToDFS(tester.LOCAL_SEQUENCE_FILE, tester.HDFS_SEQUENCE);
- try {
- tester.buildGraph();
- } catch (Exception e) {
- throw new IOException(e);
- }
- }
- return FileVisitResult.CONTINUE;
- }
-
- @Override
- public FileVisitResult visitFileFailed(java.nio.file.Path file, IOException exc) throws IOException {
- return FileVisitResult.CONTINUE;
- }
- });
-
- }
-
-// @Test
- public void TestBuildGraph() throws Exception {
- LOCAL_SEQUENCE_FILE = "src/test/resources/data/sequence/bubble_test1.txt";
- GRAPHBUILD_FILE = "bubble_test1.txt";
- cleanUpOutput();
- copyLocalToDFS(LOCAL_SEQUENCE_FILE, HDFS_SEQUENCE);
- buildGraph();
-
- LOCAL_SEQUENCE_FILE = "src/test/resources/data/sequence/fr_test.txt";
- GRAPHBUILD_FILE = "fr_test.txt";
- cleanUpOutput();
- copyLocalToDFS(LOCAL_SEQUENCE_FILE, HDFS_SEQUENCE);
- buildGraph();
-
- LOCAL_SEQUENCE_FILE = "src/test/resources/data/sequence/fr_test2.txt";
- GRAPHBUILD_FILE = "fr_test2.txt";
- cleanUpOutput();
- copyLocalToDFS(LOCAL_SEQUENCE_FILE, HDFS_SEQUENCE);
- buildGraph();
-
- LOCAL_SEQUENCE_FILE = "src/test/resources/data/sequence/rf_test.txt";
- GRAPHBUILD_FILE = "rf_test.txt";
- cleanUpOutput();
- copyLocalToDFS(LOCAL_SEQUENCE_FILE, HDFS_SEQUENCE);
- buildGraph();
-
- LOCAL_SEQUENCE_FILE = "src/test/resources/data/sequence/singleread.txt";
- GRAPHBUILD_FILE = "single_read.txt";
- cleanUpOutput();
- copyLocalToDFS(LOCAL_SEQUENCE_FILE, HDFS_SEQUENCE);
- buildGraph();
-
- LOCAL_SEQUENCE_FILE = "src/test/resources/data/sequence/text.txt";
- GRAPHBUILD_FILE = "text.txt";
- cleanUpOutput();
- copyLocalToDFS(LOCAL_SEQUENCE_FILE, HDFS_SEQUENCE);
- buildGraph();
-
- LOCAL_SEQUENCE_FILE = "src/test/resources/data/sequence/tworeads.txt";
- GRAPHBUILD_FILE = "tworeads.txt";
- cleanUpOutput();
- copyLocalToDFS(LOCAL_SEQUENCE_FILE, HDFS_SEQUENCE);
- buildGraph();
-
- LOCAL_SEQUENCE_FILE = "src/test/resources/data/sequence/tip_test1.txt";
- GRAPHBUILD_FILE = "tip_test1.txt";
- cleanUpOutput();
- copyLocalToDFS(LOCAL_SEQUENCE_FILE, HDFS_SEQUENCE);
- buildGraph();
-
- LOCAL_SEQUENCE_FILE = "src/test/resources/data/sequence/fr_with_tip.txt";
- GRAPHBUILD_FILE = "fr_with_tip.txt";
- cleanUpOutput();
- copyLocalToDFS(LOCAL_SEQUENCE_FILE, HDFS_SEQUENCE);
- buildGraph();
-
- LOCAL_SEQUENCE_FILE = "src/test/resources/data/sequence/walk_random_seq1.txt";
- GRAPHBUILD_FILE = "walk_random_seq1.txt";
- cleanUpOutput();
- copyLocalToDFS(LOCAL_SEQUENCE_FILE, HDFS_SEQUENCE);
- buildGraph();
- }
-
-// @Test
- public void TestMergeOneIteration() throws Exception {
- cleanUpOutput();
- if (regenerateGraph) {
- copyLocalToDFS(LOCAL_SEQUENCE_FILE, HDFS_SEQUENCE);
- buildGraph();
- copyLocalToDFS(ACTUAL_ROOT + GRAPHBUILD_FILE + ".binmerge", HDFS_GRAPHBUILD);
- } else {
- copyLocalToDFS(EXPECTED_ROOT + GRAPHBUILD_FILE + ".binmerge", HDFS_GRAPHBUILD);
- }
-
-// PathNodeInitial inith3 = new PathNodeInitial();
-// inith3.run(HDFS_GRAPHBUILD, HDFS_MARKPATHS + "toMerge", HDFS_MARKPATHS + "complete", conf);
-// copyResultsToLocal(HDFS_MARKPATHS + "toMerge", ACTUAL_ROOT + PATHMARKS_FILE, false, conf);
-// copyResultsToLocal(HDFS_MARKPATHS + "complete", ACTUAL_ROOT + PATHMARKS_FILE, false, conf);
-//
-// MergePathsH3Driver h3 = new MergePathsH3Driver();
-// h3.run(HDFS_MARKPATHS + "toMerge", HDFS_MERGED, 2, kmerByteSize, 1, conf);
-// copyResultsToLocal(HDFS_MERGED, ACTUAL_ROOT + PATHMERGE_FILE, false, conf);
- }
-
-
-
- public void buildGraph() throws Exception {
- JobConf buildConf = new JobConf(conf); // use a separate conf so we don't interfere with other jobs
- FileInputFormat.setInputPaths(buildConf, HDFS_SEQUENCE);
- FileOutputFormat.setOutputPath(buildConf, new Path(HDFS_GRAPHBUILD));
- buildConf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_BINARY);
- buildConf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
- driver.runJob(new GenomixJobConf(buildConf), Plan.BUILD_UNMERGED_GRAPH, true);
- String fileFormat = buildConf.get(GenomixJobConf.OUTPUT_FORMAT);
- boolean resultsAreText = GenomixJobConf.OUTPUT_FORMAT_TEXT.equalsIgnoreCase(fileFormat);
- File rootDir = new File(new File(ACTUAL_ROOT + LOCAL_SEQUENCE_FILE).getParent());
- FileUtils.forceMkdir(rootDir);
- copyResultsToLocal(HDFS_GRAPHBUILD, ACTUAL_ROOT + LOCAL_SEQUENCE_FILE, resultsAreText, buildConf);
- }
-}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h4/TestPathMergeH4.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h4/TestPathMergeH4.java
deleted file mode 100644
index a686fed..0000000
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h4/TestPathMergeH4.java
+++ /dev/null
@@ -1,74 +0,0 @@
-package edu.uci.ics.genomix.hadoop.graphclean.mergepaths.h4;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.junit.Test;
-
-import edu.uci.ics.genomix.hadoop.pmcommon.HadoopMiniClusterTest;
-import edu.uci.ics.genomix.hadoop.pmcommon.PathNodeInitial;
-import edu.uci.ics.genomix.hadoop.velvetgraphbuilding.GraphBuildingDriver;
-
-@SuppressWarnings("deprecation")
-public class TestPathMergeH4 extends HadoopMiniClusterTest {
- protected String INPUT_GRAPH;
- protected String OUTPUT_GRAPH;
- protected String localPath;
-
- public void setupTestConf(int kmerLength, int readLength, String inputDir) throws IOException {
- KMER_LENGTH = kmerLength;
- READ_LENGTH = readLength;
- INPUT_GRAPH = "/input" + inputDir;
- OUTPUT_GRAPH = "/output" + inputDir;
- HDFS_PATHS = new ArrayList<String>(Arrays.asList(OUTPUT_GRAPH));
- copyLocalToDFS(INPUT_ROOT + inputDir, INPUT_GRAPH);
- }
-
- @Test
- public void testTwoReads() throws Exception {
- setupTestConf(5, 8, "/graphs/pathmerge/singleread");
- testPathNode();
- testMergeOneIteration();
- testMergeToCompletion();
- }
-
- // @Test
- public void testSimpleText() throws Exception {
- setupTestConf(5, 8, "text.txt");
- testPathNode();
- testMergeOneIteration();
-// testMergeToCompletion();
- }
-
- public void testPathNode() throws IOException {
- cleanUpOutput();
- // identify head and tail nodes with pathnode initial
- PathNodeInitial inith4 = new PathNodeInitial();
- inith4.run(INPUT_GRAPH, OUTPUT_GRAPH + "/toMerge", OUTPUT_GRAPH + "/toUpdate", OUTPUT_GRAPH + "/completed", conf);
- }
-
- public void testMergeOneIteration() throws Exception {
- cleanUpOutput();
-
- MergePathsH4Driver h4 = new MergePathsH4Driver();
- String outputs = h4.run(INPUT_GRAPH, 2, KMER_LENGTH, 1, conf);
- for (String out : outputs.split(",")) {
- copyResultsToLocal(out, out.replaceFirst("/input/", ACTUAL_ROOT), false, conf);
- }
- }
-
- public void testMergeToCompletion() throws Exception {
- cleanUpOutput();
-
- MergePathsH4Driver h4 = new MergePathsH4Driver();
- String outputs = h4.run(INPUT_GRAPH, 2, KMER_LENGTH, 50, conf);
- for (String out : outputs.split(",")) {
- copyResultsToLocal(out, out.replaceFirst("/input/", ACTUAL_ROOT), false, conf);
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterTest.java
deleted file mode 100644
index c3fc9b9..0000000
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterTest.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.graphcountfilter;
-
-import java.io.BufferedWriter;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileWriter;
-import java.io.IOException;
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MiniMRCluster;
-import org.apache.hadoop.util.ReflectionUtils;
-import org.junit.Test;
-
-import edu.uci.ics.genomix.hadoop.graphcountfilter.CountFilterDriver;
-import edu.uci.ics.genomix.hadoop.utils.TestUtils;
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-
-@SuppressWarnings("deprecation")
-public class CountFilterTest {
- private static final String ACTUAL_RESULT_DIR = "actual2";
- private static final String COMPARE_DIR = "compare";
- private JobConf conf = new JobConf();
- private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
- private static final String DATA_PATH = "actual1" + "/result1" + "/part-00000";
- private static final String HDFS_PATH = "/webmap";
- private static final String RESULT_PATH = "/result2";
- private static final String EXPECTED_PATH = "expected/result2";
- private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH;
- private static final int COUNT_REDUCER = 4;
- private MiniDFSCluster dfsCluster;
- private MiniMRCluster mrCluster;
- private FileSystem dfs;
-
- @SuppressWarnings("resource")
- @Test
- public void test() throws Exception {
- FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
- FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
- startHadoop();
-
- // run graph transformation tests
- CountFilterDriver tldriver = new CountFilterDriver();
- tldriver.run(HDFS_PATH, RESULT_PATH, COUNT_REDUCER, 1, HADOOP_CONF_PATH);
-
- SequenceFile.Reader reader = null;
- Path path = new Path(RESULT_PATH + "/part-00000");
- reader = new SequenceFile.Reader(dfs, path, conf);
- KmerBytesWritable key = (KmerBytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
- ByteWritable value = (ByteWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
- File filePathTo = new File(TEST_SOURCE_DIR);
- FileUtils.forceMkdir(filePathTo);
- FileUtils.cleanDirectory(filePathTo);
- BufferedWriter bw = new BufferedWriter(new FileWriter(new File(TEST_SOURCE_DIR + "/comparesource.txt")));
- while (reader.next(key, value)) {
- bw.write(key.toString() + "\t" + value.toString());
- bw.newLine();
- }
- bw.close();
-
- dumpResult();
- TestUtils.compareWithResult(new File(TEST_SOURCE_DIR + "/comparesource.txt"), new File(EXPECTED_PATH));
-
- cleanupHadoop();
-
- }
- private void startHadoop() throws IOException {
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.delete(new Path("build"), true);
- System.setProperty("hadoop.log.dir", "logs");
- dfsCluster = new MiniDFSCluster(conf, 2, true, null);
- dfs = dfsCluster.getFileSystem();
- mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
-
- Path src = new Path(DATA_PATH);
- Path dest = new Path(HDFS_PATH + "/");
- dfs.mkdirs(dest);
- dfs.copyFromLocalFile(src, dest);
-
- DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
- conf.writeXml(confOutput);
- confOutput.flush();
- confOutput.close();
- }
-
- private void cleanupHadoop() throws IOException {
- mrCluster.shutdown();
- dfsCluster.shutdown();
- }
-
- private void dumpResult() throws IOException {
- Path src = new Path(RESULT_PATH);
- Path dest = new Path(ACTUAL_RESULT_DIR + "/");
- dfs.copyToLocalFile(src, dest);
- }
-}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Test.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Test.java
deleted file mode 100644
index de98179..0000000
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Test.java
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.pathmergingh1;
-
-import java.io.BufferedWriter;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileWriter;
-import java.io.IOException;
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MiniMRCluster;
-import org.apache.hadoop.util.ReflectionUtils;
-import org.junit.Test;
-import edu.uci.ics.genomix.hadoop.pathmergingh1.MergePathH1Driver;
-import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
-import edu.uci.ics.genomix.hadoop.utils.TestUtils;
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings("deprecation")
-public class MergePathH1Test {
- private static final String ACTUAL_RESULT_DIR = "actual3";
- private static final String COMPARE_DIR = "compare";
- private JobConf conf = new JobConf();
- private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
- private static final String DATA_PATH = "actual2" + "/result2" + "/part-00000";
- private static final String HDFS_PATH = "/hdfsdata";
- private static final String HDFS_PATH_MERGED = "/pathmerged";
-
- private static final String RESULT_PATH = "/result3";
-// private static final String EXPECTED_PATH = "expected/result3";
- private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH;
-
- private static final int COUNT_REDUCER = 1;
- private static final int SIZE_KMER = 3;
- private static final int MERGE_ROUND = 2;
-
- private MiniDFSCluster dfsCluster;
- private MiniMRCluster mrCluster;
- private FileSystem dfs;
-
- @SuppressWarnings("resource")
- @Test
- public void test() throws Exception {
- FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
- FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
- startHadoop();
-
- MergePathH1Driver tldriver = new MergePathH1Driver();
- tldriver.run(HDFS_PATH, RESULT_PATH, HDFS_PATH_MERGED, COUNT_REDUCER, SIZE_KMER, MERGE_ROUND, HADOOP_CONF_PATH);
-
- SequenceFile.Reader reader = null;
- Path path = new Path(HDFS_PATH_MERGED + "/comSinglePath2" + "/comSinglePath2-r-00000");
- reader = new SequenceFile.Reader(dfs, path, conf);
- VKmerBytesWritable key = (VKmerBytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
- MergePathValueWritable value = (MergePathValueWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
- File filePathTo = new File(TEST_SOURCE_DIR);
- FileUtils.forceMkdir(filePathTo);
- FileUtils.cleanDirectory(filePathTo);
- BufferedWriter bw = new BufferedWriter(new FileWriter(new File(TEST_SOURCE_DIR + "/comparesource.txt")));
- while (reader.next(key, value)) {
- bw.write(key.toString() + "\t" + value.getAdjBitMap() + "\t" + value.getFlag());
- bw.newLine();
- }
- bw.close();
-
- cleanupHadoop();
-
- }
- private void startHadoop() throws IOException {
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.delete(new Path("build"), true);
- System.setProperty("hadoop.log.dir", "logs");
- dfsCluster = new MiniDFSCluster(conf, 2, true, null);
- dfs = dfsCluster.getFileSystem();
- mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
-
- Path src = new Path(DATA_PATH);
- Path dest = new Path(HDFS_PATH + "/");
- dfs.mkdirs(dest);
- dfs.copyFromLocalFile(src, dest);
- Path data = new Path(HDFS_PATH_MERGED + "/");
- dfs.mkdirs(data);
-
- DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
- conf.writeXml(confOutput);
- confOutput.flush();
- confOutput.close();
- }
-
- private void cleanupHadoop() throws IOException {
- mrCluster.shutdown();
- dfsCluster.shutdown();
- }
-
- private void dumpResult() throws IOException {
- Path src = new Path(RESULT_PATH);
- Path dest = new Path(ACTUAL_RESULT_DIR + "/");
- dfs.copyToLocalFile(src, dest);
- }
-}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Test.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Test.java
deleted file mode 100644
index 9ba98ef..0000000
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Test.java
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.pathmergingh2;
-
-import java.io.BufferedWriter;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileWriter;
-import java.io.IOException;
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MiniMRCluster;
-import org.apache.hadoop.util.ReflectionUtils;
-import org.junit.Test;
-import edu.uci.ics.genomix.hadoop.pathmergingh2.MergePathH2Driver;
-import edu.uci.ics.genomix.hadoop.pmcommon.MergePathValueWritable;
-import edu.uci.ics.genomix.hadoop.utils.TestUtils;
-import edu.uci.ics.genomix.hadoop.oldtype.*;
-@SuppressWarnings("deprecation")
-public class MergePathH2Test {
- private static final String ACTUAL_RESULT_DIR = "actual4";
- private static final String COMPARE_DIR = "compare";
- private JobConf conf = new JobConf();
- private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
- private static final String DATA_PATH = "actual2" + "/result2" + "/part-00000";
- private static final String HDFS_PATH = "/hdfsdata";
- private static final String HDFA_PATH_DATA = "/pathmerged";
-
- private static final String RESULT_PATH = "/result4";
- private static final String EXPECTED_PATH = "expected/result4";
- private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH;
- private static final int COUNT_REDUCER = 1;
- private static final int SIZE_KMER = 3;
-
- private MiniDFSCluster dfsCluster;
- private MiniMRCluster mrCluster;
- private FileSystem dfs;
-
- @SuppressWarnings("resource")
- @Test
- public void test() throws Exception {
- FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
- FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
- startHadoop();
-
- MergePathH2Driver tldriver = new MergePathH2Driver();
- tldriver.run(HDFS_PATH, RESULT_PATH, HDFA_PATH_DATA, COUNT_REDUCER, SIZE_KMER, 3, HADOOP_CONF_PATH);
-
- SequenceFile.Reader reader = null;
- Path path = new Path(HDFA_PATH_DATA + "/comSinglePath2" + "/comSinglePath2-r-00000");
- reader = new SequenceFile.Reader(dfs, path, conf);
- VKmerBytesWritable key = (VKmerBytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
- MergePathValueWritable value = (MergePathValueWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
- File filePathTo = new File(TEST_SOURCE_DIR);
- FileUtils.forceMkdir(filePathTo);
- FileUtils.cleanDirectory(filePathTo);
- BufferedWriter bw = new BufferedWriter(new FileWriter(new File(TEST_SOURCE_DIR + "/comparesource.txt")));
- while (reader.next(key, value)) {
- bw.write(key.toString() + "\t" + value.getAdjBitMap() + "\t" + value.getFlag());
- bw.newLine();
- }
- bw.close();
-
-// TestUtils.compareWithResult(new File(TEST_SOURCE_DIR + "/comparesource.txt"), new File(EXPECTED_PATH));
-
- cleanupHadoop();
-
- }
- private void startHadoop() throws IOException {
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.delete(new Path("build"), true);
- System.setProperty("hadoop.log.dir", "logs");
- dfsCluster = new MiniDFSCluster(conf, 2, true, null);
- dfs = dfsCluster.getFileSystem();
- mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
-
- Path src = new Path(DATA_PATH);
- Path dest = new Path(HDFS_PATH + "/");
- dfs.mkdirs(dest);
- dfs.copyFromLocalFile(src, dest);
- Path data = new Path(HDFA_PATH_DATA + "/");
- dfs.mkdirs(data);
-
- DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
- conf.writeXml(confOutput);
- confOutput.flush();
- confOutput.close();
- }
-
- private void cleanupHadoop() throws IOException {
- mrCluster.shutdown();
- dfsCluster.shutdown();
- }
-
- private void dumpResult() throws IOException {
-// Path src = new Path(HDFA_PATH_DATA + "/" + "complete2");
- Path src = new Path(RESULT_PATH);
- Path dest = new Path(ACTUAL_RESULT_DIR + "/");
- dfs.copyToLocalFile(src, dest);
- }
-}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/GenomixMiniClusterTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/GenomixMiniClusterTest.java
deleted file mode 100644
index 714d77e..0000000
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/GenomixMiniClusterTest.java
+++ /dev/null
@@ -1,192 +0,0 @@
-package edu.uci.ics.genomix.hadoop.pmcommon;
-
-import java.io.BufferedWriter;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MiniMRCluster;
-import org.apache.hadoop.util.ReflectionUtils;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-
-import edu.uci.ics.genomix.hyracks.driver.Driver;
-import edu.uci.ics.genomix.hyracks.job.GenomixJobConf;
-import edu.uci.ics.genomix.hyracks.test.TestUtils;
-import edu.uci.ics.hyracks.hdfs.utils.HyracksUtils;
-
-/*
- * A base class providing most of the boilerplate for Genomix-based tests
- */
-@SuppressWarnings("deprecation")
-public class GenomixMiniClusterTest {
- protected int KMER_LENGTH = 5;
- protected int READ_LENGTH = 8;
-
- // subclass should modify this to include the HDFS directories that should be cleaned up
- protected ArrayList<String> HDFS_PATHS = new ArrayList<String>();
-
- protected static String EXPECTED_ROOT = "src/test/resources/expected/";
- protected static String ACTUAL_ROOT = "src/test/resources/actual/";
-
- protected static String HADOOP_CONF_ROOT = "src/test/resources/hadoop/conf/";
-
- protected static MiniDFSCluster dfsCluster;
- protected static MiniMRCluster mrCluster;
- private static FileSystem dfs;
- protected static JobConf conf = new JobConf();
- protected static int numberOfNC = 1;
- protected static int numPartitionPerMachine = 1;
- protected static Driver driver;
-
- @BeforeClass
- public static void setUpMiniCluster() throws Exception {
- cleanupStores();
- startHDFS();
- HyracksUtils.init();
- FileUtils.forceMkdir(new File(ACTUAL_ROOT));
- FileUtils.cleanDirectory(new File(ACTUAL_ROOT));
- driver = new Driver(HyracksUtils.CC_HOST, HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT, numPartitionPerMachine);
- }
-
- /*
- * Merge and copy a DFS directory to a local destination, converting to text if necessary.
- * Also locally store the binary-formatted result if available.
- */
- protected static void copyResultsToLocal(String hdfsSrcDir, String localDestFile, boolean resultsAreText,
- Configuration conf) throws IOException {
- if (resultsAreText) {
- // for text files, just concatenate them together
- FileUtil.copyMerge(FileSystem.get(conf), new Path(hdfsSrcDir), FileSystem.getLocal(new Configuration()),
- new Path(localDestFile), false, conf, null);
- } else {
- // file is binary
-// // save the entire binary output dir
-// FileUtil.copy(FileSystem.get(conf), new Path(hdfsSrcDir), FileSystem.getLocal(new Configuration()),
-// new Path(localDestFile + ".bindir"), false, conf);
-
- // also load the Nodes and write them out as text locally.
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.mkdirs(new Path(localDestFile).getParent());
- File filePathTo = new File(localDestFile);
- BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
-
- FileStatus[] files = dfs.globStatus(new Path(hdfsSrcDir + "*"));
- SequenceFile.Reader reader = new SequenceFile.Reader(dfs, files[0].getPath(), conf);
- String destBinDir = localDestFile.substring(0, localDestFile.lastIndexOf("."));
-// FileUtil.copy(FileSystem.get(conf), new Path(hdfsSrcDir), FileSystem.getLocal(new Configuration()),
-// new Path(destBinDir), false, conf);
- SequenceFile.Writer writer = new SequenceFile.Writer(lfs, new JobConf(), new Path(localDestFile
- + ".binmerge"), reader.getKeyClass(), reader.getValueClass());
-
- Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
- Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
-
- for (FileStatus f : files) {
- if (f.getLen() == 0) {
- continue;
- }
- reader = new SequenceFile.Reader(dfs, f.getPath(), conf);
- while (reader.next(key, value)) {
- if (key == null || value == null) {
- break;
- }
- bw.write(key.toString() + "\t" + value.toString());
- System.out.println(key.toString() + "\t" + value.toString());
- bw.newLine();
- writer.append(key, value);
-
- }
- reader.close();
- }
- writer.close();
- bw.close();
- }
-
- }
-
- protected static boolean checkResults(String expectedPath, String actualPath, int[] poslistField) throws Exception {
- File dumped = new File(actualPath);
- if (poslistField != null) {
- TestUtils.compareWithUnSortedPosition(new File(expectedPath), dumped, poslistField);
- } else {
- TestUtils.compareWithSortedResult(new File(expectedPath), dumped);
- }
- return true;
- }
-
- protected static void cleanupStores() throws IOException {
- FileUtils.forceMkdir(new File("teststore"));
- FileUtils.forceMkdir(new File("build"));
- FileUtils.cleanDirectory(new File("teststore"));
- FileUtils.cleanDirectory(new File("build"));
- }
-
- protected static void startHDFS() throws IOException {
- conf.addResource(new Path(HADOOP_CONF_ROOT + "core-site.xml"));
- // conf.addResource(new Path(HADOOP_CONF_ROOT + "mapred-site.xml"));
- conf.addResource(new Path(HADOOP_CONF_ROOT + "hdfs-site.xml"));
-
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.delete(new Path("build"), true);
- System.setProperty("hadoop.log.dir", "logs");
- dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
- dfs = dfsCluster.getFileSystem();
- mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
- System.out.println(dfs.getUri().toString());
-
- DataOutputStream confOutput = new DataOutputStream(
- new FileOutputStream(new File(HADOOP_CONF_ROOT + "conf.xml")));
- conf.writeXml(confOutput);
- confOutput.close();
- }
-
- protected static void copyLocalToDFS(String localSrc, String hdfsDest) throws IOException {
- Path dest = new Path(hdfsDest);
- dfs.mkdirs(dest);
- dfs.copyFromLocalFile(new Path(localSrc), dest);
- }
-
- /*
- * Remove the local "actual" folder and any hdfs folders in use by this test
- */
- public void cleanUpOutput() throws IOException {
-// // local cleanup
-// FileSystem lfs = FileSystem.getLocal(new Configuration());
-// if (lfs.exists(new Path(ACTUAL_ROOT))) {
-// lfs.delete(new Path(ACTUAL_ROOT), true);
-// }
- // dfs cleanup
- for (String path : HDFS_PATHS) {
- if (dfs.exists(new Path(path))) {
- dfs.delete(new Path(path), true);
- }
- }
- }
-
- @AfterClass
- public static void tearDown() throws Exception {
- HyracksUtils.deinit();
- cleanupHDFS();
- }
-
- protected static void cleanupHDFS() throws Exception {
- dfsCluster.shutdown();
- mrCluster.shutdown();
- }
-}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
deleted file mode 100644
index 0f2d714..0000000
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
+++ /dev/null
@@ -1,248 +0,0 @@
-package edu.uci.ics.genomix.hadoop.pmcommon;
-
-import java.io.BufferedWriter;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang3.ArrayUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MiniMRCluster;
-import org.apache.hadoop.util.ReflectionUtils;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-
-import edu.uci.ics.genomix.hadoop.velvetgraphbuilding.GraphBuildingDriver;
-import edu.uci.ics.genomix.hyracks.test.TestUtils;
-
-/*
- * A base class providing most of the boilerplate for Hadoop-based tests
- */
-@SuppressWarnings("deprecation")
-public class HadoopMiniClusterTest {
- protected int KMER_LENGTH = 5;
- protected int READ_LENGTH = 8;
-
- // subclass should modify this to include the HDFS directories that should be cleaned up
- protected ArrayList<String> HDFS_PATHS = new ArrayList<String>();
-
- protected static final String EXPECTED_ROOT = "src/test/resources/expected/";
- protected static final String ACTUAL_ROOT = "src/test/resources/actual/";
- protected static final String INPUT_ROOT = "src/test/resources/input/";
-
- protected static String HADOOP_CONF_ROOT = "src/test/resources/hadoop/conf/";
- protected static String HADOOP_CONF = HADOOP_CONF_ROOT + "conf.xml";
-
- protected static MiniDFSCluster dfsCluster;
- protected static MiniMRCluster mrCluster;
- protected static FileSystem dfs;
- protected static JobConf conf = new JobConf();
- protected static int numberOfNC = 1;
- protected static int numPartitionPerMachine = 1;
-
- @BeforeClass
- public static void setUpMiniCluster() throws Exception {
- cleanupStores();
- startHDFS();
- FileUtils.forceMkdir(new File(ACTUAL_ROOT));
- FileUtils.cleanDirectory(new File(ACTUAL_ROOT));
- }
-
- protected static void copyResultsToLocal(String hdfsSrcDir, String localDestFile, boolean resultsAreText,
- Configuration conf) throws IOException {
- copyResultsToLocal(hdfsSrcDir, localDestFile, resultsAreText, conf, true);
- }
-
- public static void copyResultsToLocal(String hdfsSrcDir, String localDestFile, boolean resultsAreText,
- Configuration conf, boolean ignoreZeroOutputs) throws IOException {
- copyResultsToLocal(hdfsSrcDir, localDestFile, resultsAreText,
- conf, ignoreZeroOutputs, dfs);
- }
-
- /*
- * Merge and copy a DFS directory to a local destination, converting to text if necessary.
- * Also locally store the binary-formatted result if available.
- */
- public static void copyResultsToLocal(String hdfsSrcDir, String localDestFile, boolean resultsAreText,
- Configuration conf, boolean ignoreZeroOutputs, FileSystem dfs) throws IOException {
- if (resultsAreText) {
- // for text files, just concatenate them together
- FileUtil.copyMerge(FileSystem.get(conf), new Path(hdfsSrcDir), FileSystem.getLocal(new Configuration()),
- new Path(localDestFile), false, conf, null);
- } else {
- // file is binary
- // save the entire binary output dir
- FileUtil.copy(FileSystem.get(conf), new Path(hdfsSrcDir), FileSystem.getLocal(new Configuration()),
- new Path(localDestFile + ".bindir"), false, conf);
-
- // chomp through output files
- FileStatus[] files = ArrayUtils.addAll(dfs.globStatus(new Path(hdfsSrcDir + "*")), dfs.globStatus(new Path(hdfsSrcDir + "*/*")));
- FileStatus validFile = null;
- for (FileStatus f : files) {
- if (f.getLen() != 0) {
- validFile = f;
- break;
- }
- }
- if (validFile == null) {
- if (ignoreZeroOutputs) {
- // just make a dummy output dir
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.mkdirs(new Path(localDestFile).getParent());
- return;
- }
- else {
- throw new IOException("No non-zero outputs in source directory " + hdfsSrcDir);
- }
- }
-
- // also load the Nodes and write them out as text locally.
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.mkdirs(new Path(localDestFile).getParent());
- File filePathTo = new File(localDestFile);
- if (filePathTo.exists() && filePathTo.isDirectory()) {
- filePathTo = new File(localDestFile + "/data");
- }
- BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
- SequenceFile.Reader reader = new SequenceFile.Reader(dfs, validFile.getPath(), conf);
- SequenceFile.Writer writer = new SequenceFile.Writer(lfs, new JobConf(), new Path(localDestFile
- + ".binmerge"), reader.getKeyClass(), reader.getValueClass());
-
- Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
- Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
-
- for (FileStatus f : files) {
- if (f.getLen() == 0) {
- continue;
- }
- reader = new SequenceFile.Reader(dfs, f.getPath(), conf);
- while (reader.next(key, value)) {
- if (key == null || value == null) {
- break;
- }
- bw.write(key.toString() + "\t" + value.toString());
- System.out.println(key.toString() + "\t" + value.toString());
- bw.newLine();
- writer.append(key, value);
-
- }
- reader.close();
- }
- writer.close();
- bw.close();
- }
-
- }
-
- protected static boolean checkResults(String expectedPath, String actualPath, int[] poslistField) throws Exception {
- File dumped = new File(actualPath);
- if (poslistField != null) {
- TestUtils.compareWithUnSortedPosition(new File(expectedPath), dumped, poslistField);
- } else {
- TestUtils.compareWithSortedResult(new File(expectedPath), dumped);
- }
- return true;
- }
-
- protected static void cleanupStores() throws IOException {
- FileUtils.forceMkdir(new File("teststore"));
- FileUtils.forceMkdir(new File("build"));
- FileUtils.cleanDirectory(new File("teststore"));
- FileUtils.cleanDirectory(new File("build"));
- }
-
- protected static void startHDFS() throws IOException {
-// conf.addResource(new Path(HADOOP_CONF_ROOT + "core-site.xml"));
- // conf.addResource(new Path(HADOOP_CONF_ROOT + "mapred-site.xml"));
-// conf.addResource(new Path(HADOOP_CONF_ROOT + "hdfs-site.xml"));
-
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.delete(new Path("build"), true);
- System.setProperty("hadoop.log.dir", "logs");
- dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
- dfs = dfsCluster.getFileSystem();
- mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
- System.out.println(dfs.getUri().toString());
-
- DataOutputStream confOutput = new DataOutputStream(
- new FileOutputStream(new File(HADOOP_CONF)));
- conf.writeXml(confOutput);
- confOutput.close();
- }
-
- protected static void copyLocalToDFS(String localSrc, String hdfsDest) throws IOException {
- Path dest = new Path(hdfsDest);
- dfs.mkdirs(dest);
- System.out.println("copying from " + localSrc + " to " + dest);
- for (File f : new File(localSrc).listFiles()) {
- dfs.copyFromLocalFile(new Path(f.getAbsolutePath()), dest);
- }
- }
-
- /*
- * Remove the local "actual" folder and any hdfs folders in use by this test
- */
- public void cleanUpOutput() throws IOException {
- // local cleanup
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- if (lfs.exists(new Path(ACTUAL_ROOT))) {
- lfs.delete(new Path(ACTUAL_ROOT), true);
- }
- // dfs cleanup
- for (String path : HDFS_PATHS) {
- if (dfs.exists(new Path(path))) {
- dfs.delete(new Path(path), true);
- }
- }
- }
-
- @AfterClass
- public static void tearDown() throws Exception {
- cleanupHDFS();
- }
-
- protected static void cleanupHDFS() throws Exception {
- dfsCluster.shutdown();
- mrCluster.shutdown();
- }
-
-// public void buildGraph() throws IOException {
-// JobConf buildConf = new JobConf(conf); // use a separate conf so we don't interfere with other jobs
-// FileInputFormat.setInputPaths(buildConf, SEQUENCE);
-// FileOutputFormat.setOutputPath(buildConf, new Path(INPUT_GRAPH));
-//
-// GraphBuildingDriver tldriver = new GraphBuildingDriver();
-// tldriver.run(SEQUENCE, INPUT_GRAPH, 2, kmerByteSize, READ_LENGTH, false, true, HADOOP_CONF_ROOT + "conf.xml");
-//
-// boolean resultsAreText = true;
-// copyResultsToLocal(INPUT_GRAPH, ACTUAL_ROOT + INPUT_GRAPH, resultsAreText, buildConf);
-// }
-//
-// private void prepareGraph() throws IOException {
-// if (regenerateGraph) {
-// copyLocalToDFS(LOCAL_SEQUENCE_FILE, SEQUENCE);
-// buildGraph();
-// copyLocalToDFS(ACTUAL_ROOT + INPUT_GRAPH + readsFile + ".binmerge", INPUT_GRAPH);
-// } else {
-// copyLocalToDFS(EXPECTED_ROOT + INPUT_GRAPH + readsFile + ".binmerge", INPUT_GRAPH);
-// }
-// }
-
-}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/utils/TestUtils.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/utils/TestUtils.java
deleted file mode 100755
index deb3b97..0000000
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/utils/TestUtils.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hadoop.utils;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-
-/**
- * This class offer the service for graphbuildingtest.class
- */
-public class TestUtils {
- public static void compareWithResult(File expectedFile, File actualFile) throws Exception {
- BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
- BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
- String lineExpected, lineActual;
- int num = 1;
- try {
- while ((lineExpected = readerExpected.readLine()) != null) {
- lineActual = readerActual.readLine();
- // Assert.assertEquals(lineExpected, lineActual);
- if (lineActual == null) {
- throw new Exception("Actual result changed at line " + num + ":\n< " + lineExpected + "\n> ");
- }
- if (!equalStrings(lineExpected, lineActual)) {
- throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
- + lineActual);
- }
- ++num;
- }
- lineActual = readerActual.readLine();
- if (lineActual != null) {
- throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineActual);
- }
- } finally {
- readerExpected.close();
- readerActual.close();
- }
- }
-
- private static boolean equalStrings(String s1, String s2) {
- String[] rowsOne = s1.split("\t");
- String[] rowsTwo = s2.split("\t");
-
- if (rowsOne.length != rowsTwo.length)
- return false;
-
- for (int i = 0; i < rowsOne.length; i++) {
- String row1 = rowsOne[i];
- String row2 = rowsTwo[i];
-
- if (row1.equals(row2))
- continue;
- else
- return false;
- }
- return true;
- }
-
- public static void main(String[] args) throws Exception {
- TestUtils TUtils = new TestUtils();
- }
-}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/NewGraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/NewGraphBuildingTest.java
deleted file mode 100644
index 2517810..0000000
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/NewGraphBuildingTest.java
+++ /dev/null
@@ -1,99 +0,0 @@
-package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
-
-import java.io.BufferedWriter;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileWriter;
-import java.io.IOException;
-import junit.framework.Assert;
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MiniMRCluster;
-import org.junit.Test;
-
-@SuppressWarnings("deprecation")
-
-public class NewGraphBuildingTest {
-
- private JobConf conf = new JobConf();
- private static final String ACTUAL_RESULT_DIR = "actual1";
- private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
- private static final String DATA_PATH = "data/webmap/text.txt";
- private static final String HDFS_PATH = "/webmap";
- private static final String RESULT_PATH = "/result1";
- private static final String EXPECTED_PATH = "expected/";
- private static final int COUNT_REDUCER = 2;
- private static final int SIZE_KMER = 5;
- private static final int READ_LENGTH = 8;
-
- private MiniDFSCluster dfsCluster;
- private MiniMRCluster mrCluster;
- private FileSystem dfs;
-
- @SuppressWarnings("resource")
- @Test
- public void test() throws Exception {
- FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
- FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
- startHadoop();
-// TestGroupbyKmer();
-// TestMapKmerToRead();
- TestGroupByReadID();
- cleanupHadoop();
- }
-
- public void TestGroupbyKmer() throws Exception {
- GraphBuildingDriver tldriver = new GraphBuildingDriver();
- tldriver.run(HDFS_PATH, RESULT_PATH, COUNT_REDUCER, SIZE_KMER, READ_LENGTH, true, false, HADOOP_CONF_PATH);
- dumpResult();
- }
-
- public void TestMapKmerToRead() throws Exception {
- GraphBuildingDriver tldriver = new GraphBuildingDriver();
- tldriver.run(HDFS_PATH, RESULT_PATH, 0, SIZE_KMER, READ_LENGTH, false, false, HADOOP_CONF_PATH);
- dumpResult();
- }
-
- public void TestGroupByReadID() throws Exception {
- GraphBuildingDriver tldriver = new GraphBuildingDriver();
- tldriver.run(HDFS_PATH, RESULT_PATH, 2, SIZE_KMER, READ_LENGTH, false, false, HADOOP_CONF_PATH);
- dumpResult();
- }
-
- private void startHadoop() throws IOException {
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.delete(new Path("build"), true);
- System.setProperty("hadoop.log.dir", "logs");
- dfsCluster = new MiniDFSCluster(conf, 2, true, null);
- dfs = dfsCluster.getFileSystem();
- mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
-
- Path src = new Path(DATA_PATH);
- Path dest = new Path(HDFS_PATH + "/");
- dfs.mkdirs(dest);
- dfs.copyFromLocalFile(src, dest);
-
- DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
- conf.writeXml(confOutput);
- confOutput.flush();
- confOutput.close();
- }
-
- private void cleanupHadoop() throws IOException {
- mrCluster.shutdown();
- dfsCluster.shutdown();
- }
-
-
- private void dumpResult() throws IOException {
- Path src = new Path(RESULT_PATH);
- Path dest = new Path(ACTUAL_RESULT_DIR);
- dfs.copyToLocalFile(src, dest);
- }
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/KmerPointable.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/KmerPointable.java
deleted file mode 100644
index 0457de9..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/KmerPointable.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.data.primitive;
-
-import edu.uci.ics.genomix.hyracks.data.accessors.KmerHashPartitioncomputerFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
-import edu.uci.ics.hyracks.data.std.api.AbstractPointable;
-import edu.uci.ics.hyracks.data.std.api.IComparable;
-import edu.uci.ics.hyracks.data.std.api.IHashable;
-import edu.uci.ics.hyracks.data.std.api.INumeric;
-import edu.uci.ics.hyracks.data.std.api.IPointable;
-import edu.uci.ics.hyracks.data.std.api.IPointableFactory;
-
-public final class KmerPointable extends AbstractPointable implements IHashable, IComparable, INumeric {
- public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
- private static final long serialVersionUID = 1L;
-
- @Override
- public boolean isFixedLength() {
- return false;
- }
-
- @Override
- public int getFixedLength() {
- return -1;
- }
- };
-
- public static final IPointableFactory FACTORY = new IPointableFactory() {
- private static final long serialVersionUID = 1L;
-
- @Override
- public IPointable createPointable() {
- return new KmerPointable();
- }
-
- @Override
- public ITypeTraits getTypeTraits() {
- return TYPE_TRAITS;
- }
- };
-
- public static short getShortReverse(byte[] bytes, int offset, int length) {
- if (length < 2) {
- return (short) (bytes[offset] & 0xff);
- }
- return (short) (((bytes[offset + length - 1] & 0xff) << 8) + (bytes[offset + length - 2] & 0xff));
- }
-
- public static int getIntReverse(byte[] bytes, int offset, int length) {
- int shortValue = getShortReverse(bytes, offset, length) & 0xffff;
-
- if (length < 3) {
- return shortValue;
- }
- if (length == 3) {
- return (((bytes[offset + 2] & 0xff) << 16) + ((bytes[offset + 1] & 0xff) << 8) + ((bytes[offset] & 0xff)));
- }
- return ((bytes[offset + length - 1] & 0xff) << 24) + ((bytes[offset + length - 2] & 0xff) << 16)
- + ((bytes[offset + length - 3] & 0xff) << 8) + ((bytes[offset + length - 4] & 0xff) << 0);
- }
-
- public static long getLongReverse(byte[] bytes, int offset, int length) {
- if (length < 8) {
- return ((long) getIntReverse(bytes, offset, length)) & 0x0ffffffffL;
- }
- return (((long) (bytes[offset + length - 1] & 0xff)) << 56)
- + (((long) (bytes[offset + length - 2] & 0xff)) << 48)
- + (((long) (bytes[offset + length - 3] & 0xff)) << 40)
- + (((long) (bytes[offset + length - 4] & 0xff)) << 32)
- + (((long) (bytes[offset + length - 5] & 0xff)) << 24)
- + (((long) (bytes[offset + length - 6] & 0xff)) << 16)
- + (((long) (bytes[offset + length - 7] & 0xff)) << 8) + (((long) (bytes[offset + length - 8] & 0xff)));
- }
-
- @Override
- public int compareTo(IPointable pointer) {
- return compareTo(pointer.getByteArray(), pointer.getStartOffset(), pointer.getLength());
- }
-
- @Override
- public int compareTo(byte[] bytes, int offset, int length) {
-
- if (this.length != length) {
- return this.length - length;
- }
- for (int i = length - 1; i >= 0; i--) {
- int cmp = (this.bytes[this.start + i] & 0xff) - (bytes[offset + i] & 0xff);
- if (cmp != 0) {
- return cmp;
- }
- }
-
- return 0;
- }
-
- @Override
- public int hash() {
- int hash = KmerHashPartitioncomputerFactory.hashBytes(bytes, start, length);
- return hash;
- }
-
- @Override
- public byte byteValue() {
- return bytes[start + length - 1];
- }
-
- @Override
- public short shortValue() {
- return getShortReverse(bytes, start, length);
- }
-
- @Override
- public int intValue() {
- return getIntReverse(bytes, start, length);
- }
-
- @Override
- public long longValue() {
- return getLongReverse(bytes, start, length);
- }
-
- @Override
- public float floatValue() {
- return Float.intBitsToFloat(intValue());
- }
-
- @Override
- public double doubleValue() {
- return Double.longBitsToDouble(longValue());
- }
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/NodeReference.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/NodeReference.java
deleted file mode 100644
index 60c0682..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/NodeReference.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.data.primitive;
-
-import edu.uci.ics.genomix.velvet.oldtype.NodeWritable;
-
-public class NodeReference extends NodeWritable {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public NodeReference(int kmerSize) {
- super(kmerSize);
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionListReference.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionListReference.java
deleted file mode 100644
index 47a3047..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionListReference.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.data.primitive;
-
-import edu.uci.ics.genomix.oldtype.PositionListWritable;
-import edu.uci.ics.hyracks.data.std.api.IValueReference;
-
-public class PositionListReference extends PositionListWritable implements IValueReference {
-
- public PositionListReference(int countByDataLength, byte[] byteArray, int startOffset) {
- super(countByDataLength, byteArray, startOffset);
- }
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionReference.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionReference.java
deleted file mode 100644
index f066dc7..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionReference.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.data.primitive;
-
-import edu.uci.ics.genomix.oldtype.PositionWritable;
-import edu.uci.ics.hyracks.data.std.api.IValueReference;
-
-public class PositionReference extends PositionWritable implements IValueReference {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ConnectorPolicyAssignmentPolicy.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ConnectorPolicyAssignmentPolicy.java
deleted file mode 100644
index 61f16b2..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ConnectorPolicyAssignmentPolicy.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.dataflow;
-
-import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
-import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicyAssignmentPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.PipeliningConnectorPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedPipeliningConnectorPolicy;
-import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
-
-/**
- * used by precluster groupby
- */
-public class ConnectorPolicyAssignmentPolicy implements IConnectorPolicyAssignmentPolicy {
- private static final long serialVersionUID = 1L;
- private IConnectorPolicy senderSideMaterializePolicy = new SendSideMaterializedPipeliningConnectorPolicy();
- private IConnectorPolicy pipeliningPolicy = new PipeliningConnectorPolicy();
-
- @Override
- public IConnectorPolicy getConnectorPolicyAssignment(IConnectorDescriptor c, int nProducers, int nConsumers,
- int[] fanouts) {
- if (c instanceof MToNPartitioningMergingConnectorDescriptor) {
- return senderSideMaterializePolicy;
- } else {
- return pipeliningPolicy;
- }
- }
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapKmerPositionToReadOperator.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapKmerPositionToReadOperator.java
deleted file mode 100644
index 3736934..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapKmerPositionToReadOperator.java
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.dataflow;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-import edu.uci.ics.genomix.hyracks.data.primitive.PositionReference;
-import edu.uci.ics.genomix.oldtype.PositionListWritable;
-import edu.uci.ics.genomix.oldtype.PositionWritable;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
-import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.job.IOperatorDescriptorRegistry;
-import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorNodePushable;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputUnaryOutputOperatorNodePushable;
-
-public class MapKmerPositionToReadOperator extends AbstractSingleActivityOperatorDescriptor {
-
- private static final Log LOG = LogFactory.getLog(MapKmerPositionToReadOperator.class);
- public static int WARNSIZE = 100 * 1000 * 5;
-
- public MapKmerPositionToReadOperator(IOperatorDescriptorRegistry spec, RecordDescriptor recDesc, int readlength,
- int kmerSize) {
- super(spec, 1, 1);
- recordDescriptors[0] = recDesc;
- LAST_POSID = readlength - kmerSize + 1;
- }
-
- private final int LAST_POSID;
-
- private static final long serialVersionUID = 1L;
- public static final int InputKmerField = 0;
- public static final int InputPosListField = 1;
-
- public static final int OutputReadIDField = 0;
- public static final int OutputPosInReadField = 1;
- public static final int OutputOtherReadIDListField = 2;
- public static final int OutputKmerField = 3; // may not needed
-
- public static final RecordDescriptor readIDOutputRec = new RecordDescriptor(new ISerializerDeserializer[] { null,
- null, null, null });
-
- /**
- * Map (Kmer, {(ReadID,PosInRead),...}) into
- * (ReadID,PosInRead,{OtherReadID,...},*Kmer*) OtherReadID appears only when
- * otherReadID.otherPos==0
- */
- public class MapKmerPositionToReadNodePushable extends AbstractUnaryInputUnaryOutputOperatorNodePushable {
- private final IHyracksTaskContext ctx;
- private final RecordDescriptor inputRecDesc;
- private final RecordDescriptor outputRecDesc;
-
- private FrameTupleAccessor accessor;
- private ByteBuffer writeBuffer;
- private ArrayTupleBuilder builder;
- private FrameTupleAppender appender;
-
- private PositionReference positionEntry;
- private ArrayBackedValueStorage posListEntry;
- private ArrayBackedValueStorage zeroPositionCollection;
- private ArrayBackedValueStorage noneZeroPositionCollection;
- private PositionListWritable plistEntry;
-
- public MapKmerPositionToReadNodePushable(IHyracksTaskContext ctx, RecordDescriptor inputRecDesc,
- RecordDescriptor outputRecDesc) {
- this.ctx = ctx;
- this.inputRecDesc = inputRecDesc;
- this.outputRecDesc = outputRecDesc;
- this.positionEntry = new PositionReference();
- this.posListEntry = new ArrayBackedValueStorage();
- this.zeroPositionCollection = new ArrayBackedValueStorage();
- this.noneZeroPositionCollection = new ArrayBackedValueStorage();
- this.plistEntry = new PositionListWritable();
- }
-
- @Override
- public void open() throws HyracksDataException {
- accessor = new FrameTupleAccessor(ctx.getFrameSize(), inputRecDesc);
- writeBuffer = ctx.allocateFrame();
- builder = new ArrayTupleBuilder(outputRecDesc.getFieldCount());
- appender = new FrameTupleAppender(ctx.getFrameSize());
- appender.reset(writeBuffer, true);
- writer.open();
- posListEntry.reset();
- }
-
- @Override
- public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
- accessor.reset(buffer);
- int tupleCount = accessor.getTupleCount();
- for (int i = 0; i < tupleCount; i++) {
- scanPosition(i, zeroPositionCollection, noneZeroPositionCollection);
- scanAgainToOutputTuple(i, zeroPositionCollection, noneZeroPositionCollection, builder);
- }
- }
-
- private boolean isStart(byte posInRead) {
- return posInRead == 1 || posInRead == -LAST_POSID;
- }
-
- private void scanPosition(int tIndex, ArrayBackedValueStorage zeroPositionCollection2,
- ArrayBackedValueStorage noneZeroPositionCollection2) {
- zeroPositionCollection2.reset();
- noneZeroPositionCollection2.reset();
- byte[] data = accessor.getBuffer().array();
- int offsetPoslist = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength()
- + accessor.getFieldStartOffset(tIndex, InputPosListField);
- for (int i = 0; i < accessor.getFieldLength(tIndex, InputPosListField); i += PositionReference.LENGTH) {
- positionEntry.setNewReference(data, offsetPoslist + i);
- if (isStart(positionEntry.getPosInRead())) {
- zeroPositionCollection2.append(positionEntry);
- } else {
- noneZeroPositionCollection2.append(positionEntry);
- }
- }
-
- }
-
- private void scanAgainToOutputTuple(int tIndex, ArrayBackedValueStorage zeroPositionCollection,
- ArrayBackedValueStorage noneZeroPositionCollection, ArrayTupleBuilder builder2) {
- byte[] data = accessor.getBuffer().array();
- int offsetPoslist = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength()
- + accessor.getFieldStartOffset(tIndex, InputPosListField);
- for (int i = 0; i < accessor.getFieldLength(tIndex, InputPosListField); i += PositionReference.LENGTH) {
- positionEntry.setNewReference(data, offsetPoslist + i);
- if (!isStart(positionEntry.getPosInRead())) {
- appendNodeToBuilder(tIndex, positionEntry, zeroPositionCollection, builder2);
- } else {
- appendNodeToBuilder(tIndex, positionEntry, noneZeroPositionCollection, builder2);
- }
- }
- }
-
- private void appendNodeToBuilder(int tIndex, PositionReference pos, ArrayBackedValueStorage posList2,
- ArrayTupleBuilder builder2) {
- try {
- builder2.reset();
- builder2.addField(pos.getByteArray(), pos.getStartOffset(), PositionReference.INTBYTES);
- builder2.addField(pos.getByteArray(), pos.getStartOffset() + PositionReference.INTBYTES, 1);
-
- if (posList2 == null) {
- builder2.addFieldEndOffset();
- } else {
- if (posList2.getLength() > WARNSIZE){
- LOG.warn("Hot overlap @" + pos.toString() + " :" + posList2.getLength());
- }
- writePosToFieldAndSkipSameReadID(pos, builder2.getDataOutput(), posList2);
- builder2.addFieldEndOffset();
- }
- // set kmer, may not useful,
- // the reversed ID don't need to output the kmer
- if (pos.getPosInRead() > 0) {
- byte[] data = accessor.getBuffer().array();
- int offsetKmer = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength()
- + accessor.getFieldStartOffset(tIndex, InputKmerField);
- builder2.addField(data, offsetKmer, accessor.getFieldLength(tIndex, InputKmerField));
- } else {
- builder2.addFieldEndOffset();
- }
-
- if (!appender.append(builder2.getFieldEndOffsets(), builder2.getByteArray(), 0, builder2.getSize())) {
- FrameUtils.flushFrame(writeBuffer, writer);
- appender.reset(writeBuffer, true);
- if (!appender.append(builder2.getFieldEndOffsets(), builder2.getByteArray(), 0, builder2.getSize())) {
- throw new IllegalStateException("length:" + builder2.getSize() );
- }
- }
- } catch (HyracksDataException e) {
- throw new IllegalStateException(
- "Failed to Add a field to the tuple by copying the data bytes from a byte array."
- + e.getMessage());
- }
- }
-
- private void writePosToFieldAndSkipSameReadID(PositionReference pos, DataOutput ds,
- ArrayBackedValueStorage posList2) throws HyracksDataException {
-
- plistEntry.setNewReference(PositionListWritable.getCountByDataLength(posList2.getLength()),
- posList2.getByteArray(), posList2.getStartOffset());
- for (int i = 0; i < plistEntry.getCountOfPosition(); i++) {
- PositionWritable p = plistEntry.getPosition(i);
- if (!pos.isSameReadID(p)) {
- try {
- ds.write(p.getByteArray(), p.getStartOffset(), p.getLength());
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- }
- }
- }
-
- @Override
- public void fail() throws HyracksDataException {
- writer.fail();
- }
-
- @Override
- public void close() throws HyracksDataException {
- if (appender.getTupleCount() > 0) {
- FrameUtils.flushFrame(writeBuffer, writer);
- }
- writer.close();
- }
-
- }
-
- @Override
- public AbstractOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
- IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) {
- return new MapKmerPositionToReadNodePushable(ctx, recordDescProvider.getInputRecordDescriptor(getActivityId(),
- 0), recordDescriptors[0]);
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapReadToNodeOperator.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapReadToNodeOperator.java
deleted file mode 100644
index 1827651..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapReadToNodeOperator.java
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.dataflow;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import edu.uci.ics.genomix.hyracks.data.primitive.NodeReference;
-
-import edu.uci.ics.genomix.velvet.oldtype.PositionListWritable;
-import edu.uci.ics.genomix.velvet.oldtype.PositionWritable;
-import edu.uci.ics.genomix.velvet.oldtype.KmerBytesWritable;
-
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
-import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
-import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.job.IOperatorDescriptorRegistry;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputUnaryOutputOperatorNodePushable;
-
-public class MapReadToNodeOperator extends AbstractSingleActivityOperatorDescriptor {
-
- public MapReadToNodeOperator(IOperatorDescriptorRegistry spec, RecordDescriptor outRecDesc, int kmerSize,
- boolean bMergeNode) {
- super(spec, 1, 1);
- recordDescriptors[0] = outRecDesc;
- this.kmerSize = kmerSize;
- this.DoMergeNodeInRead = bMergeNode;
- }
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
- private final int kmerSize;
-
- public static final int InputReadIDField = 0;
- public static final int InputInfoFieldStart = 1;
-
- public static final int OutputNodeIDField = 0;
- public static final int OutputCountOfKmerField = 1;
- public static final int OutputForwardForwardField = 2;
- public static final int OutputForwardReverseField = 3;
- public static final int OutputReverseForwardField = 4;
- public static final int OutputReverseReverseField = 5;
- public static final int OutputKmerBytesField = 6;
-
- public final boolean DoMergeNodeInRead;
-
- public static final RecordDescriptor nodeOutputRec = new RecordDescriptor(new ISerializerDeserializer[7]);
-
- /**
- * (ReadID, Storage[posInRead]={len, PositionList, len, Kmer})
- * to (Position, LengthCount, InComingPosList, OutgoingPosList, Kmer)
- */
- public class MapReadToNodePushable extends AbstractUnaryInputUnaryOutputOperatorNodePushable {
- public static final int INT_LENGTH = 4;
- private final IHyracksTaskContext ctx;
- private final RecordDescriptor inputRecDesc;
- private final RecordDescriptor outputRecDesc;
-
- private final int LAST_POSITION_ID;
-
- private FrameTupleAccessor accessor;
- private ByteBuffer writeBuffer;
- private ArrayTupleBuilder builder;
- private FrameTupleAppender appender;
-
- private NodeReference curNodeEntry;
- private NodeReference nextNodeEntry;
- private NodeReference nextNextNodeEntry;
-
- private PositionListWritable cachePositionList;
-
- public MapReadToNodePushable(IHyracksTaskContext ctx, RecordDescriptor inputRecDesc,
- RecordDescriptor outputRecDesc) {
- this.ctx = ctx;
- this.inputRecDesc = inputRecDesc;
- this.outputRecDesc = outputRecDesc;
- curNodeEntry = new NodeReference(kmerSize);
- nextNodeEntry = new NodeReference(kmerSize);
- nextNextNodeEntry = new NodeReference(0);
- cachePositionList = new PositionListWritable();
- LAST_POSITION_ID = (inputRecDesc.getFieldCount() - InputInfoFieldStart) / 2; //?????????æ£è´Ÿ
- }
-
- @Override
- public void open() throws HyracksDataException {
- accessor = new FrameTupleAccessor(ctx.getFrameSize(), inputRecDesc);
- writeBuffer = ctx.allocateFrame();
- builder = new ArrayTupleBuilder(outputRecDesc.getFieldCount());
- appender = new FrameTupleAppender(ctx.getFrameSize());
- appender.reset(writeBuffer, true);
- writer.open();
- curNodeEntry.reset(kmerSize);
- }
-
- @Override
- public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
- accessor.reset(buffer);
- int tupleCount = accessor.getTupleCount();
- for (int i = 0; i < tupleCount; i++) {
- generateNodeFromRead(i);
- }
- }
-
- private void generateNodeFromRead(int tIndex) throws HyracksDataException {
- int offsetPoslist = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength();
- int readID = accessor.getBuffer().getInt(
- offsetPoslist + accessor.getFieldStartOffset(tIndex, InputReadIDField));
- if ((accessor.getFieldCount() - InputInfoFieldStart) % 2 != 0) {
- throw new IllegalArgumentException("field length is odd");
- }
-
- resetNode(curNodeEntry, readID, (byte) (1));
- setForwardIncomingList(curNodeEntry,
- offsetPoslist + accessor.getFieldStartOffset(tIndex, InputInfoFieldStart));
- setKmer(curNodeEntry.getKmer(), offsetPoslist + accessor.getFieldStartOffset(tIndex, InputInfoFieldStart));
- if (curNodeEntry.getNodeID().getPosInRead() == LAST_POSITION_ID) {
- setReverseIncomingList(curNodeEntry,
- offsetPoslist + accessor.getFieldStartOffset(tIndex, InputInfoFieldStart + 1));
- }
-
- // next Node
- readNodesInfo(tIndex, readID, curNodeEntry, nextNodeEntry, InputInfoFieldStart);
-
- for (int i = InputInfoFieldStart + 2; i < accessor.getFieldCount(); i += 2) {
- readNodesInfo(tIndex, readID, nextNodeEntry, nextNextNodeEntry, i);
-
- if (!DoMergeNodeInRead || curNodeEntry.inDegree() > 1 || curNodeEntry.outDegree() > 0
- || nextNodeEntry.inDegree() > 0 || nextNodeEntry.outDegree() > 0
- || nextNextNodeEntry.inDegree() > 0 || nextNextNodeEntry.outDegree() > 0) {
- connect(curNodeEntry, nextNodeEntry);
- outputNode(curNodeEntry);
- curNodeEntry.set(nextNodeEntry);
- nextNodeEntry.set(nextNextNodeEntry);
- continue;
- }
- curNodeEntry.mergeForwardNext(nextNodeEntry, kmerSize);
- nextNodeEntry.set(nextNextNodeEntry);
- }
- outputNode(curNodeEntry);
- }
-
- private void readNodesInfo(int tIndex, int readID, NodeReference curNode, NodeReference nextNode, int curFieldID) {
- // nextNext node
- int offsetPoslist = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength();
- if (curFieldID + 2 < accessor.getFieldCount()) {
- setForwardOutgoingList(curNode, offsetPoslist + accessor.getFieldStartOffset(tIndex, curFieldID + 2));
- resetNode(nextNode, readID, (byte) (1 + (curFieldID + 2 - InputInfoFieldStart) / 2));
- setKmer(nextNode.getKmer(), offsetPoslist + accessor.getFieldStartOffset(tIndex, curFieldID + 2));
- setReverseOutgoingList(nextNode, offsetPoslist + accessor.getFieldStartOffset(tIndex, curFieldID + 1));
- if (nextNode.getNodeID().getPosInRead() == LAST_POSITION_ID) {
- setReverseIncomingList(nextNode,
- offsetPoslist + accessor.getFieldStartOffset(tIndex, curFieldID + 3));
- }
- } else {
- resetNode(nextNode, readID, (byte) 0);
- }
- }
-
- private void setKmer(KmerBytesWritable kmer, int offset) {
- ByteBuffer buffer = accessor.getBuffer();
- int length = buffer.getInt(offset);
- offset += INT_LENGTH + length;
- length = buffer.getInt(offset);
- if (kmer.getLength() != length) {
- throw new IllegalArgumentException("kmer kmerByteSize is invalid");
- }
- offset += INT_LENGTH;
- kmer.set(buffer.array(), offset);
- }
-
- private void connect(NodeReference curNode, NodeReference nextNode) {
- curNode.getFFList().append(nextNode.getNodeID());
- nextNode.getRRList().append(curNode.getNodeID());
- }
-
- private void setCachList(int offset) {
- ByteBuffer buffer = accessor.getBuffer();
- int count = PositionListWritable.getCountByDataLength(buffer.getInt(offset));
- cachePositionList.set(count, buffer.array(), offset + INT_LENGTH);
- }
-
- private void resetNode(NodeReference node, int readID, byte posInRead) {
- node.reset(kmerSize);
- node.setNodeID(readID, posInRead);
- }
-
- private void setReverseOutgoingList(NodeReference node, int offset) {
- setCachList(offset);
- for (int i = 0; i < cachePositionList.getCountOfPosition(); i++) {
- PositionWritable pos = cachePositionList.getPosition(i);
- if (pos.getPosInRead() > 0) {
- node.getRFList().append(pos);
- } else {
- node.getRRList().append(pos.getReadID(), (byte) -pos.getPosInRead());
- }
- }
- }
-
- private void setReverseIncomingList(NodeReference node, int offset) {
- setCachList(offset);
- for (int i = 0; i < cachePositionList.getCountOfPosition(); i++) {
- PositionWritable pos = cachePositionList.getPosition(i);
- if (pos.getPosInRead() > 0) {
- if (pos.getPosInRead() > 1) {
- node.getFRList().append(pos.getReadID(), (byte) (pos.getPosInRead() - 1));
- } else {
- throw new IllegalArgumentException("Invalid position");
- }
- } else {
- if (pos.getPosInRead() > -LAST_POSITION_ID) {
- node.getFFList().append(pos.getReadID(), (byte) -(pos.getPosInRead() - 1));
- }
- }
- }
- }
-
- private void setForwardOutgoingList(NodeReference node, int offset) {
- setCachList(offset);
- for (int i = 0; i < cachePositionList.getCountOfPosition(); i++) {
- PositionWritable pos = cachePositionList.getPosition(i);
- if (pos.getPosInRead() > 0) {
- node.getFFList().append(pos);
- } else {
- node.getFRList().append(pos.getReadID(), (byte) -pos.getPosInRead());
- }
- }
- }
-
- private void setForwardIncomingList(NodeReference node, int offset) {
- setCachList(offset);
- for (int i = 0; i < cachePositionList.getCountOfPosition(); i++) {
- PositionWritable pos = cachePositionList.getPosition(i);
- if (pos.getPosInRead() > 0) {
- if (pos.getPosInRead() > 1) {
- node.getRRList().append(pos.getReadID(), (byte) (pos.getPosInRead() - 1));
- } else {
- throw new IllegalArgumentException("position id is invalid");
- }
- } else {
- if (pos.getPosInRead() > -LAST_POSITION_ID) {
- node.getRFList().append(pos.getReadID(), (byte) -(pos.getPosInRead() - 1));
- }
- }
- }
- }
-
- private void outputNode(NodeReference node) throws HyracksDataException {
- if (node.getNodeID().getPosInRead() == 0) {
- return;
- }
- try {
- builder.reset();
- builder.addField(node.getNodeID().getByteArray(), node.getNodeID().getStartOffset(), node.getNodeID()
- .getLength());
- builder.getDataOutput().writeInt(node.getCount());
- builder.addFieldEndOffset();
- builder.addField(node.getFFList().getByteArray(), node.getFFList().getStartOffset(), node.getFFList()
- .getLength());
- builder.addField(node.getFRList().getByteArray(), node.getFRList().getStartOffset(), node.getFRList()
- .getLength());
- builder.addField(node.getRFList().getByteArray(), node.getRFList().getStartOffset(), node.getRFList()
- .getLength());
- builder.addField(node.getRRList().getByteArray(), node.getRRList().getStartOffset(), node.getRRList()
- .getLength());
- builder.addField(node.getKmer().getBytes(), node.getKmer().getOffset(), node.getKmer().getLength());
-
- if (!appender.append(builder.getFieldEndOffsets(), builder.getByteArray(), 0, builder.getSize())) {
- FrameUtils.flushFrame(writeBuffer, writer);
- appender.reset(writeBuffer, true);
- if (!appender.append(builder.getFieldEndOffsets(), builder.getByteArray(), 0, builder.getSize())) {
- throw new IllegalStateException("Failed to append tuplebuilder to frame");
- }
- }
- } catch (IOException e) {
- throw new IllegalStateException("Failed to Add a field to the tupleBuilder.");
- }
- }
-
- @Override
- public void fail() throws HyracksDataException {
- writer.fail();
- }
-
- @Override
- public void close() throws HyracksDataException {
- if (appender.getTupleCount() > 0) {
- FrameUtils.flushFrame(writeBuffer, writer);
- }
- writer.close();
- }
-
- }
-
- @Override
- public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
- IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
- return new MapReadToNodePushable(ctx, recordDescProvider.getInputRecordDescriptor(getActivityId(), 0),
- recordDescriptors[0]);
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ReadsKeyValueParserFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ReadsKeyValueParserFactory.java
deleted file mode 100644
index 2134177..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ReadsKeyValueParserFactory.java
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.dataflow;
-
-import java.nio.ByteBuffer;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-
-import edu.uci.ics.genomix.hyracks.data.primitive.PositionReference;
-import edu.uci.ics.genomix.velvet.oldtype.GeneCode;
-import edu.uci.ics.genomix.velvet.oldtype.KmerBytesWritable;
-import edu.uci.ics.hyracks.api.comm.IFrameWriter;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
-import edu.uci.ics.hyracks.hdfs.api.IKeyValueParser;
-import edu.uci.ics.hyracks.hdfs.api.IKeyValueParserFactory;
-
-public class ReadsKeyValueParserFactory implements IKeyValueParserFactory<LongWritable, Text> {
- private static final long serialVersionUID = 1L;
- private static final Log LOG = LogFactory.getLog(ReadsKeyValueParserFactory.class);
-
- public static final int OutputKmerField = 0;
- public static final int OutputPosition = 1;
-
- private final boolean bReversed;
- private final int readLength;
- private final int kmerSize;
-
- public static final RecordDescriptor readKmerOutputRec = new RecordDescriptor(new ISerializerDeserializer[] { null,
- null });
-
- public ReadsKeyValueParserFactory(int readlength, int k, boolean bGenerateReversed) {
- bReversed = bGenerateReversed;
- this.readLength = readlength;
- this.kmerSize = k;
- }
-
- @Override
- public IKeyValueParser<LongWritable, Text> createKeyValueParser(final IHyracksTaskContext ctx) {
- final ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(2);
- final ByteBuffer outputBuffer = ctx.allocateFrame();
- final FrameTupleAppender outputAppender = new FrameTupleAppender(ctx.getFrameSize());
- outputAppender.reset(outputBuffer, true);
-
- return new IKeyValueParser<LongWritable, Text>() {
-
- private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
- private PositionReference pos = new PositionReference();
-
- @Override
- public void parse(LongWritable key, Text value, IFrameWriter writer) throws HyracksDataException {
- String[] geneLine = value.toString().split("\\t"); // Read the Real Gene Line
- if (geneLine.length != 2) {
- return;
- }
- int readID = 0;
- try {
- readID = Integer.parseInt(geneLine[0]);
- } catch (NumberFormatException e) {
- LOG.warn("Invalid data ");
- return;
- }
-
- Pattern genePattern = Pattern.compile("[AGCT]+");
- Matcher geneMatcher = genePattern.matcher(geneLine[1]);
- boolean isValid = geneMatcher.matches();
- if (isValid) {
- if (geneLine[1].length() != readLength) {
- LOG.warn("Invalid readlength at: " + readID);
- return;
- }
- SplitReads(readID, geneLine[1].getBytes(), writer);
- }
- }
-
- private void SplitReads(int readID, byte[] array, IFrameWriter writer) {
- /** first kmer */
- if (kmerSize >= array.length) {
- return;
- }
- kmer.setByRead(array, 0);
- InsertToFrame(kmer, readID, 1, writer);
-
- /** middle kmer */
- for (int i = kmerSize; i < array.length; i++) {
- kmer.shiftKmerWithNextChar(array[i]);
- InsertToFrame(kmer, readID, i - kmerSize + 2, writer);
- }
-
- if (bReversed) {
- /** first kmer */
- kmer.setByReadReverse(array, 0);
- InsertToFrame(kmer, readID, -1, writer);
- /** middle kmer */
- for (int i = kmerSize; i < array.length; i++) {
- kmer.shiftKmerWithPreCode(GeneCode.getPairedCodeFromSymbol(array[i]));
- InsertToFrame(kmer, readID, -(i - kmerSize + 2), writer);
- }
- }
- }
-
- private void InsertToFrame(KmerBytesWritable kmer, int readID, int posInRead, IFrameWriter writer) {
- try {
- if (Math.abs(posInRead) > 127) {
- throw new IllegalArgumentException("Position id is beyond 127 at " + readID);
- }
- tupleBuilder.reset();
- tupleBuilder.addField(kmer.getBytes(), kmer.getOffset(), kmer.getLength());
- pos.set(readID, (byte) posInRead);
- tupleBuilder.addField(pos.getByteArray(), pos.getStartOffset(), pos.getLength());
-
- if (!outputAppender.append(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray(), 0,
- tupleBuilder.getSize())) {
- FrameUtils.flushFrame(outputBuffer, writer);
- outputAppender.reset(outputBuffer, true);
- if (!outputAppender.append(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray(), 0,
- tupleBuilder.getSize())) {
- throw new IllegalStateException(
- "Failed to copy an record into a frame: the record kmerByteSize is too large.");
- }
- }
- } catch (Exception e) {
- throw new IllegalStateException(e);
- }
- }
-
- @Override
- public void open(IFrameWriter writer) throws HyracksDataException {
- }
-
- @Override
- public void close(IFrameWriter writer) throws HyracksDataException {
- FrameUtils.flushFrame(outputBuffer, writer);
- }
- };
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/AggregateKmerAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/AggregateKmerAggregateFactory.java
deleted file mode 100644
index bd70f19..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/AggregateKmerAggregateFactory.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.dataflow.aggregators;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import edu.uci.ics.genomix.hyracks.data.primitive.PositionReference;
-import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
-
-public class AggregateKmerAggregateFactory implements IAggregatorDescriptorFactory {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- @Override
- public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor,
- RecordDescriptor outRecordDescriptor, int[] keyFields, int[] keyFieldsInPartialResults)
- throws HyracksDataException {
- return new IAggregatorDescriptor() {
- private PositionReference position = new PositionReference();
-
- protected int getOffSet(IFrameTupleAccessor accessor, int tIndex, int fieldId) {
- int tupleOffset = accessor.getTupleStartOffset(tIndex);
- int fieldStart = accessor.getFieldStartOffset(tIndex, fieldId);
- int offset = tupleOffset + fieldStart + accessor.getFieldSlotsLength();
- return offset;
- }
-
- @Override
- public void reset() {
- }
-
- @Override
- public void close() {
-
- }
-
- @Override
- public AggregateState createAggregateStates() {
- return new AggregateState(new ArrayBackedValueStorage());
- }
-
- @Override
- public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- ArrayBackedValueStorage inputVal = (ArrayBackedValueStorage) state.state;
- inputVal.reset();
- position.setNewReference(accessor.getBuffer().array(), getOffSet(accessor, tIndex, 1));
- inputVal.append(position);
-
- // make an empty field
- tupleBuilder.addFieldEndOffset();
- }
-
- @Override
- public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor,
- int stateTupleIndex, AggregateState state) throws HyracksDataException {
- ArrayBackedValueStorage inputVal = (ArrayBackedValueStorage) state.state;
- position.setNewReference(accessor.getBuffer().array(), getOffSet(accessor, tIndex, 1));
- inputVal.append(position);
- }
-
- @Override
- public void outputPartialResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- throw new IllegalStateException("partial result method should not be called");
- }
-
- @Override
- public void outputFinalResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- DataOutput fieldOutput = tupleBuilder.getDataOutput();
- ArrayBackedValueStorage inputVal = (ArrayBackedValueStorage) state.state;
- try {
- fieldOutput.write(inputVal.getByteArray(), inputVal.getStartOffset(), inputVal.getLength());
- tupleBuilder.addFieldEndOffset();
- } catch (IOException e) {
- throw new HyracksDataException("I/O exception when writing aggregation to the output buffer.");
- }
- }
-
- };
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/AggregateReadIDAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/AggregateReadIDAggregateFactory.java
deleted file mode 100644
index 92b85b3..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/AggregateReadIDAggregateFactory.java
+++ /dev/null
@@ -1,162 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.dataflow.aggregators;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import edu.uci.ics.genomix.hyracks.data.accessors.ByteSerializerDeserializer;
-import edu.uci.ics.genomix.hyracks.dataflow.MapKmerPositionToReadOperator;
-import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
-
-public class AggregateReadIDAggregateFactory implements IAggregatorDescriptorFactory {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public static final int InputReadIDField = MapKmerPositionToReadOperator.OutputReadIDField;
- public static final int InputPosInReadField = MapKmerPositionToReadOperator.OutputPosInReadField;
- public static final int InputPositionListField = MapKmerPositionToReadOperator.OutputOtherReadIDListField;
- public static final int InputKmerField = MapKmerPositionToReadOperator.OutputKmerField;
-
- public static final int OutputReadIDField = 0;
- public static final int OutputPositionListField = 1;
-
- public static final RecordDescriptor readIDAggregateRec = new RecordDescriptor(new ISerializerDeserializer[] {
- null, null });
-
- public AggregateReadIDAggregateFactory() {
- }
-
- /**
- * (ReadID,PosInRead,{OtherPosition,...},Kmer) to
- * (ReadID, {(PosInRead,{OtherPositoin..},Kmer) ...}
- */
- @Override
- public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor,
- RecordDescriptor outRecordDescriptor, int[] keyFields, int[] keyFieldsInPartialResults)
- throws HyracksDataException {
- return new IAggregatorDescriptor() {
-
- protected int getOffSet(IFrameTupleAccessor accessor, int tIndex, int fieldId) {
- int tupleOffset = accessor.getTupleStartOffset(tIndex);
- int fieldStart = accessor.getFieldStartOffset(tIndex, fieldId);
- int offset = tupleOffset + fieldStart + accessor.getFieldSlotsLength();
- return offset;
- }
-
- protected byte readByteField(IFrameTupleAccessor accessor, int tIndex, int fieldId) {
- return ByteSerializerDeserializer.getByte(accessor.getBuffer().array(),
- getOffSet(accessor, tIndex, fieldId));
- }
-
- @Override
- public AggregateState createAggregateStates() {
- return new AggregateState(new ArrayBackedValueStorage());
- }
-
- @Override
- public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- ArrayBackedValueStorage storage = (ArrayBackedValueStorage) state.state;
- storage.reset();
- DataOutput out = storage.getDataOutput();
- byte posInRead = readByteField(accessor, tIndex, InputPosInReadField);
-
- try {
- out.writeByte(posInRead);
- writeBytesToStorage(out, accessor, tIndex, InputPositionListField);
- if (posInRead > 0) {
- writeBytesToStorage(out, accessor, tIndex, InputKmerField);
- }
- } catch (IOException e) {
- throw new HyracksDataException("Failed to write into temporary storage");
- }
- // make fake feild
- tupleBuilder.addFieldEndOffset();
- }
-
- private void writeBytesToStorage(DataOutput out, IFrameTupleAccessor accessor, int tIndex, int idField)
- throws IOException {
- int len = accessor.getFieldLength(tIndex, idField);
- out.writeInt(len);
- if (len > 0) {
- out.write(accessor.getBuffer().array(), getOffSet(accessor, tIndex, idField), len);
- }
- }
-
- @Override
- public void reset() {
-
- }
-
- @Override
- public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor,
- int stateTupleIndex, AggregateState state) throws HyracksDataException {
- ArrayBackedValueStorage storage = (ArrayBackedValueStorage) state.state;
- DataOutput out = storage.getDataOutput();
- byte posInRead = readByteField(accessor, tIndex, InputPosInReadField);
-
- try {
- out.writeByte(posInRead);
- writeBytesToStorage(out, accessor, tIndex, InputPositionListField);
- if (posInRead > 0) {
- writeBytesToStorage(out, accessor, tIndex, InputKmerField);
- }
- } catch (IOException e) {
- throw new HyracksDataException("Failed to write into temporary storage");
- }
- }
-
- @Override
- public void outputPartialResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- throw new IllegalStateException("partial result method should not be called");
- }
-
- @Override
- public void outputFinalResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- DataOutput fieldOutput = tupleBuilder.getDataOutput();
- ArrayBackedValueStorage inputVal = (ArrayBackedValueStorage) state.state;
- try {
- fieldOutput.write(inputVal.getByteArray(), inputVal.getStartOffset(), inputVal.getLength());
- tupleBuilder.addFieldEndOffset();
-
- } catch (IOException e) {
- throw new HyracksDataException("I/O exception when writing aggregation to the output buffer.");
- }
- }
-
- @Override
- public void close() {
-
- }
-
- };
- }
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeKmerAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeKmerAggregateFactory.java
deleted file mode 100644
index 8620d39..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeKmerAggregateFactory.java
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.dataflow.aggregators;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-import edu.uci.ics.genomix.hyracks.data.primitive.PositionReference;
-import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
-
-public class MergeKmerAggregateFactory implements IAggregatorDescriptorFactory {
- private static final long serialVersionUID = 1L;
- private static final Log LOG = LogFactory.getLog(MergeKmerAggregateFactory.class);
-
- @Override
- public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor,
- RecordDescriptor outRecordDescriptor, int[] keyFields, int[] keyFieldsInPartialResults)
- throws HyracksDataException {
- final int frameSize = ctx.getFrameSize();
- return new IAggregatorDescriptor() {
-
- private PositionReference position = new PositionReference();
-
- @Override
- public AggregateState createAggregateStates() {
- return new AggregateState(new ArrayBackedValueStorage());
- }
-
- @Override
- public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- ArrayBackedValueStorage inputVal = (ArrayBackedValueStorage) state.state;
- inputVal.reset();
- int leadOffset = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength();
- for (int offset = accessor.getFieldStartOffset(tIndex, 1); offset < accessor.getFieldEndOffset(tIndex,
- 1); offset += PositionReference.LENGTH) {
- position.setNewReference(accessor.getBuffer().array(), leadOffset + offset);
- inputVal.append(position);
- }
- //make a fake feild to cheat caller
- tupleBuilder.addFieldEndOffset();
- }
-
- @Override
- public void reset() {
-
- }
-
- @Override
- public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor,
- int stateTupleIndex, AggregateState state) throws HyracksDataException {
- ArrayBackedValueStorage inputVal = (ArrayBackedValueStorage) state.state;
- int leadOffset = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength();
- for (int offset = accessor.getFieldStartOffset(tIndex, 1); offset < accessor.getFieldEndOffset(tIndex,
- 1); offset += PositionReference.LENGTH) {
- position.setNewReference(accessor.getBuffer().array(), leadOffset + offset);
- inputVal.append(position);
- }
- }
-
- @Override
- public void outputPartialResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- throw new IllegalStateException("partial result method should not be called");
- }
-
- @Override
- public void outputFinalResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- DataOutput fieldOutput = tupleBuilder.getDataOutput();
- ArrayBackedValueStorage inputVal = (ArrayBackedValueStorage) state.state;
- try {
- if (inputVal.getLength() > frameSize / 2) {
- LOG.warn("MergeKmer: output data kmerByteSize is too big: " + inputVal.getLength());
- }
- fieldOutput.write(inputVal.getByteArray(), inputVal.getStartOffset(), inputVal.getLength());
- tupleBuilder.addFieldEndOffset();
-
- } catch (IOException e) {
- throw new HyracksDataException("I/O exception when writing aggregation to the output buffer.");
- }
- }
-
- @Override
- public void close() {
-
- }
-
- };
-
- }
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeReadIDAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeReadIDAggregateFactory.java
deleted file mode 100644
index f2eedde..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeReadIDAggregateFactory.java
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.dataflow.aggregators;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
-
-public class MergeReadIDAggregateFactory implements IAggregatorDescriptorFactory {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- private final int ValidPosCount;
- private static final Log LOG = LogFactory.getLog(MergeReadIDAggregateFactory.class);
-
- public MergeReadIDAggregateFactory(int readLength, int kmerLength) {
- ValidPosCount = getPositionCount(readLength, kmerLength);
- }
-
- public static int getPositionCount(int readLength, int kmerLength) {
- return readLength - kmerLength + 1;
- }
-
- public static final int InputReadIDField = AggregateReadIDAggregateFactory.OutputReadIDField;
- public static final int InputPositionListField = AggregateReadIDAggregateFactory.OutputPositionListField;
-
- public static final int BYTE_SIZE = 1;
- public static final int INTEGER_SIZE = 4;
-
- /**
- * (ReadID, {(PosInRead,{OtherPositoin..},Kmer) ...} to Aggregate as
- * (ReadID, Storage[posInRead]={PositionList,Kmer})
- */
- @Override
- public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor,
- RecordDescriptor outRecordDescriptor, int[] keyFields, int[] keyFieldsInPartialResults)
- throws HyracksDataException {
- final int frameSize = ctx.getFrameSize();
- return new IAggregatorDescriptor() {
-
- class PositionArray {
- public ArrayBackedValueStorage[] forwardStorages;
- public ArrayBackedValueStorage[] reverseStorages;
- public int count;
-
- public PositionArray() {
- forwardStorages = new ArrayBackedValueStorage[ValidPosCount];
- reverseStorages = new ArrayBackedValueStorage[ValidPosCount];
- for (int i = 0; i < ValidPosCount; i++) {
- forwardStorages[i] = new ArrayBackedValueStorage();
- reverseStorages[i] = new ArrayBackedValueStorage();
- }
- count = 0;
- }
-
- public void reset() {
- for (int i = 0; i < ValidPosCount; i++) {
- forwardStorages[i].reset();
- reverseStorages[i].reset();
- }
- count = 0;
- }
- }
-
- @Override
- public AggregateState createAggregateStates() {
-
- return new AggregateState(new PositionArray());
- }
-
- @Override
- public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- PositionArray positionArray = (PositionArray) state.state;
- positionArray.reset();
-
- pushIntoStorage(accessor, tIndex, positionArray);
-
- // make fake fields
- for (int i = 0; i < ValidPosCount * 2; i++) {
- tupleBuilder.addFieldEndOffset();
- }
- }
-
- private void pushIntoStorage(IFrameTupleAccessor accessor, int tIndex, PositionArray positionArray)
- throws HyracksDataException {
- int leadbyte = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength();
- int fieldOffset = leadbyte + accessor.getFieldStartOffset(tIndex, InputPositionListField);
- ByteBuffer fieldBuffer = accessor.getBuffer();
-
- while (fieldOffset < leadbyte + accessor.getFieldEndOffset(tIndex, InputPositionListField)) {
- byte posInRead = fieldBuffer.get(fieldOffset);
-
- ArrayBackedValueStorage[] storage = positionArray.forwardStorages;
- boolean hasKmer = true;
- if (posInRead < 0) {
- storage = positionArray.reverseStorages;
- posInRead = (byte) -posInRead;
- hasKmer = false;
- }
- if (storage[posInRead - 1].getLength() > 0) {
- throw new IllegalArgumentException("Reentering into an exist storage");
- }
- fieldOffset += BYTE_SIZE;
-
- // read poslist
- fieldOffset += writeBytesToStorage(storage[posInRead - 1], fieldBuffer, fieldOffset);
- // read Kmer
- if (hasKmer) {
- fieldOffset += writeBytesToStorage(storage[posInRead - 1], fieldBuffer, fieldOffset);
- }
-
- positionArray.count += 1;
- }
-
- }
-
- private int writeBytesToStorage(ArrayBackedValueStorage storage, ByteBuffer fieldBuffer, int fieldOffset)
- throws HyracksDataException {
- int lengthPosList = fieldBuffer.getInt(fieldOffset);
- try {
- storage.getDataOutput().writeInt(lengthPosList);
- fieldOffset += INTEGER_SIZE;
- if (lengthPosList > 0) {
- storage.getDataOutput().write(fieldBuffer.array(), fieldOffset, lengthPosList);
- }
- } catch (IOException e) {
- throw new HyracksDataException("Failed to write into temporary storage");
- }
- return lengthPosList + INTEGER_SIZE;
- }
-
- @Override
- public void reset() {
-
- }
-
- @Override
- public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor,
- int stateTupleIndex, AggregateState state) throws HyracksDataException {
- PositionArray positionArray = (PositionArray) state.state;
- pushIntoStorage(accessor, tIndex, positionArray);
- }
-
- @Override
- public void outputPartialResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- throw new IllegalStateException("partial result method should not be called");
- }
-
- @Override
- public void outputFinalResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- PositionArray positionArray = (PositionArray) state.state;
-
- if (positionArray.count != ValidPosCount * 2) {
- throw new IllegalStateException("Final aggregate position number is invalid");
- }
- DataOutput fieldOutput = tupleBuilder.getDataOutput();
- try {
- int totalSize = 0;
- for (int i = 0; i < ValidPosCount; i++) {
- fieldOutput.write(positionArray.forwardStorages[i].getByteArray(),
- positionArray.forwardStorages[i].getStartOffset(),
- positionArray.forwardStorages[i].getLength());
- tupleBuilder.addFieldEndOffset();
-
- fieldOutput.write(positionArray.reverseStorages[i].getByteArray(),
- positionArray.reverseStorages[i].getStartOffset(),
- positionArray.reverseStorages[i].getLength());
- tupleBuilder.addFieldEndOffset();
-
- totalSize += positionArray.forwardStorages[i].getLength()
- + positionArray.reverseStorages[i].getLength();
- }
- if (totalSize > frameSize / 2) {
- int leadbyte = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength();
- int readID = accessor.getBuffer().getInt(
- leadbyte + accessor.getFieldStartOffset(tIndex, InputReadIDField));
- LOG.warn("MergeReadID on read:" + readID + " is of kmerByteSize: " + totalSize + ", current frameSize:"
- + frameSize + "\n Recommendate to enlarge the FrameSize");
- }
- if (totalSize > frameSize) {
- for (StackTraceElement ste : Thread.currentThread().getStackTrace()) {
- System.out.println(ste);
- }
- throw new HyracksDataException("Data is too long");
- }
- } catch (IOException e) {
- throw new HyracksDataException("I/O exception when writing aggregation to the output buffer.");
- }
- }
-
- @Override
- public void close() {
-
- }
-
- };
- }
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/KMerSequenceWriterFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/KMerSequenceWriterFactory.java
deleted file mode 100644
index def046b..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/KMerSequenceWriterFactory.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.dataflow.io;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.SequenceFile.CompressionType;
-import org.apache.hadoop.io.SequenceFile.Writer;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.genomix.hyracks.job.GenomixJobConf;
-import edu.uci.ics.genomix.velvet.oldtype.PositionListWritable;
-import edu.uci.ics.genomix.velvet.oldtype.PositionWritable;
-import edu.uci.ics.genomix.velvet.oldtype.KmerBytesWritable;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
-import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
-
-@SuppressWarnings("deprecation")
-public class KMerSequenceWriterFactory implements ITupleWriterFactory {
-
- private static final long serialVersionUID = 1L;
- private ConfFactory confFactory;
- private final int kmerlength;
-
- public static final int InputKmerField = 0;
- public static final int InputPositionListField = 1;
-
- public KMerSequenceWriterFactory(JobConf conf) throws HyracksDataException {
- this.confFactory = new ConfFactory(conf);
- this.kmerlength = conf.getInt(GenomixJobConf.KMER_LENGTH, GenomixJobConf.DEFAULT_KMERLEN);
- }
-
- public class TupleWriter implements ITupleWriter {
- public TupleWriter(ConfFactory cf) {
- this.cf = cf;
- }
-
- ConfFactory cf;
- Writer writer = null;
-
- KmerBytesWritable reEnterKey = new KmerBytesWritable(kmerlength);
- PositionListWritable plist = new PositionListWritable();
-
- /**
- * assumption is that output never change source!
- */
- @Override
- public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
- try {
- if (reEnterKey.getLength() > tuple.getFieldLength(InputKmerField)) {
- throw new IllegalArgumentException("Not enough kmer bytes");
- }
- reEnterKey.setNewReference(tuple.getFieldData(InputKmerField), tuple.getFieldStart(InputKmerField));
- int countOfPos = tuple.getFieldLength(InputPositionListField) / PositionWritable.LENGTH;
- if (tuple.getFieldLength(InputPositionListField) % PositionWritable.LENGTH != 0) {
- throw new IllegalArgumentException("Invalid count of position byte");
- }
- plist.setNewReference(countOfPos, tuple.getFieldData(InputPositionListField),
- tuple.getFieldStart(InputPositionListField));
-
- writer.append(reEnterKey, plist);
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void open(DataOutput output) throws HyracksDataException {
- try {
- writer = SequenceFile.createWriter(cf.getConf(), (FSDataOutputStream) output, KmerBytesWritable.class,
- PositionListWritable.class, CompressionType.NONE, null);
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void close(DataOutput output) throws HyracksDataException {
- }
- }
-
- @Override
- public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
- return new TupleWriter(confFactory);
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/KMerTextWriterFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/KMerTextWriterFactory.java
deleted file mode 100644
index 652a6f2..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/KMerTextWriterFactory.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hyracks.dataflow.io;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import edu.uci.ics.genomix.velvet.oldtype.PositionListWritable;
-import edu.uci.ics.genomix.velvet.oldtype.PositionWritable;
-import edu.uci.ics.genomix.velvet.oldtype.KmerBytesWritable;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
-
-public class KMerTextWriterFactory implements ITupleWriterFactory {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- private final int kmerSize;
-
- public KMerTextWriterFactory(int k) {
- kmerSize = k;
- }
-
- public class TupleWriter implements ITupleWriter {
- private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
- private PositionListWritable plist = new PositionListWritable();
-
- @Override
- public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
- try {
- if (kmer.getLength() > tuple.getFieldLength(KMerSequenceWriterFactory.InputKmerField)) {
- throw new IllegalArgumentException("Not enough kmer bytes");
- }
- kmer.setNewReference(tuple.getFieldData(KMerSequenceWriterFactory.InputKmerField),
- tuple.getFieldStart(KMerSequenceWriterFactory.InputKmerField));
- int countOfPos = tuple.getFieldLength(KMerSequenceWriterFactory.InputPositionListField)
- / PositionWritable.LENGTH;
- if (tuple.getFieldLength(KMerSequenceWriterFactory.InputPositionListField) % PositionWritable.LENGTH != 0) {
- throw new IllegalArgumentException("Invalid count of position byte");
- }
- plist.setNewReference(countOfPos, tuple.getFieldData(KMerSequenceWriterFactory.InputPositionListField),
- tuple.getFieldStart(KMerSequenceWriterFactory.InputPositionListField));
-
- output.write(kmer.toString().getBytes());
- output.writeByte('\t');
- output.write(plist.toString().getBytes());
- output.writeByte('\n');
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void open(DataOutput output) throws HyracksDataException {
-
- }
-
- @Override
- public void close(DataOutput output) throws HyracksDataException {
- }
- }
-
- @Override
- public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
- return new TupleWriter();
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeSequenceWriterFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeSequenceWriterFactory.java
deleted file mode 100644
index e116ab9..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeSequenceWriterFactory.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hyracks.dataflow.io;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.SequenceFile.CompressionType;
-import org.apache.hadoop.io.SequenceFile.Writer;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.genomix.data.Marshal;
-import edu.uci.ics.genomix.hyracks.dataflow.MapReadToNodeOperator;
-import edu.uci.ics.genomix.hyracks.job.GenomixJobConf;
-import edu.uci.ics.genomix.velvet.oldtype.NodeWritable;
-import edu.uci.ics.genomix.velvet.oldtype.PositionWritable;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
-import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
-
-@SuppressWarnings("deprecation")
-public class NodeSequenceWriterFactory implements ITupleWriterFactory {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public static final int InputNodeIDField = MapReadToNodeOperator.OutputNodeIDField;
- public static final int InputCountOfKmerField = MapReadToNodeOperator.OutputCountOfKmerField;
- public static final int InputFFField = MapReadToNodeOperator.OutputForwardForwardField;
- public static final int InputFRField = MapReadToNodeOperator.OutputForwardReverseField;
- public static final int InputRFField = MapReadToNodeOperator.OutputReverseForwardField;
- public static final int InputRRField = MapReadToNodeOperator.OutputReverseReverseField;
-
- public static final int InputKmerBytesField = MapReadToNodeOperator.OutputKmerBytesField;
-
- private ConfFactory confFactory;
- private final int kmerlength;
-
- public NodeSequenceWriterFactory(JobConf conf) throws HyracksDataException {
- this.confFactory = new ConfFactory(conf);
- this.kmerlength = conf.getInt(GenomixJobConf.KMER_LENGTH, GenomixJobConf.DEFAULT_KMERLEN);
- }
-
- public class TupleWriter implements ITupleWriter {
-
- public TupleWriter(ConfFactory confFactory) {
- this.cf = confFactory;
- }
-
- ConfFactory cf;
- Writer writer = null;
- NodeWritable node = new NodeWritable(kmerlength);
-
- @Override
- public void open(DataOutput output) throws HyracksDataException {
- try {
- writer = SequenceFile.createWriter(cf.getConf(), (FSDataOutputStream) output, NodeWritable.class,
- NullWritable.class, CompressionType.NONE, null);
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
- node.getNodeID().setNewReference(tuple.getFieldData(InputNodeIDField),
- tuple.getFieldStart(InputNodeIDField));
- node.getFFList().setNewReference(tuple.getFieldLength(InputFFField) / PositionWritable.LENGTH,
- tuple.getFieldData(InputFFField), tuple.getFieldStart(InputFFField));
- node.getFRList().setNewReference(tuple.getFieldLength(InputFRField) / PositionWritable.LENGTH,
- tuple.getFieldData(InputFRField), tuple.getFieldStart(InputFRField));
- node.getRFList().setNewReference(tuple.getFieldLength(InputRFField) / PositionWritable.LENGTH,
- tuple.getFieldData(InputRFField), tuple.getFieldStart(InputRFField));
- node.getRRList().setNewReference(tuple.getFieldLength(InputRRField) / PositionWritable.LENGTH,
- tuple.getFieldData(InputRRField), tuple.getFieldStart(InputRRField));
-
- node.getKmer().setNewReference(
- Marshal.getInt(tuple.getFieldData(NodeSequenceWriterFactory.InputCountOfKmerField),
- tuple.getFieldStart(NodeSequenceWriterFactory.InputCountOfKmerField)),
- tuple.getFieldData(InputKmerBytesField), tuple.getFieldStart(InputKmerBytesField));
-
- try {
- writer.append(node, NullWritable.get());
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void close(DataOutput output) throws HyracksDataException {
- }
-
- }
-
- /**
- * Input schema:
- * (Position, LengthCount, InComingPosList, OutgoingPosList, Kmer)
- */
- @Override
- public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
- return new TupleWriter(confFactory);
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeTextWriterFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeTextWriterFactory.java
deleted file mode 100644
index bc00aa5..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeTextWriterFactory.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hyracks.dataflow.io;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import edu.uci.ics.genomix.data.Marshal;
-import edu.uci.ics.genomix.velvet.oldtype.NodeWritable;
-import edu.uci.ics.genomix.velvet.oldtype.PositionWritable;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
-
-public class NodeTextWriterFactory implements ITupleWriterFactory {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
- private final int initialKmerSize;
-
- public NodeTextWriterFactory(int initialKmerSize) {
- this.initialKmerSize = initialKmerSize;
- }
-
- @Override
- public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
- return new ITupleWriter() {
- NodeWritable node = new NodeWritable(initialKmerSize);
-
- @Override
- public void open(DataOutput output) throws HyracksDataException {
-
- }
-
- @Override
- public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
- node.getNodeID().setNewReference(tuple.getFieldData(NodeSequenceWriterFactory.InputNodeIDField),
- tuple.getFieldStart(NodeSequenceWriterFactory.InputNodeIDField));
- node.getFFList().setNewReference(
- tuple.getFieldLength(NodeSequenceWriterFactory.InputFFField) / PositionWritable.LENGTH,
- tuple.getFieldData(NodeSequenceWriterFactory.InputFFField),
- tuple.getFieldStart(NodeSequenceWriterFactory.InputFFField));
- node.getFRList().setNewReference(
- tuple.getFieldLength(NodeSequenceWriterFactory.InputFRField) / PositionWritable.LENGTH,
- tuple.getFieldData(NodeSequenceWriterFactory.InputFRField),
- tuple.getFieldStart(NodeSequenceWriterFactory.InputFRField));
- node.getRFList().setNewReference(
- tuple.getFieldLength(NodeSequenceWriterFactory.InputRFField) / PositionWritable.LENGTH,
- tuple.getFieldData(NodeSequenceWriterFactory.InputRFField),
- tuple.getFieldStart(NodeSequenceWriterFactory.InputRFField));
- node.getRRList().setNewReference(
- tuple.getFieldLength(NodeSequenceWriterFactory.InputRRField) / PositionWritable.LENGTH,
- tuple.getFieldData(NodeSequenceWriterFactory.InputRRField),
- tuple.getFieldStart(NodeSequenceWriterFactory.InputRRField));
-
- node.getKmer().setNewReference(
- Marshal.getInt(tuple.getFieldData(NodeSequenceWriterFactory.InputCountOfKmerField),
- tuple.getFieldStart(NodeSequenceWriterFactory.InputCountOfKmerField)),
- tuple.getFieldData(NodeSequenceWriterFactory.InputKmerBytesField),
- tuple.getFieldStart(NodeSequenceWriterFactory.InputKmerBytesField));
- try {
- output.write(node.toString().getBytes());
- output.writeByte('\n');
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void close(DataOutput output) throws HyracksDataException {
-
- }
-
- };
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java
deleted file mode 100644
index de56b83..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapred.JobConf;
-
-@SuppressWarnings("deprecation")
-public class GenomixJobConf extends JobConf {
-
- public static final String JOB_NAME = "genomix";
-
- /** Kmers length */
- public static final String KMER_LENGTH = "genomix.kmerlen";
- /** Read length */
- public static final String READ_LENGTH = "genomix.readlen";
- /** Frame Size */
- public static final String FRAME_SIZE = "genomix.framesize";
- /** Frame Limit, hyracks need */
- public static final String FRAME_LIMIT = "genomix.framelimit";
- /** Table Size, hyracks need */
- public static final String TABLE_SIZE = "genomix.tablesize";
- /** Groupby types */
- public static final String GROUPBY_TYPE = "genomix.graph.groupby.type";
- /** Graph outputformat */
- public static final String OUTPUT_FORMAT = "genomix.graph.output";
- /** Get reversed Kmer Sequence */
- public static final String REVERSED_KMER = "genomix.kmer.reversed";
-
- /** Configurations used by hybrid groupby function in graph build phrase */
- public static final String GROUPBY_HYBRID_INPUTSIZE = "genomix.graph.groupby.hybrid.inputsize";
- public static final String GROUPBY_HYBRID_INPUTKEYS = "genomix.graph.groupby.hybrid.inputkeys";
- public static final String GROUPBY_HYBRID_RECORDSIZE_SINGLE = "genomix.graph.groupby.hybrid.recordsize.single";
- public static final String GROUPBY_HYBRID_RECORDSIZE_CROSS = "genomix.graph.groupby.hybrid.recordsize.cross";
- public static final String GROUPBY_HYBRID_HASHLEVEL = "genomix.graph.groupby.hybrid.hashlevel";
-
- public static final int DEFAULT_KMERLEN = 21;
- public static final int DEFAULT_READLEN = 124;
- public static final int DEFAULT_FRAME_SIZE = 128 * 1024;
- public static final int DEFAULT_FRAME_LIMIT = 4096;
- public static final int DEFAULT_TABLE_SIZE = 10485767;
- public static final long DEFAULT_GROUPBY_HYBRID_INPUTSIZE = 154000000L;
- public static final long DEFAULT_GROUPBY_HYBRID_INPUTKEYS = 38500000L;
- public static final int DEFAULT_GROUPBY_HYBRID_RECORDSIZE_SINGLE = 9;
- public static final int DEFAULT_GROUPBY_HYBRID_HASHLEVEL = 1;
- public static final int DEFAULT_GROUPBY_HYBRID_RECORDSIZE_CROSS = 13;
-
- public static final boolean DEFAULT_REVERSED = true;
-
- public static final String JOB_PLAN_GRAPHBUILD = "graphbuild";
- public static final String JOB_PLAN_GRAPHSTAT = "graphstat";
-
- public static final String GROUPBY_TYPE_HYBRID = "hybrid";
- public static final String GROUPBY_TYPE_EXTERNAL = "external";
- public static final String GROUPBY_TYPE_PRECLUSTER = "precluster";
- public static final String OUTPUT_FORMAT_BINARY = "binary";
- public static final String OUTPUT_FORMAT_TEXT = "text";
-
- public GenomixJobConf() throws IOException {
- super(new Configuration());
- }
-
- public GenomixJobConf(Configuration conf) throws IOException {
- super(conf);
- }
-
- /**
- * Set the kmer length
- *
- * @param the
- * desired frame kmerByteSize
- */
- final public void setKmerLength(int kmerlength) {
- setInt(KMER_LENGTH, kmerlength);
- }
-
- final public void setFrameSize(int frameSize) {
- setInt(FRAME_SIZE, frameSize);
- }
-
- final public void setFrameLimit(int frameLimit) {
- setInt(FRAME_LIMIT, frameLimit);
- }
-
- final public void setTableSize(int tableSize) {
- setInt(TABLE_SIZE, tableSize);
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGen.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGen.java
deleted file mode 100644
index c8cb701..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGen.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.io.Serializable;
-import java.util.UUID;
-
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.exceptions.HyracksException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
-
-public abstract class JobGen implements Serializable {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
- protected final ConfFactory confFactory;
- protected String jobId = new UUID(System.currentTimeMillis(), System.nanoTime()).toString();
-
- public JobGen(GenomixJobConf job) throws HyracksDataException {
- this.confFactory = new ConfFactory(job);
- }
-
- public abstract JobSpecification generateJob() throws HyracksException;
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenBrujinGraph.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenBrujinGraph.java
deleted file mode 100644
index 7571653..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenBrujinGraph.java
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.io.IOException;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.genomix.hyracks.data.accessors.KmerHashPartitioncomputerFactory;
-import edu.uci.ics.genomix.hyracks.data.accessors.KmerNormarlizedComputerFactory;
-import edu.uci.ics.genomix.hyracks.data.accessors.ReadIDPartitionComputerFactory;
-import edu.uci.ics.genomix.hyracks.data.primitive.KmerPointable;
-import edu.uci.ics.genomix.hyracks.dataflow.ConnectorPolicyAssignmentPolicy;
-import edu.uci.ics.genomix.hyracks.dataflow.MapKmerPositionToReadOperator;
-import edu.uci.ics.genomix.hyracks.dataflow.MapReadToNodeOperator;
-import edu.uci.ics.genomix.hyracks.dataflow.ReadsKeyValueParserFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.aggregators.AggregateKmerAggregateFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.aggregators.AggregateReadIDAggregateFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.aggregators.MergeKmerAggregateFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.aggregators.MergeReadIDAggregateFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.io.KMerSequenceWriterFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.io.KMerTextWriterFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.io.NodeSequenceWriterFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.io.NodeTextWriterFactory;
-import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
-import edu.uci.ics.hyracks.api.constraints.PartitionConstraintHelper;
-import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
-import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
-import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.exceptions.HyracksException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
-import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory;
-import edu.uci.ics.hyracks.data.std.api.IPointableFactory;
-import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
-import edu.uci.ics.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.group.HashSpillableTableFactory;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
-import edu.uci.ics.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
-import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSWriteOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
-
-@SuppressWarnings("deprecation")
-public class JobGenBrujinGraph extends JobGen {
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public enum GroupbyType {
- EXTERNAL,
- PRECLUSTER,
- HYBRIDHASH,
- }
-
- public enum OutputFormat {
- TEXT,
- BINARY,
- }
-
- protected ConfFactory hadoopJobConfFactory;
- protected static final Log LOG = LogFactory.getLog(JobGenBrujinGraph.class);
- protected String[] ncNodeNames;
- protected String[] readSchedule;
-
- protected int readLength;
- protected int kmerSize;
- protected int frameLimits;
- protected int frameSize;
- protected int tableSize;
- protected GroupbyType groupbyType;
- protected OutputFormat outputFormat;
- protected boolean bGenerateReversedKmer;
-
- protected void logDebug(String status) {
- LOG.debug(status + " nc nodes:" + ncNodeNames.length);
- }
-
- public JobGenBrujinGraph(GenomixJobConf job, Scheduler scheduler, final Map<String, NodeControllerInfo> ncMap,
- int numPartitionPerMachine) throws HyracksDataException {
- super(job);
- String[] nodes = new String[ncMap.size()];
- ncMap.keySet().toArray(nodes);
- ncNodeNames = new String[nodes.length * numPartitionPerMachine];
- for (int i = 0; i < numPartitionPerMachine; i++) {
- System.arraycopy(nodes, 0, ncNodeNames, i * nodes.length, nodes.length);
- }
- initJobConfiguration(scheduler);
- }
-
- private ExternalGroupOperatorDescriptor newExternalGroupby(JobSpecification jobSpec, int[] keyFields,
- IAggregatorDescriptorFactory aggeragater, IAggregatorDescriptorFactory merger,
- ITuplePartitionComputerFactory partition, INormalizedKeyComputerFactory normalizer,
- IPointableFactory pointable, RecordDescriptor outRed) {
- return new ExternalGroupOperatorDescriptor(jobSpec, keyFields, frameLimits,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(pointable) }, normalizer,
- aggeragater, merger, outRed, new HashSpillableTableFactory(new FieldHashPartitionComputerFactory(
- keyFields,
- new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(pointable) }),
- tableSize), true);
- }
-
- private Object[] generateAggeragateDescriptorbyType(JobSpecification jobSpec, int[] keyFields,
- IAggregatorDescriptorFactory aggregator, IAggregatorDescriptorFactory merger,
- ITuplePartitionComputerFactory partition, INormalizedKeyComputerFactory normalizer,
- IPointableFactory pointable, RecordDescriptor combineRed, RecordDescriptor finalRec)
- throws HyracksDataException {
-
- Object[] obj = new Object[3];
-
- switch (groupbyType) {
- case EXTERNAL:
- obj[0] = newExternalGroupby(jobSpec, keyFields, aggregator, merger, partition, normalizer, pointable,
- combineRed);
- obj[1] = new MToNPartitioningConnectorDescriptor(jobSpec, partition);
- obj[2] = newExternalGroupby(jobSpec, keyFields, merger, merger, partition, normalizer, pointable,
- finalRec);
- break;
- case PRECLUSTER:
- default:
-
- obj[0] = new PreclusteredGroupOperatorDescriptor(jobSpec, keyFields,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(pointable) }, aggregator,
- combineRed);
- obj[1] = new MToNPartitioningMergingConnectorDescriptor(jobSpec, partition, keyFields,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(pointable) });
- obj[2] = new PreclusteredGroupOperatorDescriptor(jobSpec, keyFields,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(pointable) }, merger,
- finalRec);
- jobSpec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
- break;
- }
- return obj;
- }
-
- public HDFSReadOperatorDescriptor createHDFSReader(JobSpecification jobSpec) throws HyracksDataException {
- try {
- InputSplit[] splits = hadoopJobConfFactory.getConf().getInputFormat()
- .getSplits(hadoopJobConfFactory.getConf(), ncNodeNames.length);
-
- return new HDFSReadOperatorDescriptor(jobSpec, ReadsKeyValueParserFactory.readKmerOutputRec,
- hadoopJobConfFactory.getConf(), splits, readSchedule, new ReadsKeyValueParserFactory(readLength,
- kmerSize, bGenerateReversedKmer));
- } catch (Exception e) {
- throw new HyracksDataException(e);
- }
- }
-
- public static void connectOperators(JobSpecification jobSpec, IOperatorDescriptor preOp, String[] preNodes,
- IOperatorDescriptor nextOp, String[] nextNodes, IConnectorDescriptor conn) {
- PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, preOp, preNodes);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, nextOp, nextNodes);
- jobSpec.connect(conn, preOp, 0, nextOp, 0);
- }
-
- public AbstractOperatorDescriptor generateGroupbyKmerJob(JobSpecification jobSpec,
- AbstractOperatorDescriptor readOperator) throws HyracksDataException {
- int[] keyFields = new int[] { 0 }; // the id of grouped key
-
- ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(jobSpec, frameLimits, keyFields,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(KmerPointable.FACTORY) },
- ReadsKeyValueParserFactory.readKmerOutputRec);
- connectOperators(jobSpec, readOperator, ncNodeNames, sorter, ncNodeNames, new OneToOneConnectorDescriptor(
- jobSpec));
-
- RecordDescriptor combineKmerOutputRec = new RecordDescriptor(new ISerializerDeserializer[] { null, null });
- jobSpec.setFrameSize(frameSize);
-
- Object[] objs = generateAggeragateDescriptorbyType(jobSpec, keyFields, new AggregateKmerAggregateFactory(),
- new MergeKmerAggregateFactory(), new KmerHashPartitioncomputerFactory(),
- new KmerNormarlizedComputerFactory(), KmerPointable.FACTORY, combineKmerOutputRec, combineKmerOutputRec);
- AbstractOperatorDescriptor kmerLocalAggregator = (AbstractOperatorDescriptor) objs[0];
- logDebug("LocalKmerGroupby Operator");
- connectOperators(jobSpec, sorter, ncNodeNames, kmerLocalAggregator, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
-
- logDebug("CrossKmerGroupby Operator");
- IConnectorDescriptor kmerConnPartition = (IConnectorDescriptor) objs[1];
- AbstractOperatorDescriptor kmerCrossAggregator = (AbstractOperatorDescriptor) objs[2];
- connectOperators(jobSpec, kmerLocalAggregator, ncNodeNames, kmerCrossAggregator, ncNodeNames, kmerConnPartition);
- return kmerCrossAggregator;
- }
-
- public AbstractOperatorDescriptor generateMapperFromKmerToRead(JobSpecification jobSpec,
- AbstractOperatorDescriptor kmerCrossAggregator) {
- // Map (Kmer, {(ReadID,PosInRead),...}) into
- // (ReadID,PosInRead,{OtherPosition,...},Kmer)
-
- AbstractOperatorDescriptor mapKmerToRead = new MapKmerPositionToReadOperator(jobSpec,
- MapKmerPositionToReadOperator.readIDOutputRec, readLength, kmerSize);
- connectOperators(jobSpec, kmerCrossAggregator, ncNodeNames, mapKmerToRead, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
- return mapKmerToRead;
- }
-
- public AbstractOperatorDescriptor generateGroupbyReadJob(JobSpecification jobSpec,
- AbstractOperatorDescriptor mapKmerToRead) throws HyracksDataException {
- int[] keyFields = new int[] { 0 }; // the id of grouped key
- // (ReadID, {(PosInRead,{OtherPositoin..},Kmer) ...}
- ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(jobSpec, frameLimits, keyFields,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) },
- MapKmerPositionToReadOperator.readIDOutputRec);
- connectOperators(jobSpec, mapKmerToRead, ncNodeNames, sorter, ncNodeNames, new OneToOneConnectorDescriptor(
- jobSpec));
-
- RecordDescriptor readIDFinalRec = new RecordDescriptor(
- new ISerializerDeserializer[1 + 2 * MergeReadIDAggregateFactory.getPositionCount(readLength, kmerSize)]);
- Object[] objs = generateAggeragateDescriptorbyType(jobSpec, keyFields, new AggregateReadIDAggregateFactory(),
- new MergeReadIDAggregateFactory(readLength, kmerSize), new ReadIDPartitionComputerFactory(), null,
- IntegerPointable.FACTORY, AggregateReadIDAggregateFactory.readIDAggregateRec, readIDFinalRec);
- AbstractOperatorDescriptor readLocalAggregator = (AbstractOperatorDescriptor) objs[0];
- connectOperators(jobSpec, sorter, ncNodeNames, readLocalAggregator, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
-
- logDebug("Group by ReadID merger");
- IConnectorDescriptor readconn = (IConnectorDescriptor) objs[1];
- AbstractOperatorDescriptor readCrossAggregator = (AbstractOperatorDescriptor) objs[2];
- connectOperators(jobSpec, readLocalAggregator, ncNodeNames, readCrossAggregator, ncNodeNames, readconn);
- return readCrossAggregator;
- }
-
- public AbstractOperatorDescriptor generateMapperFromReadToNode(JobSpecification jobSpec,
- AbstractOperatorDescriptor readCrossAggregator) {
- // Map (ReadID, [(Poslist,Kmer) ... ]) to (Node, IncomingList,
- // OutgoingList, Kmer)
-
- AbstractOperatorDescriptor mapEachReadToNode = new MapReadToNodeOperator(jobSpec,
- MapReadToNodeOperator.nodeOutputRec, kmerSize, true);
- connectOperators(jobSpec, readCrossAggregator, ncNodeNames, mapEachReadToNode, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
- return mapEachReadToNode;
- }
-
- public AbstractOperatorDescriptor generateKmerWritorOperator(JobSpecification jobSpec,
- AbstractOperatorDescriptor kmerCrossAggregator) throws HyracksException {
- // Output Kmer
- ITupleWriterFactory kmerWriter = null;
- switch (outputFormat) {
- case TEXT:
- kmerWriter = new KMerTextWriterFactory(kmerSize);
- break;
- case BINARY:
- default:
- kmerWriter = new KMerSequenceWriterFactory(hadoopJobConfFactory.getConf());
- break;
- }
- logDebug("WriteOperator");
- HDFSWriteOperatorDescriptor writeKmerOperator = new HDFSWriteOperatorDescriptor(jobSpec,
- hadoopJobConfFactory.getConf(), kmerWriter);
- connectOperators(jobSpec, kmerCrossAggregator, ncNodeNames, writeKmerOperator, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
- return writeKmerOperator;
- }
-
- public AbstractOperatorDescriptor generateNodeWriterOpertator(JobSpecification jobSpec,
- AbstractOperatorDescriptor mapEachReadToNode) throws HyracksException {
- ITupleWriterFactory nodeWriter = null;
- switch (outputFormat) {
- case TEXT:
- nodeWriter = new NodeTextWriterFactory(kmerSize);
- break;
- case BINARY:
- default:
- nodeWriter = new NodeSequenceWriterFactory(hadoopJobConfFactory.getConf());
- break;
- }
- logDebug("WriteOperator");
- // Output Node
- HDFSWriteOperatorDescriptor writeNodeOperator = new HDFSWriteOperatorDescriptor(jobSpec,
- hadoopJobConfFactory.getConf(), nodeWriter);
- connectOperators(jobSpec, mapEachReadToNode, ncNodeNames, writeNodeOperator, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
- return writeNodeOperator;
- }
-
- @Override
- public JobSpecification generateJob() throws HyracksException {
-
- JobSpecification jobSpec = new JobSpecification();
- logDebug("ReadKmer Operator");
-
- HDFSReadOperatorDescriptor readOperator = createHDFSReader(jobSpec);
-
- logDebug("Group by Kmer");
- AbstractOperatorDescriptor lastOperator = generateGroupbyKmerJob(jobSpec, readOperator);
-
- // logDebug("Write kmer to result");
- // generateRootByWriteKmerGroupbyResult(jobSpec, lastOperator);
-
- logDebug("Map Kmer to Read Operator");
- lastOperator = generateMapperFromKmerToRead(jobSpec, lastOperator);
-
- logDebug("Group by Read Operator");
- lastOperator = generateGroupbyReadJob(jobSpec, lastOperator);
-
- logDebug("Generate final node");
- lastOperator = generateMapperFromReadToNode(jobSpec, lastOperator);
- logDebug("Write node to result");
- lastOperator = generateNodeWriterOpertator(jobSpec, lastOperator);
-
- jobSpec.addRoot(lastOperator);
- return jobSpec;
- }
-
- protected void initJobConfiguration(Scheduler scheduler) throws HyracksDataException {
- Configuration conf = confFactory.getConf();
- readLength = conf.getInt(GenomixJobConf.READ_LENGTH, GenomixJobConf.DEFAULT_READLEN);
- kmerSize = conf.getInt(GenomixJobConf.KMER_LENGTH, GenomixJobConf.DEFAULT_KMERLEN);
- if (kmerSize % 2 == 0) {
- kmerSize--;
- conf.setInt(GenomixJobConf.KMER_LENGTH, kmerSize);
- }
- frameLimits = conf.getInt(GenomixJobConf.FRAME_LIMIT, GenomixJobConf.DEFAULT_FRAME_LIMIT);
- tableSize = conf.getInt(GenomixJobConf.TABLE_SIZE, GenomixJobConf.DEFAULT_TABLE_SIZE);
- frameSize = conf.getInt(GenomixJobConf.FRAME_SIZE, GenomixJobConf.DEFAULT_FRAME_SIZE);
-
- bGenerateReversedKmer = conf.getBoolean(GenomixJobConf.REVERSED_KMER, GenomixJobConf.DEFAULT_REVERSED);
-
- String type = conf.get(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
- if (type.equalsIgnoreCase(GenomixJobConf.GROUPBY_TYPE_EXTERNAL)) {
- groupbyType = GroupbyType.EXTERNAL;
- } else if (type.equalsIgnoreCase(GenomixJobConf.GROUPBY_TYPE_PRECLUSTER)) {
- groupbyType = GroupbyType.PRECLUSTER;
- } else {
- groupbyType = GroupbyType.HYBRIDHASH;
- }
-
- String output = conf.get(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_BINARY);
- if (output.equalsIgnoreCase("text")) {
- outputFormat = OutputFormat.TEXT;
- } else {
- outputFormat = OutputFormat.BINARY;
- }
- try {
- hadoopJobConfFactory = new ConfFactory(new JobConf(conf));
- InputSplit[] splits = hadoopJobConfFactory.getConf().getInputFormat()
- .getSplits(hadoopJobConfFactory.getConf(), ncNodeNames.length);
- readSchedule = scheduler.getLocationConstraints(splits);
- } catch (IOException ex) {
- throw new HyracksDataException(ex);
- }
-
- LOG.info("Genomix Graph Build Configuration");
- LOG.info("Kmer:" + kmerSize);
- LOG.info("Groupby type:" + type);
- LOG.info("Output format:" + output);
- LOG.info("Frame limit" + frameLimits);
- LOG.info("Frame kmerByteSize" + frameSize);
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCheckReader.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCheckReader.java
deleted file mode 100644
index b4b1e73..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCheckReader.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Map;
-
-import edu.uci.ics.genomix.hyracks.dataflow.ReadsKeyValueParserFactory;
-import edu.uci.ics.genomix.velvet.oldtype.PositionWritable;
-import edu.uci.ics.genomix.velvet.oldtype.KmerBytesWritable;
-import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.exceptions.HyracksException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSWriteOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
-
-public class JobGenCheckReader extends JobGenBrujinGraph {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public JobGenCheckReader(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
- int numPartitionPerMachine) throws HyracksDataException {
- super(job, scheduler, ncMap, numPartitionPerMachine);
- }
-
- @Override
- public JobSpecification generateJob() throws HyracksException {
-
- JobSpecification jobSpec = new JobSpecification();
- logDebug("ReadKmer Operator");
- HDFSReadOperatorDescriptor readOperator = createHDFSReader(jobSpec);
-
- logDebug("Write kmer to result");
- generateRootByWriteKmerReader(jobSpec, readOperator);
-
- return jobSpec;
- }
-
- public AbstractSingleActivityOperatorDescriptor generateRootByWriteKmerReader(JobSpecification jobSpec,
- HDFSReadOperatorDescriptor readOperator) throws HyracksException {
- // Output Kmer
- HDFSWriteOperatorDescriptor writeKmerOperator = new HDFSWriteOperatorDescriptor(jobSpec,
- hadoopJobConfFactory.getConf(), new ITupleWriterFactory() {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- @Override
- public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
- return new ITupleWriter() {
-
- private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
- private PositionWritable pos = new PositionWritable();
-
- @Override
- public void open(DataOutput output) throws HyracksDataException {
- }
-
- @Override
- public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
- try {
- if (kmer.getLength() > tuple
- .getFieldLength(ReadsKeyValueParserFactory.OutputKmerField)) {
- throw new IllegalArgumentException("Not enough kmer bytes");
- }
- kmer.setNewReference(
- tuple.getFieldData(ReadsKeyValueParserFactory.OutputKmerField),
- tuple.getFieldStart(ReadsKeyValueParserFactory.OutputKmerField));
- pos.setNewReference(tuple.getFieldData(ReadsKeyValueParserFactory.OutputPosition),
- tuple.getFieldStart(ReadsKeyValueParserFactory.OutputPosition));
-
- output.write(kmer.toString().getBytes());
- output.writeByte('\t');
- output.write(pos.toString().getBytes());
- output.writeByte('\n');
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void close(DataOutput output) throws HyracksDataException {
-
- }
-
- };
- }
-
- });
- connectOperators(jobSpec, readOperator, ncNodeNames, writeKmerOperator, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
- jobSpec.addRoot(writeKmerOperator);
- return writeKmerOperator;
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCreateKmerInfo.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCreateKmerInfo.java
deleted file mode 100644
index 5202ba2..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCreateKmerInfo.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.util.Map;
-
-import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.exceptions.HyracksException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
-
-public class JobGenCreateKmerInfo extends JobGenBrujinGraph {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public JobGenCreateKmerInfo(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
- int numPartitionPerMachine) throws HyracksDataException {
- super(job, scheduler, ncMap, numPartitionPerMachine);
- }
-
- @Override
- public JobSpecification generateJob() throws HyracksException {
-
- JobSpecification jobSpec = new JobSpecification();
- logDebug("ReadKmer Operator");
- HDFSReadOperatorDescriptor readOperator = createHDFSReader(jobSpec);
-
- logDebug("Group by Kmer");
- AbstractOperatorDescriptor lastOperator = generateGroupbyKmerJob(jobSpec, readOperator);
-
- logDebug("Write kmer to result");
- lastOperator = generateKmerWritorOperator(jobSpec, lastOperator);
- jobSpec.addRoot(lastOperator);
-
- return jobSpec;
- }
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenGroupbyReadID.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenGroupbyReadID.java
deleted file mode 100644
index 1e78b79..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenGroupbyReadID.java
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Map;
-
-import edu.uci.ics.genomix.data.Marshal;
-import edu.uci.ics.genomix.velvet.oldtype.PositionListWritable;
-import edu.uci.ics.genomix.velvet.oldtype.KmerBytesWritable;
-import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.exceptions.HyracksException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSWriteOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
-
-public class JobGenGroupbyReadID extends JobGenBrujinGraph {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public JobGenGroupbyReadID(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
- int numPartitionPerMachine) throws HyracksDataException {
- super(job, scheduler, ncMap, numPartitionPerMachine);
- }
-
- @Override
- public JobSpecification generateJob() throws HyracksException {
-
- JobSpecification jobSpec = new JobSpecification();
- logDebug("ReadKmer Operator");
- HDFSReadOperatorDescriptor readOperator = createHDFSReader(jobSpec);
-
- logDebug("Group by Kmer");
- AbstractOperatorDescriptor lastOperator = generateGroupbyKmerJob(jobSpec, readOperator);
-
- //logDebug("Write kmer to result");
- //generateRootByWriteKmerGroupbyResult(jobSpec, lastOperator);
-
- logDebug("Map Kmer to Read Operator");
- lastOperator = generateMapperFromKmerToRead(jobSpec, lastOperator);
-
- logDebug("Group by Read Operator");
- lastOperator = generateGroupbyReadJob(jobSpec, lastOperator);
-
- logDebug("Write node to result");
- lastOperator = generateRootByWriteReadIDAggregationResult(jobSpec, lastOperator);
- jobSpec.addRoot(lastOperator);
- return jobSpec;
- }
-
- public AbstractOperatorDescriptor generateRootByWriteReadIDAggregationResult(JobSpecification jobSpec,
- AbstractOperatorDescriptor readCrossAggregator) throws HyracksException {
- HDFSWriteOperatorDescriptor writeKmerOperator = new HDFSWriteOperatorDescriptor(jobSpec,
- hadoopJobConfFactory.getConf(), new ITupleWriterFactory() {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- @Override
- public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
- return new ITupleWriter() {
-
- private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
- private PositionListWritable plist = new PositionListWritable();
-
- @Override
- public void open(DataOutput output) throws HyracksDataException {
-
- }
-
- @Override
- public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
- int readId = Marshal.getInt(tuple.getFieldData(0), tuple.getFieldStart(0));
- try {
- output.write((Integer.toString(readId) + "\t").getBytes());
- for (int i = 1; i < tuple.getFieldCount(); i++) {
- int fieldOffset = tuple.getFieldStart(i);
- while (fieldOffset < tuple.getFieldStart(i) + tuple.getFieldLength(i)) {
- byte[] buffer = tuple.getFieldData(i);
- // read poslist
- int posCount = PositionListWritable.getCountByDataLength(Marshal.getInt(
- buffer, fieldOffset));
- fieldOffset += 4;
- plist.setNewReference(posCount, buffer, fieldOffset);
- fieldOffset += plist.getLength();
-
- int posInRead = (i + 1) / 2;
- if (i % 2 == 0) {
- posInRead = -posInRead;
- }
- String kmerString = "";
- if (posInRead > 0) {
- int kmerbytes = Marshal.getInt(buffer, fieldOffset);
- if (kmer.getLength() != kmerbytes) {
- throw new IllegalArgumentException("kmerlength is invalid");
- }
- fieldOffset += 4;
- kmer.setNewReference(buffer, fieldOffset);
- fieldOffset += kmer.getLength();
- kmerString = kmer.toString();
- }
-
- output.write(Integer.toString(posInRead).getBytes());
- output.writeByte('\t');
- output.write(plist.toString().getBytes());
- output.writeByte('\t');
- output.write(kmerString.getBytes());
- output.writeByte('\t');
- }
- }
- output.writeByte('\n');
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void close(DataOutput output) throws HyracksDataException {
-
- }
-
- };
- }
-
- });
- connectOperators(jobSpec, readCrossAggregator, ncNodeNames, writeKmerOperator, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
-
- return writeKmerOperator;
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenMapKmerToRead.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenMapKmerToRead.java
deleted file mode 100644
index 8e727959..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenMapKmerToRead.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Map;
-
-import edu.uci.ics.genomix.data.Marshal;
-import edu.uci.ics.genomix.hyracks.dataflow.MapKmerPositionToReadOperator;
-import edu.uci.ics.genomix.velvet.oldtype.PositionListWritable;
-import edu.uci.ics.genomix.velvet.oldtype.KmerBytesWritable;
-import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.exceptions.HyracksException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSWriteOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
-
-public class JobGenMapKmerToRead extends JobGenBrujinGraph {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public JobGenMapKmerToRead(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
- int numPartitionPerMachine) throws HyracksDataException {
- super(job, scheduler, ncMap, numPartitionPerMachine);
- }
-
- public AbstractOperatorDescriptor generateRootByWriteMapperFromKmerToReadID(JobSpecification jobSpec,
- AbstractOperatorDescriptor mapper) throws HyracksException {
- // Output Kmer
- HDFSWriteOperatorDescriptor writeKmerOperator = new HDFSWriteOperatorDescriptor(jobSpec,
- hadoopJobConfFactory.getConf(), new ITupleWriterFactory() {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- @Override
- public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
- return new ITupleWriter() {
-
- private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
- private PositionListWritable plist = new PositionListWritable();
-
- @Override
- public void open(DataOutput output) throws HyracksDataException {
-
- }
-
- @Override
- public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
- try {
- int readID = Marshal.getInt(
- tuple.getFieldData(MapKmerPositionToReadOperator.OutputReadIDField),
- tuple.getFieldStart(MapKmerPositionToReadOperator.OutputReadIDField));
- byte posInRead = tuple
- .getFieldData(MapKmerPositionToReadOperator.OutputPosInReadField)[tuple
- .getFieldStart(MapKmerPositionToReadOperator.OutputPosInReadField)];
- int posCount = PositionListWritable.getCountByDataLength(tuple
- .getFieldLength(MapKmerPositionToReadOperator.OutputOtherReadIDListField));
- plist.setNewReference(
- posCount,
- tuple.getFieldData(MapKmerPositionToReadOperator.OutputOtherReadIDListField),
- tuple.getFieldStart(MapKmerPositionToReadOperator.OutputOtherReadIDListField));
-
- String kmerString = "";
- if (posInRead > 0) {
- if (kmer.getLength() > tuple
- .getFieldLength(MapKmerPositionToReadOperator.OutputKmerField)) {
- throw new IllegalArgumentException("Not enough kmer bytes");
- }
- kmer.setNewReference(
- tuple.getFieldData(MapKmerPositionToReadOperator.OutputKmerField),
- tuple.getFieldStart(MapKmerPositionToReadOperator.OutputKmerField));
- kmerString = kmer.toString();
- }
-
- output.write(Integer.toString(readID).getBytes());
- output.writeByte('\t');
- output.write(Integer.toString(posInRead).getBytes());
- output.writeByte('\t');
- output.write(plist.toString().getBytes());
- output.writeByte('\t');
- output.write(kmerString.getBytes());
- output.writeByte('\n');
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
-
- }
-
- @Override
- public void close(DataOutput output) throws HyracksDataException {
-
- }
-
- };
- }
-
- });
- connectOperators(jobSpec, mapper, ncNodeNames, writeKmerOperator, ncNodeNames, new OneToOneConnectorDescriptor(
- jobSpec));
- jobSpec.addRoot(writeKmerOperator);
- return writeKmerOperator;
- }
-
- @Override
- public JobSpecification generateJob() throws HyracksException {
-
- JobSpecification jobSpec = new JobSpecification();
- logDebug("ReadKmer Operator");
- HDFSReadOperatorDescriptor readOperator = createHDFSReader(jobSpec);
-
- logDebug("Group by Kmer");
- AbstractOperatorDescriptor lastOperator = generateGroupbyKmerJob(jobSpec, readOperator);
-
- logDebug("Map Kmer to Read Operator");
- lastOperator = generateMapperFromKmerToRead(jobSpec, lastOperator);
-
- generateRootByWriteMapperFromKmerToReadID(jobSpec, lastOperator);
-
- return jobSpec;
- }
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenUnMerged.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenUnMerged.java
deleted file mode 100644
index 21b6385..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenUnMerged.java
+++ /dev/null
@@ -1,34 +0,0 @@
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.util.Map;
-
-import edu.uci.ics.genomix.hyracks.dataflow.MapReadToNodeOperator;
-import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
-import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
-
-public class JobGenUnMerged extends JobGenBrujinGraph {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public JobGenUnMerged(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
- int numPartitionPerMachine) throws HyracksDataException {
- super(job, scheduler, ncMap, numPartitionPerMachine);
- }
-
- @Override
- public AbstractOperatorDescriptor generateMapperFromReadToNode(JobSpecification jobSpec,
- AbstractOperatorDescriptor readCrossAggregator) {
- AbstractOperatorDescriptor mapEachReadToNode = new MapReadToNodeOperator(jobSpec,
- MapReadToNodeOperator.nodeOutputRec, kmerSize, false);
- connectOperators(jobSpec, readCrossAggregator, ncNodeNames, mapEachReadToNode, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
- return mapEachReadToNode;
- }
-}