refactor 'size' to 'kmerByteSize' in KmerBytesWritable
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
index a9c1183..96e1cc4 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
@@ -30,7 +30,7 @@
/**
* Variable kmer length byteswritable
* It was used to generate the graph in which phase the kmer length doesn't change.
- * Thus the size of bytes doesn't change either.
+ * Thus the kmerByteSize of bytes doesn't change either.
*/
public class KmerBytesWritable extends BinaryComparable implements Serializable, WritableComparable<BinaryComparable> {
/**
@@ -39,7 +39,7 @@
private static final long serialVersionUID = 1L;
private static final byte[] EMPTY_BYTES = {};
- public int size;
+ public int kmerByteSize;
protected byte[] bytes;
protected int offset;
protected int kmerlength;
@@ -64,9 +64,9 @@
*/
public KmerBytesWritable(int k) {
this.kmerlength = k;
- this.size = KmerUtil.getByteNumFromK(kmerlength);
+ this.kmerByteSize = KmerUtil.getByteNumFromK(kmerlength);
if (k > 0) {
- this.bytes = new byte[this.size];
+ this.bytes = new byte[this.kmerByteSize];
} else {
this.bytes = EMPTY_BYTES;
}
@@ -100,7 +100,7 @@
*/
public void set(byte[] newData, int offset) {
if (kmerlength > 0) {
- System.arraycopy(newData, offset, bytes, this.offset, size);
+ System.arraycopy(newData, offset, bytes, this.offset, kmerByteSize);
}
}
@@ -117,7 +117,7 @@
public void set(int k, byte[] newData, int offset) {
reset(k);
if (k > 0) {
- System.arraycopy(newData, offset, bytes, this.offset, size);
+ System.arraycopy(newData, offset, bytes, this.offset, kmerByteSize);
}
}
@@ -143,7 +143,7 @@
public void setNewReference(byte[] newData, int offset) {
this.bytes = newData;
this.offset = offset;
- if (newData.length - offset < size) {
+ if (newData.length - offset < kmerByteSize) {
throw new IllegalArgumentException("Not given enough space");
}
}
@@ -159,7 +159,7 @@
*/
public void setNewReference(int k, byte[] newData, int offset) {
this.kmerlength = k;
- this.size = KmerUtil.getByteNumFromK(k);
+ this.kmerByteSize = KmerUtil.getByteNumFromK(k);
setNewReference(newData, offset);
}
@@ -167,7 +167,7 @@
if (size > getCapacity()) {
setCapacity((size * 3 / 2));
}
- this.size = size;
+ this.kmerByteSize = size;
}
protected int getCapacity() {
@@ -177,11 +177,11 @@
protected void setCapacity(int new_cap) {
if (new_cap != getCapacity()) {
byte[] new_data = new byte[new_cap];
- if (new_cap < size) {
- size = new_cap;
+ if (new_cap < kmerByteSize) {
+ kmerByteSize = new_cap;
}
- if (size != 0) {
- System.arraycopy(bytes, offset, new_data, 0, size);
+ if (kmerByteSize != 0) {
+ System.arraycopy(bytes, offset, new_data, 0, kmerByteSize);
}
bytes = new_data;
offset = 0;
@@ -206,7 +206,7 @@
private byte geneCodeAtPosition(int pos) {
int posByte = pos / 4;
int shift = (pos % 4) << 1;
- return (byte) ((bytes[offset + size - 1 - posByte] >> shift) & 0x3);
+ return (byte) ((bytes[offset + kmerByteSize - 1 - posByte] >> shift) & 0x3);
}
public int getKmerLength() {
@@ -224,7 +224,7 @@
@Override
public int getLength() {
- return size;
+ return kmerByteSize;
}
/**
@@ -238,7 +238,7 @@
public void setByRead(byte[] array, int start) {
byte l = 0;
int bytecount = 0;
- int bcount = this.size - 1;
+ int bcount = this.kmerByteSize - 1;
for (int i = start; i < start + kmerlength && i < array.length; i++) {
byte code = GeneCode.getCodeFromSymbol(array[i]);
l |= (byte) (code << bytecount);
@@ -272,7 +272,7 @@
public void setByReadReverse(byte[] array, int start) {
byte l = 0;
int bytecount = 0;
- int bcount = size - 1;
+ int bcount = kmerByteSize - 1;
// for (int i = start + kmerlength - 1; i >= 0 && i < array.length; i--) {
for (int i = start + kmerlength - 1; i >= start && i < array.length; i--) {
byte code = GeneCode.getPairedCodeFromSymbol(array[i]);
@@ -313,8 +313,8 @@
* @return the shift out gene, in gene code format
*/
public byte shiftKmerWithNextCode(byte c) {
- byte output = (byte) (bytes[offset + size - 1] & 0x03);
- for (int i = size - 1; i > 0; i--) {
+ byte output = (byte) (bytes[offset + kmerByteSize - 1] & 0x03);
+ for (int i = kmerByteSize - 1; i > 0; i--) {
byte in = (byte) (bytes[offset + i - 1] & 0x03);
bytes[offset + i] = (byte) (((bytes[offset + i] >>> 2) & 0x3f) | (in << 6));
}
@@ -346,11 +346,11 @@
public byte shiftKmerWithPreCode(byte c) {
int pos = ((kmerlength - 1) % 4) << 1;
byte output = (byte) ((bytes[offset] >> pos) & 0x03);
- for (int i = 0; i < size - 1; i++) {
+ for (int i = 0; i < kmerByteSize - 1; i++) {
byte in = (byte) ((bytes[offset + i + 1] >> 6) & 0x03);
bytes[offset + i] = (byte) ((bytes[offset + i] << 2) | in);
}
- bytes[offset + size - 1] = (byte) ((bytes[offset + size - 1] << 2) | c);
+ bytes[offset + kmerByteSize - 1] = (byte) ((bytes[offset + kmerByteSize - 1] << 2) | c);
clearLeadBit();
return output;
}
@@ -367,15 +367,15 @@
*/
public void mergeWithFFKmer(int initialKmerSize, KmerBytesWritable kmer) {
int preKmerLength = kmerlength;
- int preSize = size;
+ int preSize = kmerByteSize;
this.kmerlength += kmer.kmerlength - initialKmerSize + 1;
setSize(KmerUtil.getByteNumFromK(kmerlength));
for (int i = 1; i <= preSize; i++) {
- bytes[offset + size - i] = bytes[offset + preSize - i];
+ bytes[offset + kmerByteSize - i] = bytes[offset + preSize - i];
}
for (int k = initialKmerSize - 1; k < kmer.getKmerLength(); k += 4) {
byte onebyte = getOneByteFromKmerAtPosition(k, kmer.getBytes(), kmer.getOffset(), kmer.getLength());
- appendOneByteAtPosition(preKmerLength + k - initialKmerSize + 1, onebyte, bytes, offset, size);
+ appendOneByteAtPosition(preKmerLength + k - initialKmerSize + 1, onebyte, bytes, offset, kmerByteSize);
}
clearLeadBit();
}
@@ -393,18 +393,18 @@
* : the next kmer
*/
public void mergeWithFRKmer(int initialKmerSize, KmerBytesWritable kmer) {
- int preSize = size;
+ int preSize = kmerByteSize;
int preKmerLength = kmerlength;
this.kmerlength += kmer.kmerlength - initialKmerSize + 1;
setSize(KmerUtil.getByteNumFromK(kmerlength));
// copy prefix into right-side of buffer
for (int i = 1; i <= preSize; i++) {
- bytes[offset + size - i] = bytes[offset + preSize - i];
+ bytes[offset + kmerByteSize - i] = bytes[offset + preSize - i];
}
int bytecount = (preKmerLength % 4) * 2;
- int bcount = size - preSize - bytecount / 8; // may overlap previous kmer
- byte l = bcount == size - preSize ? bytes[offset + bcount] : 0x00;
+ int bcount = kmerByteSize - preSize - bytecount / 8; // may overlap previous kmer
+ byte l = bcount == kmerByteSize - preSize ? bytes[offset + bcount] : 0x00;
bytecount %= 8;
for (int i = kmer.kmerlength - initialKmerSize; i >= 0; i--) {
byte code = GeneCode.getPairedGeneCode(kmer.getGeneCodeAtPosition(i));
@@ -433,12 +433,12 @@
*/
public void mergeWithRFKmer(int initialKmerSize, KmerBytesWritable preKmer) {
int preKmerLength = kmerlength;
- int preSize = size;
+ int preSize = kmerByteSize;
this.kmerlength += preKmer.kmerlength - initialKmerSize + 1;
setSize(KmerUtil.getByteNumFromK(kmerlength));
// byte cacheByte = getOneByteFromKmerAtPosition(0, bytes, offset, preSize);
- int byteIndex = size - 1;
+ int byteIndex = kmerByteSize - 1;
byte cacheByte = 0x00;
int posnInByte = 0;
@@ -457,7 +457,7 @@
// copy my kmer into low positions of bytes
for (int i = 0; i < preKmerLength; i++) {
// expanding the capacity makes this offset incorrect. It's off by the # of additional bytes added.
- int newposn = i + (size - preSize) * 4;
+ int newposn = i + (kmerByteSize - preSize) * 4;
byte code = geneCodeAtPosition(newposn);
cacheByte |= (byte) (code << posnInByte);
posnInByte += 2;
@@ -483,25 +483,25 @@
*/
public void mergeWithRRKmer(int initialKmerSize, KmerBytesWritable preKmer) {
int preKmerLength = kmerlength;
- int preSize = size;
+ int preSize = kmerByteSize;
this.kmerlength += preKmer.kmerlength - initialKmerSize + 1;
setSize(KmerUtil.getByteNumFromK(kmerlength));
byte cacheByte = getOneByteFromKmerAtPosition(0, bytes, offset, preSize);
// copy prekmer
for (int k = 0; k < preKmer.kmerlength - initialKmerSize + 1; k += 4) {
- byte onebyte = getOneByteFromKmerAtPosition(k, preKmer.bytes, preKmer.offset, preKmer.size);
- appendOneByteAtPosition(k, onebyte, bytes, offset, size);
+ byte onebyte = getOneByteFromKmerAtPosition(k, preKmer.bytes, preKmer.offset, preKmer.kmerByteSize);
+ appendOneByteAtPosition(k, onebyte, bytes, offset, kmerByteSize);
}
// copy current kmer
int k = 4;
for (; k < preKmerLength; k += 4) {
byte onebyte = getOneByteFromKmerAtPosition(k, bytes, offset, preSize);
- appendOneByteAtPosition(preKmer.kmerlength - initialKmerSize + k - 4 + 1, cacheByte, bytes, offset, size);
+ appendOneByteAtPosition(preKmer.kmerlength - initialKmerSize + k - 4 + 1, cacheByte, bytes, offset, kmerByteSize);
cacheByte = onebyte;
}
- appendOneByteAtPosition(preKmer.kmerlength - initialKmerSize + k - 4 + 1, cacheByte, bytes, offset, size);
+ appendOneByteAtPosition(preKmer.kmerlength - initialKmerSize + k - 4 + 1, cacheByte, bytes, offset, kmerByteSize);
clearLeadBit();
}
@@ -560,13 +560,13 @@
@Override
public void readFields(DataInput in) throws IOException {
this.kmerlength = in.readInt();
- this.size = KmerUtil.getByteNumFromK(kmerlength);
+ this.kmerByteSize = KmerUtil.getByteNumFromK(kmerlength);
if (this.kmerlength > 0) {
- if (this.bytes.length < this.size) {
- this.bytes = new byte[this.size];
+ if (this.bytes.length < this.kmerByteSize) {
+ this.bytes = new byte[this.kmerByteSize];
this.offset = 0;
}
- in.readFully(bytes, offset, size);
+ in.readFully(bytes, offset, kmerByteSize);
}
}
@@ -574,7 +574,7 @@
public void write(DataOutput out) throws IOException {
out.writeInt(kmerlength);
if (kmerlength > 0) {
- out.write(bytes, offset, size);
+ out.write(bytes, offset, kmerByteSize);
}
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
index 6c9dfe4..a32c306 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerListWritable.java
@@ -9,12 +9,14 @@
import org.apache.hadoop.io.Writable;
+import edu.uci.ics.genomix.data.KmerUtil;
+
public class KmerListWritable implements Writable, Iterable<KmerBytesWritable>, Serializable{
private static final long serialVersionUID = 1L;
protected byte[] storage;
protected int offset;
protected int valueCount;
- public int KMER_LENGTH = 3;
+ public int kmerByteSize = 2; //default kmerSize = 5, kmerByteSize = 2, fix length once setting
protected static final byte[] EMPTY = {};
protected KmerBytesWritable posIter = new KmerBytesWritable();
@@ -25,9 +27,9 @@
this.offset = 0;
}
- public KmerListWritable(int kmerLength) {
+ public KmerListWritable(int kmerSize) {
this();
- this.KMER_LENGTH = kmerLength;
+ this.kmerByteSize = KmerUtil.getByteNumFromK(kmerSize);;
}
public KmerListWritable(int count, byte[] data, int offset) {
@@ -49,8 +51,8 @@
}
public void append(KmerBytesWritable kmer){
- setSize((1 + valueCount) * kmer.getLength());
- System.arraycopy(kmer.getBytes(), 0, storage, offset, KMER_LENGTH);
+ setSize((1 + valueCount) * kmerByteSize);
+ System.arraycopy(kmer.getBytes(), 0, storage, offset, kmerByteSize);
valueCount += 1;
}
@@ -83,7 +85,7 @@
if (i >= valueCount) {
throw new ArrayIndexOutOfBoundsException("No such positions");
}
- posIter.setNewReference(storage, offset + i * KMER_LENGTH);
+ posIter.setNewReference(storage, offset + i * kmerByteSize);
return posIter;
}
@@ -93,9 +95,9 @@
public void set(int valueCount, byte[] newData, int offset) {
this.valueCount = valueCount;
- setSize(valueCount * KMER_LENGTH);
+ setSize(valueCount * kmerByteSize);
if (valueCount > 0) {
- System.arraycopy(newData, offset, storage, this.offset, valueCount * KMER_LENGTH);
+ System.arraycopy(newData, offset, storage, this.offset, valueCount * kmerByteSize);
}
}
@@ -118,9 +120,9 @@
@Override
public void remove() {
if(currentIndex < valueCount)
- System.arraycopy(storage, offset + currentIndex * KMER_LENGTH,
- storage, offset + (currentIndex - 1) * KMER_LENGTH,
- (valueCount - currentIndex) * KMER_LENGTH);
+ System.arraycopy(storage, offset + currentIndex * kmerByteSize,
+ storage, offset + (currentIndex - 1) * kmerByteSize,
+ (valueCount - currentIndex) * kmerByteSize);
valueCount--;
currentIndex--;
}
@@ -131,14 +133,14 @@
@Override
public void readFields(DataInput in) throws IOException {
this.valueCount = in.readInt();
- setSize(valueCount * KMER_LENGTH);
- in.readFully(storage, offset, valueCount * KMER_LENGTH);
+ setSize(valueCount * kmerByteSize);
+ in.readFully(storage, offset, valueCount * kmerByteSize);
}
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(valueCount);
- out.write(storage, offset, valueCount * KMER_LENGTH);
+ out.write(storage, offset, valueCount * kmerByteSize);
}
public int getCountOfPosition() {
@@ -154,6 +156,6 @@
}
public int getLength() {
- return valueCount * KMER_LENGTH;
+ return valueCount * kmerByteSize;
}
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingDriver.java
index 2d0c36e..c5a7c23 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingDriver.java
@@ -42,7 +42,7 @@
@Option(name = "-num-reducers", usage = "the number of reducers", required = true)
public int numReducers;
- @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+ @Option(name = "-kmer-kmerByteSize", usage = "the kmerByteSize of kmer", required = true)
public int sizeKmer;
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java
index e3f3fb2..b4885b5 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java
@@ -44,7 +44,7 @@
@Option(name = "-num-reducers", usage = "the number of reducers", required = true)
public int numReducers;
- @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+ @Option(name = "-kmer-kmerByteSize", usage = "the kmerByteSize of kmer", required = true)
public int sizeKmer;
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3Driver.java
index 28c4108..cd54705 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3Driver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/MergePathsH3Driver.java
@@ -39,7 +39,7 @@
@Option(name = "-num-reducers", usage = "the number of reducers", required = true)
public int numReducers;
- @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+ @Option(name = "-kmer-kmerByteSize", usage = "the kmerByteSize of kmer", required = true)
public int sizeKmer;
@Option(name = "-merge-rounds", usage = "the maximum number of rounds to merge", required = false)
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h4/MergePathsH4Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h4/MergePathsH4Driver.java
index 8f5a3ac..1f6a157 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h4/MergePathsH4Driver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h4/MergePathsH4Driver.java
@@ -56,7 +56,7 @@
@Option(name = "-num-reducers", usage = "the number of reducers", required = true)
public int numReducers;
- @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+ @Option(name = "-kmer-kmerByteSize", usage = "the kmerByteSize of kmer", required = true)
public int sizeKmer;
@Option(name = "-merge-rounds", usage = "the maximum number of rounds to merge", required = false)
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerBytesWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerBytesWritable.java
index 6d53649..884b2a9 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerBytesWritable.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/oldtype/KmerBytesWritable.java
@@ -27,7 +27,7 @@
/**
* Fix kmer length byteswritable
* It was used to generate the graph in which phase the kmer length doesn't change.
- * Thus the size of bytes doesn't change either.
+ * Thus the kmerByteSize of bytes doesn't change either.
*/
public class KmerBytesWritable extends BinaryComparable implements Serializable, WritableComparable<BinaryComparable> {
/**
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java
index 56ce79b..12307fe 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java
@@ -52,7 +52,7 @@
@Option(name = "-num-reducers", usage = "the number of reducers", required = true)
public int numReducers;
- @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+ @Option(name = "-kmer-kmerByteSize", usage = "the kmerByteSize of kmer", required = true)
public int sizeKmer;
@Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java
index 216da7a..8f8996b 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java
@@ -51,7 +51,7 @@
@Option(name = "-num-reducers", usage = "the number of reducers", required = true)
public int numReducers;
- @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+ @Option(name = "-kmer-kmerByteSize", usage = "the kmerByteSize of kmer", required = true)
public int sizeKmer;
@Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java
index a9efb34..4d6b221 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java
@@ -36,7 +36,7 @@
@Option(name = "-num-reducers", usage = "the number of reducers", required = true)
public int numReducers;
- @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+ @Option(name = "-kmer-kmerByteSize", usage = "the kmerByteSize of kmer", required = true)
public int sizeKmer;
@Option(name = "-read-length", usage = "the length of read", required = true)
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/TestPathMergeH3.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/TestPathMergeH3.java
index f89a656..239a2f7 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/TestPathMergeH3.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphclean/mergepaths/h3/TestPathMergeH3.java
@@ -159,7 +159,7 @@
// copyResultsToLocal(HDFS_MARKPATHS + "complete", ACTUAL_ROOT + PATHMARKS_FILE, false, conf);
//
// MergePathsH3Driver h3 = new MergePathsH3Driver();
-// h3.run(HDFS_MARKPATHS + "toMerge", HDFS_MERGED, 2, KMER_LENGTH, 1, conf);
+// h3.run(HDFS_MARKPATHS + "toMerge", HDFS_MERGED, 2, kmerByteSize, 1, conf);
// copyResultsToLocal(HDFS_MERGED, ACTUAL_ROOT + PATHMERGE_FILE, false, conf);
}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
index 8a8501d..d4ae5dd 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pmcommon/HadoopMiniClusterTest.java
@@ -222,7 +222,7 @@
// FileOutputFormat.setOutputPath(buildConf, new Path(INPUT_GRAPH));
//
// GraphBuildingDriver tldriver = new GraphBuildingDriver();
-// tldriver.run(SEQUENCE, INPUT_GRAPH, 2, KMER_LENGTH, READ_LENGTH, false, true, HADOOP_CONF_ROOT + "conf.xml");
+// tldriver.run(SEQUENCE, INPUT_GRAPH, 2, kmerByteSize, READ_LENGTH, false, true, HADOOP_CONF_ROOT + "conf.xml");
//
// boolean resultsAreText = true;
// copyResultsToLocal(INPUT_GRAPH, ACTUAL_ROOT + INPUT_GRAPH, resultsAreText, buildConf);
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapReadToNodeOperator.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapReadToNodeOperator.java
index 88c53e6..0f0aa29 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapReadToNodeOperator.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapReadToNodeOperator.java
@@ -183,7 +183,7 @@
offset += INT_LENGTH + length;
length = buffer.getInt(offset);
if (kmer.getLength() != length) {
- throw new IllegalArgumentException("kmer size is invalid");
+ throw new IllegalArgumentException("kmer kmerByteSize is invalid");
}
offset += INT_LENGTH;
kmer.set(buffer.array(), offset);
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ReadsKeyValueParserFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ReadsKeyValueParserFactory.java
index 8c69201..9aea9ad 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ReadsKeyValueParserFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ReadsKeyValueParserFactory.java
@@ -139,7 +139,7 @@
if (!outputAppender.append(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray(), 0,
tupleBuilder.getSize())) {
throw new IllegalStateException(
- "Failed to copy an record into a frame: the record size is too large.");
+ "Failed to copy an record into a frame: the record kmerByteSize is too large.");
}
}
} catch (Exception e) {
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeKmerAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeKmerAggregateFactory.java
index ce178c6..8620d39 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeKmerAggregateFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeKmerAggregateFactory.java
@@ -95,7 +95,7 @@
ArrayBackedValueStorage inputVal = (ArrayBackedValueStorage) state.state;
try {
if (inputVal.getLength() > frameSize / 2) {
- LOG.warn("MergeKmer: output data size is too big: " + inputVal.getLength());
+ LOG.warn("MergeKmer: output data kmerByteSize is too big: " + inputVal.getLength());
}
fieldOutput.write(inputVal.getByteArray(), inputVal.getStartOffset(), inputVal.getLength());
tupleBuilder.addFieldEndOffset();
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeReadIDAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeReadIDAggregateFactory.java
index 73e3093..f2eedde 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeReadIDAggregateFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeReadIDAggregateFactory.java
@@ -206,7 +206,7 @@
int leadbyte = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength();
int readID = accessor.getBuffer().getInt(
leadbyte + accessor.getFieldStartOffset(tIndex, InputReadIDField));
- LOG.warn("MergeReadID on read:" + readID + " is of size: " + totalSize + ", current frameSize:"
+ LOG.warn("MergeReadID on read:" + readID + " is of kmerByteSize: " + totalSize + ", current frameSize:"
+ frameSize + "\n Recommendate to enlarge the FrameSize");
}
if (totalSize > frameSize) {
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java
index d1f47c2..de56b83 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java
@@ -83,7 +83,7 @@
* Set the kmer length
*
* @param the
- * desired frame size
+ * desired frame kmerByteSize
*/
final public void setKmerLength(int kmerlength) {
setInt(KMER_LENGTH, kmerlength);
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenBrujinGraph.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenBrujinGraph.java
index 09794d0..7571653 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenBrujinGraph.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenBrujinGraph.java
@@ -378,7 +378,7 @@
LOG.info("Groupby type:" + type);
LOG.info("Output format:" + output);
LOG.info("Frame limit" + frameLimits);
- LOG.info("Frame size" + frameSize);
+ LOG.info("Frame kmerByteSize" + frameSize);
}
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ReadsKeyValueParserFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ReadsKeyValueParserFactory.java
index 915540a..2ef5920 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ReadsKeyValueParserFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/dataflow/ReadsKeyValueParserFactory.java
@@ -44,7 +44,7 @@
private static final Log LOG = LogFactory.getLog(ReadsKeyValueParserFactory.class);
public static final int OutputKmerField = 0;
- public static final int OutputNodeId = 1;
+ public static final int OutputNodeIdField = 1;
public static final int OutputForwardForwardField = 2;
public static final int OutputForwardReverseField = 3;
public static final int OutputReverseForwardField = 4;
@@ -72,8 +72,8 @@
private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
private KmerBytesWritable nextKmer = new KmerBytesWritable(kmerSize);
- private PositionWritable uniqueKey = new PositionWritable();
- private KmerListWritable kmerList = new KmerListWritable();
+ private PositionWritable nodeId = new PositionWritable();
+ private KmerListWritable kmerList = new KmerListWritable(kmerSize);
private IntermediateNodeWritable interMediateNode = new IntermediateNodeWritable();
private byte mateId = 0;
@@ -112,8 +112,8 @@
nextKmer.set(kmer);
nextKmer.shiftKmerWithNextChar(array[kmerSize]);
kmerList.append(nextKmer);
- uniqueKey.set(mateId, readID, 1);
- interMediateNode.setNodeId(uniqueKey);
+ nodeId.set(mateId, readID, 1);
+ interMediateNode.setNodeId(nodeId);
interMediateNode.setFFList(kmerList);
InsertToFrame(kmer, interMediateNode, writer);
@@ -123,8 +123,8 @@
nextKmer.set(kmer);
nextKmer.shiftKmerWithNextChar(array[i+1]);
kmerList.append(nextKmer);
- uniqueKey.set(mateId, readID, i - kmerSize + 2);
- interMediateNode.setNodeId(uniqueKey);
+ nodeId.set(mateId, readID, i - kmerSize + 2);
+ interMediateNode.setNodeId(nodeId);
interMediateNode.setFFList(kmerList);
InsertToFrame(kmer, interMediateNode, writer);
}
@@ -150,7 +150,7 @@
if (!outputAppender.append(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray(), 0,
tupleBuilder.getSize())) {
throw new IllegalStateException(
- "Failed to copy an record into a frame: the record size is too large.");
+ "Failed to copy an record into a frame: the record kmerByteSize is too large.");
}
}
} catch (Exception e) {
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/driver/Driver.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/driver/Driver.java
index 64d359d..6d6e1e6 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/driver/Driver.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/driver/Driver.java
@@ -10,12 +10,8 @@
import edu.uci.ics.genomix.hyracks.driver.Driver.Plan;
import edu.uci.ics.genomix.hyracks.job.GenomixJobConf;
import edu.uci.ics.genomix.hyracks.job.JobGen;
-import edu.uci.ics.genomix.hyracks.job.JobGenBrujinGraph;
-import edu.uci.ics.genomix.hyracks.job.JobGenCheckReader;
-import edu.uci.ics.genomix.hyracks.job.JobGenCreateKmerInfo;
-import edu.uci.ics.genomix.hyracks.job.JobGenGroupbyReadID;
-import edu.uci.ics.genomix.hyracks.job.JobGenMapKmerToRead;
-import edu.uci.ics.genomix.hyracks.job.JobGenUnMerged;
+import edu.uci.ics.genomix.hyracks.newgraph.job.JobGenCheckReader;
+
import edu.uci.ics.hyracks.api.client.HyracksConnection;
import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGenBrujinGraph.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGenBrujinGraph.java
index eadf046..abfff00 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGenBrujinGraph.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGenBrujinGraph.java
@@ -129,7 +129,7 @@
LOG.info("Groupby type:" + type);
LOG.info("Output format:" + output);
LOG.info("Frame limit" + frameLimits);
- LOG.info("Frame size" + frameSize);
+ LOG.info("Frame kmerByteSize" + frameSize);
}
public HDFSReadOperatorDescriptor createHDFSReader(JobSpecification jobSpec) throws HyracksDataException {
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGenCheckReader.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGenCheckReader.java
index 2f89cb1..c371f6d 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGenCheckReader.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/job/JobGenCheckReader.java
@@ -18,7 +18,7 @@
import java.io.IOException;
import java.util.Map;
-import edu.uci.ics.genomix.hyracks.dataflow.ReadsKeyValueParserFactory;
+import edu.uci.ics.genomix.hyracks.newgraph.dataflow.ReadsKeyValueParserFactory;
import edu.uci.ics.genomix.hyracks.job.GenomixJobConf;
import edu.uci.ics.genomix.oldtype.PositionWritable;
import edu.uci.ics.genomix.type.IntermediateNodeWritable;
@@ -65,9 +65,6 @@
HDFSWriteOperatorDescriptor writeKmerOperator = new HDFSWriteOperatorDescriptor(jobSpec,
hadoopJobConfFactory.getConf(), new ITupleWriterFactory() {
- /**
- *
- */
private static final long serialVersionUID = 1L;
@Override
@@ -75,7 +72,6 @@
return new ITupleWriter() {
private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
- private PositionWritable pos = new PositionWritable();
private IntermediateNodeWritable intermediateNode = new IntermediateNodeWritable();
@Override
@@ -89,15 +85,29 @@
.getFieldLength(ReadsKeyValueParserFactory.OutputKmerField)) {
throw new IllegalArgumentException("Not enough kmer bytes");
}
+ //kemr
kmer.setNewReference(
tuple.getFieldData(ReadsKeyValueParserFactory.OutputKmerField),
tuple.getFieldStart(ReadsKeyValueParserFactory.OutputKmerField));
- pos.setNewReference(tuple.getFieldData(ReadsKeyValueParserFactory.OutputPosition),
- tuple.getFieldStart(ReadsKeyValueParserFactory.OutputPosition));
-
+ //nodeId
+ intermediateNode.getNodeId().setNewReference(tuple.getFieldData(ReadsKeyValueParserFactory.OutputNodeIdField),
+ tuple.getFieldStart(ReadsKeyValueParserFactory.OutputForwardForwardField));
+ //FF list
+ intermediateNode.getFFList().setNewReference(tuple.getFieldLength(ReadsKeyValueParserFactory.OutputForwardForwardField / kmer.getLength()) ,
+ tuple.getFieldData(ReadsKeyValueParserFactory.OutputForwardForwardField), tuple.getFieldStart(ReadsKeyValueParserFactory.OutputForwardForwardField));
+ //FR list
+ intermediateNode.getFRList().setNewReference(tuple.getFieldLength(ReadsKeyValueParserFactory.OutputForwardReverseField / kmer.getLength()),
+ tuple.getFieldData(ReadsKeyValueParserFactory.OutputForwardReverseField), tuple.getFieldStart(ReadsKeyValueParserFactory.OutputForwardReverseField));
+ //RF list
+ intermediateNode.getRFList().setNewReference(tuple.getFieldLength(ReadsKeyValueParserFactory.OutputReverseForwardField / kmer.getLength()),
+ tuple.getFieldData(ReadsKeyValueParserFactory.OutputReverseForwardField), tuple.getFieldStart(ReadsKeyValueParserFactory.OutputReverseForwardField));
+ //RR list
+ intermediateNode.getRRList().setNewReference(tuple.getFieldLength(ReadsKeyValueParserFactory.OutputReverseReverseField / kmer.getLength()),
+ tuple.getFieldData(ReadsKeyValueParserFactory.OutputReverseReverseField), tuple.getFieldStart(ReadsKeyValueParserFactory.OutputReverseReverseField));
+
output.write(kmer.toString().getBytes());
output.writeByte('\t');
- output.write(pos.toString().getBytes());
+ output.write(intermediateNode.toString().getBytes());
output.writeByte('\n');
} catch (IOException e) {
throw new HyracksDataException(e);
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java
index d4798e6..239071c 100644
--- a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java
@@ -1,12 +1,15 @@
package edu.uci.ics.genomix.hyracks.newgraph.test;
import java.io.BufferedWriter;
+import java.io.DataOutputStream;
import java.io.File;
+import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import junit.framework.Assert;
+import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
@@ -14,8 +17,11 @@
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.junit.After;
+import org.junit.Before;
import org.junit.Test;
import edu.uci.ics.genomix.hyracks.job.GenomixJobConf;
@@ -24,6 +30,7 @@
import edu.uci.ics.genomix.hyracks.test.TestUtils;
import edu.uci.ics.genomix.oldtype.NodeWritable;
+@SuppressWarnings("deprecation")
public class JobRun {
private static final int KmerSize = 5;
private static final int ReadLength = 8;
@@ -36,11 +43,11 @@
private static final String EXPECTED_DIR = "src/test/resources/expected/";
private static final String EXPECTED_READER_RESULT = EXPECTED_DIR + "result_after_initial_read";
- private static final String EXPECTED_OUPUT_KMER = EXPECTED_DIR + "result_after_kmerAggregate";
- private static final String EXPECTED_KMER_TO_READID = EXPECTED_DIR + "result_after_kmer2readId";
- private static final String EXPECTED_GROUPBYREADID = EXPECTED_DIR + "result_after_readIDAggreage";
- private static final String EXPECTED_OUPUT_NODE = EXPECTED_DIR + "result_after_generateNode";
- private static final String EXPECTED_UNMERGED = EXPECTED_DIR + "result_unmerged";
+// private static final String EXPECTED_OUPUT_KMER = EXPECTED_DIR + "result_after_kmerAggregate";
+// private static final String EXPECTED_KMER_TO_READID = EXPECTED_DIR + "result_after_kmer2readId";
+// private static final String EXPECTED_GROUPBYREADID = EXPECTED_DIR + "result_after_readIDAggreage";
+// private static final String EXPECTED_OUPUT_NODE = EXPECTED_DIR + "result_after_generateNode";
+// private static final String EXPECTED_UNMERGED = EXPECTED_DIR + "result_unmerged";
private static final String DUMPED_RESULT = ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH + "/merged.txt";
private static final String CONVERT_RESULT = DUMPED_RESULT + ".txt";
@@ -62,6 +69,53 @@
cleanUpReEntry();
conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
driver.runJob(new GenomixJobConf(conf), Plan.CHECK_KMERREADER, true);
+ Assert.assertEquals(true, checkResults(EXPECTED_READER_RESULT, null));
+ }
+
+ @Before
+ public void setUp() throws Exception {
+ cleanupStores();
+ edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.init();
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHDFS();
+
+ FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
+ FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
+
+ conf.setInt(GenomixJobConf.KMER_LENGTH, KmerSize);
+ conf.setInt(GenomixJobConf.READ_LENGTH, ReadLength);
+ driver = new Driver(edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.CC_HOST,
+ edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT, numPartitionPerMachine);
+ }
+
+ private void cleanupStores() throws IOException {
+ FileUtils.forceMkdir(new File("teststore"));
+ FileUtils.forceMkdir(new File("build"));
+ FileUtils.cleanDirectory(new File("teststore"));
+ FileUtils.cleanDirectory(new File("build"));
+ }
+
+ private void startHDFS() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+ FileSystem dfs = FileSystem.get(conf);
+ Path src = new Path(DATA_INPUT_PATH);
+ Path dest = new Path(HDFS_INPUT_PATH);
+ dfs.mkdirs(dest);
+ // dfs.mkdirs(result);
+ dfs.copyFromLocalFile(src, dest);
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
}
private void cleanUpReEntry() throws IOException {
@@ -75,6 +129,56 @@
}
}
+ private boolean checkResults(String expectedPath, int[] poslistField) throws Exception {
+ File dumped = null;
+ String format = conf.get(GenomixJobConf.OUTPUT_FORMAT);
+ if (GenomixJobConf.OUTPUT_FORMAT_TEXT.equalsIgnoreCase(format)) {
+ FileUtil.copyMerge(FileSystem.get(conf), new Path(HDFS_OUTPUT_PATH),
+ FileSystem.getLocal(new Configuration()), new Path(DUMPED_RESULT), false, conf, null);
+ dumped = new File(DUMPED_RESULT);
+ } else {
+
+ FileSystem.getLocal(new Configuration()).mkdirs(new Path(ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH));
+ File filePathTo = new File(CONVERT_RESULT);
+ BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+ for (int i = 0; i < numPartitionPerMachine * numberOfNC; i++) {
+ String partname = "/part-" + i;
+ // FileUtil.copy(FileSystem.get(conf), new Path(HDFS_OUTPUT_PATH
+ // + partname), FileSystem.getLocal(new Configuration()),
+ // new Path(ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH + partname),
+ // false, conf);
+
+ Path path = new Path(HDFS_OUTPUT_PATH + partname);
+ FileSystem dfs = FileSystem.get(conf);
+ if (dfs.getFileStatus(path).getLen() == 0) {
+ continue;
+ }
+ SequenceFile.Reader reader = new SequenceFile.Reader(dfs, path, conf);
+
+ NodeWritable node = new NodeWritable(conf.getInt(GenomixJobConf.KMER_LENGTH, KmerSize));
+ NullWritable value = NullWritable.get();
+ while (reader.next(node, value)) {
+ if (node == null) {
+ break;
+ }
+ bw.write(node.toString());
+ System.out.println(node.toString());
+ bw.newLine();
+ }
+ reader.close();
+ }
+ bw.close();
+ dumped = new File(CONVERT_RESULT);
+ }
+
+ if (poslistField != null) {
+ TestUtils.compareWithUnSortedPosition(new File(expectedPath), dumped, poslistField);
+ } else {
+ TestUtils.compareWithSortedResult(new File(expectedPath), dumped);
+ }
+ return true;
+ }
+
@After
public void tearDown() throws Exception {
edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.deinit();
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/client/Client.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/client/Client.java
index e135085..918fa1e 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/client/Client.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/client/Client.java
@@ -34,7 +34,7 @@
@Option(name = "-plan", usage = "query plan choice", required = false)
public Plan planChoice = Plan.OUTER_JOIN;
- @Option(name = "-kmer-size", usage = "the size of kmer", required = false)
+ @Option(name = "-kmer-kmerByteSize", usage = "the kmerByteSize of kmer", required = false)
public int sizeKmer;
@Option(name = "-num-iteration", usage = "max number of iterations, for pagerank job only", required = false)