change fix bytes array to flexible bytes with offset and length in KmerUtil
git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_genomix@3382 123451ca-8445-de46-9d55-352943316053
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerUtil.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerUtil.java
index d796d19..c4d6401 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerUtil.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerUtil.java
@@ -1,5 +1,7 @@
package edu.uci.ics.genomix.type;
+import java.util.Arrays;
+
public class KmerUtil {
public static int countNumberOfBitSet(int i) {
@@ -28,7 +30,7 @@
* @return LastKmer bytes array
*/
public static byte[] getLastKmerFromChain(int k, int kInChain,
- byte[] kmerChain) {
+ byte[] kmerChain, int offset, int length) {
if (k > kInChain) {
return null;
}
@@ -39,17 +41,17 @@
byte[] kmer = new byte[byteNum];
/** from end to start */
- int byteInChain = kmerChain.length - 1 - (kInChain - k) / 4;
+ int byteInChain = length - 1 - (kInChain - k) / 4;
int posInByteOfChain = ((kInChain - k) % 4) << 1; // *2
int byteInKmer = byteNum - 1;
for (; byteInKmer >= 0 && byteInChain > 0; byteInKmer--, byteInChain--) {
- kmer[byteInKmer] = (byte) ((0xff & kmerChain[byteInChain]) >> posInByteOfChain);
- kmer[byteInKmer] |= ((kmerChain[byteInChain - 1] << (8 - posInByteOfChain)));
+ kmer[byteInKmer] = (byte) ((0xff & kmerChain[offset + byteInChain]) >> posInByteOfChain);
+ kmer[byteInKmer] |= ((kmerChain[offset + byteInChain - 1] << (8 - posInByteOfChain)));
}
/** last kmer byte */
if (byteInKmer == 0) {
- kmer[0] = (byte) ((kmerChain[0] & 0xff) >> posInByteOfChain);
+ kmer[0] = (byte) ((kmerChain[offset] & 0xff) >> posInByteOfChain);
}
return kmer;
}
@@ -64,7 +66,7 @@
* @return FirstKmer bytes array
*/
public static byte[] getFirstKmerFromChain(int k, int kInChain,
- byte[] kmerChain) {
+ byte[] kmerChain, int offset, int length) {
if (k > kInChain) {
return null;
}
@@ -76,13 +78,13 @@
int i = 1;
for (; i < kmer.length; i++) {
- kmer[kmer.length - i] = kmerChain[kmerChain.length - i];
+ kmer[kmer.length - i] = kmerChain[offset + length - i];
}
int posInByteOfChain = (k % 4) << 1; // *2
if (posInByteOfChain == 0) {
- kmer[0] = kmerChain[kmerChain.length - i];
+ kmer[0] = kmerChain[offset + length - i];
} else {
- kmer[0] = (byte) (kmerChain[kmerChain.length - i] & ((1 << posInByteOfChain) - 1));
+ kmer[0] = (byte) (kmerChain[offset + length - i] & ((1 << posInByteOfChain) - 1));
}
return kmer;
}
@@ -96,19 +98,19 @@
* @param nextCode: next neighbor in gene-code format
* @return the merged Kmer, this K of this Kmer is k+1
*/
- public static byte[] mergeKmerWithNextCode(int k, byte[] kmer, byte nextCode) {
- int byteNum = kmer.length;
+ public static byte[] mergeKmerWithNextCode(int k, byte[] kmer, int offset, int length, byte nextCode) {
+ int byteNum = length;
if (k % 4 == 0) {
byteNum++;
}
byte[] mergedKmer = new byte[byteNum];
- for (int i = 1; i <= kmer.length; i++) {
- mergedKmer[mergedKmer.length - i] = kmer[kmer.length - i];
+ for (int i = 1; i <= length; i++) {
+ mergedKmer[mergedKmer.length - i] = kmer[offset + length - i];
}
- if (mergedKmer.length > kmer.length) {
+ if (mergedKmer.length > length) {
mergedKmer[0] = (byte) (nextCode & 0x3);
} else {
- mergedKmer[0] = (byte) (kmer[0] | ((nextCode & 0x3) << ((k % 4) << 1)));
+ mergedKmer[0] = (byte) (kmer[offset] | ((nextCode & 0x3) << ((k % 4) << 1)));
}
return mergedKmer;
}
@@ -122,22 +124,22 @@
* @param preCode: next neighbor in gene-code format
* @return the merged Kmer,this K of this Kmer is k+1
*/
- public static byte[] mergeKmerWithPreCode(int k, byte[] kmer, byte preCode) {
- int byteNum = kmer.length;
+ public static byte[] mergeKmerWithPreCode(int k, byte[] kmer, int offset, int length, byte preCode) {
+ int byteNum = length;
byte[] mergedKmer = null;
int byteInMergedKmer = 0;
if (k % 4 == 0) {
byteNum++;
mergedKmer = new byte[byteNum];
- mergedKmer[0] = (byte) ((kmer[0] >> 6) & 0x3);
+ mergedKmer[0] = (byte) ((kmer[offset] >> 6) & 0x3);
byteInMergedKmer++;
} else {
mergedKmer = new byte[byteNum];
}
- for (int i = 0; i < kmer.length - 1; i++, byteInMergedKmer++) {
- mergedKmer[byteInMergedKmer] = (byte) ((kmer[i] << 2) | ((kmer[i + 1] >> 6) & 0x3));
+ for (int i = 0; i < length - 1; i++, byteInMergedKmer++) {
+ mergedKmer[byteInMergedKmer] = (byte) ((kmer[offset + i] << 2) | ((kmer[offset + i + 1] >> 6) & 0x3));
}
- mergedKmer[byteInMergedKmer] = (byte) ((kmer[kmer.length - 1] << 2) | (preCode & 0x3));
+ mergedKmer[byteInMergedKmer] = (byte) ((kmer[offset + length - 1] << 2) | (preCode & 0x3));
return mergedKmer;
}
@@ -150,31 +152,31 @@
* @param kmerNext : bytes array of next kmer
* @return merged kmer, the new k is @preK + @nextK
*/
- public static byte[] mergeTwoKmer(int preK, byte[] kmerPre, int nextK,
- byte[] kmerNext) {
+ public static byte[] mergeTwoKmer(int preK, byte[] kmerPre, int offsetPre, int lengthPre, int nextK,
+ byte[] kmerNext, int offsetNext, int lengthNext) {
int byteNum = Kmer.getByteNumFromK(preK + nextK);
byte[] mergedKmer = new byte[byteNum];
int i = 1;
- for (; i <= kmerPre.length; i++) {
- mergedKmer[byteNum - i] = kmerPre[kmerPre.length - i];
+ for (; i <= lengthPre; i++) {
+ mergedKmer[byteNum - i] = kmerPre[offsetPre + lengthPre - i];
}
if ( i > 1){
i--;
}
if (preK % 4 == 0) {
- for (int j = 1; j <= kmerNext.length; j++) {
- mergedKmer[byteNum - i - j] = kmerNext[kmerNext.length - j];
+ for (int j = 1; j <= lengthNext; j++) {
+ mergedKmer[byteNum - i - j] = kmerNext[offsetNext + lengthNext - j];
}
} else {
int posNeedToMove = ((preK % 4) << 1);
- mergedKmer[byteNum - i] |= kmerNext[kmerNext.length - 1] << posNeedToMove;
- for (int j = 1; j < kmerNext.length; j++) {
- mergedKmer[byteNum - i - j] = (byte) (((kmerNext[kmerNext.length
- - j] & 0xff) >> (8 - posNeedToMove)) | (kmerNext[kmerNext.length
+ mergedKmer[byteNum - i] |= kmerNext[offsetNext + lengthNext - 1] << posNeedToMove;
+ for (int j = 1; j < lengthNext; j++) {
+ mergedKmer[byteNum - i - j] = (byte) (((kmerNext[offsetNext + lengthNext
+ - j] & 0xff) >> (8 - posNeedToMove)) | (kmerNext[offsetNext + lengthNext
- j - 1] << posNeedToMove));
}
if ( (nextK % 4) * 2 + posNeedToMove > 8) {
- mergedKmer[0] = (byte) (kmerNext[0] >> (8 - posNeedToMove));
+ mergedKmer[0] = (byte) (kmerNext[offsetNext] >> (8 - posNeedToMove));
}
}
return mergedKmer;
@@ -188,8 +190,8 @@
* @param afterCode: input genecode
* @return new created kmer that shifted by afterCode, the K will not change
*/
- public static byte[] shiftKmerWithNextCode(int k, final byte[] kmer, byte afterCode){
- byte[] shifted = kmer.clone();
+ public static byte[] shiftKmerWithNextCode(int k, final byte[] kmer, int offset, int length, byte afterCode){
+ byte[] shifted = Arrays.copyOfRange(kmer, offset, offset+length);
Kmer.moveKmer(k, shifted, Kmer.GENE_CODE.getSymbolFromCode(afterCode));
return shifted;
}
@@ -202,10 +204,19 @@
* @param preCode: input genecode
* @return new created kmer that shifted by preCode, the K will not change
*/
- public static byte[] shiftKmerWithPreCode(int k, final byte[] kmer, byte preCode){
- byte[] shifted = kmer.clone();
+ public static byte[] shiftKmerWithPreCode(int k, final byte[] kmer, int offset, int length, byte preCode){
+ byte[] shifted = Arrays.copyOfRange(kmer, offset, offset+length);
Kmer.moveKmerReverse(k, shifted, Kmer.GENE_CODE.getSymbolFromCode(preCode));
return shifted;
}
+ public static byte getGeneCodeAtPosition(int pos, int k, final byte[] kmer,
+ int offset, int length) {
+ if (pos >= k) {
+ return -1;
+ }
+ int posByte = pos / 4;
+ int shift = (pos % 4) << 1;
+ return (byte) ((kmer[offset + length - 1 - posByte] >> shift) & 0x3);
+ }
}
diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerUtilTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerUtilTest.java
index 206e279..4b6e180 100644
--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerUtilTest.java
+++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/example/kmer/KmerUtilTest.java
@@ -24,7 +24,7 @@
Assert.assertEquals("AGCTGACCG", Kmer.recoverKmerFrom(9, kmerChain, 0, kmerChain.length));
byte[] lastKmer ;
for(int i = 8; i>0 ; i--){
- lastKmer = KmerUtil.getLastKmerFromChain(i, 9, kmerChain);
+ lastKmer = KmerUtil.getLastKmerFromChain(i, 9, kmerChain, 0, kmerChain.length);
// System.out.println(Kmer.recoverKmerFrom(i, lastKmer, 0, lastKmer.length));
Assert.assertEquals("AGCTGACCG".substring(9-i), Kmer.recoverKmerFrom(i, lastKmer, 0, lastKmer.length));
}
@@ -36,13 +36,13 @@
String text = "AGCTGACCG";
Assert.assertEquals(text, Kmer.recoverKmerFrom(9, kmer, 0, kmer.length));
for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
- kmer = KmerUtil.mergeKmerWithNextCode(9+x, kmer, x);
+ kmer = KmerUtil.mergeKmerWithNextCode(9+x, kmer, 0, kmer.length, x);
// System.out.println(Kmer.recoverKmerFrom(9+x+1, kmer, 0, kmer.length));
text = text + (char)Kmer.GENE_SYMBOL[x];
Assert.assertEquals(text, Kmer.recoverKmerFrom(9+x+1, kmer, 0, kmer.length));
}
for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
- kmer = KmerUtil.mergeKmerWithNextCode(13+x, kmer, x);
+ kmer = KmerUtil.mergeKmerWithNextCode(13+x, kmer,0, kmer.length, x);
// System.out.println(Kmer.recoverKmerFrom(13+x+1, kmer, 0, kmer.length));
text = text + (char)Kmer.GENE_SYMBOL[x];
Assert.assertEquals(text, Kmer.recoverKmerFrom(13+x+1, kmer, 0, kmer.length));
@@ -55,13 +55,13 @@
String text = "AGCTGACCG";
Assert.assertEquals(text, Kmer.recoverKmerFrom(9, kmer, 0, kmer.length));
for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
- kmer = KmerUtil.mergeKmerWithPreCode(9+x, kmer, x);
+ kmer = KmerUtil.mergeKmerWithPreCode(9+x, kmer, 0, kmer.length,x);
// System.out.println(Kmer.recoverKmerFrom(9+x+1, kmer, 0, kmer.length));
text = (char)Kmer.GENE_SYMBOL[x] + text;
Assert.assertEquals(text , Kmer.recoverKmerFrom(9+x+1, kmer, 0, kmer.length));
}
for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
- kmer = KmerUtil.mergeKmerWithPreCode(13+x, kmer, x);
+ kmer = KmerUtil.mergeKmerWithPreCode(13+x, kmer,0, kmer.length, x);
// System.out.println(Kmer.recoverKmerFrom(13+x+1, kmer, 0, kmer.length));
text = (char)Kmer.GENE_SYMBOL[x] + text;
Assert.assertEquals(text , Kmer.recoverKmerFrom(13+x+1, kmer, 0, kmer.length));
@@ -77,33 +77,33 @@
Assert.assertEquals(text1, Kmer.recoverKmerFrom(9, kmer1, 0, kmer1.length));
Assert.assertEquals(text2, Kmer.recoverKmerFrom(9, kmer2, 0, kmer2.length));
- byte[] merged = KmerUtil.mergeTwoKmer(9, kmer1, 9, kmer2);
+ byte[] merged = KmerUtil.mergeTwoKmer(9, kmer1,0,kmer1.length, 9, kmer2,0,kmer2.length);
Assert.assertEquals(text1+text2, Kmer.recoverKmerFrom(9+9, merged, 0, merged.length));
byte[] kmer3 = Kmer.compressKmer(3, array, 1);
String text3 = "GCT";
Assert.assertEquals(text3, Kmer.recoverKmerFrom(3, kmer3, 0, kmer3.length));
- merged = KmerUtil.mergeTwoKmer(9, kmer1, 3, kmer3);
+ merged = KmerUtil.mergeTwoKmer(9, kmer1, 0 , kmer1.length, 3, kmer3, 0, kmer3.length);
Assert.assertEquals(text1+text3, Kmer.recoverKmerFrom(9+3, merged, 0, merged.length));
- merged = KmerUtil.mergeTwoKmer(3, kmer3, 9, kmer1);
+ merged = KmerUtil.mergeTwoKmer(3, kmer3, 0 , kmer3.length, 9, kmer1, 0, kmer1.length);
Assert.assertEquals(text3+text1, Kmer.recoverKmerFrom(9+3, merged, 0, merged.length));
byte[] kmer4 = Kmer.compressKmer(8, array, 0);
String text4 = "AGCTGACC";
Assert.assertEquals(text4, Kmer.recoverKmerFrom(8, kmer4, 0, kmer4.length));
- merged = KmerUtil.mergeTwoKmer(8, kmer4, 3, kmer3);
+ merged = KmerUtil.mergeTwoKmer(8, kmer4, 0, kmer4.length, 3, kmer3, 0, kmer3.length);
Assert.assertEquals(text4+text3, Kmer.recoverKmerFrom(8+3, merged, 0, merged.length));
byte[] kmer5 = Kmer.compressKmer(7, array, 0);
String text5 = "AGCTGAC";
byte[] kmer6 = Kmer.compressKmer(9, array, 1);
String text6 = "GCTGACCGT";
- merged = KmerUtil.mergeTwoKmer(7, kmer5, 9, kmer6);
+ merged = KmerUtil.mergeTwoKmer(7, kmer5, 0, kmer5.length,9, kmer6, 0, kmer6.length);
Assert.assertEquals(text5+text6, Kmer.recoverKmerFrom(7+9, merged, 0, merged.length));
byte[] kmer7 = Kmer.compressKmer(6, array, 1);
String text7 = "GCTGAC";
- merged = KmerUtil.mergeTwoKmer(7, kmer5, 6, kmer7);
+ merged = KmerUtil.mergeTwoKmer(7, kmer5, 0, kmer5.length, 6, kmer7, 0, kmer7.length);
Assert.assertEquals(text5+text7, Kmer.recoverKmerFrom(7+6, merged, 0, merged.length));
}
@@ -113,12 +113,21 @@
String text = "AGCTGACCG";
Assert.assertEquals(text, Kmer.recoverKmerFrom(9, kmer, 0, kmer.length));
- byte [] kmerForward = KmerUtil.shiftKmerWithNextCode(9, kmer, Kmer.GENE_CODE.A);
+ byte [] kmerForward = KmerUtil.shiftKmerWithNextCode(9, kmer,0, kmer.length, Kmer.GENE_CODE.A);
Assert.assertEquals(text, Kmer.recoverKmerFrom(9, kmer, 0, kmer.length));
Assert.assertEquals("GCTGACCGA", Kmer.recoverKmerFrom(9, kmerForward, 0, kmerForward.length));
- byte [] kmerBackward = KmerUtil.shiftKmerWithPreCode(9, kmer, Kmer.GENE_CODE.C);
+ byte [] kmerBackward = KmerUtil.shiftKmerWithPreCode(9, kmer,0, kmer.length,Kmer.GENE_CODE.C);
Assert.assertEquals(text, Kmer.recoverKmerFrom(9, kmer, 0, kmer.length));
Assert.assertEquals("CAGCTGACC", Kmer.recoverKmerFrom(9, kmerBackward, 0, kmerBackward.length));
}
+ @Test
+ public void TestGetGene(){
+ byte[] kmer = Kmer.compressKmer(9, array, 0);
+ String text = "AGCTGACCG";
+ for(int i =0; i < 9; i++){
+ Assert.assertEquals(text.charAt(i),
+ (char)(Kmer.GENE_CODE.getSymbolFromCode(KmerUtil.getGeneCodeAtPosition(i, 9, kmer, 0, kmer.length))));
+ }
+ }
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java
index 50146ad..1d772b2 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java
@@ -49,20 +49,18 @@
precursor = (byte) ((precursor & 0xff) >> 4);
succeed = (byte) (succeed & adjBitMap);
- byte[] kmerValue = new byte[key.getLength()];
- for (int i = 0; i < kmerValue.length; i++) {
- kmerValue[i] = key.getBytes()[i];
- }
+ byte[] kmerValue = key.getBytes();
+ int kmerLength = key.getLength();
if (bitFlag == 1) {
byte succeedCode = GENE_CODE.getGeneCodeFromBitMap(succeed);
int originalByteNum = Kmer.getByteNumFromK(KMER_SIZE);
- byte[] tmpKmer = KmerUtil.getLastKmerFromChain(KMER_SIZE, value.getKmerSize(), kmerValue);
- byte[] newKmer = KmerUtil.shiftKmerWithNextCode(KMER_SIZE, tmpKmer, succeedCode);
+ byte[] tmpKmer = KmerUtil.getLastKmerFromChain(KMER_SIZE, value.getKmerSize(), kmerValue, 0, kmerLength);
+ byte[] newKmer = KmerUtil.shiftKmerWithNextCode(KMER_SIZE, tmpKmer,0, tmpKmer.length, succeedCode);
outputKmer.set(newKmer, 0, originalByteNum);
int mergeByteNum = Kmer.getByteNumFromK(value.getKmerSize() - (KMER_SIZE - 1));
byte[] mergeKmer = KmerUtil.getFirstKmerFromChain(value.getKmerSize() - (KMER_SIZE - 1),
- value.getKmerSize(), kmerValue);
+ value.getKmerSize(), kmerValue, 0, kmerLength);
outputAdjList.set(mergeKmer, 0, mergeByteNum, adjBitMap, bitFlag, value.getKmerSize() - (KMER_SIZE - 1));
output.collect(outputKmer, outputAdjList);
} else {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java
index 0ffc6e0..2397a98 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java
@@ -50,20 +50,18 @@
if (values.hasNext() == true) {
- byte[] keyBytes = new byte[key.getLength()];
- for (int i = 0; i < keyBytes.length; i++) {
- keyBytes[i] = key.getBytes()[i];
- }
+ byte[] keyBytes = key.getBytes();
+ int keyLength = key.getLength();
if (outputAdjList.getFlag() == 1) {
byte adjBitMap = outputAdjList.getAdjBitMap();
byte bitFlag = outputAdjList.getFlag();
int kmerSize = outputAdjList.getKmerSize();
int mergeByteNum = Kmer.getByteNumFromK(KMER_SIZE + kmerSize);
- byte[] valueBytes = new byte[outputAdjList.getLength()];
- for (int i = 0; i < valueBytes.length; i++) {
- valueBytes[i] = outputAdjList.getBytes()[i];
- }
- byte[] mergeKmer = KmerUtil.mergeTwoKmer(outputAdjList.getKmerSize(), valueBytes, KMER_SIZE, keyBytes);
+ byte[] valueBytes = outputAdjList.getBytes();
+ int valueLength = outputAdjList.getLength();
+
+ byte[] mergeKmer = KmerUtil.mergeTwoKmer(outputAdjList.getKmerSize(), valueBytes,0, valueLength,
+ KMER_SIZE, keyBytes, 0, keyLength);
outputKmer.set(mergeKmer, 0, mergeByteNum);
outputAdjList = values.next();
@@ -84,11 +82,10 @@
byte flag = outputAdjList.getFlag();
int kmerSize = outputAdjList.getKmerSize();
int mergeByteNum = Kmer.getByteNumFromK(KMER_SIZE + kmerSize);
- byte[] valueBytes = new byte[outputAdjList.getLength()];
- for (int i = 0; i < valueBytes.length; i++) {
- valueBytes[i] = outputAdjList.getBytes()[i];
- }
- byte[] mergeKmer = KmerUtil.mergeTwoKmer(outputAdjList.getKmerSize(), valueBytes, KMER_SIZE, keyBytes);
+ byte[] valueBytes = outputAdjList.getBytes();
+ int valueLength =outputAdjList.getLength();
+ byte[] mergeKmer = KmerUtil.mergeTwoKmer(outputAdjList.getKmerSize(), valueBytes, 0, valueLength,
+ KMER_SIZE, keyBytes, 0, keyLength);
outputKmer.set(mergeKmer, 0, mergeByteNum);
adjBitMap = (byte) (adjBitMap & 0xF0);
@@ -98,22 +95,18 @@
mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputAdjList);
}
} else {
- byte[] keyBytes = new byte[key.getLength()];
- for (int i = 0; i < keyBytes.length; i++) {
- keyBytes[i] = key.getBytes()[i];
- }
+ byte[] keyBytes = key.getBytes();
+ int keyLength = key.getLength();
if (outputAdjList.getFlag() != 0) {
byte adjBitMap = outputAdjList.getAdjBitMap();
byte flag = outputAdjList.getFlag();
int kmerSize = outputAdjList.getKmerSize();
int mergeByteNum = Kmer.getByteNumFromK(KMER_SIZE - 1 + kmerSize);
- byte[] tmpKmer = KmerUtil.getFirstKmerFromChain(KMER_SIZE - 1, KMER_SIZE, keyBytes);
- byte[] valueBytes = new byte[outputAdjList.getLength()];
- for (int i = 0; i < valueBytes.length; i++) {
- valueBytes[i] = outputAdjList.getBytes()[i];
- }
- byte[] mergeKmer = KmerUtil.mergeTwoKmer(outputAdjList.getKmerSize(), valueBytes, KMER_SIZE - 1,
- tmpKmer);
+ byte[] tmpKmer = KmerUtil.getFirstKmerFromChain(KMER_SIZE - 1, KMER_SIZE, keyBytes,0,keyLength);
+ byte[] valueBytes = outputAdjList.getBytes();
+ int valueLength = outputAdjList.getLength();
+ byte[] mergeKmer = KmerUtil.mergeTwoKmer(outputAdjList.getKmerSize(), valueBytes,0, valueLength,
+ KMER_SIZE - 1, tmpKmer,0,tmpKmer.length);
outputKmer.set(mergeKmer, 0, mergeByteNum);
outputAdjList.set(null, 0, 0, adjBitMap, flag, KMER_SIZE + kmerSize);
mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputAdjList);
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java
index 3552681..2e25c0d 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java
@@ -105,10 +105,8 @@
boolean inDegree = measureDegree(precursor);
boolean outDegree = measureDegree(succeed);
byte initial = 0;
- byte[] kmerValue = new byte[key.getLength()];
- for (int i = 0; i < kmerValue.length; i++) {
- kmerValue[i] = key.getBytes()[i];
- }
+ byte[] kmerValue = key.getBytes();
+ int kmerLength = key.getLength();
if (inDegree == true && outDegree == false) {
flag = (byte) 2;
switch (succeed) {
@@ -125,7 +123,7 @@
initial = (byte) 0x03;
break;
}
- byte[] newKmer = KmerUtil.shiftKmerWithNextCode(KMER_SIZE, kmerValue, initial);
+ byte[] newKmer = KmerUtil.shiftKmerWithNextCode(KMER_SIZE, kmerValue,0, kmerLength, initial);
outputKmer.set(newKmer, 0, kmerValue.length);
adjBitMap = (byte) (adjBitMap & 0xF0);
outputAdjList.set(null, 0, 0, adjBitMap, flag, KMER_SIZE);
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LogAlgorithmForMergeGraphVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LogAlgorithmForMergeGraphVertex.java
index 19a8fcb..2264301 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LogAlgorithmForMergeGraphVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LogAlgorithmForMergeGraphVertex.java
@@ -70,7 +70,7 @@
tmpMsg.setMessage(Message.START);
for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
if((tmpVal.getValue() & (1 << x)) != 0){
- tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(GraphVertexOperation.k, tmpVertexId, x);
+ tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(GraphVertexOperation.k, tmpVertexId, 0, tmpVertexId.length,x);
destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
sendMsg(destVertexId,tmpMsg);
}
@@ -82,7 +82,7 @@
for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
if(((tmpVal.getValue()>> 4) & (1 << x)) != 0){
- tmpDestVertexId = KmerUtil.shiftKmerWithPreCode(GraphVertexOperation.k, tmpVertexId, x);
+ tmpDestVertexId = KmerUtil.shiftKmerWithPreCode(GraphVertexOperation.k, tmpVertexId, 0, tmpVertexId.length, x);
destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
sendMsg(destVertexId,tmpMsg);
}
@@ -120,7 +120,7 @@
else if(getSuperstep()%3 == 0){
if(getSuperstep() == 3){
tmpMsg = new LogAlgorithmMessageWritable();
- tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(GraphVertexOperation.k, tmpVertexId,
+ tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(GraphVertexOperation.k, tmpVertexId, 0, tmpVertexId.length,
Kmer.GENE_CODE.getGeneCodeFromBitMap((byte)(tmpVal.getValue() & 0x0F)));
destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
if(tmpVal.getState() == State.START_VERTEX){
@@ -141,8 +141,8 @@
tmpMsg = msgIterator.next();
byte[] lastKmer = KmerUtil.getLastKmerFromChain(GraphVertexOperation.k,
tmpVal.getLengthOfMergeChain(),
- tmpVal.getMergeChain());
- tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(GraphVertexOperation.k, lastKmer,
+ tmpVal.getMergeChain(),0,tmpVal.getMergeChain().length);
+ tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(GraphVertexOperation.k, lastKmer, 0, lastKmer.length,
Kmer.GENE_CODE.getGeneCodeFromBitMap((byte)(tmpVal.getValue() & 0x0F))); //tmpMsg.getNeighberInfo()
destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
if(tmpVal.getState() == State.START_VERTEX){
@@ -222,11 +222,13 @@
lengthOfMergeChainVertex = tmpVal.getLengthOfMergeChain();
mergeChainVertexId = tmpVal.getMergeChain();
}
+ byte[] tmplastKmer = KmerUtil.getLastKmerFromChain(tmpMsg.getLengthOfChain() - GraphVertexOperation.k + 1,
+ tmpMsg.getLengthOfChain(), tmpMsg.getChainVertexId(),0, tmpMsg.getChainVertexId().length);
mergeChainVertexId = KmerUtil.mergeTwoKmer(lengthOfMergeChainVertex,
- mergeChainVertexId,
+ mergeChainVertexId, 0, mergeChainVertexId.length,
tmpMsg.getLengthOfChain() - GraphVertexOperation.k + 1,
- KmerUtil.getLastKmerFromChain(tmpMsg.getLengthOfChain() - GraphVertexOperation.k + 1,
- tmpMsg.getLengthOfChain(), tmpMsg.getChainVertexId()));
+ tmplastKmer, 0 , tmplastKmer.length
+ );
lengthOfMergeChainVertex = lengthOfMergeChainVertex + tmpMsg.getLengthOfChain()
- GraphVertexOperation.k + 1;
tmpVal.setLengthOfMergeChain(lengthOfMergeChainVertex);
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/MergeGraphVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/MergeGraphVertex.java
index 36dc8a1..7f2cc43 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/MergeGraphVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/MergeGraphVertex.java
@@ -71,7 +71,7 @@
tmpMsg.setChainVertexId(tmpChainVertexId.getBytes());
for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
if((getVertexValue().getValue() & (1 << x)) != 0){
- tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(GraphVertexOperation.k, tmpVertexId, x);
+ tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(GraphVertexOperation.k, tmpVertexId, 0, tmpVertexId.length, x);
destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
sendMsg(destVertexId,tmpMsg);
}
@@ -99,7 +99,7 @@
String source = Kmer.recoverKmerFrom(GraphVertexOperation.k, tmpVertexId, 0, tmpVertexId.length);
tmpMsg.setChainVertexId(KmerUtil.mergeKmerWithNextCode(
tmpMsg.getLengthOfChain(),
- tmpMsg.getChainVertexId(),
+ tmpMsg.getChainVertexId(),0, tmpMsg.getChainVertexId().length,
Kmer.GENE_CODE.getCodeFromSymbol((byte)source.charAt(source.length() - 1))));
tmpMsg.incrementLength();
deleteVertex(getVertexId());
@@ -141,8 +141,8 @@
if(!tmpMsg.isRear()){
byte[] lastKmer = KmerUtil.getLastKmerFromChain(GraphVertexOperation.k,
tmpMsg.getLengthOfChain(),
- tmpMsg.getChainVertexId());
- tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(GraphVertexOperation.k, lastKmer,
+ tmpMsg.getChainVertexId(), 0 , tmpMsg.getChainVertexId().length);
+ tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(GraphVertexOperation.k, lastKmer, 0, lastKmer.length,
Kmer.GENE_CODE.getGeneCodeFromBitMap((byte)(tmpMsg.getNeighberInfo() & 0x0F)));
tmpMsg.setSourceVertexId(tmpVertexId);