Merge commit '94e075b5c3db9aa613ef61c2581430a143b17bc8' into nanzhang/hyracks_genomix

Conflicts:
	genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
	genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
	genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerListWritableTest.java
	genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/MessageWritable.java
	genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java
	genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicGraphCleanVertex.java
	genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/P1ForPathMergeVertex.java
	genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/P3ForPathMergeVertex.java
diff --git a/patch.diff b/patch.diff
new file mode 100644
index 0000000..a333970
--- /dev/null
+++ b/patch.diff
@@ -0,0 +1,245 @@
+From 9e006501f9e33467a8428199bd94b71dbff063ef Mon Sep 17 00:00:00 2001
+From: Anbang Xu <anbangx@gmail.com>
+Date: Fri, 26 Jul 2013 14:10:33 -0700
+Subject: [PATCH] p2 pass all the tests except 9
+
+---
+ .../genomix/data/test/KmerBytesWritableTest.java   | 76 +++++++++++++++++++++-
+ .../genomix/pregelix/io/VertexValueWritable.java   |  2 +-
+ .../operator/pathmerge/BasicPathMergeVertex.java   | 35 +++++-----
+ .../pathmerge/LogAlgorithmForPathMergeVertex.java  |  8 +--
+ .../pregelix/JobRun/PathMergeSmallTestSuite.java   |  2 +-
+ 5 files changed, 98 insertions(+), 25 deletions(-)
+
+diff --git a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
+index bda73e5..fbfbeeb 100644
+--- a/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
++++ b/genomix/genomix-data/src/test/java/edu/uci/ics/genomix/data/test/KmerBytesWritableTest.java
+@@ -229,14 +229,34 @@ public class KmerBytesWritableTest {
+         merge.mergeWithRFKmer(i, kmer2);
+         Assert.assertEquals("GGCACAACAACCC", merge.toString());
+         
+-        String test1 = "CTA";
+-        String test2 = "AGA";
++        String test1;
++        String test2;
++        test1 = "CTA";
++        test2 = "AGA";
+         KmerBytesWritable k1 = new KmerBytesWritable(3);
+         KmerBytesWritable k2 = new KmerBytesWritable(3);
+         k1.setByRead(test1.getBytes(), 0);
+         k2.setByRead(test2.getBytes(), 0);
+         k1.mergeWithRFKmer(3, k2);
+         Assert.assertEquals("TCTA", k1.toString());
++        
++        test1 = "CTA";
++        test2 = "ATA"; //TAT
++        k1 = new KmerBytesWritable(3);
++        k2 = new KmerBytesWritable(3);
++        k1.setByRead(test1.getBytes(), 0);
++        k2.setByRead(test2.getBytes(), 0);
++        k1.mergeWithFRKmer(3, k2);
++        Assert.assertEquals("CTAT", k1.toString());
++        
++        test1 = "ATA";
++        test2 = "CTA"; //TAT
++        k1 = new KmerBytesWritable(3);
++        k2 = new KmerBytesWritable(3);
++        k1.setByRead(test1.getBytes(), 0);
++        k2.setByRead(test2.getBytes(), 0);
++        k1.mergeWithFRKmer(3, k2);
++        Assert.assertEquals("ATAG", k1.toString());
+     }
+     
+     
+@@ -281,5 +301,55 @@ public class KmerBytesWritableTest {
+             }
+         }
+     }
+-
++    
++    @Test
++    public void TestFinalMerge() {
++        String selfString;
++        String match;
++        String msgString;
++        int index;
++        KmerBytesWritable kmer = new KmerBytesWritable();
++        int kmerSize = 3;
++        
++        String F1 = "AATAG";
++        String F2 = "TAGAA";
++        String R1 = "CTATT";
++        String R2 = "TTCTA";
++        
++        //FF test
++        selfString = F1;
++        match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length()); 
++        msgString = F2;
++        index = msgString.indexOf(match);
++        kmer.reset(msgString.length() - index);
++        kmer.setByRead(msgString.substring(index).getBytes(), 0);
++        System.out.println(kmer.toString());
++        
++        //FR test
++        selfString = F1;
++        match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length()); 
++        msgString = GeneCode.reverseComplement(R2);
++        index = msgString.indexOf(match);
++        kmer.reset(msgString.length() - index);
++        kmer.setByRead(msgString.substring(index).getBytes(), 0);
++        System.out.println(kmer.toString());
++        
++        //RF test
++        selfString = R1;
++        match = selfString.substring(0,kmerSize - 1); 
++        msgString = GeneCode.reverseComplement(F2);
++        index = msgString.lastIndexOf(match) + kmerSize - 2;
++        kmer.reset(index + 1);
++        kmer.setByReadReverse(msgString.substring(0, index + 1).getBytes(), 0);
++        System.out.println(kmer.toString());
++        
++        //RR test
++        selfString = R1;
++        match = selfString.substring(0,kmerSize - 1); 
++        msgString = R2;
++        index = msgString.lastIndexOf(match) + kmerSize - 2;
++        kmer.reset(index + 1);
++        kmer.setByRead(msgString.substring(0, index + 1).getBytes(), 0);
++        System.out.println(kmer.toString());
++    }
+ }
+diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java
+index 6d4f683..065bfd5 100644
+--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java
++++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/VertexValueWritable.java
+@@ -32,7 +32,7 @@ public class VertexValueWritable implements WritableComparable<VertexValueWritab
+         public static final byte SHOULD_MERGEWITHNEXT = 0b01 << 3;
+         public static final byte SHOULD_MERGEWITHPREV = 0b10 << 3;
+         public static final byte SHOULD_MERGE_MASK = 0b11 << 3;
+-        public static final byte SHOULD_MERGE_CLEAR = 0b1110011;
++        public static final byte SHOULD_MERGE_CLEAR = 0b1100111;
+     }
+     
+     private PositionListWritable nodeIdList;
+diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicPathMergeVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicPathMergeVertex.java
+index b7b0814..ec608c5 100644
+--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicPathMergeVertex.java
++++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/BasicPathMergeVertex.java
+@@ -495,6 +495,7 @@ public class BasicPathMergeVertex extends
+     
+     public void setStateAsMergeWithNext(){
+     	byte state = getVertexValue().getState();
++    	state &= State.SHOULD_MERGE_CLEAR;
+         state |= State.SHOULD_MERGEWITHNEXT;
+         getVertexValue().setState(state);
+     }
+@@ -512,6 +513,7 @@ public class BasicPathMergeVertex extends
+     
+     public void setStateAsMergeWithPrev(){
+         byte state = getVertexValue().getState();
++        state &= State.SHOULD_MERGE_CLEAR;
+         state |= State.SHOULD_MERGEWITHPREV;
+         getVertexValue().setState(state);
+     }
+@@ -638,7 +640,7 @@ public class BasicPathMergeVertex extends
+         String match;
+         String msgString;
+         int index;
+-        switch(neighborToMergeDir){
++        switch(neighborToMeDir){
+             case MessageFlag.DIR_FF:
+                 selfString = getVertexValue().getKmer().toString();
+                 match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length()); 
+@@ -648,28 +650,29 @@ public class BasicPathMergeVertex extends
+                 kmer.setByRead(msgString.substring(index).getBytes(), 0);
+                 break;
+             case MessageFlag.DIR_FR:
+-                selfString = getVertexId().toString();
+-                match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
++                selfString = getVertexValue().getKmer().toString();
++                match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length()); 
+                 msgString = GeneCode.reverseComplement(msg.getKmer().toString());
+                 index = msgString.indexOf(match);
+                 kmer.reset(msgString.length() - index);
+-                kmer.setByRead(msgString.substring(index).getBytes(), 0);
++                kmer.setByReadReverse(msgString.substring(index).getBytes(), 0);
+                 break;
+             case MessageFlag.DIR_RF:
+-                selfString = GeneCode.reverseComplement(getVertexValue().getKmer().toString());
+-                match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
+-                msgString = msg.getKmer().toString();
+-                index = msgString.indexOf(match);
+-                kmer.reset(msgString.length() - index);
+-                kmer.setByRead(msgString.substring(index).getBytes(), 0);
++                selfString = getVertexValue().getKmer().toString();
++                match = selfString.substring(0,kmerSize - 1); 
++                msgString = GeneCode.reverseComplement(msg.getKmer().toString());
++                index = msgString.lastIndexOf(match) + kmerSize - 2;
++                kmer.reset(index + 1);
++                kmer.setByReadReverse(msgString.substring(0, index + 1).getBytes(), 0);
+                 break;
+             case MessageFlag.DIR_RR:
+-                selfString = GeneCode.reverseComplement(getVertexValue().getKmer().toString());
+-                match = selfString.substring(selfString.length() - kmerSize + 1,selfString.length());
+-                msgString = GeneCode.reverseComplement(msg.getKmer().toString());
+-                index = msgString.indexOf(match);
+-                kmer.reset(msgString.length() - index);
+-                kmer.setByRead(msgString.substring(index).getBytes(), 0);
++                selfString = getVertexValue().getKmer().toString();
++                match = selfString.substring(0,kmerSize - 1); 
++                msgString = msg.getKmer().toString();
++                index = msgString.lastIndexOf(match) + kmerSize - 2;
++                kmer.reset(index + 1);
++                kmer.setByRead(msgString.substring(0, index + 1).getBytes(), 0);
++                System.out.println(kmer.toString());
+                 break;
+         }
+        
+diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/LogAlgorithmForPathMergeVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/LogAlgorithmForPathMergeVertex.java
+index a68b646..3b5a782 100644
+--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/LogAlgorithmForPathMergeVertex.java
++++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/pathmerge/LogAlgorithmForPathMergeVertex.java
+@@ -170,22 +170,22 @@ public class LogAlgorithmForPathMergeVertex extends
+                 case MessageFromHead.BothMsgsFromHead:
+                 case MessageFromHead.OneMsgFromOldHeadAndOneFromHead:
+                     for(int i = 0; i < 2; i++)
+-                        processMerge(receivedMsgList.get(i));
++                        processFinalMerge(receivedMsgList.get(i)); //processMerge()
+                     getVertexValue().setState(State.IS_FINAL);
+                     voteToHalt();
+                     break;
+                 case MessageFromHead.OneMsgFromHeadAndOneFromNonHead:
+                     for(int i = 0; i < 2; i++)
+-                        processMerge(receivedMsgList.get(i));
++                        processFinalMerge(receivedMsgList.get(i));
+                     getVertexValue().setState(State .IS_HEAD);
+                     break;
+                 case MessageFromHead.BothMsgsFromNonHead:
+                     for(int i = 0; i < 2; i++)
+-                        processMerge(receivedMsgList.get(i));
++                        processFinalMerge(receivedMsgList.get(i));
+                     break;
+                 case MessageFromHead.NO_MSG:
+                     //halt
+-                    deleteVertex(getVertexId());
++                    voteToHalt(); //deleteVertex(getVertexId());
+                     break;
+             }
+         }
+diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java
+index 9f96b5a..1578dfc 100644
+--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java
++++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/PathMergeSmallTestSuite.java
+@@ -52,7 +52,7 @@ public class PathMergeSmallTestSuite extends TestSuite {
+ //    + "6", PreFix + File.separator
+ //    + "7", PreFix + File.separator
+ //    + "8", PreFix + File.separator
+-    + "5"};
++    + "9"};
+     private static final String ACTUAL_RESULT_DIR = "data/actual/pathmerge";
+     private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+     private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
+-- 
+1.7.11.1
+