hadoop pathmerge h1 algorithm

git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_genomix@3377 123451ca-8445-de46-9d55-352943316053
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/Kmer.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/Kmer.java
index 133e7f8..21fee2b 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/Kmer.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/Kmer.java
@@ -15,7 +15,7 @@
 package edu.uci.ics.genomix.type;
 
 public class Kmer {
-
+ 
 	public final static byte[] GENE_SYMBOL = { 'A', 'C', 'G', 'T' };
 
 	public final static class GENE_CODE {
diff --git a/genomix/genomix-hadoop/expected/result2 b/genomix/genomix-hadoop/expected/result2
index 3665e18..db55a38 100755
--- a/genomix/genomix-hadoop/expected/result2
+++ b/genomix/genomix-hadoop/expected/result2
@@ -1,3 +1,8 @@
-39 41 0c	1	1
-e4 04 31	24	1
-93 13 c4	16	1
+GCA	-72
+AGC	1
+CGC	-119
+TGC	1
+ATC	36
+TCG	18
+CAT	66
+GCT	32
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java
index 1a33c7a..5d39928 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java
@@ -42,8 +42,9 @@
         byte bytCount = 0;
         while (values.hasNext()) {
             //Merge By the all adjacent Nodes;
-            groupByAdjList = (byte) (groupByAdjList | values.next().getAdjBitMap());
-            count ++;
+            KmerCountValue geneValue = values.next();
+            groupByAdjList = (byte) (groupByAdjList | geneValue.getAdjBitMap());
+            count = count + (int) geneValue.getCount();
         }
         if (count >= 127)
             bytCount = (byte) 127;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java
index 6e55f09..b90ab23 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java
@@ -28,8 +28,6 @@
 import org.apache.hadoop.mapred.TextInputFormat;
 import org.kohsuke.args4j.CmdLineParser;
 import org.kohsuke.args4j.Option;
-
-import edu.uci.ics.genomix.type.KmerBytesWritable;
 import edu.uci.ics.genomix.type.KmerCountValue;
 
 /**
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
index 443cceb..cd8b7e3 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
@@ -55,20 +55,20 @@
 
     /*succeed node
       A 00000001 1
-      G 00000010 2
-      C 00000100 4
+      C 00000010 2
+      G 00000100 4
       T 00001000 8
       precursor node
       A 00010000 16
-      G 00100000 32
-      C 01000000 64
+      C 00100000 32
+      G 01000000 64
       T 10000000 128*/
     @Override
     public void map(LongWritable key, Text value, OutputCollector<BytesWritable, KmerCountValue> output,
             Reporter reporter) throws IOException {
         /* A 00
-           G 01
-           C 10
+           C 01
+           G 10
            T 11*/
         String geneLine = value.toString(); // Read the Real Gene Line
         Pattern genePattern = Pattern.compile("[AGCT]+");
@@ -76,7 +76,7 @@
         boolean isValid = geneMatcher.matches();
         if (isValid == true) {
             /** first kmer */
-            byte count = 0;
+            byte count = 1;
             byte[] array = geneLine.getBytes();
             byte[] kmer = Kmer.compressKmer(KMER_SIZE, array, 0);
             byte pre = 0;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java
index 03bad56..676d6f1 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java
@@ -41,8 +41,7 @@
         byte bytCount = 0;
         while (values.hasNext()) {
             //Merge By the all adjacent Nodes;
-            KmerCountValue geneValue = values.next();
-            
+            KmerCountValue geneValue = values.next();            
             groupByAdjList = (byte) (groupByAdjList | geneValue.getAdjBitMap());
             count = count + (int) geneValue.getCount();
         }
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
index 28cbbbc..6bd3bd5 100755
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
@@ -35,23 +35,24 @@
 
 import edu.uci.ics.genomix.type.Kmer;
 import edu.uci.ics.genomix.type.KmerCountValue;
+import edu.uci.ics.utils.TestUtils;
 /**
  * This class test the correctness of graphbuilding program
  */
 @SuppressWarnings("deprecation")
 public class GraphBuildingTest {
 
-    private static final String ACTUAL_RESULT_DIR = "actual";
-    @SuppressWarnings("deprecation")
+    private static final String ACTUAL_RESULT_DIR = "actual1";
+    private static final String COMPARE_DIR = "compare";
     private JobConf conf = new JobConf();
     private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
-    private static final String DATA_PATH = "data/webmap/text.txt";
+    private static final String DATA_PATH = "data/webmap/Test.txt";
     private static final String HDFS_PATH = "/webmap";
-    private static final String RESULT_PATH = "/result2";
-    private static final String EXPECTED_PATH = "expected/result2";
-    private static final String TEST_SOURCE_DIR = "testactual/source.txt";
+    private static final String RESULT_PATH = "/result1";
+    private static final String EXPECTED_PATH = "expected/result1";
+    private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH + "/comparesource.txt";
     private static final int COUNT_REDUCER = 4;
-    private static final int SIZE_KMER = 12;
+    private static final int SIZE_KMER = 3;
     
     private MiniDFSCluster dfsCluster;
     private MiniMRCluster mrCluster;
@@ -70,11 +71,12 @@
 
         SequenceFile.Reader reader = null;
         Path path = new Path(RESULT_PATH + "/part-00000");
-        reader = new SequenceFile.Reader(dfs, path, conf);
+        reader = new SequenceFile.Reader(dfs, path, conf); 
         BytesWritable key = (BytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
         KmerCountValue value = (KmerCountValue) ReflectionUtils.newInstance(reader.getValueClass(), conf);
         File filePathTo = new File(TEST_SOURCE_DIR);
         BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+        
         while (reader.next(key, value)) {
             bw.write(Kmer.recoverKmerFrom(SIZE_KMER, key.getBytes(), 0, key.getLength()) + "\t" + value.toString());
             bw.newLine();
@@ -82,7 +84,7 @@
         bw.close();
 
         dumpResult();
-//        TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
+        TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
 
         cleanupHadoop();
 
@@ -114,7 +116,7 @@
 
     private void dumpResult() throws IOException {
         Path src = new Path(RESULT_PATH);
-        Path dest = new Path(ACTUAL_RESULT_DIR + "/");
+        Path dest = new Path(ACTUAL_RESULT_DIR);
         dfs.copyToLocalFile(src, dest);
     }
 }
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
index 237a764..015017a 100755
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
@@ -71,7 +71,5 @@
 
     public static void main(String[] args) throws Exception {
         TestUtils TUtils = new TestUtils();
-        TUtils.compareWithResult(new File("/Users/hadoop/Documents/workspace/Test/part-00000"), new File(
-                "/Users/hadoop/Documents/workspace/Test/test.txt"));
     }
 }