after code view h1 h2 update

commit: 0047d3c8de4a8dad75edc7682bd8b145a5500aad [log] [tgz]
author: Nan Zhang <zhangnan2920214@gmail.com> Tue May 14 10:52:44 2013 -0700
committer: Nan Zhang <zhangnan2920214@gmail.com> Tue May 14 10:52:44 2013 -0700
tree: 1a89e8ce1e09870c76dd71c86ee4ecaaf06877cf
parent: bef5ad00fd32b3ae73d8a100b55c48cd550ce584 [diff]
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathValueWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/MergePathValueWritable.java
similarity index 95%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathValueWritable.java
rename to genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/MergePathValueWritable.java
index 67b168d..4e8199a 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathValueWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/MergePathValueWritable.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.pathmergingh2;
+package edu.uci.ics.genomix.type;
 
 import java.io.DataInput;
 import java.io.DataOutput;
@@ -94,6 +94,9 @@
         return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag);
     }
 
+    public String pureToString() {
+        return GeneCode.getSymbolFromBitMap(adjBitMap);
+    }
     @Override
     public byte[] getBytes() {
         // TODO Auto-generated method stub

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingDriver.java
similarity index 98%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingDriver.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingDriver.java
index 067249a..2d0c36e 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingDriver.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.gbresultschecking;
+package edu.uci.ics.genomix.hadoop.gbresultschecking;
 
 import java.io.IOException;
 import org.apache.hadoop.fs.FileSystem;

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingMapper.java
similarity index 97%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingMapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingMapper.java
index 466d44c..b1e5e59 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingMapper.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.gbresultschecking;
+package edu.uci.ics.genomix.hadoop.gbresultschecking;
 
 import java.io.IOException;
 import org.apache.hadoop.io.ByteWritable;

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingReducer.java
similarity index 96%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingReducer.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingReducer.java
index 6d7fbc0..e93548f 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingReducer.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.gbresultschecking;
+package edu.uci.ics.genomix.hadoop.gbresultschecking;
 
 import java.io.IOException;
 import java.util.Iterator;

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixCombiner.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixCombiner.java
similarity index 97%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixCombiner.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixCombiner.java
index 7029c86..8a4cdc9 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixCombiner.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixCombiner.java

@@ -13,7 +13,7 @@
  * limitations under the License.
  */
 
-package edu.uci.ics.genomix.graphbuilding;
+package edu.uci.ics.genomix.hadoop.graphbuilding;
 
 import java.io.IOException;
 import java.util.Iterator;

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java
similarity index 98%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixDriver.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java
index 60802eb..cd0ee2d 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java

@@ -13,7 +13,7 @@
  * limitations under the License.
  */
 
-package edu.uci.ics.genomix.graphbuilding;
+package edu.uci.ics.genomix.hadoop.graphbuilding;
 
 import java.io.IOException;
 

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixMapper.java
similarity index 98%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixMapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixMapper.java
index 3fb3425..e9fa3f0 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixMapper.java

@@ -13,7 +13,7 @@
  * limitations under the License.
  */
 
-package edu.uci.ics.genomix.graphbuilding;
+package edu.uci.ics.genomix.hadoop.graphbuilding;
 
 import java.io.IOException;
 import java.util.regex.Matcher;

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixReducer.java
similarity index 97%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixReducer.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixReducer.java
index 75add24..9b284df 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixReducer.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.graphbuilding;
+package edu.uci.ics.genomix.hadoop.graphbuilding;
 
 import java.io.IOException;
 import java.util.Iterator;

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterDriver.java
similarity index 98%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterDriver.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterDriver.java
index 2f6dddd..a2eafeb 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterDriver.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.graphcountfilter;
+package edu.uci.ics.genomix.hadoop.graphcountfilter;
 
 import java.io.IOException;
 import org.apache.hadoop.fs.FileSystem;

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterMapper.java
similarity index 96%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterMapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterMapper.java
index 4a9a8a9..4c25597 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterMapper.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.graphcountfilter;
+package edu.uci.ics.genomix.hadoop.graphcountfilter;
 
 import java.io.IOException;
 

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterReducer.java
similarity index 96%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterReducer.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterReducer.java
index 6e61973..58be646 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterReducer.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.graphcountfilter;
+package edu.uci.ics.genomix.hadoop.graphcountfilter;
 
 import java.io.IOException;
 import java.util.Iterator;

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java
new file mode 100644
index 0000000..28f38a8
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java

@@ -0,0 +1,197 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
+import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.genomix.hadoop.pathmergingh2.MergePathH2Driver;
+import edu.uci.ics.genomix.hadoop.pathmergingh2.SNodeInitialMapper;
+import edu.uci.ics.genomix.hadoop.pathmergingh2.SNodeInitialReducer;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
+
+@SuppressWarnings("deprecation")
+public class MergePathH1Driver {
+
+    private static class Options {
+        @Option(name = "-inputpath", usage = "the input path", required = true)
+        public String inputPath;
+
+        @Option(name = "-outputpath", usage = "the output path", required = true)
+        public String outputPath;
+
+        @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
+        public String mergeResultPath;
+
+        @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+        public int numReducers;
+
+        @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+        public int sizeKmer;
+
+        @Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
+        public int mergeRound;
+
+    }
+
+    public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer,
+            int mergeRound, String defaultConfPath) throws IOException {
+
+        JobConf conf = new JobConf(MergePathH2Driver.class);
+        conf.setInt("sizeKmer", sizeKmer);
+
+        if (defaultConfPath != null) {
+            conf.addResource(new Path(defaultConfPath));
+        }
+        conf.setJobName("Initial Path-Starting-Points Table");
+        conf.setMapperClass(SNodeInitialMapper.class);
+        conf.setReducerClass(SNodeInitialReducer.class);
+
+        conf.setMapOutputKeyClass(KmerBytesWritable.class);
+        conf.setMapOutputValueClass(MergePathValueWritable.class);
+
+        conf.setInputFormat(SequenceFileInputFormat.class);
+        conf.setOutputFormat(SequenceFileOutputFormat.class);
+
+        String singlePointPath = "comSinglePath0";
+
+        MultipleOutputs.addNamedOutput(conf, singlePointPath, MergePathMultiSeqOutputFormat.class,
+                VKmerBytesWritable.class, MergePathValueWritable.class);
+
+        conf.setOutputKeyClass(VKmerBytesWritable.class);
+        conf.setOutputValueClass(MergePathValueWritable.class);
+
+        FileInputFormat.setInputPaths(conf, new Path(inputPath));
+        FileOutputFormat.setOutputPath(conf, new Path(inputPath + "stepNext"));
+        conf.setNumReduceTasks(numReducers);
+        FileSystem dfs = FileSystem.get(conf);
+        dfs.delete(new Path(inputPath + "stepNext"), true);
+        JobClient.runJob(conf);
+        dfs.rename(new Path(inputPath + "stepNext" + "/" + singlePointPath), new Path(mergeResultPath + "/"
+                + singlePointPath));
+        int iMerge = 0;
+        /*----------------------------------------------------------------------*/
+        for (iMerge = 0; iMerge < mergeRound; iMerge++) {
+//            if (!dfs.exists(new Path(inputPath + "-step1")))
+//                break;
+            conf = new JobConf(MergePathH1Driver.class);
+            conf.setInt("sizeKmer", sizeKmer);
+            conf.setInt("iMerge", iMerge);
+
+            if (defaultConfPath != null) {
+                conf.addResource(new Path(defaultConfPath));
+            }
+            conf.setJobName("Path Merge");
+
+            conf.setMapperClass(MergePathH1Mapper.class);
+            conf.setReducerClass(MergePathH1Reducer.class);
+
+            conf.setMapOutputKeyClass(VKmerBytesWritable.class);
+            conf.setMapOutputValueClass(MergePathValueWritable.class);
+
+            conf.setInputFormat(SequenceFileInputFormat.class);
+
+            String uncompSinglePath = "uncompSinglePath" + iMerge;
+            String comSinglePath = "comSinglePath" + iMerge;
+            String comCircle = "comCircle" + iMerge;
+
+            MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiSeqOutputFormat.class,
+                    VKmerBytesWritable.class, MergePathValueWritable.class);
+
+            MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiSeqOutputFormat.class,
+                    VKmerBytesWritable.class, MergePathValueWritable.class);
+
+            MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiSeqOutputFormat.class,
+                    VKmerBytesWritable.class, MergePathValueWritable.class);
+
+            conf.setOutputKeyClass(VKmerBytesWritable.class);
+            conf.setOutputValueClass(MergePathValueWritable.class);
+
+            FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext"));
+            FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+            conf.setNumReduceTasks(numReducers);
+            dfs.delete(new Path(outputPath), true);
+            JobClient.runJob(conf);
+            dfs.delete(new Path(inputPath + "stepNext"), true);
+            dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext"));
+            dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath));
+            dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle));
+        }
+        /*----------------------------------------*/
+        /*        conf = new JobConf(MergePathH1Driver.class);
+                conf.setInt("sizeKmer", sizeKmer);
+                conf.setInt("iMerge", iMerge);
+                
+                if (defaultConfPath != null) {
+                    conf.addResource(new Path(defaultConfPath));
+                }
+                conf.setJobName("Path Merge");
+                
+                conf.setMapperClass(MergePathH1Mapper.class);
+                conf.setReducerClass(MergePathH1Reducer.class);
+                
+                conf.setMapOutputKeyClass(VKmerBytesWritable.class);
+                conf.setMapOutputValueClass(MergePathValueWritable.class);
+                
+                conf.setInputFormat(SequenceFileInputFormat.class);
+                
+                String uncomplete = "uncomplete" + iMerge;
+                String complete = "complete" + iMerge;
+               
+                MultipleOutputs.addNamedOutput(conf, uncomplete,
+                        MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
+                        MergePathValueWritable.class);
+
+                MultipleOutputs.addNamedOutput(conf, complete,
+                        MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class,
+                        MergePathValueWritable.class);
+                
+                conf.setOutputKeyClass(VKmerBytesWritable.class);
+                conf.setOutputValueClass(MergePathValueWritable.class);
+                
+                FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+                FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+                conf.setNumReduceTasks(numReducers);
+                dfs.delete(new Path(outputPath), true);
+                JobClient.runJob(conf);
+                dfs.delete(new Path(inputPath + "-step1"), true);
+                dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
+                dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));*/
+    }
+
+    public static void main(String[] args) throws Exception {
+        Options options = new Options();
+        CmdLineParser parser = new CmdLineParser(options);
+        parser.parseArgument(args);
+        MergePathH1Driver driver = new MergePathH1Driver();
+        driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers,
+                options.sizeKmer, options.mergeRound, null);
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Mapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Mapper.java
similarity index 81%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Mapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Mapper.java
index 6357483..95cc01e 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Mapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Mapper.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.pathmergingh1;
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
 
 import java.io.IOException;
 import org.apache.hadoop.mapred.JobConf;
@@ -23,17 +23,17 @@
 import edu.uci.ics.genomix.type.GeneCode;
 import edu.uci.ics.genomix.type.VKmerBytesWritable;
 import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
 
 @SuppressWarnings("deprecation")
 public class MergePathH1Mapper extends MapReduceBase implements
         Mapper<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
     private int KMER_SIZE;
     private VKmerBytesWritableFactory outputKmerFactory;
-    private MergePathValueWritable outputValue; 
+    private MergePathValueWritable outputValue;
     private VKmerBytesWritable tmpKmer;
     private VKmerBytesWritable outputKmer;
 
-
     public void configure(JobConf job) {
         KMER_SIZE = job.getInt("sizeKmer", 0);
         outputKmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
@@ -45,7 +45,6 @@
     @Override
     public void map(VKmerBytesWritable key, MergePathValueWritable value,
             OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
-
         byte precursor = (byte) 0xF0;
         byte succeed = (byte) 0x0F;
         byte adjBitMap = value.getAdjBitMap();
@@ -53,18 +52,22 @@
         precursor = (byte) (precursor & adjBitMap);
         precursor = (byte) ((precursor & 0xff) >> 4);
         succeed = (byte) (succeed & adjBitMap);
-       if (bitFlag == 1) {
+        byte bitStartEnd = (byte) (0x01 & bitFlag);
+        if (bitStartEnd == 1) {
+            /**
+             * eg. the kmer: AGCGT(already merge 3 kmers sizeof 3), adjMap C|G
+             *     succeedCode -> G then tmpKmer store the succeding neighbor: GTG ->outputKmer
+             *     then we store the AGC in the tmpKmer -> outputValue
+             */
             byte succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
             tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
+            //TODO remove tmpKmer!!!!
             outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
-            
             tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
             outputValue.set(adjBitMap, bitFlag, tmpKmer);
             output.collect(outputKmer, outputValue);
-        } else {
-            outputKmer.set(key);
-            outputValue.set(value);
-            output.collect(key, outputValue);
+        } else {//!!!!Make comments
+            output.collect(key, value);
         }
     }
 }

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Reducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Reducer.java
new file mode 100644
index 0000000..8e0ba00
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Reducer.java

@@ -0,0 +1,126 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
+
+@SuppressWarnings("deprecation")
+public class MergePathH1Reducer extends MapReduceBase implements
+        Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+    private VKmerBytesWritableFactory kmerFactory;
+    private VKmerBytesWritable outputKmer;
+    private int KMER_SIZE;
+    private MergePathValueWritable outputValue;
+    MultipleOutputs mos = null;
+    private int I_MERGE;
+
+    public void configure(JobConf job) {
+        mos = new MultipleOutputs(job);
+        I_MERGE = Integer.parseInt(job.get("iMerge"));
+        KMER_SIZE = job.getInt("sizeKmer", 0);
+        outputValue = new MergePathValueWritable();
+        kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
+        outputKmer = new VKmerBytesWritable(KMER_SIZE);
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void reduce(VKmerBytesWritable key, Iterator<MergePathValueWritable> values,
+            OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+        outputValue = values.next();
+        if (values.hasNext() == true) {
+            byte bitFlag = outputValue.getFlag();
+            byte bitStartEnd = (byte) (0x01 & outputValue.getFlag());
+            if (bitStartEnd == 0) {
+                /**
+                 * eg. if 2 records go into same group, the first is start-point: (GTG, null, A|T, 0) the second is: (GTG, AGC, C|G, 1)
+                 *     the results of combing: AGCGTG, null, C|T, 1
+                 */
+                //first record is non-start point
+               
+                byte nextAdj = outputValue.getAdjBitMap();
+                byte succeed = (byte) 0x0F;
+                succeed = (byte) (succeed & nextAdj);
+                //second record must be start point
+                outputValue = values.next();
+                byte adjBitMap = outputValue.getAdjBitMap();
+                if (outputValue.getKmerLength() != 0)
+                    outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
+                else
+                    outputKmer.set(key);
+                byte outputFlag = (byte) (0x81 & bitFlag);
+                outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
+                adjBitMap = (byte) (adjBitMap & 0xF0);
+                adjBitMap = (byte) (adjBitMap | succeed);
+                outputValue.set(adjBitMap, outputFlag, null);               
+                //judge whether the node after merging has contain the start-point and end-point
+                bitFlag = outputValue.getFlag();
+                bitStartEnd = (byte) (0x81 & bitFlag);
+                if (bitStartEnd == (byte) 0x81) {
+                    mos.getCollector("comSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
+                } else
+                    mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
+            } else {
+                /**
+                 * eg. if 2 records go into same group, the first is start-point:(GTG, AGC, C|G, 1)  the second is: (GTG, null, A|T, 0)
+                 *     the results of combing: AGCGTG, null, C|T, 1
+                 */
+                //first record is start point
+                byte adjBitMap = outputValue.getAdjBitMap();
+                if (outputValue.getKmerLength() != 0)
+                    outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
+                else
+                    outputKmer.set(key);
+                //second record is non start point
+                outputValue = values.next();
+                byte nextAdj = outputValue.getAdjBitMap();
+                byte succeed = (byte) 0x0F;
+                succeed = (byte) (succeed & nextAdj);
+                //set outputFlag for first record
+                byte outputFlag = (byte) (0x81 & bitFlag);
+                outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
+                adjBitMap = (byte) (adjBitMap & 0xF0);
+                adjBitMap = (byte) (adjBitMap | succeed);
+                outputValue.set(adjBitMap, outputFlag, null);
+                //judge whether the node after merging has contain the start-point and end-point
+                bitFlag = outputValue.getFlag();
+                bitStartEnd = (byte) (0x81 & bitFlag);
+                if (bitStartEnd == (byte) 0x81) {
+                    mos.getCollector("comSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
+                } else
+                    mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
+            }
+        } else {
+            mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(key, outputValue);
+        }
+    }
+
+    public void close() throws IOException {
+        // TODO Auto-generated method stub
+        mos.close();
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathMultiSeqOutputFormat.java
similarity index 91%
copy from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiSeqOutputFormat.java
copy to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathMultiSeqOutputFormat.java
index 5e8f1d8..0868f8c 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiSeqOutputFormat.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathMultiSeqOutputFormat.java

@@ -12,12 +12,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.pathmergingh1;
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
 
 import java.io.File;
 import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
 import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
+import edu.uci.ics.genomix.type.MergePathValueWritable;
 
 public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<VKmerBytesWritable, MergePathValueWritable>{
     @Override

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiTextOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathMultiTextOutputFormat.java
similarity index 95%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiTextOutputFormat.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathMultiTextOutputFormat.java
index ac88ce0..50c7a3d 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiTextOutputFormat.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathMultiTextOutputFormat.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.pathmergingh1;
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
 
 import java.io.File;
 import org.apache.hadoop.io.Text;

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/SNodeInitialMapper.java
similarity index 80%
copy from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialMapper.java
copy to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/SNodeInitialMapper.java
index 6270852..8c65473 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/SNodeInitialMapper.java

@@ -12,18 +12,20 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.pathmergingh2;
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
 
 import java.io.IOException;
+
 import org.apache.hadoop.io.ByteWritable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.GeneCode;
 import edu.uci.ics.genomix.type.KmerBytesWritable;
-
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
 @SuppressWarnings("deprecation")
 public class SNodeInitialMapper extends MapReduceBase implements
         Mapper<KmerBytesWritable, ByteWritable, KmerBytesWritable, MergePathValueWritable> {
@@ -37,7 +39,11 @@
         outputKmer = new KmerBytesWritable(KMER_SIZE);
         outputAdjList = new MergePathValueWritable();
     }
-
+    
+    /**
+     * @param adjacent the high 4 bits are useless, we just use the lower 4 bits
+     * @return if the degree == 1 then return false, else return true
+     */
     boolean measureDegree(byte adjacent) {
         boolean result = true;
         switch (adjacent) {
@@ -96,6 +102,7 @@
     @Override
     public void map(KmerBytesWritable key, ByteWritable value,
             OutputCollector<KmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+        //TODO clean this code piece, use the genomix-data function
         byte precursor = (byte) 0xF0;
         byte succeed = (byte) 0x0F;
         byte adjBitMap = value.get();
@@ -105,19 +112,24 @@
         succeed = (byte) (succeed & adjBitMap);
         boolean inDegree = measureDegree(precursor);
         boolean outDegree = measureDegree(succeed);
+        //if indegree == 1 and outdegree == 1, then it assigns these records' flag to 2
         if (inDegree == false && outDegree == false) {
             outputKmer.set(key);
-            System.out.println(outputKmer.hashCode());
-            bitFlag = (byte) 2;
+            bitFlag = (byte) 0x02;
             outputAdjList.set(adjBitMap, bitFlag, null);
             output.collect(outputKmer, outputAdjList);
         } else {
+            // other records maps its precursor neighbors
+            /**
+             * eg. ACT  CTA|CA, it maps CAC, TAC, AAC, all the 3 pairs marked  0x80
+             */
             for (int i = 0; i < 4; i++) {
                 byte temp = (byte) 0x01;
                 byte shiftedCode = 0;
                 temp = (byte) (temp << i);
-                temp = (byte) (precursor & temp);
+                temp = (byte) (precursor & temp);   
                 if (temp != 0) {
+                    //TODO use the genomix-data factory function
                     byte precurCode = GeneCode.getGeneCodeFromBitMap(temp);
                     shiftedCode = key.shiftKmerWithPreCode(precurCode);
                     outputKmer.set(key);
@@ -127,6 +139,14 @@
                     key.shiftKmerWithNextCode(shiftedCode);
                 }
             }
+            //and also maps its succeeding neighbors
+            /**
+             * eg. ACT  CTA|CA, it maps CTC, CTA, all the 2 pairs marked 0x01
+             */
+//            VKmerBytesWritableFactory factor ; //new
+//            for( int i = GeneCode.A ; i <= GeneCode.T; i++){
+//                factor.getFirstKmerFromChain(firstK, kmerChain)
+//            }
             for (int i = 0; i < 4; i++) {
                 byte temp = (byte) 0x01;
                 byte shiftedCode = 0;

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/SNodeInitialReducer.java
new file mode 100644
index 0000000..cd3db8e
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/SNodeInitialReducer.java

@@ -0,0 +1,121 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
+
+@SuppressWarnings("deprecation")
+public class SNodeInitialReducer extends MapReduceBase implements
+        Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+    private VKmerBytesWritable outputKmer = new VKmerBytesWritable();
+    private MergePathValueWritable outputValue = new MergePathValueWritable();
+    MultipleOutputs mos = null;
+
+    public void configure(JobConf job) {
+        mos = new MultipleOutputs(job);
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
+            OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+        outputKmer.set(key);
+        outputValue = values.next();
+        byte startPointFlag = 0x00;
+        byte endPointFlag = 0x00;
+        /**
+         * the targetPoint means that we want find the record which 1 indegree and 1 outdegree in the group which has multi-records
+         */
+        byte targetPointFlag = 0x00;
+        byte targetAdjList = 0x00;
+        //if we find the start or end point, we will use outputFlag to mark them
+        byte outputFlag = 0x00;
+        if (values.hasNext() == true) {
+            //find startPointFlag, endPointFlag, targetPointFlag
+            switch (outputValue.getFlag()) {
+                case (byte) 0x01:
+                    startPointFlag = (byte) 0x01;
+                    break;
+                case (byte) 0x80:
+                    endPointFlag = (byte) 0x80;
+                    break;
+                case (byte) 0x02:
+                    targetPointFlag = (byte) 0x02;
+                    targetAdjList = outputValue.getAdjBitMap();
+                    break;
+            }
+            while (values.hasNext()) {
+                outputValue = values.next();
+                switch (outputValue.getFlag()) {
+                    case (byte) 0x01:
+                        startPointFlag = (byte) 0x01;
+                        break;
+                    case (byte) 0x80:
+                        endPointFlag = (byte) 0x80;
+                        break;
+                    case (byte) 0x02:
+                        targetPointFlag = (byte) 0x02;
+                        targetAdjList = outputValue.getAdjBitMap();
+                        break;
+                }
+                if (startPointFlag != (byte) 0x00 && endPointFlag != (byte) 0x00 && targetPointFlag != (byte) 0x00)
+                    break;
+            }
+            //if we find the start-point or end-point
+            if (targetPointFlag == (byte) 0x02) {
+                //remove the single point path
+                if (startPointFlag == (byte) 0x01 && endPointFlag == (byte) 0x80) {
+                    outputFlag = (byte) (outputFlag | startPointFlag);
+                    outputFlag = (byte) (outputFlag | endPointFlag);
+                    outputValue.set(targetAdjList, outputFlag, null);
+                    mos.getCollector("comSinglePath0", reporter).collect(outputKmer, outputValue);
+                } else {
+                    if (startPointFlag == (byte) 0x01) {
+                        outputFlag = (byte) (outputFlag | startPointFlag);
+                    }
+                    if (endPointFlag == (byte) 0x80) {
+                        outputFlag = (byte) (outputFlag | endPointFlag);
+                    }
+                    outputValue.set(targetAdjList, outputFlag, null);
+                    output.collect(outputKmer, outputValue);
+                }
+            }
+        } else {
+            //keep the non-start/end single point into the input files
+            if (outputValue.getFlag() == (byte) 0x02) {
+                byte bitFlag = 0;
+                outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
+                output.collect(outputKmer, outputValue);
+            }
+        }
+    }
+
+    public void close() throws IOException {
+        // TODO Auto-generated method stub
+        mos.close();
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java
similarity index 61%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Driver.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java
index c196daa..6c977a8 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Driver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.pathmergingh2;
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
 
 import java.io.IOException;
 import org.apache.hadoop.fs.FileSystem;
@@ -29,13 +29,18 @@
 import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
 import org.kohsuke.args4j.CmdLineParser;
 import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.genomix.hadoop.pathmergingh1.MergePathH1Driver;
+import edu.uci.ics.genomix.hadoop.pathmergingh1.MergePathH1Mapper;
+import edu.uci.ics.genomix.hadoop.pathmergingh1.MergePathH1Reducer;
+import edu.uci.ics.genomix.hadoop.pathmergingh1.MergePathMultiSeqOutputFormat;
+//import edu.uci.ics.genomix.pathmergingh1.MergePathValueWritable;
 import edu.uci.ics.genomix.type.KmerBytesWritable;
 import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-
+import edu.uci.ics.genomix.type.MergePathValueWritable;
 @SuppressWarnings("deprecation")
 public class MergePathH2Driver {
-    
+
     private static class Options {
         @Option(name = "-inputpath", usage = "the input path", required = true)
         public String inputPath;
@@ -45,131 +50,142 @@
 
         @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
         public String mergeResultPath;
-        
+
         @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
         public int numReducers;
 
         @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
         public int sizeKmer;
-        
+
         @Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
         public int mergeRound;
 
     }
 
-
-    public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath)
-            throws IOException{
+    public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer,
+            int mergeRound, String defaultConfPath) throws IOException {
 
         JobConf conf = new JobConf(MergePathH2Driver.class);
         conf.setInt("sizeKmer", sizeKmer);
-        
+
         if (defaultConfPath != null) {
             conf.addResource(new Path(defaultConfPath));
         }
         conf.setJobName("Initial Path-Starting-Points Table");
-        conf.setMapperClass(SNodeInitialMapper.class); 
+        conf.setMapperClass(SNodeInitialMapper.class);
         conf.setReducerClass(SNodeInitialReducer.class);
 
         conf.setMapOutputKeyClass(KmerBytesWritable.class);
         conf.setMapOutputValueClass(MergePathValueWritable.class);
-        
+
         conf.setInputFormat(SequenceFileInputFormat.class);
         conf.setOutputFormat(SequenceFileOutputFormat.class);
         
+        String singlePointPath = "comSinglePath0";
+        
+        MultipleOutputs.addNamedOutput(conf, singlePointPath, MergePathMultiSeqOutputFormat.class,
+                VKmerBytesWritable.class, MergePathValueWritable.class);
+        
         conf.setOutputKeyClass(VKmerBytesWritable.class);
         conf.setOutputValueClass(MergePathValueWritable.class);
-        
+
         FileInputFormat.setInputPaths(conf, new Path(inputPath));
-        FileOutputFormat.setOutputPath(conf, new Path(inputPath + "-step1"));
+        FileOutputFormat.setOutputPath(conf, new Path(inputPath + "stepNext"));
         conf.setNumReduceTasks(numReducers);
         FileSystem dfs = FileSystem.get(conf);
-        dfs.delete(new Path(inputPath + "-step1"), true);
+        dfs.delete(new Path(inputPath + "stepNext"), true);
         JobClient.runJob(conf);
+        dfs.rename(new Path(inputPath + "stepNext" + "/" + singlePointPath), new Path(mergeResultPath + "/" + singlePointPath));
+        
         int iMerge = 0;
-/*----------------------------------------------------------------------*/
-        for(iMerge = 0; iMerge < mergeRound; iMerge ++){
-            if(!dfs.exists(new Path(inputPath + "-step1")))
-                break;
+        for (iMerge = 1; iMerge <= mergeRound; iMerge++) {
+//            if (!dfs.exists(new Path(inputPath + "-step1")))
+//                break;
             conf = new JobConf(MergePathH2Driver.class);
             conf.setInt("sizeKmer", sizeKmer);
             conf.setInt("iMerge", iMerge);
-            
+
             if (defaultConfPath != null) {
                 conf.addResource(new Path(defaultConfPath));
             }
             conf.setJobName("Path Merge");
-            
+
             conf.setMapperClass(MergePathH2Mapper.class);
             conf.setReducerClass(MergePathH2Reducer.class);
-            
+
             conf.setMapOutputKeyClass(VKmerBytesWritable.class);
             conf.setMapOutputValueClass(MergePathValueWritable.class);
-            
-            conf.setInputFormat(SequenceFileInputFormat.class);
-            
-            String uncomplete = "uncomplete" + iMerge;
-            String complete = "complete" + iMerge;
-           
-            MultipleOutputs.addNamedOutput(conf, uncomplete,
-                    MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
-                    MergePathValueWritable.class);
 
-            MultipleOutputs.addNamedOutput(conf, complete,
-                    MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
-                    MergePathValueWritable.class);
-            
+            conf.setInputFormat(SequenceFileInputFormat.class);
+
+            String uncompSinglePath = "uncompSinglePath" + iMerge;
+            String comSinglePath = "comSinglePath" + iMerge;
+            String comCircle = "comCircle" + iMerge;
+
+            MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiSeqOutputFormat.class,
+                    VKmerBytesWritable.class, MergePathValueWritable.class);
+
+            MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiSeqOutputFormat.class,
+                    VKmerBytesWritable.class, MergePathValueWritable.class);
+
+            MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiSeqOutputFormat.class,
+                    VKmerBytesWritable.class, MergePathValueWritable.class);
+
             conf.setOutputKeyClass(VKmerBytesWritable.class);
             conf.setOutputValueClass(MergePathValueWritable.class);
-            
-            FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+
+            FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext"));
             FileOutputFormat.setOutputPath(conf, new Path(outputPath));
             conf.setNumReduceTasks(numReducers);
             dfs.delete(new Path(outputPath), true);
             JobClient.runJob(conf);
-            dfs.delete(new Path(inputPath + "-step1"), true);
-            dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
-            dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
+            dfs.delete(new Path(inputPath + "stepNext"), true);
+            dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext"));
+            dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath));
+            dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle));
         }
 /*        conf = new JobConf(MergePathH2Driver.class);
         conf.setInt("sizeKmer", sizeKmer);
         conf.setInt("iMerge", iMerge);
-        
+
         if (defaultConfPath != null) {
             conf.addResource(new Path(defaultConfPath));
         }
         conf.setJobName("Path Merge");
-        
+
         conf.setMapperClass(MergePathH2Mapper.class);
         conf.setReducerClass(MergePathH2Reducer.class);
-        
+
         conf.setMapOutputKeyClass(VKmerBytesWritable.class);
         conf.setMapOutputValueClass(MergePathValueWritable.class);
-        
-        conf.setInputFormat(SequenceFileInputFormat.class);
-        
-        String uncomplete = "uncomplete" + iMerge;
-        String complete = "complete" + iMerge;
-       
-        MultipleOutputs.addNamedOutput(conf, uncomplete,
-                MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class,
-                MergePathValueWritable.class);
 
-        MultipleOutputs.addNamedOutput(conf, complete,
-                MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class,
-                MergePathValueWritable.class);
-        
+        conf.setInputFormat(SequenceFileInputFormat.class);
+
+        String uncompSinglePath = "uncompSinglePath" + iMerge;
+        String comSinglePath = "comSinglePath" + iMerge;
+        String comCircle = "comCircle" + iMerge;
+
+        MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiTextOutputFormat.class,
+                VKmerBytesWritable.class, MergePathValueWritable.class);
+
+        MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiTextOutputFormat.class,
+                VKmerBytesWritable.class, MergePathValueWritable.class);
+
+        MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiTextOutputFormat.class,
+                VKmerBytesWritable.class, MergePathValueWritable.class);
+
         conf.setOutputKeyClass(VKmerBytesWritable.class);
         conf.setOutputValueClass(MergePathValueWritable.class);
-        
-        FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+
+        FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext"));
         FileOutputFormat.setOutputPath(conf, new Path(outputPath));
         conf.setNumReduceTasks(numReducers);
         dfs.delete(new Path(outputPath), true);
         JobClient.runJob(conf);
-        dfs.delete(new Path(inputPath + "-step1"), true);
-        dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
-        dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));*/
+        dfs.delete(new Path(inputPath + "stepNext"), true);
+        dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext"));
+        dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath));
+        dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle));*/
     }
 
     public static void main(String[] args) throws Exception {
@@ -177,6 +193,7 @@
         CmdLineParser parser = new CmdLineParser(options);
         parser.parseArgument(args);
         MergePathH2Driver driver = new MergePathH2Driver();
-        driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers, options.sizeKmer, options.mergeRound, null);
+        driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers,
+                options.sizeKmer, options.mergeRound, null);
     }
 }

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Mapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Mapper.java
similarity index 79%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Mapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Mapper.java
index 726dd4c..00dcb55 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Mapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Mapper.java

@@ -12,9 +12,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.pathmergingh2;
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
 
 import java.io.IOException;
+import java.util.Arrays;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.Mapper;
@@ -23,6 +24,7 @@
 import edu.uci.ics.genomix.type.GeneCode;
 import edu.uci.ics.genomix.type.VKmerBytesWritable;
 import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
 
 @SuppressWarnings("deprecation")
 public class MergePathH2Mapper extends MapReduceBase implements
@@ -56,33 +58,48 @@
 
         switch (bitStartEnd) {
             case (byte) 0x01:
+                //if this record is start-point, it will just maps its succeed nodes
+                /**
+                 * eg. the kmer: AGCGT(already merge 3 kmers sizeof 3), adjMap C|G
+                 *     succeedCode -> G then tmpKmer store the succeding neighbor: GTG ->outputKmer
+                 *     then we store the AGC in the tmpKmer -> outputValue
+                 */
                 byte succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
                 tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
                 outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
-
                 tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
+                //mark the flag of key --> reverse record
                 bitFlag = (byte) (bitFlag | 0x08);
                 outputValue.set(adjBitMap, bitFlag, tmpKmer);
                 output.collect(outputKmer, outputValue);
                 break;
+            
             case (byte) 0x80:
+                //if the record is end-point, it will just maps itself
+                /**
+                 * eg. the kmer: AGCGT(already merge 3 kmers sizeof 3), adjMap C|G
+                 *     tmpKmer store the first kmer: AGC ->outputKmer
+                 *     then we store the GT in the tmpKmer -> outputValue
+                 */
                 tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(KMER_SIZE, key));
                 outputKmer.set(tmpKmer);
                 tmpKmer.set(outputKmerFactory.getLastKmerFromChain(key.getKmerLength() - KMER_SIZE, key));
+                //mark the flag of key --> itself record
                 bitFlag = (byte) (bitFlag | 0x10);
                 outputValue.set(adjBitMap, bitFlag, tmpKmer);
                 output.collect(outputKmer, outputValue);
                 break;
+            //if the record is non-start/end point, it will maps its succeed nodes and itself    
             case (byte) 0x00:
                 succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
+                //it maps the succeed nodes 
                 tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
                 outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
-
                 tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
                 bitFlag = (byte) (bitFlag | 0x08);
                 outputValue.set(adjBitMap, bitFlag, tmpKmer);
                 output.collect(outputKmer, outputValue);
-
+                //it maps itself
                 bitFlag = (byte) (bitFlag & 0xF7);
                 tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(KMER_SIZE, key));
                 outputKmer.set(tmpKmer);
@@ -91,11 +108,6 @@
                 outputValue.set(adjBitMap, bitFlag, tmpKmer);
                 output.collect(outputKmer, outputValue);
                 break;
-            case (byte) 0x81:
-                outputKmer.set(key);
-                outputValue.set(adjBitMap, bitFlag, null);
-                output.collect(outputKmer, outputValue);
-                break;
         }
     }
 }

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Reducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Reducer.java
similarity index 70%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Reducer.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Reducer.java
index 63391b4..652404e 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Reducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Reducer.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.pathmergingh2;
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
 
 import java.io.IOException;
 import java.util.Iterator;
@@ -24,10 +24,12 @@
 import org.apache.hadoop.mapred.lib.MultipleOutputs;
 import edu.uci.ics.genomix.type.VKmerBytesWritable;
 import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
 
 @SuppressWarnings("deprecation")
 public class MergePathH2Reducer extends MapReduceBase implements
         Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+
     private VKmerBytesWritableFactory kmerFactory;
     private VKmerBytesWritable outputKmer;
     private VKmerBytesWritable tmpKmer1;
@@ -35,7 +37,6 @@
     private int KMER_SIZE;
     private MergePathValueWritable outputValue;
     private MergePathValueWritable tmpOutputValue;
-
     MultipleOutputs mos = null;
     private int I_MERGE;
 
@@ -58,18 +59,27 @@
         outputKmer.set(key);
         if (values.hasNext() == true) {
             byte bitFlag = outputValue.getFlag();
+            //decide whether this record is start or end
             byte bitStartEnd = (byte) (0x81 & bitFlag);
+            //decide whether this record is reverse
             byte bitPosiNegative = (byte) (0x18 & bitFlag);
             byte succeed = (byte) 0x0F;
             switch (bitPosiNegative) {
                 case (byte) 0x08:
+                    //the first record is reverse record
+                    /**
+                     * eg. if 2 records go into same group, the first is reverse: (GTG, AGC, C|G, 0x08) the second is itself: (GTG, null, A|T, 0x10)
+                     *     the results of combing: AGCGTG, null, C|T, 0x01
+                     */
                     if (outputValue.getKmerLength() != 0)
                         tmpKmer1.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
                     else
                         tmpKmer1.set(key);
                     byte adjBitMap = outputValue.getAdjBitMap();
+                    //get the next value record
                     outputValue = values.next();
                     bitStartEnd = (byte) (0x81 & outputValue.getFlag());
+                    //if this record contain end-point
                     if (bitStartEnd == (byte) 0x80) {
                         if (outputValue.getKmerLength() != 0)
                             tmpKmer2.set(kmerFactory.mergeTwoKmer(key, outputValue.getKmer()));
@@ -77,7 +87,7 @@
                             tmpKmer2.set(key);
                         byte tmpFlag = (byte) 0x80;
                         tmpOutputValue.set(outputValue.getAdjBitMap(), tmpFlag, null);
-                        mos.getCollector("uncomplete" + I_MERGE, reporter).collect(tmpKmer2, tmpOutputValue);
+                        mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(tmpKmer2, tmpOutputValue);
                     }
                     if (outputValue.getKmerLength() != 0)
                         outputKmer.set(kmerFactory.mergeTwoKmer(tmpKmer1, outputValue.getKmer()));
@@ -89,17 +99,29 @@
                     byte outputFlag = (byte) (0x81 & bitFlag);
                     outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
                     outputValue.set(adjBitMap, outputFlag, null);
-                    mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+                    // decide whether the merged record is complete, if so, then it output to the complete file
+                    bitFlag = outputValue.getFlag();
+                    bitStartEnd = (byte) (0x81 & bitFlag);
+                    if (bitStartEnd == (byte) 0x81) {
+                        mos.getCollector("comSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
+                    } else
+                        mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
                     break;
                 case (byte) 0x10:
+                    //the first record value is 'itself' format
+                    /**
+                     * eg. if 2 records go into same group, the first is itself: (GTG, null, A|T, 0x10) the second is reverse: (GTG, AGC, C|G, 0x08)
+                     *     the results of combing: AGCGTG, null, C|T, 0x01
+                     */
                     if (outputValue.getKmerLength() != 0)
                         tmpKmer1.set(kmerFactory.mergeTwoKmer(key, outputValue.getKmer()));
                     else
                         tmpKmer1.set(key);
+                    //if this record contain end-point
                     if (bitStartEnd == (byte) 0x80) {
                         byte tmpFlag = (byte) 0x80;
                         tmpOutputValue.set(outputValue.getAdjBitMap(), tmpFlag, null);
-                        mos.getCollector("uncomplete" + I_MERGE, reporter).collect(tmpKmer1, tmpOutputValue);
+                        mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(tmpKmer1, tmpOutputValue);
                     }
                     succeed = (byte) (succeed & outputValue.getAdjBitMap());
                     outputValue = values.next();
@@ -109,22 +131,22 @@
                         outputKmer.set(tmpKmer1);
                     adjBitMap = outputValue.getAdjBitMap();
                     adjBitMap = (byte) (adjBitMap & 0xF0);
-                    adjBitMap = (byte) (adjBitMap | succeed);                    
+                    adjBitMap = (byte) (adjBitMap | succeed);
                     outputFlag = (byte) (0x81 & bitFlag);
                     outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
                     outputValue.set(adjBitMap, outputFlag, null);
-                    mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+                    // decide whether the merged record is complete, if so, then it output to the complete file
+                    bitFlag = outputValue.getFlag();
+                    bitStartEnd = (byte) (0x81 & bitFlag);
+                    if (bitStartEnd == (byte) 0x81) {
+                        mos.getCollector("comSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
+                    } else
+                        mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
                     break;
             }
-        } else {
-            byte bitFlag = outputValue.getFlag();
-            byte bitStartEnd = (byte) (0x81 & bitFlag);
-            if (bitStartEnd == (byte) 0x81) {
-                outputKmer.set(key);
-                mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
-            }
         }
     }
+
     public void close() throws IOException {
         // TODO Auto-generated method stub
         mos.close();

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathMultiSeqOutputFormat.java
similarity index 91%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiSeqOutputFormat.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathMultiSeqOutputFormat.java
index 5e8f1d8..7731503 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiSeqOutputFormat.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathMultiSeqOutputFormat.java

@@ -12,13 +12,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.pathmergingh1;
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
 
 import java.io.File;
 import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
 import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-
+import edu.uci.ics.genomix.type.MergePathValueWritable;
 public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<VKmerBytesWritable, MergePathValueWritable>{
     @Override
     protected String generateLeafFileName(String name) {

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiTextOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathMultiTextOutputFormat.java
similarity index 95%
copy from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiTextOutputFormat.java
copy to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathMultiTextOutputFormat.java
index ac88ce0..587008c 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiTextOutputFormat.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathMultiTextOutputFormat.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.pathmergingh1;
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
 
 import java.io.File;
 import org.apache.hadoop.io.Text;

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/SNodeInitialMapper.java
similarity index 84%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialMapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/SNodeInitialMapper.java
index 6270852..58bfd71 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/SNodeInitialMapper.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.pathmergingh2;
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
 
 import java.io.IOException;
 import org.apache.hadoop.io.ByteWritable;
@@ -21,16 +21,19 @@
 import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+
 import edu.uci.ics.genomix.type.GeneCode;
 import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
 
 @SuppressWarnings("deprecation")
 public class SNodeInitialMapper extends MapReduceBase implements
         Mapper<KmerBytesWritable, ByteWritable, KmerBytesWritable, MergePathValueWritable> {
 
-    public int KMER_SIZE;
-    public KmerBytesWritable outputKmer;
-    public MergePathValueWritable outputAdjList;
+    private int KMER_SIZE;
+    private KmerBytesWritable outputKmer;
+    private MergePathValueWritable outputAdjList;
 
     public void configure(JobConf job) {
         KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
@@ -38,6 +41,10 @@
         outputAdjList = new MergePathValueWritable();
     }
 
+    /**
+     * @param adjacent the high 4 bits are useless, we just use the lower 4 bits
+     * @return if the degree == 1 then return false, else return true
+     */
     boolean measureDegree(byte adjacent) {
         boolean result = true;
         switch (adjacent) {
@@ -105,13 +112,17 @@
         succeed = (byte) (succeed & adjBitMap);
         boolean inDegree = measureDegree(precursor);
         boolean outDegree = measureDegree(succeed);
+        //if indegree == 1 and outdegree == 1, then it assigns these records' flag to 2
         if (inDegree == false && outDegree == false) {
             outputKmer.set(key);
-            System.out.println(outputKmer.hashCode());
-            bitFlag = (byte) 2;
+            bitFlag = (byte) 0x02;
             outputAdjList.set(adjBitMap, bitFlag, null);
             output.collect(outputKmer, outputAdjList);
         } else {
+            // other records maps its precursor neighbors
+            /**
+             * eg. ACT  CTA|CA, it maps CAC, TAC, ACA, all the 3 pairs marked  0x80
+             */
             for (int i = 0; i < 4; i++) {
                 byte temp = (byte) 0x01;
                 byte shiftedCode = 0;
@@ -127,6 +138,10 @@
                     key.shiftKmerWithNextCode(shiftedCode);
                 }
             }
+            //and also maps its succeeding neighbors
+            /**
+             * eg. kmer:ACT  bitMap: CTA|CA, it maps CTC, CTA, all the 2 pairs marked 0x01
+             */
             for (int i = 0; i < 4; i++) {
                 byte temp = (byte) 0x01;
                 byte shiftedCode = 0;

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/SNodeInitialReducer.java
new file mode 100644
index 0000000..fc9e80d
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/SNodeInitialReducer.java

@@ -0,0 +1,117 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
+
+@SuppressWarnings("deprecation")
+public class SNodeInitialReducer extends MapReduceBase implements
+        Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+    private VKmerBytesWritable outputKmer = new VKmerBytesWritable();
+    private MergePathValueWritable outputValue = new MergePathValueWritable();
+    MultipleOutputs mos = null;
+    public void configure(JobConf job) {
+        mos = new MultipleOutputs(job);
+    }
+    @SuppressWarnings("unchecked")
+    @Override
+    public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
+            OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+        outputKmer.set(key);
+        outputValue = values.next();
+        byte startPointFlag = 0x00;
+        byte endPointFlag = 0x00;
+        /**
+         * the targetPoint means that we want find the record which 1 indegree and 1 outdegree in the group which has multi-records
+         */
+        byte targetPointFlag = 0x00;
+        byte targetAdjList = 0x00;
+        //if we find the start or end point, we will use outputFlag to mark them
+        byte outputFlag = 0x00;
+        if (values.hasNext() == true) {
+            //find startPointFlag, endPointFlag, targetPointFlag
+            switch (outputValue.getFlag()) {
+                case (byte) 0x01:
+                    startPointFlag = (byte) 0x01;
+                    break;
+                case (byte) 0x80:
+                    endPointFlag = (byte) 0x80;
+                    break;
+                case (byte) 0x02:
+                    targetPointFlag = (byte) 0x02;
+                    targetAdjList = outputValue.getAdjBitMap();
+                    break;
+            }
+            while (values.hasNext()) {
+                outputValue = values.next();
+                switch (outputValue.getFlag()) {
+                    case (byte) 0x01:
+                        startPointFlag = (byte) 0x01;
+                        break;
+                    case (byte) 0x80:
+                        endPointFlag = (byte) 0x80;
+                        break;
+                    case (byte) 0x02:
+                        targetPointFlag = (byte) 0x02;
+                        targetAdjList = outputValue.getAdjBitMap();
+                        break;
+                }
+                if(startPointFlag != (byte) 0x00 && endPointFlag!= (byte) 0x00 && targetPointFlag != (byte) 0x00)
+                    break;
+            }
+            //find the start-point or end-point
+            if(targetPointFlag == (byte) 0x02) {
+                //remove the single point path
+                if(startPointFlag == (byte) 0x01 && endPointFlag == (byte) 0x80) {
+                    outputFlag = (byte) (outputFlag | startPointFlag);
+                    outputFlag = (byte) (outputFlag | endPointFlag);
+                    outputValue.set(targetAdjList, outputFlag, null);
+                    mos.getCollector("comSinglePath0", reporter).collect(outputKmer, outputValue);
+                }
+                else {
+                    if(startPointFlag == (byte) 0x01) {
+                        outputFlag = (byte) (outputFlag | startPointFlag);
+                    }
+                    if(endPointFlag == (byte) 0x80) {
+                        outputFlag = (byte) (outputFlag | endPointFlag);
+                    }
+                    outputValue.set(targetAdjList, outputFlag, null);
+                    output.collect(outputKmer, outputValue);
+                }
+            }
+        } else {
+            //keep the non-start/end single point into the input files
+            if (outputValue.getFlag() == (byte)0x02) {
+                byte bitFlag = 0;
+                outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
+                output.collect(outputKmer, outputValue);
+            }
+        }
+    }
+    public void close() throws IOException {
+        // TODO Auto-generated method stub
+        mos.close();
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatDriver.java
similarity index 98%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatDriver.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatDriver.java
index 7390d06..e7400be 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatDriver.java

@@ -13,7 +13,7 @@
  * limitations under the License.
  */
 
-package edu.uci.ics.genomix.statistics;
+package edu.uci.ics.genomix.hadoop.statistics;
 
 import java.io.IOException;
 import org.apache.hadoop.fs.FileSystem;

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatMapper.java
similarity index 98%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatMapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatMapper.java
index bb94c5d..623a923 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatMapper.java

@@ -13,7 +13,7 @@
  * limitations under the License.
  */
 
-package edu.uci.ics.genomix.statistics;
+package edu.uci.ics.genomix.hadoop.statistics;
 
 import java.io.IOException;
 import org.apache.hadoop.io.ByteWritable;

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatReducer.java
similarity index 96%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatReducer.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatReducer.java
index d5ce11c..090e680 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatReducer.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.statistics;
+package edu.uci.ics.genomix.hadoop.statistics;
 
 import java.io.IOException;
 import java.util.Iterator;

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Driver.java
deleted file mode 100644
index a8e5f7c..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Driver.java
+++ /dev/null

@@ -1,182 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh1;
-
-import java.io.IOException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.TextOutputFormat;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
-import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class MergePathH1Driver {
-    
-    private static class Options {
-        @Option(name = "-inputpath", usage = "the input path", required = true)
-        public String inputPath;
-
-        @Option(name = "-outputpath", usage = "the output path", required = true)
-        public String outputPath;
-
-        @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
-        public String mergeResultPath;
-        
-        @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
-        public int numReducers;
-
-        @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
-        public int sizeKmer;
-        
-        @Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
-        public int mergeRound;
-
-    }
-
-
-    public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath)
-            throws IOException{
-
-        JobConf conf = new JobConf(MergePathH1Driver.class);
-        conf.setInt("sizeKmer", sizeKmer);
-        
-        if (defaultConfPath != null) {
-            conf.addResource(new Path(defaultConfPath));
-        }
-        conf.setJobName("Initial Path-Starting-Points Table");
-        conf.setMapperClass(SNodeInitialMapper.class); 
-        conf.setReducerClass(SNodeInitialReducer.class);
-        
-        conf.setMapOutputKeyClass(KmerBytesWritable.class);
-        conf.setMapOutputValueClass(MergePathValueWritable.class);
-        
-        conf.setInputFormat(SequenceFileInputFormat.class);
-        conf.setOutputFormat(SequenceFileOutputFormat.class);
-        
-        conf.setOutputKeyClass(VKmerBytesWritable.class);
-        conf.setOutputValueClass(MergePathValueWritable.class);
-        
-        FileInputFormat.setInputPaths(conf, new Path(inputPath));
-        FileOutputFormat.setOutputPath(conf, new Path(inputPath + "-step1"));
-        conf.setNumReduceTasks(numReducers);
-        FileSystem dfs = FileSystem.get(conf);
-        dfs.delete(new Path(inputPath + "-step1"), true);
-        JobClient.runJob(conf);
-        int iMerge = 0;
-/*----------------------------------------------------------------------*/
-        for(iMerge = 0; iMerge < mergeRound; iMerge ++){
-        
-            conf = new JobConf(MergePathH1Driver.class);
-            conf.setInt("sizeKmer", sizeKmer);
-            conf.setInt("iMerge", iMerge);
-            
-            if (defaultConfPath != null) {
-                conf.addResource(new Path(defaultConfPath));
-            }
-            conf.setJobName("Path Merge");
-            
-            conf.setMapperClass(MergePathH1Mapper.class);
-            conf.setReducerClass(MergePathH1Reducer.class);
-            
-            conf.setMapOutputKeyClass(VKmerBytesWritable.class);
-            conf.setMapOutputValueClass(MergePathValueWritable.class);
-            
-            conf.setInputFormat(SequenceFileInputFormat.class);
-            
-            String uncomplete = "uncomplete" + iMerge;
-            String complete = "complete" + iMerge;
-           
-            MultipleOutputs.addNamedOutput(conf, uncomplete,
-                    MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
-                    MergePathValueWritable.class);
-
-            MultipleOutputs.addNamedOutput(conf, complete,
-                    MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
-                    MergePathValueWritable.class);
-            
-            conf.setOutputKeyClass(VKmerBytesWritable.class);
-            conf.setOutputValueClass(MergePathValueWritable.class);
-            
-            FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
-            FileOutputFormat.setOutputPath(conf, new Path(outputPath));
-            conf.setNumReduceTasks(numReducers);
-            dfs.delete(new Path(outputPath), true);
-            JobClient.runJob(conf);
-            dfs.delete(new Path(inputPath + "-step1"), true);
-            dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
-            dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
-        }
-        /*----------------------------------------*/
-/*        conf = new JobConf(MergePathH1Driver.class);
-        conf.setInt("sizeKmer", sizeKmer);
-        conf.setInt("iMerge", iMerge);
-        
-        if (defaultConfPath != null) {
-            conf.addResource(new Path(defaultConfPath));
-        }
-        conf.setJobName("Path Merge");
-        
-        conf.setMapperClass(MergePathH1Mapper.class);
-        conf.setReducerClass(MergePathH1Reducer.class);
-        
-        conf.setMapOutputKeyClass(VKmerBytesWritable.class);
-        conf.setMapOutputValueClass(MergePathValueWritable.class);
-        
-        conf.setInputFormat(SequenceFileInputFormat.class);
-        
-        String uncomplete = "uncomplete" + iMerge;
-        String complete = "complete" + iMerge;
-       
-        MultipleOutputs.addNamedOutput(conf, uncomplete,
-                MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
-                MergePathValueWritable.class);
-
-        MultipleOutputs.addNamedOutput(conf, complete,
-                MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
-                MergePathValueWritable.class);
-        
-        conf.setOutputKeyClass(VKmerBytesWritable.class);
-        conf.setOutputValueClass(MergePathValueWritable.class);
-        
-        FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
-        FileOutputFormat.setOutputPath(conf, new Path(outputPath));
-        conf.setNumReduceTasks(numReducers);
-        dfs.delete(new Path(outputPath), true);
-        JobClient.runJob(conf);
-        dfs.delete(new Path(inputPath + "-step1"), true);
-        dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
-        dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));*/
-    }
-
-    public static void main(String[] args) throws Exception {
-        Options options = new Options();
-        CmdLineParser parser = new CmdLineParser(options);
-        parser.parseArgument(args);
-        MergePathH1Driver driver = new MergePathH1Driver();
-        driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers, options.sizeKmer, options.mergeRound, null);
-    }
-}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Reducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Reducer.java
deleted file mode 100644
index f5c9c8d..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Reducer.java
+++ /dev/null

@@ -1,141 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh1;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
-
-@SuppressWarnings("deprecation")
-public class MergePathH1Reducer extends MapReduceBase implements
-        Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
-    private VKmerBytesWritableFactory kmerFactory;
-    private VKmerBytesWritable outputKmer;
-    private VKmerBytesWritable tmpKmer;
-    private int KMER_SIZE;
-    private MergePathValueWritable outputValue;
-    private MergePathValueWritable tmpOutputValue;
-    MultipleOutputs mos = null;
-    private int I_MERGE;
-
-    public void configure(JobConf job) {
-        mos = new MultipleOutputs(job);
-        I_MERGE = Integer.parseInt(job.get("iMerge"));
-        KMER_SIZE = job.getInt("sizeKmer", 0);
-        outputValue = new MergePathValueWritable();
-        tmpOutputValue = new MergePathValueWritable();
-        kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
-        outputKmer = new VKmerBytesWritable(KMER_SIZE);
-        tmpKmer = new VKmerBytesWritable(KMER_SIZE);
-    }
-
-    @SuppressWarnings("unchecked")
-    @Override
-    public void reduce(VKmerBytesWritable key, Iterator<MergePathValueWritable> values,
-            OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
-        outputValue = values.next();
-        if (values.hasNext() == true) {
-            if (outputValue.getFlag() != 1) {
-                byte nextAdj = outputValue.getAdjBitMap();
-                byte succeed = (byte) 0x0F;
-                succeed = (byte) (succeed & nextAdj);
-
-                outputValue = values.next();
-                byte adjBitMap = outputValue.getAdjBitMap();
-                byte flag = outputValue.getFlag();
-                if (outputValue.getKmerLength() != 0)
-                    outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
-                else
-                    outputKmer.set(key);
-
-                adjBitMap = (byte) (adjBitMap & 0xF0);
-                adjBitMap = (byte) (adjBitMap | succeed);
-                outputValue.set(adjBitMap, flag, null);
-                mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
-            } else {
-                tmpOutputValue.set(outputValue);
-                byte tmpAdjMap = tmpOutputValue.getAdjBitMap();
-
-                outputValue = values.next();
-                if (outputValue.getFlag() != 1) {
-                    if (tmpOutputValue.getKmerLength() != 0)
-                        outputKmer.set(kmerFactory.mergeTwoKmer(tmpOutputValue.getKmer(), key));
-                    else
-                        outputKmer.set(key);
-
-                    byte nextAdj = outputValue.getAdjBitMap();
-                    byte succeed = (byte) 0x0F;
-                    succeed = (byte) (succeed & nextAdj);
-                    tmpAdjMap = (byte) (tmpAdjMap & 0xF0);
-                    tmpAdjMap = (byte) (tmpAdjMap | succeed);
-                    outputValue.set(tmpAdjMap, tmpOutputValue.getFlag(), null);
-                    mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
-                } else {
-
-                    tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
-                    if (tmpOutputValue.getKmerLength() != 0)
-                        outputKmer.set(kmerFactory.mergeTwoKmer(tmpOutputValue.getKmer(), tmpKmer));
-                    else
-                        outputKmer.set(tmpKmer);
-                    tmpOutputValue.set(tmpAdjMap, tmpOutputValue.getFlag(), null);
-                    mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, tmpOutputValue);
-
-                    tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
-                    if (outputValue.getKmerLength() != 0)
-                        outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer));
-                    else
-                        outputKmer.set(tmpKmer);
-                    outputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
-                    mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
-
-                    while (values.hasNext()) {
-                        outputValue = values.next();
-                        tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
-                        if (outputValue.getKmerLength() != 0)
-                            outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer));
-                        else
-                            outputKmer.set(tmpKmer);
-                        outputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
-                        mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
-                    }
-                }
-            }
-        } else {
-            if (outputValue.getFlag() != 0) {
-                tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
-                if (outputValue.getKmerLength() != 0)
-                    outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer));
-                else
-                    outputKmer.set(tmpKmer);
-                outputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
-                mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
-
-            } else
-                mos.getCollector("uncomplete" + I_MERGE, reporter).collect(key, outputValue);
-        }
-    }
-
-    public void close() throws IOException {
-        // TODO Auto-generated method stub
-        mos.close();
-    }
-}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathValueWritable.java
deleted file mode 100644
index f14e5f2..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathValueWritable.java
+++ /dev/null

@@ -1,107 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh1;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import org.apache.hadoop.io.BinaryComparable;
-import org.apache.hadoop.io.WritableComparable;
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-public class MergePathValueWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
-
-    private static final byte[] EMPTY_BYTES = {};
-    private byte adjBitMap;
-    private byte flag;
-    private VKmerBytesWritable kmer;
-
-    public MergePathValueWritable() {
-        this((byte) 0, (byte) 0, 0, EMPTY_BYTES);
-    }
-
-    public MergePathValueWritable(byte adjBitMap, byte flag, int kmerSize, byte[] bytes) {
-        this.adjBitMap = adjBitMap;
-        this.flag = flag;
-        this.kmer = new VKmerBytesWritable(kmerSize, bytes);
-        kmer.set(bytes, 0, bytes.length);
-    }
-
-    public void set(MergePathValueWritable right) {
-        set(right.getAdjBitMap(), right.getFlag(), right.getKmer());
-    }
-
-    public void set(byte adjBitMap, byte flag, VKmerBytesWritable kmer) {
-        this.kmer.set(kmer);
-        this.adjBitMap = adjBitMap;
-        this.flag = flag;
-    }
-
-    @Override
-    public void readFields(DataInput arg0) throws IOException {
-        // TODO Auto-generated method stub
-        kmer.readFields(arg0);
-        adjBitMap = arg0.readByte();
-        flag = arg0.readByte();
-    }
-
-    @Override
-    public void write(DataOutput arg0) throws IOException {
-        // TODO Auto-generated method stub
-
-        kmer.write(arg0);
-        arg0.writeByte(adjBitMap);
-        arg0.writeByte(flag);
-    }
-
-    public VKmerBytesWritable getKmer() {
-        if (kmer.getLength() != 0) {
-            return kmer;
-        }
-        return null;
-    }
-
-    public byte getAdjBitMap() {
-        return this.adjBitMap;
-    }
-
-    public byte getFlag() {
-        return this.flag;
-    }
-
-    public String toString() {
-        return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag);
-    }
-
-    @Override
-    public byte[] getBytes() {
-        // TODO Auto-generated method stub
-        if (kmer.getLength() != 0) {
-            return kmer.getBytes();
-        } else
-            return null;
-
-    }
-
-    public int getKmerLength() {
-        return kmer.getKmerLength();
-    }
-
-    @Override
-    public int getLength() {
-        return kmer.getLength();
-    }
-}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialMapper.java
deleted file mode 100644
index 1c12f63..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialMapper.java
+++ /dev/null

@@ -1,132 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh1;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.GeneCode;
-
-@SuppressWarnings("deprecation")
-public class SNodeInitialMapper extends MapReduceBase implements
-        Mapper<KmerBytesWritable, ByteWritable, KmerBytesWritable, MergePathValueWritable> {
-
-    public int KMER_SIZE;
-    public KmerBytesWritable outputKmer;
-    public MergePathValueWritable outputAdjList;
-
-    public void configure(JobConf job) {
-        KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
-        outputKmer = new KmerBytesWritable(KMER_SIZE);
-        outputAdjList = new MergePathValueWritable();
-    }
-
-    boolean measureDegree(byte adjacent) {
-        boolean result = true;
-        switch (adjacent) {
-            case 0:
-                result = true;
-                break;
-            case 1:
-                result = false;
-                break;
-            case 2:
-                result = false;
-                break;
-            case 3:
-                result = true;
-                break;
-            case 4:
-                result = false;
-                break;
-            case 5:
-                result = true;
-                break;
-            case 6:
-                result = true;
-                break;
-            case 7:
-                result = true;
-                break;
-            case 8:
-                result = false;
-                break;
-            case 9:
-                result = true;
-                break;
-            case 10:
-                result = true;
-                break;
-            case 11:
-                result = true;
-                break;
-            case 12:
-                result = true;
-                break;
-            case 13:
-                result = true;
-                break;
-            case 14:
-                result = true;
-                break;
-            case 15:
-                result = true;
-                break;
-        }
-        return result;
-    }
-
-    @Override
-    public void map(KmerBytesWritable key, ByteWritable value,
-            OutputCollector<KmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
-        byte precursor = (byte) 0xF0;
-        byte succeed = (byte) 0x0F;
-        byte adjBitMap = value.get();
-        byte bitFlag = (byte) 0;
-        precursor = (byte) (precursor & adjBitMap);
-        precursor = (byte) ((precursor & 0xff) >> 4);
-        succeed = (byte) (succeed & adjBitMap);
-        boolean inDegree = measureDegree(precursor);
-        boolean outDegree = measureDegree(succeed);
-        if (inDegree == false && outDegree == false) {
-            outputKmer.set(key);
-            bitFlag = (byte) 2;
-            outputAdjList.set(adjBitMap, bitFlag, null);///~~~~~kmersize----->0
-            output.collect(outputKmer, outputAdjList);
-        }
-        else{
-            for(int i = 0 ; i < 4; i ++){
-                byte temp = 0x01;
-                byte shiftedCode = 0;
-                temp  = (byte)(temp << i);
-                temp = (byte) (succeed & temp);
-                if(temp != 0 ){
-                    byte succeedCode = GeneCode.getGeneCodeFromBitMap(temp);
-                    shiftedCode = key.shiftKmerWithNextCode(succeedCode);
-                    outputKmer.set(key);
-                    outputAdjList.set((byte)0, bitFlag, null);
-                    output.collect(outputKmer, outputAdjList);
-                    key.shiftKmerWithPreCode(shiftedCode);
-                }
-            }
-        }
-    }
-}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialReducer.java
deleted file mode 100644
index 1426fba..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialReducer.java
+++ /dev/null

@@ -1,66 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh1;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class SNodeInitialReducer extends MapReduceBase implements
-        Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
-    private VKmerBytesWritable outputKmer = new VKmerBytesWritable();
-    private MergePathValueWritable outputValue = new MergePathValueWritable();
-
-
-    @Override
-    public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
-            OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
-        outputKmer.set(key);
-        outputValue = values.next();
-        if (values.hasNext() == true) {
-            if (outputValue.getFlag() == 2) {
-                byte bitFlag = 1;
-                outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);///outputValue.getKmerLength()
-                output.collect(outputKmer, outputValue);
-            } else {
-                boolean flag = false;
-                while (values.hasNext()) {
-                    outputValue = values.next();
-                    if (outputValue.getFlag() == 2) {
-                        flag = true;
-                        break;
-                    }
-                }
-                if (flag == true) {
-                    byte bitFlag = 1;
-                    outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
-                    output.collect(outputKmer, outputValue);
-                }
-            }
-        } else {
-            if (outputValue.getFlag() == 2) {
-                byte bitFlag = 0;
-                outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
-                output.collect(outputKmer, outputValue);
-            }
-        }
-    }
-}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiSeqOutputFormat.java
deleted file mode 100644
index 66d3b6b..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiSeqOutputFormat.java
+++ /dev/null

@@ -1,28 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh2;
-
-import java.io.File;
-import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<VKmerBytesWritable, MergePathValueWritable>{
-    @Override
-    protected String generateLeafFileName(String name) {
-        // TODO Auto-generated method stub System.out.println(name); 
-        String[] names = name.split("-");
-        return names[0] + File.separator + name;
-    }
-}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiTextOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiTextOutputFormat.java
deleted file mode 100644
index bca9695..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiTextOutputFormat.java
+++ /dev/null

@@ -1,28 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh2;
-
-import java.io.File;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
-
-public class MergePathMultiTextOutputFormat extends MultipleTextOutputFormat<Text, Text>{
-    @Override
-    protected String generateLeafFileName(String name) {
-        // TODO Auto-generated method stub System.out.println(name); 
-        String[] names = name.split("-");
-        return names[0] + File.separator + name;
-    }
-}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialReducer.java
deleted file mode 100644
index 8ba5aa8..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialReducer.java
+++ /dev/null

@@ -1,90 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh2;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class SNodeInitialReducer extends MapReduceBase implements
-        Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
-    private VKmerBytesWritable outputKmer = new VKmerBytesWritable();
-    private MergePathValueWritable outputValue = new MergePathValueWritable();
-
-    @Override
-    public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
-            OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
-        outputKmer.set(key);
-        outputValue = values.next();
-        byte startFlag = 0x00;
-        byte endFlag = 0x00;
-        byte targetPointFlag = 0x00;
-        byte targetAdjList = 0x00;
-        byte outputFlag = 0x00;
-        if (values.hasNext() == true) {
-            switch (outputValue.getFlag()) {
-                case (byte) 0x01:
-                    startFlag = (byte) 0x01;
-                    break;
-                case (byte) 0x80:
-                    endFlag = (byte) 0x80;
-                    break;
-                case (byte) 0x02:
-                    targetPointFlag = (byte) 0x02;
-                    targetAdjList = outputValue.getAdjBitMap();
-                    break;
-            }
-            while (values.hasNext()) {
-                outputValue = values.next();
-                switch (outputValue.getFlag()) {
-                    case (byte) 0x01:
-                        startFlag = (byte) 0x01;
-                        break;
-                    case (byte) 0x80:
-                        endFlag = (byte) 0x80;
-                        break;
-                    case (byte) 0x02:
-                        targetPointFlag = (byte) 0x02;
-                        targetAdjList = outputValue.getAdjBitMap();
-                        break;
-                }
-                if(startFlag != (byte) 0x00 && endFlag!= (byte) 0x00 && targetPointFlag != (byte) 0x00)
-                    break;
-            }
-            if(targetPointFlag == (byte) 0x02) {
-                if(startFlag == (byte) 0x01) {
-                    outputFlag = (byte) (outputFlag | startFlag);
-                }
-                if(endFlag == (byte) 0x80) {
-                    outputFlag = (byte) (outputFlag | endFlag);
-                }
-                outputValue.set(targetAdjList, outputFlag, null);
-                output.collect(outputKmer, outputValue);
-            }
-        } else {
-            if (outputValue.getFlag() == 2) {
-                byte bitFlag = 0;
-                outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
-                output.collect(outputKmer, outputValue);
-            }
-        }
-    }
-}

diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingTest.java
similarity index 96%
rename from genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingTest.java
rename to genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingTest.java
index acca4e7..ea05e53 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingTest.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.gbresultschecking;
+package edu.uci.ics.genomix.hadoop.gbresultschecking;
 
 import java.io.DataOutputStream;
 import java.io.File;
@@ -27,7 +27,7 @@
 import org.apache.hadoop.mapred.MiniMRCluster;
 import org.junit.Test;
 
-import edu.uci.ics.genomix.gbresultschecking.ResultsCheckingDriver;
+import edu.uci.ics.genomix.hadoop.gbresultschecking.ResultsCheckingDriver;
 
 @SuppressWarnings("deprecation")
 public class ResultsCheckingTest {

diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphbuilding/GraphBuildingTest.java
similarity index 96%
rename from genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphbuilding/GraphBuildingTest.java
rename to genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphbuilding/GraphBuildingTest.java
index efd3619..ad14d33 100755
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphbuilding/GraphBuildingTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphbuilding/GraphBuildingTest.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.graphbuilding;
+package edu.uci.ics.genomix.hadoop.graphbuilding;
 
 import java.io.BufferedWriter;
 import java.io.DataOutputStream;
@@ -30,11 +30,12 @@
 import org.apache.hadoop.mapred.MiniMRCluster;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.junit.Test;
-import edu.uci.ics.genomix.graphbuilding.GenomixDriver;
+
+import edu.uci.ics.genomix.hadoop.graphbuilding.GenomixDriver;
+import edu.uci.ics.genomix.hadoop.utils.TestUtils;
 import edu.uci.ics.genomix.type.GeneCode;
 import edu.uci.ics.genomix.type.KmerBytesWritable;
 import edu.uci.ics.genomix.type.KmerCountValue;
-import edu.uci.ics.genomix.utils.TestUtils;
 /**
  * This class test the correctness of graphbuilding program
  */

diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphcountfilter/CountFilterTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterTest.java
similarity index 96%
rename from genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphcountfilter/CountFilterTest.java
rename to genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterTest.java
index bff0179..5f8b3db 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphcountfilter/CountFilterTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterTest.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.graphcountfilter;
+package edu.uci.ics.genomix.hadoop.graphcountfilter;
 
 import java.io.BufferedWriter;
 import java.io.DataOutputStream;
@@ -31,9 +31,10 @@
 import org.apache.hadoop.mapred.MiniMRCluster;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.junit.Test;
-import edu.uci.ics.genomix.graphcountfilter.CountFilterDriver;
+
+import edu.uci.ics.genomix.hadoop.graphcountfilter.CountFilterDriver;
+import edu.uci.ics.genomix.hadoop.utils.TestUtils;
 import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.utils.TestUtils;
 
 
 @SuppressWarnings("deprecation")

diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh1/MergePathTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathTest.java
similarity index 95%
rename from genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh1/MergePathTest.java
rename to genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathTest.java
index 109db50..a1fd3a4 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh1/MergePathTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathTest.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.pathmergingh1;
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
 
 import java.io.BufferedWriter;
 import java.io.DataOutputStream;
@@ -31,10 +31,11 @@
 import org.apache.hadoop.util.ReflectionUtils;
 import org.junit.Test;
 
-import edu.uci.ics.genomix.pathmergingh1.MergePathH1Driver;
+import edu.uci.ics.genomix.hadoop.pathmergingh1.MergePathH1Driver;
+import edu.uci.ics.genomix.hadoop.utils.TestUtils;
 import edu.uci.ics.genomix.type.KmerBytesWritable;
 import edu.uci.ics.genomix.type.VKmerBytesWritable;
-import edu.uci.ics.genomix.utils.TestUtils;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
 
 @SuppressWarnings("deprecation")
 public class MergePathTest {

diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Test.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Test.java
similarity index 95%
rename from genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Test.java
rename to genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Test.java
index 7f871bd..def2592 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Test.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Test.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.pathmergingh2;
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
 
 import java.io.BufferedWriter;
 import java.io.DataOutputStream;
@@ -31,11 +31,11 @@
 import org.apache.hadoop.util.ReflectionUtils;
 import org.junit.Test;
 
-import edu.uci.ics.genomix.pathmergingh2.MergePathH2Driver;
+import edu.uci.ics.genomix.hadoop.pathmergingh2.MergePathH2Driver;
+import edu.uci.ics.genomix.hadoop.utils.TestUtils;
 import edu.uci.ics.genomix.type.KmerBytesWritable;
 import edu.uci.ics.genomix.type.VKmerBytesWritable;
-import edu.uci.ics.genomix.utils.TestUtils;
-
+import edu.uci.ics.genomix.type.MergePathValueWritable;
 @SuppressWarnings("deprecation")
 public class MergePathH2Test {
     private static final String ACTUAL_RESULT_DIR = "actual4";

diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/utils/TestUtils.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/utils/TestUtils.java
similarity index 98%
rename from genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/utils/TestUtils.java
rename to genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/utils/TestUtils.java
index 1488907..deb3b97 100755
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/utils/TestUtils.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/utils/TestUtils.java

@@ -12,7 +12,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.genomix.utils;
+package edu.uci.ics.genomix.hadoop.utils;
 
 import java.io.BufferedReader;
 import java.io.File;
commit	0047d3c8de4a8dad75edc7682bd8b145a5500aad	[log] [tgz]
author	Nan Zhang <zhangnan2920214@gmail.com>	Tue May 14 10:52:44 2013 -0700
committer	Nan Zhang <zhangnan2920214@gmail.com>	Tue May 14 10:52:44 2013 -0700
tree	1a89e8ce1e09870c76dd71c86ee4ecaaf06877cf
parent	bef5ad00fd32b3ae73d8a100b55c48cd550ce584 [diff]