after code view h1 h2 update
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathValueWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/MergePathValueWritable.java
similarity index 95%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathValueWritable.java
rename to genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/MergePathValueWritable.java
index 67b168d..4e8199a 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathValueWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/MergePathValueWritable.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.pathmergingh2;
+package edu.uci.ics.genomix.type;
import java.io.DataInput;
import java.io.DataOutput;
@@ -94,6 +94,9 @@
return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag);
}
+ public String pureToString() {
+ return GeneCode.getSymbolFromBitMap(adjBitMap);
+ }
@Override
public byte[] getBytes() {
// TODO Auto-generated method stub
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingDriver.java
similarity index 98%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingDriver.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingDriver.java
index 067249a..2d0c36e 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingDriver.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.gbresultschecking;
+package edu.uci.ics.genomix.hadoop.gbresultschecking;
import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingMapper.java
similarity index 97%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingMapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingMapper.java
index 466d44c..b1e5e59 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingMapper.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.gbresultschecking;
+package edu.uci.ics.genomix.hadoop.gbresultschecking;
import java.io.IOException;
import org.apache.hadoop.io.ByteWritable;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingReducer.java
similarity index 96%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingReducer.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingReducer.java
index 6d7fbc0..e93548f 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingReducer.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.gbresultschecking;
+package edu.uci.ics.genomix.hadoop.gbresultschecking;
import java.io.IOException;
import java.util.Iterator;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixCombiner.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixCombiner.java
similarity index 97%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixCombiner.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixCombiner.java
index 7029c86..8a4cdc9 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixCombiner.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixCombiner.java
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-package edu.uci.ics.genomix.graphbuilding;
+package edu.uci.ics.genomix.hadoop.graphbuilding;
import java.io.IOException;
import java.util.Iterator;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java
similarity index 98%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixDriver.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java
index 60802eb..cd0ee2d 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixDriver.java
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-package edu.uci.ics.genomix.graphbuilding;
+package edu.uci.ics.genomix.hadoop.graphbuilding;
import java.io.IOException;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixMapper.java
similarity index 98%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixMapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixMapper.java
index 3fb3425..e9fa3f0 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixMapper.java
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-package edu.uci.ics.genomix.graphbuilding;
+package edu.uci.ics.genomix.hadoop.graphbuilding;
import java.io.IOException;
import java.util.regex.Matcher;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixReducer.java
similarity index 97%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixReducer.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixReducer.java
index 75add24..9b284df 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphbuilding/GenomixReducer.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.graphbuilding;
+package edu.uci.ics.genomix.hadoop.graphbuilding;
import java.io.IOException;
import java.util.Iterator;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterDriver.java
similarity index 98%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterDriver.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterDriver.java
index 2f6dddd..a2eafeb 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterDriver.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.graphcountfilter;
+package edu.uci.ics.genomix.hadoop.graphcountfilter;
import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterMapper.java
similarity index 96%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterMapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterMapper.java
index 4a9a8a9..4c25597 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterMapper.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.graphcountfilter;
+package edu.uci.ics.genomix.hadoop.graphcountfilter;
import java.io.IOException;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterReducer.java
similarity index 96%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterReducer.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterReducer.java
index 6e61973..58be646 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterReducer.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.graphcountfilter;
+package edu.uci.ics.genomix.hadoop.graphcountfilter;
import java.io.IOException;
import java.util.Iterator;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java
new file mode 100644
index 0000000..28f38a8
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Driver.java
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
+import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.genomix.hadoop.pathmergingh2.MergePathH2Driver;
+import edu.uci.ics.genomix.hadoop.pathmergingh2.SNodeInitialMapper;
+import edu.uci.ics.genomix.hadoop.pathmergingh2.SNodeInitialReducer;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
+
+@SuppressWarnings("deprecation")
+public class MergePathH1Driver {
+
+ private static class Options {
+ @Option(name = "-inputpath", usage = "the input path", required = true)
+ public String inputPath;
+
+ @Option(name = "-outputpath", usage = "the output path", required = true)
+ public String outputPath;
+
+ @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
+ public String mergeResultPath;
+
+ @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+ public int numReducers;
+
+ @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+ public int sizeKmer;
+
+ @Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
+ public int mergeRound;
+
+ }
+
+ public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer,
+ int mergeRound, String defaultConfPath) throws IOException {
+
+ JobConf conf = new JobConf(MergePathH2Driver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+ conf.setJobName("Initial Path-Starting-Points Table");
+ conf.setMapperClass(SNodeInitialMapper.class);
+ conf.setReducerClass(SNodeInitialReducer.class);
+
+ conf.setMapOutputKeyClass(KmerBytesWritable.class);
+ conf.setMapOutputValueClass(MergePathValueWritable.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+ conf.setOutputFormat(SequenceFileOutputFormat.class);
+
+ String singlePointPath = "comSinglePath0";
+
+ MultipleOutputs.addNamedOutput(conf, singlePointPath, MergePathMultiSeqOutputFormat.class,
+ VKmerBytesWritable.class, MergePathValueWritable.class);
+
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
+ conf.setOutputValueClass(MergePathValueWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath));
+ FileOutputFormat.setOutputPath(conf, new Path(inputPath + "stepNext"));
+ conf.setNumReduceTasks(numReducers);
+ FileSystem dfs = FileSystem.get(conf);
+ dfs.delete(new Path(inputPath + "stepNext"), true);
+ JobClient.runJob(conf);
+ dfs.rename(new Path(inputPath + "stepNext" + "/" + singlePointPath), new Path(mergeResultPath + "/"
+ + singlePointPath));
+ int iMerge = 0;
+ /*----------------------------------------------------------------------*/
+ for (iMerge = 0; iMerge < mergeRound; iMerge++) {
+// if (!dfs.exists(new Path(inputPath + "-step1")))
+// break;
+ conf = new JobConf(MergePathH1Driver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+ conf.setInt("iMerge", iMerge);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+ conf.setJobName("Path Merge");
+
+ conf.setMapperClass(MergePathH1Mapper.class);
+ conf.setReducerClass(MergePathH1Reducer.class);
+
+ conf.setMapOutputKeyClass(VKmerBytesWritable.class);
+ conf.setMapOutputValueClass(MergePathValueWritable.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+
+ String uncompSinglePath = "uncompSinglePath" + iMerge;
+ String comSinglePath = "comSinglePath" + iMerge;
+ String comCircle = "comCircle" + iMerge;
+
+ MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiSeqOutputFormat.class,
+ VKmerBytesWritable.class, MergePathValueWritable.class);
+
+ MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiSeqOutputFormat.class,
+ VKmerBytesWritable.class, MergePathValueWritable.class);
+
+ MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiSeqOutputFormat.class,
+ VKmerBytesWritable.class, MergePathValueWritable.class);
+
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
+ conf.setOutputValueClass(MergePathValueWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext"));
+ FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+ conf.setNumReduceTasks(numReducers);
+ dfs.delete(new Path(outputPath), true);
+ JobClient.runJob(conf);
+ dfs.delete(new Path(inputPath + "stepNext"), true);
+ dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext"));
+ dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath));
+ dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle));
+ }
+ /*----------------------------------------*/
+ /* conf = new JobConf(MergePathH1Driver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+ conf.setInt("iMerge", iMerge);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+ conf.setJobName("Path Merge");
+
+ conf.setMapperClass(MergePathH1Mapper.class);
+ conf.setReducerClass(MergePathH1Reducer.class);
+
+ conf.setMapOutputKeyClass(VKmerBytesWritable.class);
+ conf.setMapOutputValueClass(MergePathValueWritable.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+
+ String uncomplete = "uncomplete" + iMerge;
+ String complete = "complete" + iMerge;
+
+ MultipleOutputs.addNamedOutput(conf, uncomplete,
+ MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ MultipleOutputs.addNamedOutput(conf, complete,
+ MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
+ conf.setOutputValueClass(MergePathValueWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+ FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+ conf.setNumReduceTasks(numReducers);
+ dfs.delete(new Path(outputPath), true);
+ JobClient.runJob(conf);
+ dfs.delete(new Path(inputPath + "-step1"), true);
+ dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
+ dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));*/
+ }
+
+ public static void main(String[] args) throws Exception {
+ Options options = new Options();
+ CmdLineParser parser = new CmdLineParser(options);
+ parser.parseArgument(args);
+ MergePathH1Driver driver = new MergePathH1Driver();
+ driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers,
+ options.sizeKmer, options.mergeRound, null);
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Mapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Mapper.java
similarity index 81%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Mapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Mapper.java
index 6357483..95cc01e 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Mapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Mapper.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.pathmergingh1;
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
import java.io.IOException;
import org.apache.hadoop.mapred.JobConf;
@@ -23,17 +23,17 @@
import edu.uci.ics.genomix.type.GeneCode;
import edu.uci.ics.genomix.type.VKmerBytesWritable;
import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
@SuppressWarnings("deprecation")
public class MergePathH1Mapper extends MapReduceBase implements
Mapper<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
private int KMER_SIZE;
private VKmerBytesWritableFactory outputKmerFactory;
- private MergePathValueWritable outputValue;
+ private MergePathValueWritable outputValue;
private VKmerBytesWritable tmpKmer;
private VKmerBytesWritable outputKmer;
-
public void configure(JobConf job) {
KMER_SIZE = job.getInt("sizeKmer", 0);
outputKmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
@@ -45,7 +45,6 @@
@Override
public void map(VKmerBytesWritable key, MergePathValueWritable value,
OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
-
byte precursor = (byte) 0xF0;
byte succeed = (byte) 0x0F;
byte adjBitMap = value.getAdjBitMap();
@@ -53,18 +52,22 @@
precursor = (byte) (precursor & adjBitMap);
precursor = (byte) ((precursor & 0xff) >> 4);
succeed = (byte) (succeed & adjBitMap);
- if (bitFlag == 1) {
+ byte bitStartEnd = (byte) (0x01 & bitFlag);
+ if (bitStartEnd == 1) {
+ /**
+ * eg. the kmer: AGCGT(already merge 3 kmers sizeof 3), adjMap C|G
+ * succeedCode -> G then tmpKmer store the succeding neighbor: GTG ->outputKmer
+ * then we store the AGC in the tmpKmer -> outputValue
+ */
byte succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
+ //TODO remove tmpKmer!!!!
outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
-
tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
outputValue.set(adjBitMap, bitFlag, tmpKmer);
output.collect(outputKmer, outputValue);
- } else {
- outputKmer.set(key);
- outputValue.set(value);
- output.collect(key, outputValue);
+ } else {//!!!!Make comments
+ output.collect(key, value);
}
}
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Reducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Reducer.java
new file mode 100644
index 0000000..8e0ba00
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathH1Reducer.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
+
+@SuppressWarnings("deprecation")
+public class MergePathH1Reducer extends MapReduceBase implements
+ Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+ private VKmerBytesWritableFactory kmerFactory;
+ private VKmerBytesWritable outputKmer;
+ private int KMER_SIZE;
+ private MergePathValueWritable outputValue;
+ MultipleOutputs mos = null;
+ private int I_MERGE;
+
+ public void configure(JobConf job) {
+ mos = new MultipleOutputs(job);
+ I_MERGE = Integer.parseInt(job.get("iMerge"));
+ KMER_SIZE = job.getInt("sizeKmer", 0);
+ outputValue = new MergePathValueWritable();
+ kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
+ outputKmer = new VKmerBytesWritable(KMER_SIZE);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void reduce(VKmerBytesWritable key, Iterator<MergePathValueWritable> values,
+ OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ outputValue = values.next();
+ if (values.hasNext() == true) {
+ byte bitFlag = outputValue.getFlag();
+ byte bitStartEnd = (byte) (0x01 & outputValue.getFlag());
+ if (bitStartEnd == 0) {
+ /**
+ * eg. if 2 records go into same group, the first is start-point: (GTG, null, A|T, 0) the second is: (GTG, AGC, C|G, 1)
+ * the results of combing: AGCGTG, null, C|T, 1
+ */
+ //first record is non-start point
+
+ byte nextAdj = outputValue.getAdjBitMap();
+ byte succeed = (byte) 0x0F;
+ succeed = (byte) (succeed & nextAdj);
+ //second record must be start point
+ outputValue = values.next();
+ byte adjBitMap = outputValue.getAdjBitMap();
+ if (outputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
+ else
+ outputKmer.set(key);
+ byte outputFlag = (byte) (0x81 & bitFlag);
+ outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
+ adjBitMap = (byte) (adjBitMap & 0xF0);
+ adjBitMap = (byte) (adjBitMap | succeed);
+ outputValue.set(adjBitMap, outputFlag, null);
+ //judge whether the node after merging has contain the start-point and end-point
+ bitFlag = outputValue.getFlag();
+ bitStartEnd = (byte) (0x81 & bitFlag);
+ if (bitStartEnd == (byte) 0x81) {
+ mos.getCollector("comSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ } else
+ mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ } else {
+ /**
+ * eg. if 2 records go into same group, the first is start-point:(GTG, AGC, C|G, 1) the second is: (GTG, null, A|T, 0)
+ * the results of combing: AGCGTG, null, C|T, 1
+ */
+ //first record is start point
+ byte adjBitMap = outputValue.getAdjBitMap();
+ if (outputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
+ else
+ outputKmer.set(key);
+ //second record is non start point
+ outputValue = values.next();
+ byte nextAdj = outputValue.getAdjBitMap();
+ byte succeed = (byte) 0x0F;
+ succeed = (byte) (succeed & nextAdj);
+ //set outputFlag for first record
+ byte outputFlag = (byte) (0x81 & bitFlag);
+ outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
+ adjBitMap = (byte) (adjBitMap & 0xF0);
+ adjBitMap = (byte) (adjBitMap | succeed);
+ outputValue.set(adjBitMap, outputFlag, null);
+ //judge whether the node after merging has contain the start-point and end-point
+ bitFlag = outputValue.getFlag();
+ bitStartEnd = (byte) (0x81 & bitFlag);
+ if (bitStartEnd == (byte) 0x81) {
+ mos.getCollector("comSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ } else
+ mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ }
+ } else {
+ mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(key, outputValue);
+ }
+ }
+
+ public void close() throws IOException {
+ // TODO Auto-generated method stub
+ mos.close();
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathMultiSeqOutputFormat.java
similarity index 91%
copy from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiSeqOutputFormat.java
copy to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathMultiSeqOutputFormat.java
index 5e8f1d8..0868f8c 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiSeqOutputFormat.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathMultiSeqOutputFormat.java
@@ -12,12 +12,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.pathmergingh1;
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
import java.io.File;
import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
+import edu.uci.ics.genomix.type.MergePathValueWritable;
public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<VKmerBytesWritable, MergePathValueWritable>{
@Override
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiTextOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathMultiTextOutputFormat.java
similarity index 95%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiTextOutputFormat.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathMultiTextOutputFormat.java
index ac88ce0..50c7a3d 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiTextOutputFormat.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathMultiTextOutputFormat.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.pathmergingh1;
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
import java.io.File;
import org.apache.hadoop.io.Text;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/SNodeInitialMapper.java
similarity index 80%
copy from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialMapper.java
copy to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/SNodeInitialMapper.java
index 6270852..8c65473 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/SNodeInitialMapper.java
@@ -12,18 +12,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.pathmergingh2;
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
import java.io.IOException;
+
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.GeneCode;
import edu.uci.ics.genomix.type.KmerBytesWritable;
-
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
@SuppressWarnings("deprecation")
public class SNodeInitialMapper extends MapReduceBase implements
Mapper<KmerBytesWritable, ByteWritable, KmerBytesWritable, MergePathValueWritable> {
@@ -37,7 +39,11 @@
outputKmer = new KmerBytesWritable(KMER_SIZE);
outputAdjList = new MergePathValueWritable();
}
-
+
+ /**
+ * @param adjacent the high 4 bits are useless, we just use the lower 4 bits
+ * @return if the degree == 1 then return false, else return true
+ */
boolean measureDegree(byte adjacent) {
boolean result = true;
switch (adjacent) {
@@ -96,6 +102,7 @@
@Override
public void map(KmerBytesWritable key, ByteWritable value,
OutputCollector<KmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ //TODO clean this code piece, use the genomix-data function
byte precursor = (byte) 0xF0;
byte succeed = (byte) 0x0F;
byte adjBitMap = value.get();
@@ -105,19 +112,24 @@
succeed = (byte) (succeed & adjBitMap);
boolean inDegree = measureDegree(precursor);
boolean outDegree = measureDegree(succeed);
+ //if indegree == 1 and outdegree == 1, then it assigns these records' flag to 2
if (inDegree == false && outDegree == false) {
outputKmer.set(key);
- System.out.println(outputKmer.hashCode());
- bitFlag = (byte) 2;
+ bitFlag = (byte) 0x02;
outputAdjList.set(adjBitMap, bitFlag, null);
output.collect(outputKmer, outputAdjList);
} else {
+ // other records maps its precursor neighbors
+ /**
+ * eg. ACT CTA|CA, it maps CAC, TAC, AAC, all the 3 pairs marked 0x80
+ */
for (int i = 0; i < 4; i++) {
byte temp = (byte) 0x01;
byte shiftedCode = 0;
temp = (byte) (temp << i);
- temp = (byte) (precursor & temp);
+ temp = (byte) (precursor & temp);
if (temp != 0) {
+ //TODO use the genomix-data factory function
byte precurCode = GeneCode.getGeneCodeFromBitMap(temp);
shiftedCode = key.shiftKmerWithPreCode(precurCode);
outputKmer.set(key);
@@ -127,6 +139,14 @@
key.shiftKmerWithNextCode(shiftedCode);
}
}
+ //and also maps its succeeding neighbors
+ /**
+ * eg. ACT CTA|CA, it maps CTC, CTA, all the 2 pairs marked 0x01
+ */
+// VKmerBytesWritableFactory factor ; //new
+// for( int i = GeneCode.A ; i <= GeneCode.T; i++){
+// factor.getFirstKmerFromChain(firstK, kmerChain)
+// }
for (int i = 0; i < 4; i++) {
byte temp = (byte) 0x01;
byte shiftedCode = 0;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/SNodeInitialReducer.java
new file mode 100644
index 0000000..cd3db8e
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh1/SNodeInitialReducer.java
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
+
+@SuppressWarnings("deprecation")
+public class SNodeInitialReducer extends MapReduceBase implements
+ Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+ private VKmerBytesWritable outputKmer = new VKmerBytesWritable();
+ private MergePathValueWritable outputValue = new MergePathValueWritable();
+ MultipleOutputs mos = null;
+
+ public void configure(JobConf job) {
+ mos = new MultipleOutputs(job);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
+ OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ outputKmer.set(key);
+ outputValue = values.next();
+ byte startPointFlag = 0x00;
+ byte endPointFlag = 0x00;
+ /**
+ * the targetPoint means that we want find the record which 1 indegree and 1 outdegree in the group which has multi-records
+ */
+ byte targetPointFlag = 0x00;
+ byte targetAdjList = 0x00;
+ //if we find the start or end point, we will use outputFlag to mark them
+ byte outputFlag = 0x00;
+ if (values.hasNext() == true) {
+ //find startPointFlag, endPointFlag, targetPointFlag
+ switch (outputValue.getFlag()) {
+ case (byte) 0x01:
+ startPointFlag = (byte) 0x01;
+ break;
+ case (byte) 0x80:
+ endPointFlag = (byte) 0x80;
+ break;
+ case (byte) 0x02:
+ targetPointFlag = (byte) 0x02;
+ targetAdjList = outputValue.getAdjBitMap();
+ break;
+ }
+ while (values.hasNext()) {
+ outputValue = values.next();
+ switch (outputValue.getFlag()) {
+ case (byte) 0x01:
+ startPointFlag = (byte) 0x01;
+ break;
+ case (byte) 0x80:
+ endPointFlag = (byte) 0x80;
+ break;
+ case (byte) 0x02:
+ targetPointFlag = (byte) 0x02;
+ targetAdjList = outputValue.getAdjBitMap();
+ break;
+ }
+ if (startPointFlag != (byte) 0x00 && endPointFlag != (byte) 0x00 && targetPointFlag != (byte) 0x00)
+ break;
+ }
+ //if we find the start-point or end-point
+ if (targetPointFlag == (byte) 0x02) {
+ //remove the single point path
+ if (startPointFlag == (byte) 0x01 && endPointFlag == (byte) 0x80) {
+ outputFlag = (byte) (outputFlag | startPointFlag);
+ outputFlag = (byte) (outputFlag | endPointFlag);
+ outputValue.set(targetAdjList, outputFlag, null);
+ mos.getCollector("comSinglePath0", reporter).collect(outputKmer, outputValue);
+ } else {
+ if (startPointFlag == (byte) 0x01) {
+ outputFlag = (byte) (outputFlag | startPointFlag);
+ }
+ if (endPointFlag == (byte) 0x80) {
+ outputFlag = (byte) (outputFlag | endPointFlag);
+ }
+ outputValue.set(targetAdjList, outputFlag, null);
+ output.collect(outputKmer, outputValue);
+ }
+ }
+ } else {
+ //keep the non-start/end single point into the input files
+ if (outputValue.getFlag() == (byte) 0x02) {
+ byte bitFlag = 0;
+ outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
+ output.collect(outputKmer, outputValue);
+ }
+ }
+ }
+
+ public void close() throws IOException {
+ // TODO Auto-generated method stub
+ mos.close();
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java
similarity index 61%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Driver.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java
index c196daa..6c977a8 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Driver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Driver.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.pathmergingh2;
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
@@ -29,13 +29,18 @@
import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.genomix.hadoop.pathmergingh1.MergePathH1Driver;
+import edu.uci.ics.genomix.hadoop.pathmergingh1.MergePathH1Mapper;
+import edu.uci.ics.genomix.hadoop.pathmergingh1.MergePathH1Reducer;
+import edu.uci.ics.genomix.hadoop.pathmergingh1.MergePathMultiSeqOutputFormat;
+//import edu.uci.ics.genomix.pathmergingh1.MergePathValueWritable;
import edu.uci.ics.genomix.type.KmerBytesWritable;
import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-
+import edu.uci.ics.genomix.type.MergePathValueWritable;
@SuppressWarnings("deprecation")
public class MergePathH2Driver {
-
+
private static class Options {
@Option(name = "-inputpath", usage = "the input path", required = true)
public String inputPath;
@@ -45,131 +50,142 @@
@Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
public String mergeResultPath;
-
+
@Option(name = "-num-reducers", usage = "the number of reducers", required = true)
public int numReducers;
@Option(name = "-kmer-size", usage = "the size of kmer", required = true)
public int sizeKmer;
-
+
@Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
public int mergeRound;
}
-
- public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath)
- throws IOException{
+ public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer,
+ int mergeRound, String defaultConfPath) throws IOException {
JobConf conf = new JobConf(MergePathH2Driver.class);
conf.setInt("sizeKmer", sizeKmer);
-
+
if (defaultConfPath != null) {
conf.addResource(new Path(defaultConfPath));
}
conf.setJobName("Initial Path-Starting-Points Table");
- conf.setMapperClass(SNodeInitialMapper.class);
+ conf.setMapperClass(SNodeInitialMapper.class);
conf.setReducerClass(SNodeInitialReducer.class);
conf.setMapOutputKeyClass(KmerBytesWritable.class);
conf.setMapOutputValueClass(MergePathValueWritable.class);
-
+
conf.setInputFormat(SequenceFileInputFormat.class);
conf.setOutputFormat(SequenceFileOutputFormat.class);
+ String singlePointPath = "comSinglePath0";
+
+ MultipleOutputs.addNamedOutput(conf, singlePointPath, MergePathMultiSeqOutputFormat.class,
+ VKmerBytesWritable.class, MergePathValueWritable.class);
+
conf.setOutputKeyClass(VKmerBytesWritable.class);
conf.setOutputValueClass(MergePathValueWritable.class);
-
+
FileInputFormat.setInputPaths(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(inputPath + "-step1"));
+ FileOutputFormat.setOutputPath(conf, new Path(inputPath + "stepNext"));
conf.setNumReduceTasks(numReducers);
FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(inputPath + "-step1"), true);
+ dfs.delete(new Path(inputPath + "stepNext"), true);
JobClient.runJob(conf);
+ dfs.rename(new Path(inputPath + "stepNext" + "/" + singlePointPath), new Path(mergeResultPath + "/" + singlePointPath));
+
int iMerge = 0;
-/*----------------------------------------------------------------------*/
- for(iMerge = 0; iMerge < mergeRound; iMerge ++){
- if(!dfs.exists(new Path(inputPath + "-step1")))
- break;
+ for (iMerge = 1; iMerge <= mergeRound; iMerge++) {
+// if (!dfs.exists(new Path(inputPath + "-step1")))
+// break;
conf = new JobConf(MergePathH2Driver.class);
conf.setInt("sizeKmer", sizeKmer);
conf.setInt("iMerge", iMerge);
-
+
if (defaultConfPath != null) {
conf.addResource(new Path(defaultConfPath));
}
conf.setJobName("Path Merge");
-
+
conf.setMapperClass(MergePathH2Mapper.class);
conf.setReducerClass(MergePathH2Reducer.class);
-
+
conf.setMapOutputKeyClass(VKmerBytesWritable.class);
conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
-
- String uncomplete = "uncomplete" + iMerge;
- String complete = "complete" + iMerge;
-
- MultipleOutputs.addNamedOutput(conf, uncomplete,
- MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
- MultipleOutputs.addNamedOutput(conf, complete,
- MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
-
+ conf.setInputFormat(SequenceFileInputFormat.class);
+
+ String uncompSinglePath = "uncompSinglePath" + iMerge;
+ String comSinglePath = "comSinglePath" + iMerge;
+ String comCircle = "comCircle" + iMerge;
+
+ MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiSeqOutputFormat.class,
+ VKmerBytesWritable.class, MergePathValueWritable.class);
+
+ MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiSeqOutputFormat.class,
+ VKmerBytesWritable.class, MergePathValueWritable.class);
+
+ MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiSeqOutputFormat.class,
+ VKmerBytesWritable.class, MergePathValueWritable.class);
+
conf.setOutputKeyClass(VKmerBytesWritable.class);
conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext"));
FileOutputFormat.setOutputPath(conf, new Path(outputPath));
conf.setNumReduceTasks(numReducers);
dfs.delete(new Path(outputPath), true);
JobClient.runJob(conf);
- dfs.delete(new Path(inputPath + "-step1"), true);
- dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
- dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
+ dfs.delete(new Path(inputPath + "stepNext"), true);
+ dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext"));
+ dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath));
+ dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle));
}
/* conf = new JobConf(MergePathH2Driver.class);
conf.setInt("sizeKmer", sizeKmer);
conf.setInt("iMerge", iMerge);
-
+
if (defaultConfPath != null) {
conf.addResource(new Path(defaultConfPath));
}
conf.setJobName("Path Merge");
-
+
conf.setMapperClass(MergePathH2Mapper.class);
conf.setReducerClass(MergePathH2Reducer.class);
-
+
conf.setMapOutputKeyClass(VKmerBytesWritable.class);
conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
-
- String uncomplete = "uncomplete" + iMerge;
- String complete = "complete" + iMerge;
-
- MultipleOutputs.addNamedOutput(conf, uncomplete,
- MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
- MultipleOutputs.addNamedOutput(conf, complete,
- MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
-
+ conf.setInputFormat(SequenceFileInputFormat.class);
+
+ String uncompSinglePath = "uncompSinglePath" + iMerge;
+ String comSinglePath = "comSinglePath" + iMerge;
+ String comCircle = "comCircle" + iMerge;
+
+ MultipleOutputs.addNamedOutput(conf, uncompSinglePath, MergePathMultiTextOutputFormat.class,
+ VKmerBytesWritable.class, MergePathValueWritable.class);
+
+ MultipleOutputs.addNamedOutput(conf, comSinglePath, MergePathMultiTextOutputFormat.class,
+ VKmerBytesWritable.class, MergePathValueWritable.class);
+
+ MultipleOutputs.addNamedOutput(conf, comCircle, MergePathMultiTextOutputFormat.class,
+ VKmerBytesWritable.class, MergePathValueWritable.class);
+
conf.setOutputKeyClass(VKmerBytesWritable.class);
conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath + "stepNext"));
FileOutputFormat.setOutputPath(conf, new Path(outputPath));
conf.setNumReduceTasks(numReducers);
dfs.delete(new Path(outputPath), true);
JobClient.runJob(conf);
- dfs.delete(new Path(inputPath + "-step1"), true);
- dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
- dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));*/
+ dfs.delete(new Path(inputPath + "stepNext"), true);
+ dfs.rename(new Path(outputPath + "/" + uncompSinglePath), new Path(inputPath + "stepNext"));
+ dfs.rename(new Path(outputPath + "/" + comSinglePath), new Path(mergeResultPath + "/" + comSinglePath));
+ dfs.rename(new Path(outputPath + "/" + comCircle), new Path(mergeResultPath + "/" + comCircle));*/
}
public static void main(String[] args) throws Exception {
@@ -177,6 +193,7 @@
CmdLineParser parser = new CmdLineParser(options);
parser.parseArgument(args);
MergePathH2Driver driver = new MergePathH2Driver();
- driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers, options.sizeKmer, options.mergeRound, null);
+ driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers,
+ options.sizeKmer, options.mergeRound, null);
}
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Mapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Mapper.java
similarity index 79%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Mapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Mapper.java
index 726dd4c..00dcb55 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Mapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Mapper.java
@@ -12,9 +12,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.pathmergingh2;
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
import java.io.IOException;
+import java.util.Arrays;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
@@ -23,6 +24,7 @@
import edu.uci.ics.genomix.type.GeneCode;
import edu.uci.ics.genomix.type.VKmerBytesWritable;
import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
@SuppressWarnings("deprecation")
public class MergePathH2Mapper extends MapReduceBase implements
@@ -56,33 +58,48 @@
switch (bitStartEnd) {
case (byte) 0x01:
+ //if this record is start-point, it will just maps its succeed nodes
+ /**
+ * eg. the kmer: AGCGT(already merge 3 kmers sizeof 3), adjMap C|G
+ * succeedCode -> G then tmpKmer store the succeding neighbor: GTG ->outputKmer
+ * then we store the AGC in the tmpKmer -> outputValue
+ */
byte succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
-
tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
+ //mark the flag of key --> reverse record
bitFlag = (byte) (bitFlag | 0x08);
outputValue.set(adjBitMap, bitFlag, tmpKmer);
output.collect(outputKmer, outputValue);
break;
+
case (byte) 0x80:
+ //if the record is end-point, it will just maps itself
+ /**
+ * eg. the kmer: AGCGT(already merge 3 kmers sizeof 3), adjMap C|G
+ * tmpKmer store the first kmer: AGC ->outputKmer
+ * then we store the GT in the tmpKmer -> outputValue
+ */
tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(KMER_SIZE, key));
outputKmer.set(tmpKmer);
tmpKmer.set(outputKmerFactory.getLastKmerFromChain(key.getKmerLength() - KMER_SIZE, key));
+ //mark the flag of key --> itself record
bitFlag = (byte) (bitFlag | 0x10);
outputValue.set(adjBitMap, bitFlag, tmpKmer);
output.collect(outputKmer, outputValue);
break;
+ //if the record is non-start/end point, it will maps its succeed nodes and itself
case (byte) 0x00:
succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
+ //it maps the succeed nodes
tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
-
tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
bitFlag = (byte) (bitFlag | 0x08);
outputValue.set(adjBitMap, bitFlag, tmpKmer);
output.collect(outputKmer, outputValue);
-
+ //it maps itself
bitFlag = (byte) (bitFlag & 0xF7);
tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(KMER_SIZE, key));
outputKmer.set(tmpKmer);
@@ -91,11 +108,6 @@
outputValue.set(adjBitMap, bitFlag, tmpKmer);
output.collect(outputKmer, outputValue);
break;
- case (byte) 0x81:
- outputKmer.set(key);
- outputValue.set(adjBitMap, bitFlag, null);
- output.collect(outputKmer, outputValue);
- break;
}
}
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Reducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Reducer.java
similarity index 70%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Reducer.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Reducer.java
index 63391b4..652404e 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Reducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Reducer.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.pathmergingh2;
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
import java.io.IOException;
import java.util.Iterator;
@@ -24,10 +24,12 @@
import org.apache.hadoop.mapred.lib.MultipleOutputs;
import edu.uci.ics.genomix.type.VKmerBytesWritable;
import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
@SuppressWarnings("deprecation")
public class MergePathH2Reducer extends MapReduceBase implements
Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+
private VKmerBytesWritableFactory kmerFactory;
private VKmerBytesWritable outputKmer;
private VKmerBytesWritable tmpKmer1;
@@ -35,7 +37,6 @@
private int KMER_SIZE;
private MergePathValueWritable outputValue;
private MergePathValueWritable tmpOutputValue;
-
MultipleOutputs mos = null;
private int I_MERGE;
@@ -58,18 +59,27 @@
outputKmer.set(key);
if (values.hasNext() == true) {
byte bitFlag = outputValue.getFlag();
+ //decide whether this record is start or end
byte bitStartEnd = (byte) (0x81 & bitFlag);
+ //decide whether this record is reverse
byte bitPosiNegative = (byte) (0x18 & bitFlag);
byte succeed = (byte) 0x0F;
switch (bitPosiNegative) {
case (byte) 0x08:
+ //the first record is reverse record
+ /**
+ * eg. if 2 records go into same group, the first is reverse: (GTG, AGC, C|G, 0x08) the second is itself: (GTG, null, A|T, 0x10)
+ * the results of combing: AGCGTG, null, C|T, 0x01
+ */
if (outputValue.getKmerLength() != 0)
tmpKmer1.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
else
tmpKmer1.set(key);
byte adjBitMap = outputValue.getAdjBitMap();
+ //get the next value record
outputValue = values.next();
bitStartEnd = (byte) (0x81 & outputValue.getFlag());
+ //if this record contain end-point
if (bitStartEnd == (byte) 0x80) {
if (outputValue.getKmerLength() != 0)
tmpKmer2.set(kmerFactory.mergeTwoKmer(key, outputValue.getKmer()));
@@ -77,7 +87,7 @@
tmpKmer2.set(key);
byte tmpFlag = (byte) 0x80;
tmpOutputValue.set(outputValue.getAdjBitMap(), tmpFlag, null);
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(tmpKmer2, tmpOutputValue);
+ mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(tmpKmer2, tmpOutputValue);
}
if (outputValue.getKmerLength() != 0)
outputKmer.set(kmerFactory.mergeTwoKmer(tmpKmer1, outputValue.getKmer()));
@@ -89,17 +99,29 @@
byte outputFlag = (byte) (0x81 & bitFlag);
outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
outputValue.set(adjBitMap, outputFlag, null);
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ // decide whether the merged record is complete, if so, then it output to the complete file
+ bitFlag = outputValue.getFlag();
+ bitStartEnd = (byte) (0x81 & bitFlag);
+ if (bitStartEnd == (byte) 0x81) {
+ mos.getCollector("comSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ } else
+ mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
break;
case (byte) 0x10:
+ //the first record value is 'itself' format
+ /**
+ * eg. if 2 records go into same group, the first is itself: (GTG, null, A|T, 0x10) the second is reverse: (GTG, AGC, C|G, 0x08)
+ * the results of combing: AGCGTG, null, C|T, 0x01
+ */
if (outputValue.getKmerLength() != 0)
tmpKmer1.set(kmerFactory.mergeTwoKmer(key, outputValue.getKmer()));
else
tmpKmer1.set(key);
+ //if this record contain end-point
if (bitStartEnd == (byte) 0x80) {
byte tmpFlag = (byte) 0x80;
tmpOutputValue.set(outputValue.getAdjBitMap(), tmpFlag, null);
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(tmpKmer1, tmpOutputValue);
+ mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(tmpKmer1, tmpOutputValue);
}
succeed = (byte) (succeed & outputValue.getAdjBitMap());
outputValue = values.next();
@@ -109,22 +131,22 @@
outputKmer.set(tmpKmer1);
adjBitMap = outputValue.getAdjBitMap();
adjBitMap = (byte) (adjBitMap & 0xF0);
- adjBitMap = (byte) (adjBitMap | succeed);
+ adjBitMap = (byte) (adjBitMap | succeed);
outputFlag = (byte) (0x81 & bitFlag);
outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
outputValue.set(adjBitMap, outputFlag, null);
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ // decide whether the merged record is complete, if so, then it output to the complete file
+ bitFlag = outputValue.getFlag();
+ bitStartEnd = (byte) (0x81 & bitFlag);
+ if (bitStartEnd == (byte) 0x81) {
+ mos.getCollector("comSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ } else
+ mos.getCollector("uncompSinglePath" + I_MERGE, reporter).collect(outputKmer, outputValue);
break;
}
- } else {
- byte bitFlag = outputValue.getFlag();
- byte bitStartEnd = (byte) (0x81 & bitFlag);
- if (bitStartEnd == (byte) 0x81) {
- outputKmer.set(key);
- mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
- }
}
}
+
public void close() throws IOException {
// TODO Auto-generated method stub
mos.close();
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathMultiSeqOutputFormat.java
similarity index 91%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiSeqOutputFormat.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathMultiSeqOutputFormat.java
index 5e8f1d8..7731503 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiSeqOutputFormat.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathMultiSeqOutputFormat.java
@@ -12,13 +12,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.pathmergingh1;
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
import java.io.File;
import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-
+import edu.uci.ics.genomix.type.MergePathValueWritable;
public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<VKmerBytesWritable, MergePathValueWritable>{
@Override
protected String generateLeafFileName(String name) {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiTextOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathMultiTextOutputFormat.java
similarity index 95%
copy from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiTextOutputFormat.java
copy to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathMultiTextOutputFormat.java
index ac88ce0..587008c 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiTextOutputFormat.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathMultiTextOutputFormat.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.pathmergingh1;
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
import java.io.File;
import org.apache.hadoop.io.Text;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/SNodeInitialMapper.java
similarity index 84%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialMapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/SNodeInitialMapper.java
index 6270852..58bfd71 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/SNodeInitialMapper.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.pathmergingh2;
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
import java.io.IOException;
import org.apache.hadoop.io.ByteWritable;
@@ -21,16 +21,19 @@
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+
import edu.uci.ics.genomix.type.GeneCode;
import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
@SuppressWarnings("deprecation")
public class SNodeInitialMapper extends MapReduceBase implements
Mapper<KmerBytesWritable, ByteWritable, KmerBytesWritable, MergePathValueWritable> {
- public int KMER_SIZE;
- public KmerBytesWritable outputKmer;
- public MergePathValueWritable outputAdjList;
+ private int KMER_SIZE;
+ private KmerBytesWritable outputKmer;
+ private MergePathValueWritable outputAdjList;
public void configure(JobConf job) {
KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
@@ -38,6 +41,10 @@
outputAdjList = new MergePathValueWritable();
}
+ /**
+ * @param adjacent the high 4 bits are useless, we just use the lower 4 bits
+ * @return if the degree == 1 then return false, else return true
+ */
boolean measureDegree(byte adjacent) {
boolean result = true;
switch (adjacent) {
@@ -105,13 +112,17 @@
succeed = (byte) (succeed & adjBitMap);
boolean inDegree = measureDegree(precursor);
boolean outDegree = measureDegree(succeed);
+ //if indegree == 1 and outdegree == 1, then it assigns these records' flag to 2
if (inDegree == false && outDegree == false) {
outputKmer.set(key);
- System.out.println(outputKmer.hashCode());
- bitFlag = (byte) 2;
+ bitFlag = (byte) 0x02;
outputAdjList.set(adjBitMap, bitFlag, null);
output.collect(outputKmer, outputAdjList);
} else {
+ // other records maps its precursor neighbors
+ /**
+ * eg. ACT CTA|CA, it maps CAC, TAC, ACA, all the 3 pairs marked 0x80
+ */
for (int i = 0; i < 4; i++) {
byte temp = (byte) 0x01;
byte shiftedCode = 0;
@@ -127,6 +138,10 @@
key.shiftKmerWithNextCode(shiftedCode);
}
}
+ //and also maps its succeeding neighbors
+ /**
+ * eg. kmer:ACT bitMap: CTA|CA, it maps CTC, CTA, all the 2 pairs marked 0x01
+ */
for (int i = 0; i < 4; i++) {
byte temp = (byte) 0x01;
byte shiftedCode = 0;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/SNodeInitialReducer.java
new file mode 100644
index 0000000..fc9e80d
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/pathmergingh2/SNodeInitialReducer.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
+
+@SuppressWarnings("deprecation")
+public class SNodeInitialReducer extends MapReduceBase implements
+ Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+ private VKmerBytesWritable outputKmer = new VKmerBytesWritable();
+ private MergePathValueWritable outputValue = new MergePathValueWritable();
+ MultipleOutputs mos = null;
+ public void configure(JobConf job) {
+ mos = new MultipleOutputs(job);
+ }
+ @SuppressWarnings("unchecked")
+ @Override
+ public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
+ OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ outputKmer.set(key);
+ outputValue = values.next();
+ byte startPointFlag = 0x00;
+ byte endPointFlag = 0x00;
+ /**
+ * the targetPoint means that we want find the record which 1 indegree and 1 outdegree in the group which has multi-records
+ */
+ byte targetPointFlag = 0x00;
+ byte targetAdjList = 0x00;
+ //if we find the start or end point, we will use outputFlag to mark them
+ byte outputFlag = 0x00;
+ if (values.hasNext() == true) {
+ //find startPointFlag, endPointFlag, targetPointFlag
+ switch (outputValue.getFlag()) {
+ case (byte) 0x01:
+ startPointFlag = (byte) 0x01;
+ break;
+ case (byte) 0x80:
+ endPointFlag = (byte) 0x80;
+ break;
+ case (byte) 0x02:
+ targetPointFlag = (byte) 0x02;
+ targetAdjList = outputValue.getAdjBitMap();
+ break;
+ }
+ while (values.hasNext()) {
+ outputValue = values.next();
+ switch (outputValue.getFlag()) {
+ case (byte) 0x01:
+ startPointFlag = (byte) 0x01;
+ break;
+ case (byte) 0x80:
+ endPointFlag = (byte) 0x80;
+ break;
+ case (byte) 0x02:
+ targetPointFlag = (byte) 0x02;
+ targetAdjList = outputValue.getAdjBitMap();
+ break;
+ }
+ if(startPointFlag != (byte) 0x00 && endPointFlag!= (byte) 0x00 && targetPointFlag != (byte) 0x00)
+ break;
+ }
+ //find the start-point or end-point
+ if(targetPointFlag == (byte) 0x02) {
+ //remove the single point path
+ if(startPointFlag == (byte) 0x01 && endPointFlag == (byte) 0x80) {
+ outputFlag = (byte) (outputFlag | startPointFlag);
+ outputFlag = (byte) (outputFlag | endPointFlag);
+ outputValue.set(targetAdjList, outputFlag, null);
+ mos.getCollector("comSinglePath0", reporter).collect(outputKmer, outputValue);
+ }
+ else {
+ if(startPointFlag == (byte) 0x01) {
+ outputFlag = (byte) (outputFlag | startPointFlag);
+ }
+ if(endPointFlag == (byte) 0x80) {
+ outputFlag = (byte) (outputFlag | endPointFlag);
+ }
+ outputValue.set(targetAdjList, outputFlag, null);
+ output.collect(outputKmer, outputValue);
+ }
+ }
+ } else {
+ //keep the non-start/end single point into the input files
+ if (outputValue.getFlag() == (byte)0x02) {
+ byte bitFlag = 0;
+ outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
+ output.collect(outputKmer, outputValue);
+ }
+ }
+ }
+ public void close() throws IOException {
+ // TODO Auto-generated method stub
+ mos.close();
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatDriver.java
similarity index 98%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatDriver.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatDriver.java
index 7390d06..e7400be 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatDriver.java
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-package edu.uci.ics.genomix.statistics;
+package edu.uci.ics.genomix.hadoop.statistics;
import java.io.IOException;
import org.apache.hadoop.fs.FileSystem;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatMapper.java
similarity index 98%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatMapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatMapper.java
index bb94c5d..623a923 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatMapper.java
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-package edu.uci.ics.genomix.statistics;
+package edu.uci.ics.genomix.hadoop.statistics;
import java.io.IOException;
import org.apache.hadoop.io.ByteWritable;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatReducer.java
similarity index 96%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatReducer.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatReducer.java
index d5ce11c..090e680 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/statistics/GenomixStatReducer.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.statistics;
+package edu.uci.ics.genomix.hadoop.statistics;
import java.io.IOException;
import java.util.Iterator;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Driver.java
deleted file mode 100644
index a8e5f7c..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Driver.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh1;
-
-import java.io.IOException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.TextOutputFormat;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
-import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class MergePathH1Driver {
-
- private static class Options {
- @Option(name = "-inputpath", usage = "the input path", required = true)
- public String inputPath;
-
- @Option(name = "-outputpath", usage = "the output path", required = true)
- public String outputPath;
-
- @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
- public String mergeResultPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
- public int sizeKmer;
-
- @Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
- public int mergeRound;
-
- }
-
-
- public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath)
- throws IOException{
-
- JobConf conf = new JobConf(MergePathH1Driver.class);
- conf.setInt("sizeKmer", sizeKmer);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
- conf.setJobName("Initial Path-Starting-Points Table");
- conf.setMapperClass(SNodeInitialMapper.class);
- conf.setReducerClass(SNodeInitialReducer.class);
-
- conf.setMapOutputKeyClass(KmerBytesWritable.class);
- conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
-
- conf.setOutputKeyClass(VKmerBytesWritable.class);
- conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(inputPath + "-step1"));
- conf.setNumReduceTasks(numReducers);
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(inputPath + "-step1"), true);
- JobClient.runJob(conf);
- int iMerge = 0;
-/*----------------------------------------------------------------------*/
- for(iMerge = 0; iMerge < mergeRound; iMerge ++){
-
- conf = new JobConf(MergePathH1Driver.class);
- conf.setInt("sizeKmer", sizeKmer);
- conf.setInt("iMerge", iMerge);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
- conf.setJobName("Path Merge");
-
- conf.setMapperClass(MergePathH1Mapper.class);
- conf.setReducerClass(MergePathH1Reducer.class);
-
- conf.setMapOutputKeyClass(VKmerBytesWritable.class);
- conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
-
- String uncomplete = "uncomplete" + iMerge;
- String complete = "complete" + iMerge;
-
- MultipleOutputs.addNamedOutput(conf, uncomplete,
- MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
-
- MultipleOutputs.addNamedOutput(conf, complete,
- MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
-
- conf.setOutputKeyClass(VKmerBytesWritable.class);
- conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- dfs.delete(new Path(inputPath + "-step1"), true);
- dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
- dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
- }
- /*----------------------------------------*/
-/* conf = new JobConf(MergePathH1Driver.class);
- conf.setInt("sizeKmer", sizeKmer);
- conf.setInt("iMerge", iMerge);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
- conf.setJobName("Path Merge");
-
- conf.setMapperClass(MergePathH1Mapper.class);
- conf.setReducerClass(MergePathH1Reducer.class);
-
- conf.setMapOutputKeyClass(VKmerBytesWritable.class);
- conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
-
- String uncomplete = "uncomplete" + iMerge;
- String complete = "complete" + iMerge;
-
- MultipleOutputs.addNamedOutput(conf, uncomplete,
- MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
-
- MultipleOutputs.addNamedOutput(conf, complete,
- MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
-
- conf.setOutputKeyClass(VKmerBytesWritable.class);
- conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- dfs.delete(new Path(inputPath + "-step1"), true);
- dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
- dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));*/
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- MergePathH1Driver driver = new MergePathH1Driver();
- driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers, options.sizeKmer, options.mergeRound, null);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Reducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Reducer.java
deleted file mode 100644
index f5c9c8d..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Reducer.java
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh1;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
-
-@SuppressWarnings("deprecation")
-public class MergePathH1Reducer extends MapReduceBase implements
- Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
- private VKmerBytesWritableFactory kmerFactory;
- private VKmerBytesWritable outputKmer;
- private VKmerBytesWritable tmpKmer;
- private int KMER_SIZE;
- private MergePathValueWritable outputValue;
- private MergePathValueWritable tmpOutputValue;
- MultipleOutputs mos = null;
- private int I_MERGE;
-
- public void configure(JobConf job) {
- mos = new MultipleOutputs(job);
- I_MERGE = Integer.parseInt(job.get("iMerge"));
- KMER_SIZE = job.getInt("sizeKmer", 0);
- outputValue = new MergePathValueWritable();
- tmpOutputValue = new MergePathValueWritable();
- kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
- outputKmer = new VKmerBytesWritable(KMER_SIZE);
- tmpKmer = new VKmerBytesWritable(KMER_SIZE);
- }
-
- @SuppressWarnings("unchecked")
- @Override
- public void reduce(VKmerBytesWritable key, Iterator<MergePathValueWritable> values,
- OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- outputValue = values.next();
- if (values.hasNext() == true) {
- if (outputValue.getFlag() != 1) {
- byte nextAdj = outputValue.getAdjBitMap();
- byte succeed = (byte) 0x0F;
- succeed = (byte) (succeed & nextAdj);
-
- outputValue = values.next();
- byte adjBitMap = outputValue.getAdjBitMap();
- byte flag = outputValue.getFlag();
- if (outputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
- else
- outputKmer.set(key);
-
- adjBitMap = (byte) (adjBitMap & 0xF0);
- adjBitMap = (byte) (adjBitMap | succeed);
- outputValue.set(adjBitMap, flag, null);
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
- } else {
- tmpOutputValue.set(outputValue);
- byte tmpAdjMap = tmpOutputValue.getAdjBitMap();
-
- outputValue = values.next();
- if (outputValue.getFlag() != 1) {
- if (tmpOutputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(tmpOutputValue.getKmer(), key));
- else
- outputKmer.set(key);
-
- byte nextAdj = outputValue.getAdjBitMap();
- byte succeed = (byte) 0x0F;
- succeed = (byte) (succeed & nextAdj);
- tmpAdjMap = (byte) (tmpAdjMap & 0xF0);
- tmpAdjMap = (byte) (tmpAdjMap | succeed);
- outputValue.set(tmpAdjMap, tmpOutputValue.getFlag(), null);
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
- } else {
-
- tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
- if (tmpOutputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(tmpOutputValue.getKmer(), tmpKmer));
- else
- outputKmer.set(tmpKmer);
- tmpOutputValue.set(tmpAdjMap, tmpOutputValue.getFlag(), null);
- mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, tmpOutputValue);
-
- tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
- if (outputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer));
- else
- outputKmer.set(tmpKmer);
- outputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
- mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
-
- while (values.hasNext()) {
- outputValue = values.next();
- tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
- if (outputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer));
- else
- outputKmer.set(tmpKmer);
- outputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
- mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
- }
- }
- }
- } else {
- if (outputValue.getFlag() != 0) {
- tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
- if (outputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer));
- else
- outputKmer.set(tmpKmer);
- outputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
- mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
-
- } else
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(key, outputValue);
- }
- }
-
- public void close() throws IOException {
- // TODO Auto-generated method stub
- mos.close();
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathValueWritable.java
deleted file mode 100644
index f14e5f2..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathValueWritable.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh1;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import org.apache.hadoop.io.BinaryComparable;
-import org.apache.hadoop.io.WritableComparable;
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-public class MergePathValueWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
-
- private static final byte[] EMPTY_BYTES = {};
- private byte adjBitMap;
- private byte flag;
- private VKmerBytesWritable kmer;
-
- public MergePathValueWritable() {
- this((byte) 0, (byte) 0, 0, EMPTY_BYTES);
- }
-
- public MergePathValueWritable(byte adjBitMap, byte flag, int kmerSize, byte[] bytes) {
- this.adjBitMap = adjBitMap;
- this.flag = flag;
- this.kmer = new VKmerBytesWritable(kmerSize, bytes);
- kmer.set(bytes, 0, bytes.length);
- }
-
- public void set(MergePathValueWritable right) {
- set(right.getAdjBitMap(), right.getFlag(), right.getKmer());
- }
-
- public void set(byte adjBitMap, byte flag, VKmerBytesWritable kmer) {
- this.kmer.set(kmer);
- this.adjBitMap = adjBitMap;
- this.flag = flag;
- }
-
- @Override
- public void readFields(DataInput arg0) throws IOException {
- // TODO Auto-generated method stub
- kmer.readFields(arg0);
- adjBitMap = arg0.readByte();
- flag = arg0.readByte();
- }
-
- @Override
- public void write(DataOutput arg0) throws IOException {
- // TODO Auto-generated method stub
-
- kmer.write(arg0);
- arg0.writeByte(adjBitMap);
- arg0.writeByte(flag);
- }
-
- public VKmerBytesWritable getKmer() {
- if (kmer.getLength() != 0) {
- return kmer;
- }
- return null;
- }
-
- public byte getAdjBitMap() {
- return this.adjBitMap;
- }
-
- public byte getFlag() {
- return this.flag;
- }
-
- public String toString() {
- return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag);
- }
-
- @Override
- public byte[] getBytes() {
- // TODO Auto-generated method stub
- if (kmer.getLength() != 0) {
- return kmer.getBytes();
- } else
- return null;
-
- }
-
- public int getKmerLength() {
- return kmer.getKmerLength();
- }
-
- @Override
- public int getLength() {
- return kmer.getLength();
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialMapper.java
deleted file mode 100644
index 1c12f63..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialMapper.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh1;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.GeneCode;
-
-@SuppressWarnings("deprecation")
-public class SNodeInitialMapper extends MapReduceBase implements
- Mapper<KmerBytesWritable, ByteWritable, KmerBytesWritable, MergePathValueWritable> {
-
- public int KMER_SIZE;
- public KmerBytesWritable outputKmer;
- public MergePathValueWritable outputAdjList;
-
- public void configure(JobConf job) {
- KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
- outputKmer = new KmerBytesWritable(KMER_SIZE);
- outputAdjList = new MergePathValueWritable();
- }
-
- boolean measureDegree(byte adjacent) {
- boolean result = true;
- switch (adjacent) {
- case 0:
- result = true;
- break;
- case 1:
- result = false;
- break;
- case 2:
- result = false;
- break;
- case 3:
- result = true;
- break;
- case 4:
- result = false;
- break;
- case 5:
- result = true;
- break;
- case 6:
- result = true;
- break;
- case 7:
- result = true;
- break;
- case 8:
- result = false;
- break;
- case 9:
- result = true;
- break;
- case 10:
- result = true;
- break;
- case 11:
- result = true;
- break;
- case 12:
- result = true;
- break;
- case 13:
- result = true;
- break;
- case 14:
- result = true;
- break;
- case 15:
- result = true;
- break;
- }
- return result;
- }
-
- @Override
- public void map(KmerBytesWritable key, ByteWritable value,
- OutputCollector<KmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- byte precursor = (byte) 0xF0;
- byte succeed = (byte) 0x0F;
- byte adjBitMap = value.get();
- byte bitFlag = (byte) 0;
- precursor = (byte) (precursor & adjBitMap);
- precursor = (byte) ((precursor & 0xff) >> 4);
- succeed = (byte) (succeed & adjBitMap);
- boolean inDegree = measureDegree(precursor);
- boolean outDegree = measureDegree(succeed);
- if (inDegree == false && outDegree == false) {
- outputKmer.set(key);
- bitFlag = (byte) 2;
- outputAdjList.set(adjBitMap, bitFlag, null);///~~~~~kmersize----->0
- output.collect(outputKmer, outputAdjList);
- }
- else{
- for(int i = 0 ; i < 4; i ++){
- byte temp = 0x01;
- byte shiftedCode = 0;
- temp = (byte)(temp << i);
- temp = (byte) (succeed & temp);
- if(temp != 0 ){
- byte succeedCode = GeneCode.getGeneCodeFromBitMap(temp);
- shiftedCode = key.shiftKmerWithNextCode(succeedCode);
- outputKmer.set(key);
- outputAdjList.set((byte)0, bitFlag, null);
- output.collect(outputKmer, outputAdjList);
- key.shiftKmerWithPreCode(shiftedCode);
- }
- }
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialReducer.java
deleted file mode 100644
index 1426fba..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialReducer.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh1;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class SNodeInitialReducer extends MapReduceBase implements
- Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
- private VKmerBytesWritable outputKmer = new VKmerBytesWritable();
- private MergePathValueWritable outputValue = new MergePathValueWritable();
-
-
- @Override
- public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
- OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- outputKmer.set(key);
- outputValue = values.next();
- if (values.hasNext() == true) {
- if (outputValue.getFlag() == 2) {
- byte bitFlag = 1;
- outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);///outputValue.getKmerLength()
- output.collect(outputKmer, outputValue);
- } else {
- boolean flag = false;
- while (values.hasNext()) {
- outputValue = values.next();
- if (outputValue.getFlag() == 2) {
- flag = true;
- break;
- }
- }
- if (flag == true) {
- byte bitFlag = 1;
- outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
- output.collect(outputKmer, outputValue);
- }
- }
- } else {
- if (outputValue.getFlag() == 2) {
- byte bitFlag = 0;
- outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
- output.collect(outputKmer, outputValue);
- }
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiSeqOutputFormat.java
deleted file mode 100644
index 66d3b6b..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiSeqOutputFormat.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh2;
-
-import java.io.File;
-import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<VKmerBytesWritable, MergePathValueWritable>{
- @Override
- protected String generateLeafFileName(String name) {
- // TODO Auto-generated method stub System.out.println(name);
- String[] names = name.split("-");
- return names[0] + File.separator + name;
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiTextOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiTextOutputFormat.java
deleted file mode 100644
index bca9695..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiTextOutputFormat.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh2;
-
-import java.io.File;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
-
-public class MergePathMultiTextOutputFormat extends MultipleTextOutputFormat<Text, Text>{
- @Override
- protected String generateLeafFileName(String name) {
- // TODO Auto-generated method stub System.out.println(name);
- String[] names = name.split("-");
- return names[0] + File.separator + name;
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialReducer.java
deleted file mode 100644
index 8ba5aa8..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialReducer.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.pathmergingh2;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class SNodeInitialReducer extends MapReduceBase implements
- Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
- private VKmerBytesWritable outputKmer = new VKmerBytesWritable();
- private MergePathValueWritable outputValue = new MergePathValueWritable();
-
- @Override
- public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
- OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- outputKmer.set(key);
- outputValue = values.next();
- byte startFlag = 0x00;
- byte endFlag = 0x00;
- byte targetPointFlag = 0x00;
- byte targetAdjList = 0x00;
- byte outputFlag = 0x00;
- if (values.hasNext() == true) {
- switch (outputValue.getFlag()) {
- case (byte) 0x01:
- startFlag = (byte) 0x01;
- break;
- case (byte) 0x80:
- endFlag = (byte) 0x80;
- break;
- case (byte) 0x02:
- targetPointFlag = (byte) 0x02;
- targetAdjList = outputValue.getAdjBitMap();
- break;
- }
- while (values.hasNext()) {
- outputValue = values.next();
- switch (outputValue.getFlag()) {
- case (byte) 0x01:
- startFlag = (byte) 0x01;
- break;
- case (byte) 0x80:
- endFlag = (byte) 0x80;
- break;
- case (byte) 0x02:
- targetPointFlag = (byte) 0x02;
- targetAdjList = outputValue.getAdjBitMap();
- break;
- }
- if(startFlag != (byte) 0x00 && endFlag!= (byte) 0x00 && targetPointFlag != (byte) 0x00)
- break;
- }
- if(targetPointFlag == (byte) 0x02) {
- if(startFlag == (byte) 0x01) {
- outputFlag = (byte) (outputFlag | startFlag);
- }
- if(endFlag == (byte) 0x80) {
- outputFlag = (byte) (outputFlag | endFlag);
- }
- outputValue.set(targetAdjList, outputFlag, null);
- output.collect(outputKmer, outputValue);
- }
- } else {
- if (outputValue.getFlag() == 2) {
- byte bitFlag = 0;
- outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
- output.collect(outputKmer, outputValue);
- }
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingTest.java
similarity index 96%
rename from genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingTest.java
rename to genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingTest.java
index acca4e7..ea05e53 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/gbresultschecking/ResultsCheckingTest.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.gbresultschecking;
+package edu.uci.ics.genomix.hadoop.gbresultschecking;
import java.io.DataOutputStream;
import java.io.File;
@@ -27,7 +27,7 @@
import org.apache.hadoop.mapred.MiniMRCluster;
import org.junit.Test;
-import edu.uci.ics.genomix.gbresultschecking.ResultsCheckingDriver;
+import edu.uci.ics.genomix.hadoop.gbresultschecking.ResultsCheckingDriver;
@SuppressWarnings("deprecation")
public class ResultsCheckingTest {
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphbuilding/GraphBuildingTest.java
similarity index 96%
rename from genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphbuilding/GraphBuildingTest.java
rename to genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphbuilding/GraphBuildingTest.java
index efd3619..ad14d33 100755
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphbuilding/GraphBuildingTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphbuilding/GraphBuildingTest.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.graphbuilding;
+package edu.uci.ics.genomix.hadoop.graphbuilding;
import java.io.BufferedWriter;
import java.io.DataOutputStream;
@@ -30,11 +30,12 @@
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.util.ReflectionUtils;
import org.junit.Test;
-import edu.uci.ics.genomix.graphbuilding.GenomixDriver;
+
+import edu.uci.ics.genomix.hadoop.graphbuilding.GenomixDriver;
+import edu.uci.ics.genomix.hadoop.utils.TestUtils;
import edu.uci.ics.genomix.type.GeneCode;
import edu.uci.ics.genomix.type.KmerBytesWritable;
import edu.uci.ics.genomix.type.KmerCountValue;
-import edu.uci.ics.genomix.utils.TestUtils;
/**
* This class test the correctness of graphbuilding program
*/
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphcountfilter/CountFilterTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterTest.java
similarity index 96%
rename from genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphcountfilter/CountFilterTest.java
rename to genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterTest.java
index bff0179..5f8b3db 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphcountfilter/CountFilterTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/graphcountfilter/CountFilterTest.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.graphcountfilter;
+package edu.uci.ics.genomix.hadoop.graphcountfilter;
import java.io.BufferedWriter;
import java.io.DataOutputStream;
@@ -31,9 +31,10 @@
import org.apache.hadoop.mapred.MiniMRCluster;
import org.apache.hadoop.util.ReflectionUtils;
import org.junit.Test;
-import edu.uci.ics.genomix.graphcountfilter.CountFilterDriver;
+
+import edu.uci.ics.genomix.hadoop.graphcountfilter.CountFilterDriver;
+import edu.uci.ics.genomix.hadoop.utils.TestUtils;
import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.utils.TestUtils;
@SuppressWarnings("deprecation")
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh1/MergePathTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathTest.java
similarity index 95%
rename from genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh1/MergePathTest.java
rename to genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathTest.java
index 109db50..a1fd3a4 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh1/MergePathTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh1/MergePathTest.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.pathmergingh1;
+package edu.uci.ics.genomix.hadoop.pathmergingh1;
import java.io.BufferedWriter;
import java.io.DataOutputStream;
@@ -31,10 +31,11 @@
import org.apache.hadoop.util.ReflectionUtils;
import org.junit.Test;
-import edu.uci.ics.genomix.pathmergingh1.MergePathH1Driver;
+import edu.uci.ics.genomix.hadoop.pathmergingh1.MergePathH1Driver;
+import edu.uci.ics.genomix.hadoop.utils.TestUtils;
import edu.uci.ics.genomix.type.KmerBytesWritable;
import edu.uci.ics.genomix.type.VKmerBytesWritable;
-import edu.uci.ics.genomix.utils.TestUtils;
+import edu.uci.ics.genomix.type.MergePathValueWritable;
@SuppressWarnings("deprecation")
public class MergePathTest {
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Test.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Test.java
similarity index 95%
rename from genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Test.java
rename to genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Test.java
index 7f871bd..def2592 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Test.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/pathmergingh2/MergePathH2Test.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.pathmergingh2;
+package edu.uci.ics.genomix.hadoop.pathmergingh2;
import java.io.BufferedWriter;
import java.io.DataOutputStream;
@@ -31,11 +31,11 @@
import org.apache.hadoop.util.ReflectionUtils;
import org.junit.Test;
-import edu.uci.ics.genomix.pathmergingh2.MergePathH2Driver;
+import edu.uci.ics.genomix.hadoop.pathmergingh2.MergePathH2Driver;
+import edu.uci.ics.genomix.hadoop.utils.TestUtils;
import edu.uci.ics.genomix.type.KmerBytesWritable;
import edu.uci.ics.genomix.type.VKmerBytesWritable;
-import edu.uci.ics.genomix.utils.TestUtils;
-
+import edu.uci.ics.genomix.type.MergePathValueWritable;
@SuppressWarnings("deprecation")
public class MergePathH2Test {
private static final String ACTUAL_RESULT_DIR = "actual4";
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/utils/TestUtils.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/utils/TestUtils.java
similarity index 98%
rename from genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/utils/TestUtils.java
rename to genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/utils/TestUtils.java
index 1488907..deb3b97 100755
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/utils/TestUtils.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/utils/TestUtils.java
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.genomix.utils;
+package edu.uci.ics.genomix.hadoop.utils;
import java.io.BufferedReader;
import java.io.File;