change new name for hadoop program
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/.DS_Store b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/.DS_Store
new file mode 100644
index 0000000..7880be8
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/.DS_Store b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/.DS_Store
new file mode 100644
index 0000000..f5eb144
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingDriver.java
new file mode 100644
index 0000000..067249a
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingDriver.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.gbresultschecking;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+@SuppressWarnings("deprecation")
+public class ResultsCheckingDriver {
+ private static class Options {
+ @Option(name = "-inputpath1", usage = "the input path", required = true)
+ public String inputPath1;
+
+ @Option(name = "-inputpath2", usage = "the input path", required = true)
+ public String inputPath2;
+
+ @Option(name = "-outputpath", usage = "the output path", required = true)
+ public String outputPath;
+
+ @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+ public int numReducers;
+
+ @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+ public int sizeKmer;
+
+ }
+
+ public void run(String inputPath1, String inputPath2, String outputPath, int numReducers, int sizeKmer,
+ String defaultConfPath) throws IOException {
+
+ JobConf conf = new JobConf(ResultsCheckingDriver.class);
+
+ conf.setInt("sizeKmer", sizeKmer);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+
+ conf.setJobName("Results Checking");
+ conf.setMapperClass(ResultsCheckingMapper.class);
+ conf.setReducerClass(ResultsCheckingReducer.class);
+
+ conf.setMapOutputKeyClass(Text.class);
+ conf.setMapOutputValueClass(Text.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+ conf.setOutputFormat(TextOutputFormat.class);
+
+ conf.setOutputKeyClass(Text.class);
+ conf.setOutputValueClass(Text.class);
+
+ Path[] inputList = new Path[2];
+ inputList[0] = new Path(inputPath1);
+ inputList[1] = new Path(inputPath2);
+
+ FileInputFormat.setInputPaths(conf, inputList);
+ FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+ conf.setNumReduceTasks(numReducers);
+
+ FileSystem dfs = FileSystem.get(conf);
+ dfs.delete(new Path(outputPath), true);
+ JobClient.runJob(conf);
+ }
+
+ public static void main(String[] args) throws Exception {
+ Options options = new Options();
+ CmdLineParser parser = new CmdLineParser(options);
+ parser.parseArgument(args);
+ ResultsCheckingDriver driver = new ResultsCheckingDriver();
+ driver.run(options.inputPath1, options.inputPath2, options.outputPath, options.numReducers, options.sizeKmer,
+ null);
+ }
+
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingMapper.java
new file mode 100644
index 0000000..466d44c
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingMapper.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.gbresultschecking;
+
+import java.io.IOException;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+@SuppressWarnings({ "unused", "deprecation" })
+public class ResultsCheckingMapper extends MapReduceBase implements Mapper<KmerBytesWritable, KmerCountValue, Text, Text> {
+ KmerBytesWritable valWriter;
+ private final static IntWritable one = new IntWritable(1);
+ public static Text textkey = new Text();
+ public static Text textvalue = new Text();
+ public static String INPUT_PATH;
+ public static int KMER_SIZE;
+
+ public void configure(JobConf job) {
+ KMER_SIZE = job.getInt("sizeKmer", 0);
+ valWriter= new KmerBytesWritable(KMER_SIZE);
+ }
+
+ @Override
+ public void map(KmerBytesWritable key, KmerCountValue value, OutputCollector<Text, Text> output, Reporter reporter)
+ throws IOException {
+
+ FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
+ String filename = fileSplit.getPath().getName();
+ textkey.set(key.toString() + "\t" + value.toString());
+ textvalue.set(filename);
+ output.collect(textkey, textvalue);
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingReducer.java
new file mode 100644
index 0000000..6d7fbc0
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingReducer.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.gbresultschecking;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+@SuppressWarnings("deprecation")
+public class ResultsCheckingReducer extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
+
+ public static Text textkey = new Text();
+ public static Text textvalue = new Text();
+
+ @Override
+ public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
+ throws IOException {
+ textkey.set(key);
+ textvalue.set(values.next());
+ if (values.hasNext() == false) {
+ output.collect(textkey, textvalue);
+ }
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/.DS_Store b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/.DS_Store
new file mode 100644
index 0000000..2ee03fe
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixCombiner.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixCombiner.java
new file mode 100755
index 0000000..7029c86
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixCombiner.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.graphbuilding;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+/**
+ * This class implement the combiner operator of Mapreduce model
+ */
+@SuppressWarnings("deprecation")
+public class GenomixCombiner extends MapReduceBase implements
+ Reducer<KmerBytesWritable, KmerCountValue, KmerBytesWritable, KmerCountValue> {
+ private KmerCountValue vaWriter = new KmerCountValue();
+
+ @Override
+ public void reduce(KmerBytesWritable key, Iterator<KmerCountValue> values,
+ OutputCollector<KmerBytesWritable, KmerCountValue> output, Reporter reporter) throws IOException {
+ byte groupByAdjList = 0;
+ int count = 0;
+ byte bytCount = 0;
+ while (values.hasNext()) {
+ //Merge By the all adjacent Nodes;
+ KmerCountValue geneValue = values.next();
+ groupByAdjList = (byte) (groupByAdjList | geneValue.getAdjBitMap());
+ count = count + (int) geneValue.getCount();
+ }
+ if (count >= 127)
+ bytCount = (byte) 127;
+ else
+ bytCount = (byte) count;
+ vaWriter.set(groupByAdjList, bytCount);
+ output.collect(key, vaWriter);
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixDriver.java
new file mode 100755
index 0000000..60802eb
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixDriver.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.graphbuilding;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+/**
+ * This class implement driver which start the mapreduce program for graphbuilding
+ */
+@SuppressWarnings("deprecation")
+public class GenomixDriver {
+ private static class Options {
+ @Option(name = "-inputpath", usage = "the input path", required = true)
+ public String inputPath;
+
+ @Option(name = "-outputpath", usage = "the output path", required = true)
+ public String outputPath;
+
+ @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+ public int numReducers;
+
+ @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+ public int sizeKmer;
+ }
+
+ public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath)
+ throws IOException {
+
+ JobConf conf = new JobConf(GenomixDriver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+
+ conf.setJobName("Genomix Graph Building");
+ conf.setMapperClass(GenomixMapper.class);
+ conf.setReducerClass(GenomixReducer.class);
+ conf.setCombinerClass(GenomixCombiner.class);
+
+ conf.setMapOutputKeyClass(KmerBytesWritable.class);
+ conf.setMapOutputValueClass(KmerCountValue.class);
+
+ conf.setInputFormat(TextInputFormat.class);
+ conf.setOutputFormat(SequenceFileOutputFormat.class);
+ conf.setOutputKeyClass(KmerBytesWritable.class);
+ conf.setOutputValueClass(KmerCountValue.class);
+ FileInputFormat.setInputPaths(conf, new Path(inputPath));
+ FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+ conf.setNumReduceTasks(numReducers);
+
+ FileSystem dfs = FileSystem.get(conf);
+ dfs.delete(new Path(outputPath), true);
+ JobClient.runJob(conf);
+ }
+
+ public static void main(String[] args) throws Exception {
+ Options options = new Options();
+ CmdLineParser parser = new CmdLineParser(options);
+ parser.parseArgument(args);
+ GenomixDriver driver = new GenomixDriver();
+ driver.run(options.inputPath, options.outputPath, options.numReducers, options.sizeKmer, null);
+ }
+
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixMapper.java
new file mode 100755
index 0000000..b9b9aec
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixMapper.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.graphbuilding;
+
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+/**
+ * This class implement mapper operator of mapreduce model
+ */
+@SuppressWarnings("deprecation")
+public class GenomixMapper extends MapReduceBase implements
+ Mapper<LongWritable, Text, KmerBytesWritable, KmerCountValue> {
+
+ public class CurrenByte {
+ public byte curByte;
+ public byte preMarker;
+ }
+
+ public static int KMER_SIZE;
+ public KmerCountValue outputAdjList;
+ public KmerBytesWritable outputKmer;
+
+ @Override
+ public void configure(JobConf job) {
+ KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
+ outputAdjList = new KmerCountValue();
+ outputKmer = new KmerBytesWritable(KMER_SIZE);
+ }
+
+ /*succeed node
+ A 00000001 1
+ C 00000010 2
+ G 00000100 4
+ T 00001000 8
+ precursor node
+ A 00010000 16
+ C 00100000 32
+ G 01000000 64
+ T 10000000 128*/
+ @Override
+ public void map(LongWritable key, Text value, OutputCollector<KmerBytesWritable, KmerCountValue> output,
+ Reporter reporter) throws IOException {
+ /* A 00
+ C 01
+ G 10
+ T 11*/
+ String geneLine = value.toString(); // Read the Real Gene Line
+ Pattern genePattern = Pattern.compile("[AGCT]+");
+ Matcher geneMatcher = genePattern.matcher(geneLine);
+ boolean isValid = geneMatcher.matches();
+ if (isValid == true) {
+ /** first kmer */
+ byte count = 1;
+ byte[] array = geneLine.getBytes();
+ outputKmer.setByRead( array, 0);
+ byte pre = 0;
+ byte next = GeneCode.getAdjBit(array[KMER_SIZE]);
+ byte adj = GeneCode.mergePreNextAdj(pre, next);
+ outputAdjList.set(adj, count);
+ output.collect(outputKmer, outputAdjList);
+ /** middle kmer */
+ for (int i = KMER_SIZE; i < array.length - 1; i++) {
+ pre = GeneCode.getBitMapFromGeneCode(outputKmer.shiftKmerWithNextChar(array[i]));
+ next = GeneCode.getAdjBit(array[i + 1]);
+ adj = GeneCode.mergePreNextAdj(pre, next);
+ outputAdjList.set(adj, count);
+ output.collect(outputKmer, outputAdjList);
+ }
+ /** last kmer */
+ pre = GeneCode.getBitMapFromGeneCode(outputKmer.shiftKmerWithNextChar(array[array.length - 1]));
+ next = 0;
+ adj = GeneCode.mergePreNextAdj(pre, next);
+ outputAdjList.set(adj, count);
+ output.collect(outputKmer, outputAdjList);
+ }
+ }
+}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixReducer.java
new file mode 100755
index 0000000..75add24
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphbuilding/GenomixReducer.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.graphbuilding;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+/**
+ * This class implement reducer operator of mapreduce model
+ */
+@SuppressWarnings("deprecation")
+public class GenomixReducer extends MapReduceBase implements
+ Reducer<KmerBytesWritable, KmerCountValue, KmerBytesWritable, KmerCountValue> {
+ KmerCountValue valWriter = new KmerCountValue();
+ static enum MyCounters { NUM_RECORDS };
+ @Override
+ public void reduce(KmerBytesWritable key, Iterator<KmerCountValue> values,
+ OutputCollector<KmerBytesWritable, KmerCountValue> output, Reporter reporter) throws IOException {
+ byte groupByAdjList = 0;
+ int count = 0;
+ byte bytCount = 0;
+ while (values.hasNext()) {
+ //Merge By the all adjacent Nodes;
+ KmerCountValue geneValue = values.next();
+ groupByAdjList = (byte) (groupByAdjList | geneValue.getAdjBitMap());
+ count = count + (int) geneValue.getCount();
+ }
+ if (count >= 127)
+ bytCount = (byte) 127;
+ else
+ bytCount = (byte) count;
+ valWriter.set(groupByAdjList, bytCount);
+ output.collect(key, valWriter);
+ reporter.incrCounter(MyCounters.NUM_RECORDS, 1);
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterDriver.java
new file mode 100644
index 0000000..2f6dddd
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterDriver.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.graphcountfilter;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+
+@SuppressWarnings("deprecation")
+public class CountFilterDriver {
+ private static class Options {
+ @Option(name = "-inputpath", usage = "the input path", required = true)
+ public String inputPath;
+
+ @Option(name = "-outputpath", usage = "the output path", required = true)
+ public String outputPath;
+
+ @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+ public int numReducers;
+
+ @Option(name = "-count-threshold", usage = "the threshold of count", required = true)
+ public int countThreshold;
+ }
+
+ public void run(String inputPath, String outputPath, int numReducers, int countThreshold, String defaultConfPath)
+ throws IOException {
+
+ JobConf conf = new JobConf(CountFilterDriver.class);
+ conf.setInt("countThreshold", countThreshold);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+
+ conf.setJobName("Count Filter");
+ conf.setMapperClass(CountFilterMapper.class);
+ conf.setReducerClass(CountFilterReducer.class);
+ conf.setCombinerClass(CountFilterReducer.class);
+
+ conf.setMapOutputKeyClass(KmerBytesWritable.class);
+ conf.setMapOutputValueClass(ByteWritable.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+ conf.setOutputFormat(SequenceFileOutputFormat.class);
+
+ conf.setOutputKeyClass(KmerBytesWritable.class);
+ conf.setOutputValueClass(ByteWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath));
+ FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+ conf.setNumReduceTasks(numReducers);
+
+ FileSystem dfs = FileSystem.get(conf);
+ dfs.delete(new Path(outputPath), true);
+ JobClient.runJob(conf);
+ }
+
+ public static void main(String[] args) throws Exception {
+ Options options = new Options();
+ CmdLineParser parser = new CmdLineParser(options);
+ parser.parseArgument(args);
+ CountFilterDriver driver = new CountFilterDriver();
+ driver.run(options.inputPath, options.outputPath, options.numReducers, options.countThreshold, null);
+ }
+
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterMapper.java
new file mode 100644
index 0000000..4a9a8a9
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterMapper.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.graphcountfilter;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+
+@SuppressWarnings({ "deprecation" })
+public class CountFilterMapper extends MapReduceBase implements
+ Mapper<KmerBytesWritable, KmerCountValue, KmerBytesWritable, ByteWritable> {
+ private int THRESHOLD;
+ private ByteWritable adjByte = new ByteWritable();
+ @Override
+ public void configure(JobConf job) {
+ THRESHOLD = Integer.parseInt(job.get("countThreshold"));
+ }
+ public void map(KmerBytesWritable key, KmerCountValue value, OutputCollector<KmerBytesWritable, ByteWritable> output,
+ Reporter reporter) throws IOException {
+ if(value.getCount() >= THRESHOLD){
+ adjByte.set(value.getAdjBitMap());
+ output.collect(key, adjByte );
+ }
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterReducer.java
new file mode 100644
index 0000000..6e61973
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/graphcountfilter/CountFilterReducer.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.graphcountfilter;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+
+@SuppressWarnings("deprecation")
+public class CountFilterReducer extends MapReduceBase implements
+ Reducer<KmerBytesWritable, ByteWritable, KmerBytesWritable, ByteWritable> {
+ @Override
+ public void reduce(KmerBytesWritable key, Iterator<ByteWritable> values,
+ OutputCollector<KmerBytesWritable, ByteWritable> output, Reporter reporter) throws IOException {
+ output.collect(key, values.next()); //Output the Pair
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/.DS_Store b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/.DS_Store
new file mode 100644
index 0000000..f9e3926
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Driver.java
new file mode 100644
index 0000000..2ddeb6b
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Driver.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh1;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
+import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+@SuppressWarnings("deprecation")
+public class MergePathH1Driver {
+
+ private static class Options {
+ @Option(name = "-inputpath", usage = "the input path", required = true)
+ public String inputPath;
+
+ @Option(name = "-outputpath", usage = "the output path", required = true)
+ public String outputPath;
+
+ @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
+ public String mergeResultPath;
+
+ @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+ public int numReducers;
+
+ @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+ public int sizeKmer;
+
+ @Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
+ public int mergeRound;
+
+ }
+
+
+ public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath)
+ throws IOException{
+
+ JobConf conf = new JobConf(MergePathH1Driver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+ conf.setJobName("Initial Path-Starting-Points Table");
+ conf.setMapperClass(SNodeInitialMapper.class);
+ conf.setReducerClass(SNodeInitialReducer.class);
+
+ conf.setMapOutputKeyClass(KmerBytesWritable.class);
+ conf.setMapOutputValueClass(MergePathValueWritable.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+ conf.setOutputFormat(SequenceFileOutputFormat.class);
+
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
+ conf.setOutputValueClass(MergePathValueWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath));
+ FileOutputFormat.setOutputPath(conf, new Path(inputPath + "-step1"));
+ conf.setNumReduceTasks(numReducers);
+ FileSystem dfs = FileSystem.get(conf);
+ dfs.delete(new Path(inputPath + "-step1"), true);
+ JobClient.runJob(conf);
+ int iMerge = 0;
+/*----------------------------------------------------------------------*/
+ for(iMerge = 0; iMerge < mergeRound; iMerge ++){
+
+ conf = new JobConf(MergePathH1Driver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+ conf.setInt("iMerge", iMerge);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+ conf.setJobName("Path Merge");
+
+ conf.setMapperClass(MergePathH1Mapper.class);
+ conf.setReducerClass(MergePathH1Reducer.class);
+
+ conf.setMapOutputKeyClass(VKmerBytesWritable.class);
+ conf.setMapOutputValueClass(MergePathValueWritable.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+
+ String uncomplete = "uncomplete" + iMerge;
+ String complete = "complete" + iMerge;
+
+ MultipleOutputs.addNamedOutput(conf, uncomplete,
+ MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ MultipleOutputs.addNamedOutput(conf, complete,
+ MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
+ conf.setOutputValueClass(MergePathValueWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+ FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+ conf.setNumReduceTasks(numReducers);
+ dfs.delete(new Path(outputPath), true);
+ JobClient.runJob(conf);
+ dfs.delete(new Path(inputPath + "-step1"), true);
+ dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
+ dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
+ }
+ /*----------------------------------------*/
+ conf = new JobConf(MergePathH1Driver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+ conf.setInt("iMerge", iMerge);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+ conf.setJobName("Path Merge");
+
+ conf.setMapperClass(MergePathH1Mapper.class);
+ conf.setReducerClass(MergePathH1Reducer.class);
+
+ conf.setMapOutputKeyClass(VKmerBytesWritable.class);
+ conf.setMapOutputValueClass(MergePathValueWritable.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+
+ String uncomplete = "uncomplete" + iMerge;
+ String complete = "complete" + iMerge;
+
+ MultipleOutputs.addNamedOutput(conf, uncomplete,
+ MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ MultipleOutputs.addNamedOutput(conf, complete,
+ MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
+ conf.setOutputValueClass(MergePathValueWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+ FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+ conf.setNumReduceTasks(numReducers);
+ dfs.delete(new Path(outputPath), true);
+ JobClient.runJob(conf);
+ dfs.delete(new Path(inputPath + "-step1"), true);
+ dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
+ dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
+ }
+
+ public static void main(String[] args) throws Exception {
+ Options options = new Options();
+ CmdLineParser parser = new CmdLineParser(options);
+ parser.parseArgument(args);
+ MergePathH1Driver driver = new MergePathH1Driver();
+ driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers, options.sizeKmer, options.mergeRound, null);
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Mapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Mapper.java
new file mode 100644
index 0000000..6357483
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Mapper.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh1;
+
+import java.io.IOException;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
+
+@SuppressWarnings("deprecation")
+public class MergePathH1Mapper extends MapReduceBase implements
+ Mapper<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+ private int KMER_SIZE;
+ private VKmerBytesWritableFactory outputKmerFactory;
+ private MergePathValueWritable outputValue;
+ private VKmerBytesWritable tmpKmer;
+ private VKmerBytesWritable outputKmer;
+
+
+ public void configure(JobConf job) {
+ KMER_SIZE = job.getInt("sizeKmer", 0);
+ outputKmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
+ outputValue = new MergePathValueWritable();
+ tmpKmer = new VKmerBytesWritable(KMER_SIZE);
+ outputKmer = new VKmerBytesWritable(KMER_SIZE);
+ }
+
+ @Override
+ public void map(VKmerBytesWritable key, MergePathValueWritable value,
+ OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+
+ byte precursor = (byte) 0xF0;
+ byte succeed = (byte) 0x0F;
+ byte adjBitMap = value.getAdjBitMap();
+ byte bitFlag = value.getFlag();
+ precursor = (byte) (precursor & adjBitMap);
+ precursor = (byte) ((precursor & 0xff) >> 4);
+ succeed = (byte) (succeed & adjBitMap);
+ if (bitFlag == 1) {
+ byte succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
+ tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
+ outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
+
+ tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
+ outputValue.set(adjBitMap, bitFlag, tmpKmer);
+ output.collect(outputKmer, outputValue);
+ } else {
+ outputKmer.set(key);
+ outputValue.set(value);
+ output.collect(key, outputValue);
+ }
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Reducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Reducer.java
new file mode 100644
index 0000000..f5c9c8d
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathH1Reducer.java
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh1;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
+
+@SuppressWarnings("deprecation")
+public class MergePathH1Reducer extends MapReduceBase implements
+ Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+ private VKmerBytesWritableFactory kmerFactory;
+ private VKmerBytesWritable outputKmer;
+ private VKmerBytesWritable tmpKmer;
+ private int KMER_SIZE;
+ private MergePathValueWritable outputValue;
+ private MergePathValueWritable tmpOutputValue;
+ MultipleOutputs mos = null;
+ private int I_MERGE;
+
+ public void configure(JobConf job) {
+ mos = new MultipleOutputs(job);
+ I_MERGE = Integer.parseInt(job.get("iMerge"));
+ KMER_SIZE = job.getInt("sizeKmer", 0);
+ outputValue = new MergePathValueWritable();
+ tmpOutputValue = new MergePathValueWritable();
+ kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
+ outputKmer = new VKmerBytesWritable(KMER_SIZE);
+ tmpKmer = new VKmerBytesWritable(KMER_SIZE);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void reduce(VKmerBytesWritable key, Iterator<MergePathValueWritable> values,
+ OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ outputValue = values.next();
+ if (values.hasNext() == true) {
+ if (outputValue.getFlag() != 1) {
+ byte nextAdj = outputValue.getAdjBitMap();
+ byte succeed = (byte) 0x0F;
+ succeed = (byte) (succeed & nextAdj);
+
+ outputValue = values.next();
+ byte adjBitMap = outputValue.getAdjBitMap();
+ byte flag = outputValue.getFlag();
+ if (outputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
+ else
+ outputKmer.set(key);
+
+ adjBitMap = (byte) (adjBitMap & 0xF0);
+ adjBitMap = (byte) (adjBitMap | succeed);
+ outputValue.set(adjBitMap, flag, null);
+ mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ } else {
+ tmpOutputValue.set(outputValue);
+ byte tmpAdjMap = tmpOutputValue.getAdjBitMap();
+
+ outputValue = values.next();
+ if (outputValue.getFlag() != 1) {
+ if (tmpOutputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(tmpOutputValue.getKmer(), key));
+ else
+ outputKmer.set(key);
+
+ byte nextAdj = outputValue.getAdjBitMap();
+ byte succeed = (byte) 0x0F;
+ succeed = (byte) (succeed & nextAdj);
+ tmpAdjMap = (byte) (tmpAdjMap & 0xF0);
+ tmpAdjMap = (byte) (tmpAdjMap | succeed);
+ outputValue.set(tmpAdjMap, tmpOutputValue.getFlag(), null);
+ mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ } else {
+
+ tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
+ if (tmpOutputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(tmpOutputValue.getKmer(), tmpKmer));
+ else
+ outputKmer.set(tmpKmer);
+ tmpOutputValue.set(tmpAdjMap, tmpOutputValue.getFlag(), null);
+ mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, tmpOutputValue);
+
+ tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
+ if (outputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer));
+ else
+ outputKmer.set(tmpKmer);
+ outputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
+ mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+
+ while (values.hasNext()) {
+ outputValue = values.next();
+ tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
+ if (outputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer));
+ else
+ outputKmer.set(tmpKmer);
+ outputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
+ mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ }
+ }
+ }
+ } else {
+ if (outputValue.getFlag() != 0) {
+ tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
+ if (outputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer));
+ else
+ outputKmer.set(tmpKmer);
+ outputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
+ mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+
+ } else
+ mos.getCollector("uncomplete" + I_MERGE, reporter).collect(key, outputValue);
+ }
+ }
+
+ public void close() throws IOException {
+ // TODO Auto-generated method stub
+ mos.close();
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiSeqOutputFormat.java
new file mode 100644
index 0000000..5ef8b4a
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiSeqOutputFormat.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh1;
+
+import java.io.File;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+
+public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<VKmerBytesWritable, MergePathValueWritable>{
+ @Override
+ protected String generateLeafFileName(String name) {
+ // TODO Auto-generated method stub System.out.println(name);
+ String[] names = name.split("-");
+ return names[0] + File.separator + name;
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiTextOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiTextOutputFormat.java
new file mode 100644
index 0000000..ac88ce0
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathMultiTextOutputFormat.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh1;
+
+import java.io.File;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
+
+public class MergePathMultiTextOutputFormat extends MultipleTextOutputFormat<Text, Text>{
+ @Override
+ protected String generateLeafFileName(String name) {
+ // TODO Auto-generated method stub System.out.println(name);
+ String[] names = name.split("-");
+ return names[0] + File.separator + name;
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathValueWritable.java
new file mode 100644
index 0000000..0d94227
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/MergePathValueWritable.java
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh1;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.BinaryComparable;
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+public class MergePathValueWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
+
+ private static final byte[] EMPTY_BYTES = {};
+ private byte adjBitMap;
+ private byte flag;
+ private VKmerBytesWritable kmer;
+
+ public MergePathValueWritable() {
+ this((byte) 0, (byte) 0, 0, EMPTY_BYTES);
+ }
+
+ public MergePathValueWritable(byte adjBitMap, byte flag, int kmerSize, byte[] bytes) {
+ this.adjBitMap = adjBitMap;
+ this.flag = flag;
+ this.kmer = new VKmerBytesWritable(kmerSize, bytes);
+ kmer.set(bytes, 0, bytes.length);
+ }
+
+ public void set(MergePathValueWritable right) {
+ set(right.getAdjBitMap(), right.getFlag(), right.getKmer());
+ }
+
+ public void set(byte adjBitMap, byte flag, VKmerBytesWritable kmer) {
+ this.kmer.set(kmer);
+ this.adjBitMap = adjBitMap;
+ this.flag = flag;
+ }
+
+ @Override
+ public void readFields(DataInput arg0) throws IOException {
+ // TODO Auto-generated method stub
+ kmer.readFields(arg0);
+ adjBitMap = arg0.readByte();
+ flag = arg0.readByte();
+ }
+
+ @Override
+ public void write(DataOutput arg0) throws IOException {
+ // TODO Auto-generated method stub
+
+ kmer.write(arg0);
+ arg0.writeByte(adjBitMap);
+ arg0.writeByte(flag);
+ }
+
+ public VKmerBytesWritable getKmer() {
+ if (kmer.getLength() != 0) {
+ return kmer;
+ }
+ return null;
+ }
+
+ public byte getAdjBitMap() {
+ return this.adjBitMap;
+ }
+
+ public byte getFlag() {
+ return this.flag;
+ }
+
+ public String toString() {
+ return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag);
+ }
+
+ @Override
+ public byte[] getBytes() {
+ // TODO Auto-generated method stub
+ if (kmer.getLength() != 0) {
+ return kmer.getBytes();
+ } else
+ return null;
+
+ }
+
+ public int getKmerLength() {
+ return kmer.getKmerLength();
+ }
+
+ @Override
+ public int getLength() {
+ return kmer.getLength();
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialMapper.java
new file mode 100644
index 0000000..1c12f63
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialMapper.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh1;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.GeneCode;
+
+@SuppressWarnings("deprecation")
+public class SNodeInitialMapper extends MapReduceBase implements
+ Mapper<KmerBytesWritable, ByteWritable, KmerBytesWritable, MergePathValueWritable> {
+
+ public int KMER_SIZE;
+ public KmerBytesWritable outputKmer;
+ public MergePathValueWritable outputAdjList;
+
+ public void configure(JobConf job) {
+ KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
+ outputKmer = new KmerBytesWritable(KMER_SIZE);
+ outputAdjList = new MergePathValueWritable();
+ }
+
+ boolean measureDegree(byte adjacent) {
+ boolean result = true;
+ switch (adjacent) {
+ case 0:
+ result = true;
+ break;
+ case 1:
+ result = false;
+ break;
+ case 2:
+ result = false;
+ break;
+ case 3:
+ result = true;
+ break;
+ case 4:
+ result = false;
+ break;
+ case 5:
+ result = true;
+ break;
+ case 6:
+ result = true;
+ break;
+ case 7:
+ result = true;
+ break;
+ case 8:
+ result = false;
+ break;
+ case 9:
+ result = true;
+ break;
+ case 10:
+ result = true;
+ break;
+ case 11:
+ result = true;
+ break;
+ case 12:
+ result = true;
+ break;
+ case 13:
+ result = true;
+ break;
+ case 14:
+ result = true;
+ break;
+ case 15:
+ result = true;
+ break;
+ }
+ return result;
+ }
+
+ @Override
+ public void map(KmerBytesWritable key, ByteWritable value,
+ OutputCollector<KmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ byte precursor = (byte) 0xF0;
+ byte succeed = (byte) 0x0F;
+ byte adjBitMap = value.get();
+ byte bitFlag = (byte) 0;
+ precursor = (byte) (precursor & adjBitMap);
+ precursor = (byte) ((precursor & 0xff) >> 4);
+ succeed = (byte) (succeed & adjBitMap);
+ boolean inDegree = measureDegree(precursor);
+ boolean outDegree = measureDegree(succeed);
+ if (inDegree == false && outDegree == false) {
+ outputKmer.set(key);
+ bitFlag = (byte) 2;
+ outputAdjList.set(adjBitMap, bitFlag, null);///~~~~~kmersize----->0
+ output.collect(outputKmer, outputAdjList);
+ }
+ else{
+ for(int i = 0 ; i < 4; i ++){
+ byte temp = 0x01;
+ byte shiftedCode = 0;
+ temp = (byte)(temp << i);
+ temp = (byte) (succeed & temp);
+ if(temp != 0 ){
+ byte succeedCode = GeneCode.getGeneCodeFromBitMap(temp);
+ shiftedCode = key.shiftKmerWithNextCode(succeedCode);
+ outputKmer.set(key);
+ outputAdjList.set((byte)0, bitFlag, null);
+ output.collect(outputKmer, outputAdjList);
+ key.shiftKmerWithPreCode(shiftedCode);
+ }
+ }
+ }
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialReducer.java
new file mode 100644
index 0000000..1426fba
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh1/SNodeInitialReducer.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh1;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+@SuppressWarnings("deprecation")
+public class SNodeInitialReducer extends MapReduceBase implements
+ Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+ private VKmerBytesWritable outputKmer = new VKmerBytesWritable();
+ private MergePathValueWritable outputValue = new MergePathValueWritable();
+
+
+ @Override
+ public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
+ OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ outputKmer.set(key);
+ outputValue = values.next();
+ if (values.hasNext() == true) {
+ if (outputValue.getFlag() == 2) {
+ byte bitFlag = 1;
+ outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);///outputValue.getKmerLength()
+ output.collect(outputKmer, outputValue);
+ } else {
+ boolean flag = false;
+ while (values.hasNext()) {
+ outputValue = values.next();
+ if (outputValue.getFlag() == 2) {
+ flag = true;
+ break;
+ }
+ }
+ if (flag == true) {
+ byte bitFlag = 1;
+ outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
+ output.collect(outputKmer, outputValue);
+ }
+ }
+ } else {
+ if (outputValue.getFlag() == 2) {
+ byte bitFlag = 0;
+ outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
+ output.collect(outputKmer, outputValue);
+ }
+ }
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/.DS_Store b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/.DS_Store
new file mode 100644
index 0000000..1802942
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/ENodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/ENodeInitialReducer.java
new file mode 100644
index 0000000..651476c
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/ENodeInitialReducer.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh2;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.KmerUtil;
+
+@SuppressWarnings("deprecation")
+public class ENodeInitialReducer extends MapReduceBase implements
+ Reducer<BytesWritable, MergePathValueWritable, BytesWritable, MergePathValueWritable> {
+ public BytesWritable outputKmer = new BytesWritable();
+ public MergePathValueWritable outputAdjList = new MergePathValueWritable();
+
+ @Override
+ public void reduce(BytesWritable key, Iterator<MergePathValueWritable> values,
+ OutputCollector<BytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ outputAdjList = values.next();
+ outputKmer.set(key);
+ if (values.hasNext() == true) {
+ byte bitFlag = outputAdjList.getFlag();
+ bitFlag = (byte) (bitFlag & 0xFE);
+ if (bitFlag == 2) {
+ bitFlag = (byte) (0x80 | outputAdjList.getFlag());
+ outputAdjList.set(outputAdjList.getAdjBitMap(), bitFlag, null);
+ output.collect(outputKmer, outputAdjList);
+
+ } else {
+ boolean flag = false;
+ while (values.hasNext()) {
+ outputAdjList = values.next();
+ if (outputAdjList.getFlag() == 2) {
+ flag = true;
+ break;
+ }
+ }
+ if (flag == true) {
+ bitFlag = (byte) (0x80 | outputAdjList.getFlag());
+ outputAdjList.set(outputAdjList.getAdjBitMap(), bitFlag, null);
+ output.collect(outputKmer, outputAdjList);
+ }
+ }
+ } else {
+ byte bitFlag = outputAdjList.getFlag();
+ bitFlag = (byte) (bitFlag & 0xFE);
+ if (bitFlag == 2) {
+ bitFlag = 0;
+ outputAdjList.set(outputAdjList.getAdjBitMap(), bitFlag, null);
+ output.collect(outputKmer, outputAdjList);
+ }
+ }
+ }
+}
+
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Driver.java
new file mode 100644
index 0000000..c196daa
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Driver.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh2;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
+import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+
+@SuppressWarnings("deprecation")
+public class MergePathH2Driver {
+
+ private static class Options {
+ @Option(name = "-inputpath", usage = "the input path", required = true)
+ public String inputPath;
+
+ @Option(name = "-outputpath", usage = "the output path", required = true)
+ public String outputPath;
+
+ @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
+ public String mergeResultPath;
+
+ @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+ public int numReducers;
+
+ @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+ public int sizeKmer;
+
+ @Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
+ public int mergeRound;
+
+ }
+
+
+ public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath)
+ throws IOException{
+
+ JobConf conf = new JobConf(MergePathH2Driver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+ conf.setJobName("Initial Path-Starting-Points Table");
+ conf.setMapperClass(SNodeInitialMapper.class);
+ conf.setReducerClass(SNodeInitialReducer.class);
+
+ conf.setMapOutputKeyClass(KmerBytesWritable.class);
+ conf.setMapOutputValueClass(MergePathValueWritable.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+ conf.setOutputFormat(SequenceFileOutputFormat.class);
+
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
+ conf.setOutputValueClass(MergePathValueWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath));
+ FileOutputFormat.setOutputPath(conf, new Path(inputPath + "-step1"));
+ conf.setNumReduceTasks(numReducers);
+ FileSystem dfs = FileSystem.get(conf);
+ dfs.delete(new Path(inputPath + "-step1"), true);
+ JobClient.runJob(conf);
+ int iMerge = 0;
+/*----------------------------------------------------------------------*/
+ for(iMerge = 0; iMerge < mergeRound; iMerge ++){
+ if(!dfs.exists(new Path(inputPath + "-step1")))
+ break;
+ conf = new JobConf(MergePathH2Driver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+ conf.setInt("iMerge", iMerge);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+ conf.setJobName("Path Merge");
+
+ conf.setMapperClass(MergePathH2Mapper.class);
+ conf.setReducerClass(MergePathH2Reducer.class);
+
+ conf.setMapOutputKeyClass(VKmerBytesWritable.class);
+ conf.setMapOutputValueClass(MergePathValueWritable.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+
+ String uncomplete = "uncomplete" + iMerge;
+ String complete = "complete" + iMerge;
+
+ MultipleOutputs.addNamedOutput(conf, uncomplete,
+ MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ MultipleOutputs.addNamedOutput(conf, complete,
+ MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
+ conf.setOutputValueClass(MergePathValueWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+ FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+ conf.setNumReduceTasks(numReducers);
+ dfs.delete(new Path(outputPath), true);
+ JobClient.runJob(conf);
+ dfs.delete(new Path(inputPath + "-step1"), true);
+ dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
+ dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
+ }
+/* conf = new JobConf(MergePathH2Driver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+ conf.setInt("iMerge", iMerge);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+ conf.setJobName("Path Merge");
+
+ conf.setMapperClass(MergePathH2Mapper.class);
+ conf.setReducerClass(MergePathH2Reducer.class);
+
+ conf.setMapOutputKeyClass(VKmerBytesWritable.class);
+ conf.setMapOutputValueClass(MergePathValueWritable.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+
+ String uncomplete = "uncomplete" + iMerge;
+ String complete = "complete" + iMerge;
+
+ MultipleOutputs.addNamedOutput(conf, uncomplete,
+ MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ MultipleOutputs.addNamedOutput(conf, complete,
+ MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
+ conf.setOutputValueClass(MergePathValueWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+ FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+ conf.setNumReduceTasks(numReducers);
+ dfs.delete(new Path(outputPath), true);
+ JobClient.runJob(conf);
+ dfs.delete(new Path(inputPath + "-step1"), true);
+ dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
+ dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));*/
+ }
+
+ public static void main(String[] args) throws Exception {
+ Options options = new Options();
+ CmdLineParser parser = new CmdLineParser(options);
+ parser.parseArgument(args);
+ MergePathH2Driver driver = new MergePathH2Driver();
+ driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers, options.sizeKmer, options.mergeRound, null);
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Mapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Mapper.java
new file mode 100644
index 0000000..726dd4c
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Mapper.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh2;
+
+import java.io.IOException;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
+
+@SuppressWarnings("deprecation")
+public class MergePathH2Mapper extends MapReduceBase implements
+ Mapper<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+
+ private int KMER_SIZE;
+ private VKmerBytesWritableFactory outputKmerFactory;
+ private MergePathValueWritable outputValue;
+ private VKmerBytesWritable tmpKmer;
+ private VKmerBytesWritable outputKmer;
+
+ public void configure(JobConf job) {
+ KMER_SIZE = job.getInt("sizeKmer", 0);
+ outputKmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
+ outputValue = new MergePathValueWritable();
+ tmpKmer = new VKmerBytesWritable(KMER_SIZE);
+ outputKmer = new VKmerBytesWritable(KMER_SIZE);
+ }
+
+ @Override
+ public void map(VKmerBytesWritable key, MergePathValueWritable value,
+ OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ byte precursor = (byte) 0xF0;
+ byte succeed = (byte) 0x0F;
+ byte adjBitMap = value.getAdjBitMap();
+ byte bitFlag = value.getFlag();
+ precursor = (byte) (precursor & adjBitMap);
+ precursor = (byte) ((precursor & 0xff) >> 4);
+ succeed = (byte) (succeed & adjBitMap);
+ byte bitStartEnd = (byte) (0x81 & bitFlag);
+
+ switch (bitStartEnd) {
+ case (byte) 0x01:
+ byte succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
+ tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
+ outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
+
+ tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
+ bitFlag = (byte) (bitFlag | 0x08);
+ outputValue.set(adjBitMap, bitFlag, tmpKmer);
+ output.collect(outputKmer, outputValue);
+ break;
+ case (byte) 0x80:
+ tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(KMER_SIZE, key));
+ outputKmer.set(tmpKmer);
+ tmpKmer.set(outputKmerFactory.getLastKmerFromChain(key.getKmerLength() - KMER_SIZE, key));
+ bitFlag = (byte) (bitFlag | 0x10);
+ outputValue.set(adjBitMap, bitFlag, tmpKmer);
+ output.collect(outputKmer, outputValue);
+ break;
+ case (byte) 0x00:
+ succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
+ tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
+ outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
+
+ tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
+ bitFlag = (byte) (bitFlag | 0x08);
+ outputValue.set(adjBitMap, bitFlag, tmpKmer);
+ output.collect(outputKmer, outputValue);
+
+ bitFlag = (byte) (bitFlag & 0xF7);
+ tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(KMER_SIZE, key));
+ outputKmer.set(tmpKmer);
+ tmpKmer.set(outputKmerFactory.getLastKmerFromChain(key.getKmerLength() - KMER_SIZE, key));
+ bitFlag = (byte) (bitFlag | 0x10);
+ outputValue.set(adjBitMap, bitFlag, tmpKmer);
+ output.collect(outputKmer, outputValue);
+ break;
+ case (byte) 0x81:
+ outputKmer.set(key);
+ outputValue.set(adjBitMap, bitFlag, null);
+ output.collect(outputKmer, outputValue);
+ break;
+ }
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Reducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Reducer.java
new file mode 100644
index 0000000..63391b4
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Reducer.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh2;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
+
+@SuppressWarnings("deprecation")
+public class MergePathH2Reducer extends MapReduceBase implements
+ Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+ private VKmerBytesWritableFactory kmerFactory;
+ private VKmerBytesWritable outputKmer;
+ private VKmerBytesWritable tmpKmer1;
+ private VKmerBytesWritable tmpKmer2;
+ private int KMER_SIZE;
+ private MergePathValueWritable outputValue;
+ private MergePathValueWritable tmpOutputValue;
+
+ MultipleOutputs mos = null;
+ private int I_MERGE;
+
+ public void configure(JobConf job) {
+ mos = new MultipleOutputs(job);
+ I_MERGE = Integer.parseInt(job.get("iMerge"));
+ KMER_SIZE = job.getInt("sizeKmer", 0);
+ outputValue = new MergePathValueWritable();
+ tmpOutputValue = new MergePathValueWritable();
+ kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
+ outputKmer = new VKmerBytesWritable(KMER_SIZE);
+ tmpKmer1 = new VKmerBytesWritable(KMER_SIZE);
+ tmpKmer2 = new VKmerBytesWritable(KMER_SIZE);
+ }
+
+ @SuppressWarnings("unchecked")
+ public void reduce(VKmerBytesWritable key, Iterator<MergePathValueWritable> values,
+ OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ outputValue = values.next();
+ outputKmer.set(key);
+ if (values.hasNext() == true) {
+ byte bitFlag = outputValue.getFlag();
+ byte bitStartEnd = (byte) (0x81 & bitFlag);
+ byte bitPosiNegative = (byte) (0x18 & bitFlag);
+ byte succeed = (byte) 0x0F;
+ switch (bitPosiNegative) {
+ case (byte) 0x08:
+ if (outputValue.getKmerLength() != 0)
+ tmpKmer1.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
+ else
+ tmpKmer1.set(key);
+ byte adjBitMap = outputValue.getAdjBitMap();
+ outputValue = values.next();
+ bitStartEnd = (byte) (0x81 & outputValue.getFlag());
+ if (bitStartEnd == (byte) 0x80) {
+ if (outputValue.getKmerLength() != 0)
+ tmpKmer2.set(kmerFactory.mergeTwoKmer(key, outputValue.getKmer()));
+ else
+ tmpKmer2.set(key);
+ byte tmpFlag = (byte) 0x80;
+ tmpOutputValue.set(outputValue.getAdjBitMap(), tmpFlag, null);
+ mos.getCollector("uncomplete" + I_MERGE, reporter).collect(tmpKmer2, tmpOutputValue);
+ }
+ if (outputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(tmpKmer1, outputValue.getKmer()));
+ else
+ outputKmer.set(tmpKmer1);
+ succeed = (byte) (succeed & outputValue.getAdjBitMap());
+ adjBitMap = (byte) (adjBitMap & 0xF0);
+ adjBitMap = (byte) (adjBitMap | succeed);
+ byte outputFlag = (byte) (0x81 & bitFlag);
+ outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
+ outputValue.set(adjBitMap, outputFlag, null);
+ mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ break;
+ case (byte) 0x10:
+ if (outputValue.getKmerLength() != 0)
+ tmpKmer1.set(kmerFactory.mergeTwoKmer(key, outputValue.getKmer()));
+ else
+ tmpKmer1.set(key);
+ if (bitStartEnd == (byte) 0x80) {
+ byte tmpFlag = (byte) 0x80;
+ tmpOutputValue.set(outputValue.getAdjBitMap(), tmpFlag, null);
+ mos.getCollector("uncomplete" + I_MERGE, reporter).collect(tmpKmer1, tmpOutputValue);
+ }
+ succeed = (byte) (succeed & outputValue.getAdjBitMap());
+ outputValue = values.next();
+ if (outputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer1));
+ else
+ outputKmer.set(tmpKmer1);
+ adjBitMap = outputValue.getAdjBitMap();
+ adjBitMap = (byte) (adjBitMap & 0xF0);
+ adjBitMap = (byte) (adjBitMap | succeed);
+ outputFlag = (byte) (0x81 & bitFlag);
+ outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
+ outputValue.set(adjBitMap, outputFlag, null);
+ mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ break;
+ }
+ } else {
+ byte bitFlag = outputValue.getFlag();
+ byte bitStartEnd = (byte) (0x81 & bitFlag);
+ if (bitStartEnd == (byte) 0x81) {
+ outputKmer.set(key);
+ mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ }
+ }
+ }
+ public void close() throws IOException {
+ // TODO Auto-generated method stub
+ mos.close();
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiSeqOutputFormat.java
new file mode 100644
index 0000000..66d3b6b
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiSeqOutputFormat.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh2;
+
+import java.io.File;
+import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<VKmerBytesWritable, MergePathValueWritable>{
+ @Override
+ protected String generateLeafFileName(String name) {
+ // TODO Auto-generated method stub System.out.println(name);
+ String[] names = name.split("-");
+ return names[0] + File.separator + name;
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiTextOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiTextOutputFormat.java
new file mode 100644
index 0000000..bca9695
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathMultiTextOutputFormat.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh2;
+
+import java.io.File;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
+
+public class MergePathMultiTextOutputFormat extends MultipleTextOutputFormat<Text, Text>{
+ @Override
+ protected String generateLeafFileName(String name) {
+ // TODO Auto-generated method stub System.out.println(name);
+ String[] names = name.split("-");
+ return names[0] + File.separator + name;
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathValueWritable.java
new file mode 100644
index 0000000..67b168d
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/MergePathValueWritable.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh2;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.BinaryComparable;
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+public class MergePathValueWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
+
+ private static final byte[] EMPTY_BYTES = {};
+ private byte adjBitMap;
+ private byte flag;
+ private VKmerBytesWritable kmer;
+
+ public MergePathValueWritable() {
+ this((byte) 0, (byte) 0, 0, EMPTY_BYTES);
+ }
+
+ public MergePathValueWritable(int k) {
+ this.adjBitMap = 0;
+ this.flag = 0;
+ this.kmer = new VKmerBytesWritable(k);
+ }
+
+ public MergePathValueWritable(byte adjBitMap, byte flag, int kmerSize, byte[] bytes) {
+ this.adjBitMap = adjBitMap;
+ this.flag = flag;
+ this.kmer = new VKmerBytesWritable(kmerSize, bytes);
+ kmer.set(bytes, 0, bytes.length);
+ }
+
+ public void set(MergePathValueWritable right) {
+ set(right.getAdjBitMap(), right.getFlag(), right.getKmer());
+ }
+
+ public void set(byte adjBitMap, byte flag, VKmerBytesWritable kmer) {
+ this.kmer.set(kmer);
+ this.adjBitMap = adjBitMap;
+ this.flag = flag;
+ }
+
+ @Override
+ public void readFields(DataInput arg0) throws IOException {
+ // TODO Auto-generated method stub
+ kmer.readFields(arg0);
+ adjBitMap = arg0.readByte();
+ flag = arg0.readByte();
+ }
+
+ @Override
+ public void write(DataOutput arg0) throws IOException {
+ // TODO Auto-generated method stub
+
+ kmer.write(arg0);
+ arg0.writeByte(adjBitMap);
+ arg0.writeByte(flag);
+ }
+
+ public VKmerBytesWritable getKmer() {
+ if (kmer.getLength() != 0) {
+ return kmer;
+ }
+ return null;
+ }
+
+ public byte getAdjBitMap() {
+ return this.adjBitMap;
+ }
+
+ public byte getFlag() {
+ return this.flag;
+ }
+
+ public String toString() {
+ return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag);
+ }
+
+ @Override
+ public byte[] getBytes() {
+ // TODO Auto-generated method stub
+ if (kmer.getLength() != 0) {
+ return kmer.getBytes();
+ } else
+ return null;
+
+ }
+
+ public int getKmerLength() {
+ return kmer.getKmerLength();
+ }
+
+ @Override
+ public int getLength() {
+ return kmer.getLength();
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialMapper.java
new file mode 100644
index 0000000..6270852
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialMapper.java
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh2;
+
+import java.io.IOException;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+
+@SuppressWarnings("deprecation")
+public class SNodeInitialMapper extends MapReduceBase implements
+ Mapper<KmerBytesWritable, ByteWritable, KmerBytesWritable, MergePathValueWritable> {
+
+ public int KMER_SIZE;
+ public KmerBytesWritable outputKmer;
+ public MergePathValueWritable outputAdjList;
+
+ public void configure(JobConf job) {
+ KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
+ outputKmer = new KmerBytesWritable(KMER_SIZE);
+ outputAdjList = new MergePathValueWritable();
+ }
+
+ boolean measureDegree(byte adjacent) {
+ boolean result = true;
+ switch (adjacent) {
+ case 0:
+ result = true;
+ break;
+ case 1:
+ result = false;
+ break;
+ case 2:
+ result = false;
+ break;
+ case 3:
+ result = true;
+ break;
+ case 4:
+ result = false;
+ break;
+ case 5:
+ result = true;
+ break;
+ case 6:
+ result = true;
+ break;
+ case 7:
+ result = true;
+ break;
+ case 8:
+ result = false;
+ break;
+ case 9:
+ result = true;
+ break;
+ case 10:
+ result = true;
+ break;
+ case 11:
+ result = true;
+ break;
+ case 12:
+ result = true;
+ break;
+ case 13:
+ result = true;
+ break;
+ case 14:
+ result = true;
+ break;
+ case 15:
+ result = true;
+ break;
+ }
+ return result;
+ }
+
+ @Override
+ public void map(KmerBytesWritable key, ByteWritable value,
+ OutputCollector<KmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ byte precursor = (byte) 0xF0;
+ byte succeed = (byte) 0x0F;
+ byte adjBitMap = value.get();
+ byte bitFlag = (byte) 0;
+ precursor = (byte) (precursor & adjBitMap);
+ precursor = (byte) ((precursor & 0xff) >> 4);
+ succeed = (byte) (succeed & adjBitMap);
+ boolean inDegree = measureDegree(precursor);
+ boolean outDegree = measureDegree(succeed);
+ if (inDegree == false && outDegree == false) {
+ outputKmer.set(key);
+ System.out.println(outputKmer.hashCode());
+ bitFlag = (byte) 2;
+ outputAdjList.set(adjBitMap, bitFlag, null);
+ output.collect(outputKmer, outputAdjList);
+ } else {
+ for (int i = 0; i < 4; i++) {
+ byte temp = (byte) 0x01;
+ byte shiftedCode = 0;
+ temp = (byte) (temp << i);
+ temp = (byte) (precursor & temp);
+ if (temp != 0) {
+ byte precurCode = GeneCode.getGeneCodeFromBitMap(temp);
+ shiftedCode = key.shiftKmerWithPreCode(precurCode);
+ outputKmer.set(key);
+ bitFlag = (byte) 0x80;
+ outputAdjList.set((byte) 0, bitFlag, null);
+ output.collect(outputKmer, outputAdjList);
+ key.shiftKmerWithNextCode(shiftedCode);
+ }
+ }
+ for (int i = 0; i < 4; i++) {
+ byte temp = (byte) 0x01;
+ byte shiftedCode = 0;
+ temp = (byte) (temp << i);
+ temp = (byte) (succeed & temp);
+ if (temp != 0) {
+ byte succeedCode = GeneCode.getGeneCodeFromBitMap(temp);
+ shiftedCode = key.shiftKmerWithNextCode(succeedCode);
+ outputKmer.set(key);
+ bitFlag = (byte) 0x01;
+ outputAdjList.set((byte) 0, bitFlag, null);
+ output.collect(outputKmer, outputAdjList);
+ key.shiftKmerWithPreCode(shiftedCode);
+ }
+ }
+ }
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialReducer.java
new file mode 100644
index 0000000..8ba5aa8
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/pathmergingh2/SNodeInitialReducer.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pathmergingh2;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+@SuppressWarnings("deprecation")
+public class SNodeInitialReducer extends MapReduceBase implements
+ Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+ private VKmerBytesWritable outputKmer = new VKmerBytesWritable();
+ private MergePathValueWritable outputValue = new MergePathValueWritable();
+
+ @Override
+ public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
+ OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ outputKmer.set(key);
+ outputValue = values.next();
+ byte startFlag = 0x00;
+ byte endFlag = 0x00;
+ byte targetPointFlag = 0x00;
+ byte targetAdjList = 0x00;
+ byte outputFlag = 0x00;
+ if (values.hasNext() == true) {
+ switch (outputValue.getFlag()) {
+ case (byte) 0x01:
+ startFlag = (byte) 0x01;
+ break;
+ case (byte) 0x80:
+ endFlag = (byte) 0x80;
+ break;
+ case (byte) 0x02:
+ targetPointFlag = (byte) 0x02;
+ targetAdjList = outputValue.getAdjBitMap();
+ break;
+ }
+ while (values.hasNext()) {
+ outputValue = values.next();
+ switch (outputValue.getFlag()) {
+ case (byte) 0x01:
+ startFlag = (byte) 0x01;
+ break;
+ case (byte) 0x80:
+ endFlag = (byte) 0x80;
+ break;
+ case (byte) 0x02:
+ targetPointFlag = (byte) 0x02;
+ targetAdjList = outputValue.getAdjBitMap();
+ break;
+ }
+ if(startFlag != (byte) 0x00 && endFlag!= (byte) 0x00 && targetPointFlag != (byte) 0x00)
+ break;
+ }
+ if(targetPointFlag == (byte) 0x02) {
+ if(startFlag == (byte) 0x01) {
+ outputFlag = (byte) (outputFlag | startFlag);
+ }
+ if(endFlag == (byte) 0x80) {
+ outputFlag = (byte) (outputFlag | endFlag);
+ }
+ outputValue.set(targetAdjList, outputFlag, null);
+ output.collect(outputKmer, outputValue);
+ }
+ } else {
+ if (outputValue.getFlag() == 2) {
+ byte bitFlag = 0;
+ outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
+ output.collect(outputKmer, outputValue);
+ }
+ }
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/.DS_Store b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/.DS_Store
new file mode 100644
index 0000000..a38b133
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatDriver.java
new file mode 100644
index 0000000..7390d06
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatDriver.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.statistics;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+@SuppressWarnings("deprecation")
+public class GenomixStatDriver {
+ private static class Options {
+ @Option(name = "-inputpath", usage = "the input path", required = true)
+ public String inputPath;
+
+ @Option(name = "-outputpath", usage = "the output path", required = true)
+ public String outputPath;
+
+ @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+ public int numReducers;
+
+ }
+
+ public void run(String inputPath, String outputPath, int numReducers, String defaultConfPath)
+ throws IOException {
+
+ JobConf conf = new JobConf(GenomixStatDriver.class);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+
+ conf.setJobName("Genomix Statistics");
+ conf.setMapperClass(GenomixStatMapper.class);
+ conf.setReducerClass(GenomixStatReducer.class);
+ conf.setCombinerClass(GenomixStatReducer.class);
+
+ conf.setMapOutputKeyClass(BytesWritable.class);
+ conf.setMapOutputValueClass(KmerCountValue.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+ conf.setOutputFormat(SequenceFileOutputFormat.class);
+
+ conf.setOutputKeyClass(BytesWritable.class);
+ conf.setOutputValueClass(KmerCountValue.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath));
+ FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+ conf.setNumReduceTasks(numReducers);
+
+ FileSystem dfs = FileSystem.get(conf);
+ dfs.delete(new Path(outputPath), true);
+ JobClient.runJob(conf);
+ }
+
+ public static void main(String[] args) throws Exception {
+ Options options = new Options();
+ CmdLineParser parser = new CmdLineParser(options);
+ parser.parseArgument(args);
+ GenomixStatDriver driver = new GenomixStatDriver();
+ driver.run(options.inputPath, options.outputPath, options.numReducers, null);
+ }
+}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatMapper.java
new file mode 100644
index 0000000..bb94c5d
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatMapper.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.statistics;
+
+import java.io.IOException;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+@SuppressWarnings({ "unused", "deprecation" })
+public class GenomixStatMapper extends MapReduceBase implements
+ Mapper<BytesWritable, KmerCountValue, BytesWritable, KmerCountValue> {
+
+ boolean measureDegree(byte adjacent) {
+ boolean result = true;
+ switch (adjacent) {
+ case 0:
+ result = true;
+ break;
+ case 1:
+ result = false;
+ break;
+ case 2:
+ result = false;
+ break;
+ case 3:
+ result = true;
+ break;
+ case 4:
+ result = false;
+ break;
+ case 5:
+ result = true;
+ break;
+ case 6:
+ result = true;
+ break;
+ case 7:
+ result = true;
+ break;
+ case 8:
+ result = false;
+ break;
+ case 9:
+ result = true;
+ break;
+ case 10:
+ result = true;
+ break;
+ case 11:
+ result = true;
+ break;
+ case 12:
+ result = true;
+ break;
+ case 13:
+ result = true;
+ break;
+ case 14:
+ result = true;
+ break;
+ case 15:
+ result = true;
+ break;
+ }
+ return result;
+ }
+ @Override
+ public void map(BytesWritable key, KmerCountValue value, OutputCollector<BytesWritable, KmerCountValue> output,
+ Reporter reporter) throws IOException {
+ byte precursor = (byte) 0xF0;
+ byte succeed = (byte) 0x0F;
+ byte adj = value.getAdjBitMap();
+ precursor = (byte) (precursor & adj);
+ precursor = (byte) ((precursor & 0xff) >> 4);
+ succeed = (byte) (succeed & adj);
+ boolean inDegree = measureDegree(precursor);
+ boolean outDegree = measureDegree(succeed);
+ if (inDegree == true && outDegree == false) {
+ output.collect(key, value);
+ }
+ }
+}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatReducer.java
new file mode 100644
index 0000000..d5ce11c
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/statistics/GenomixStatReducer.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.statistics;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+@SuppressWarnings("deprecation")
+public class GenomixStatReducer extends MapReduceBase implements
+ Reducer<BytesWritable, KmerCountValue, BytesWritable, KmerCountValue> {
+ static enum MyCounters { NUM_RECORDS };
+ KmerCountValue valWriter = new KmerCountValue();
+ @Override
+ public void reduce(BytesWritable key, Iterator<KmerCountValue> values,
+ OutputCollector<BytesWritable, KmerCountValue> output, Reporter reporter) throws IOException {
+ reporter.incrCounter(MyCounters.NUM_RECORDS, 1);
+ valWriter = values.next();
+ output.collect(key, valWriter);
+ }
+}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/.DS_Store b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/.DS_Store
new file mode 100644
index 0000000..14d85eb
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingTest.java
new file mode 100644
index 0000000..d29c160
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/gbresultschecking/ResultsCheckingTest.java
@@ -0,0 +1,78 @@
+package edu.uci.ics.genomix.gbresultschecking;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.gbresultschecking.ResultsCheckingDriver;
+
+@SuppressWarnings("deprecation")
+public class ResultsCheckingTest {
+ private static final String ACTUAL_RESULT_DIR = "actual4";
+ private JobConf conf = new JobConf();
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private static final String DATA_PATH1 = "ResultsCheckingData" + "/part-00000";
+ private static final String DATA_PATH2 = "ResultsCheckingData" + "/part-00001";
+ private static final String HDFS_PATH1 = "/webmap1";
+ private static final String HDFS_PATH2 = "/webmap2";
+ private static final String RESULT_PATH = "/result4";
+ private static final int COUNT_REDUCER = 4;
+ private static final int SIZE_KMER = 3;
+ private MiniDFSCluster dfsCluster;
+ private MiniMRCluster mrCluster;
+ private FileSystem dfs;
+
+ @Test
+ public void test() throws Exception {
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHadoop();
+ ResultsCheckingDriver tldriver = new ResultsCheckingDriver();
+ tldriver.run(HDFS_PATH1, HDFS_PATH2, RESULT_PATH, COUNT_REDUCER, SIZE_KMER, HADOOP_CONF_PATH);
+ dumpResult();
+ cleanupHadoop();
+
+ }
+ private void startHadoop() throws IOException {
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, 2, true, null);
+ dfs = dfsCluster.getFileSystem();
+ mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+
+ Path src = new Path(DATA_PATH1);
+ Path dest = new Path(HDFS_PATH1 + "/");
+ dfs.mkdirs(dest);
+ dfs.copyFromLocalFile(src, dest);
+ src = new Path(DATA_PATH2);
+ dest = new Path(HDFS_PATH2 + "/");
+ dfs.copyFromLocalFile(src, dest);
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupHadoop() throws IOException {
+ mrCluster.shutdown();
+ dfsCluster.shutdown();
+ }
+
+ private void dumpResult() throws IOException {
+ Path src = new Path(RESULT_PATH);
+ Path dest = new Path(ACTUAL_RESULT_DIR + "/");
+ dfs.copyToLocalFile(src, dest);
+ }
+}
+
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphbuilding/GraphBuildingTest.java
new file mode 100755
index 0000000..d329c6a
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphbuilding/GraphBuildingTest.java
@@ -0,0 +1,146 @@
+package edu.uci.ics.genomix.graphbuilding;
+
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.graphbuilding.GenomixDriver;
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.KmerCountValue;
+import edu.uci.ics.genomix.utils.TestUtils;
+/**
+ * This class test the correctness of graphbuilding program
+ */
+@SuppressWarnings("deprecation")
+public class GraphBuildingTest {
+
+ private static final String ACTUAL_RESULT_DIR = "actual1";
+ private static final String COMPARE_DIR = "compare";
+ private JobConf conf = new JobConf();
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private static final String DATA_PATH = "data/webmap/TreePath";
+ private static final String HDFS_PATH = "/webmap";
+ private static final String RESULT_PATH = "/result1";
+ private static final String EXPECTED_PATH = "expected/result1";
+ private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH + "/comparesource.txt";
+ private static final int COUNT_REDUCER = 4;
+ private static final int SIZE_KMER = 5;
+ private static final String GRAPHVIZ = "Graphviz/GenomixSource.txt";
+
+ private MiniDFSCluster dfsCluster;
+ private MiniMRCluster mrCluster;
+ private FileSystem dfs;
+
+ @SuppressWarnings("resource")
+ @Test
+ public void test() throws Exception {
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHadoop();
+
+ // run graph transformation tests
+ GenomixDriver tldriver = new GenomixDriver();
+ tldriver.run(HDFS_PATH, RESULT_PATH, COUNT_REDUCER, SIZE_KMER, HADOOP_CONF_PATH);
+
+ SequenceFile.Reader reader = null;
+ Path path = new Path(RESULT_PATH + "/part-00000");
+ reader = new SequenceFile.Reader(dfs, path, conf);
+// KmerBytesWritable key = (KmerBytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ KmerBytesWritable key = new KmerBytesWritable(SIZE_KMER);
+ KmerCountValue value = (KmerCountValue) ReflectionUtils.newInstance(reader.getValueClass(), conf);
+ File filePathTo = new File(TEST_SOURCE_DIR);
+ BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+ File GraphViz = new File(GRAPHVIZ);
+ BufferedWriter bw2 = new BufferedWriter(new FileWriter(GraphViz));
+
+ while (reader.next(key, value)) {
+ byte succeed = (byte) 0x0F;
+ byte adjBitMap = value.getAdjBitMap();
+ succeed = (byte) (succeed & adjBitMap);
+ byte shiftedCode = 0;
+ for(int i = 0 ; i < 4; i ++){
+ byte temp = 0x01;
+ temp = (byte)(temp << i);
+ temp = (byte) (succeed & temp);
+ if(temp != 0 ){
+ bw2.write(key.toString());
+ bw2.newLine();
+ byte succeedCode = GeneCode.getGeneCodeFromBitMap(temp);
+ shiftedCode = key.shiftKmerWithNextCode(succeedCode);
+ bw2.write(key.toString());
+ bw2.newLine();
+ key.shiftKmerWithPreCode(shiftedCode);
+ }
+ }
+ bw.write(key.toString() + "\t" + value.toString());
+ bw.newLine();
+ }
+ bw2.close();
+ bw.close();
+
+ dumpResult();
+// TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
+
+ cleanupHadoop();
+
+ }
+
+ private void startHadoop() throws IOException {
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, 2, true, null);
+ dfs = dfsCluster.getFileSystem();
+ mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+
+ Path src = new Path(DATA_PATH);
+ Path dest = new Path(HDFS_PATH + "/");
+ dfs.mkdirs(dest);
+ dfs.copyFromLocalFile(src, dest);
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupHadoop() throws IOException {
+ mrCluster.shutdown();
+ dfsCluster.shutdown();
+ }
+
+ private void dumpResult() throws IOException {
+ Path src = new Path(RESULT_PATH);
+ Path dest = new Path(ACTUAL_RESULT_DIR);
+ dfs.copyToLocalFile(src, dest);
+ }
+}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphcountfilter/CountFilterTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphcountfilter/CountFilterTest.java
new file mode 100644
index 0000000..6eaced2
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/graphcountfilter/CountFilterTest.java
@@ -0,0 +1,103 @@
+package edu.uci.ics.genomix.graphcountfilter;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.graphcountfilter.CountFilterDriver;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.utils.TestUtils;
+
+
+@SuppressWarnings("deprecation")
+public class CountFilterTest {
+ private static final String ACTUAL_RESULT_DIR = "actual2";
+ private static final String COMPARE_DIR = "compare";
+ private JobConf conf = new JobConf();
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private static final String DATA_PATH = "actual1" + "/result1" + "/part-00000";
+ private static final String HDFS_PATH = "/webmap";
+ private static final String RESULT_PATH = "/result2";
+ private static final String EXPECTED_PATH = "expected/result2";
+ private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH + "/comparesource.txt";
+ private static final int COUNT_REDUCER = 4;
+ private static final int SIZE_KMER = 5;
+ private MiniDFSCluster dfsCluster;
+ private MiniMRCluster mrCluster;
+ private FileSystem dfs;
+
+ @SuppressWarnings("resource")
+ @Test
+ public void test() throws Exception {
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHadoop();
+
+ // run graph transformation tests
+ CountFilterDriver tldriver = new CountFilterDriver();
+ tldriver.run(HDFS_PATH, RESULT_PATH, COUNT_REDUCER, 1, HADOOP_CONF_PATH);
+
+ SequenceFile.Reader reader = null;
+ Path path = new Path(RESULT_PATH + "/part-00000");
+ reader = new SequenceFile.Reader(dfs, path, conf);
+ KmerBytesWritable key = (KmerBytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ ByteWritable value = (ByteWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
+ File filePathTo = new File(TEST_SOURCE_DIR);
+ BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+ while (reader.next(key, value)) {
+ bw.write(key.toString() + "\t" + value.toString());
+ bw.newLine();
+ }
+ bw.close();
+
+ dumpResult();
+ TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
+
+ cleanupHadoop();
+
+ }
+ private void startHadoop() throws IOException {
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, 2, true, null);
+ dfs = dfsCluster.getFileSystem();
+ mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+
+ Path src = new Path(DATA_PATH);
+ Path dest = new Path(HDFS_PATH + "/");
+ dfs.mkdirs(dest);
+ dfs.copyFromLocalFile(src, dest);
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupHadoop() throws IOException {
+ mrCluster.shutdown();
+ dfsCluster.shutdown();
+ }
+
+ private void dumpResult() throws IOException {
+ Path src = new Path(RESULT_PATH);
+ Path dest = new Path(ACTUAL_RESULT_DIR + "/");
+ dfs.copyToLocalFile(src, dest);
+ }
+}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh1/MergePathTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh1/MergePathTest.java
new file mode 100644
index 0000000..c7a1213
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh1/MergePathTest.java
@@ -0,0 +1,107 @@
+package edu.uci.ics.genomix.pathmergingh1;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.pathmergingh1.MergePathH1Driver;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.utils.TestUtils;
+
+@SuppressWarnings("deprecation")
+public class MergePathTest {
+ private static final String ACTUAL_RESULT_DIR = "actual3";
+ private static final String COMPARE_DIR = "compare";
+ private JobConf conf = new JobConf();
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private static final String DATA_PATH = "actual2" + "/result2" + "/part-00000";
+ private static final String HDFS_PATH = "/webmap";
+ private static final String HDFA_PATH_DATA = "/webmapdata";
+
+ private static final String RESULT_PATH = "/result3";
+ private static final String EXPECTED_PATH = "expected/result3";
+ private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH + "/comparesource.txt";
+ private static final int COUNT_REDUCER = 1;
+ private static final int SIZE_KMER = 3;
+
+ private MiniDFSCluster dfsCluster;
+ private MiniMRCluster mrCluster;
+ private FileSystem dfs;
+
+ @SuppressWarnings("resource")
+ @Test
+ public void test() throws Exception {
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHadoop();
+
+ MergePathH1Driver tldriver = new MergePathH1Driver();
+ tldriver.run(HDFS_PATH, RESULT_PATH, HDFA_PATH_DATA, COUNT_REDUCER, SIZE_KMER, 1, HADOOP_CONF_PATH);
+
+/* SequenceFile.Reader reader = null;
+ Path path = new Path(RESULT_PATH + "/part-00000");
+// Path path = new Path(RESULT_PATH + "/uncomplete0" + "/uncomplete0-r-00000");
+ reader = new SequenceFile.Reader(dfs, path, conf);
+ VKmerBytesWritable key = (VKmerBytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ MergePathValueWritable value = (MergePathValueWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
+ File filePathTo = new File(TEST_SOURCE_DIR);
+ BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+ while (reader.next(key, value)) {
+ bw.write(key.toString() + "\t" + value.getAdjBitMap() + "\t" + value.getFlag());
+ bw.newLine();
+ }
+ bw.close();*/
+ dumpResult();
+
+// TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
+
+ cleanupHadoop();
+
+ }
+ private void startHadoop() throws IOException {
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, 2, true, null);
+ dfs = dfsCluster.getFileSystem();
+ mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+
+ Path src = new Path(DATA_PATH);
+ Path dest = new Path(HDFS_PATH + "/");
+ dfs.mkdirs(dest);
+ dfs.copyFromLocalFile(src, dest);
+ Path data = new Path(HDFA_PATH_DATA + "/");
+ dfs.mkdirs(data);
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupHadoop() throws IOException {
+ mrCluster.shutdown();
+ dfsCluster.shutdown();
+ }
+
+ private void dumpResult() throws IOException {
+// Path src = new Path(HDFA_PATH_DATA + "/" + "complete2");
+ Path src = new Path(RESULT_PATH);
+ Path dest = new Path(ACTUAL_RESULT_DIR + "/");
+ dfs.copyToLocalFile(src, dest);
+ }
+}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Test.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Test.java
new file mode 100644
index 0000000..e72c352
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/pathmergingh2/MergePathH2Test.java
@@ -0,0 +1,107 @@
+package edu.uci.ics.genomix.pathmergingh2;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.pathmergingh2.MergePathH2Driver;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.utils.TestUtils;
+
+@SuppressWarnings("deprecation")
+public class MergePathH2Test {
+ private static final String ACTUAL_RESULT_DIR = "actual4";
+ private static final String COMPARE_DIR = "compare";
+ private JobConf conf = new JobConf();
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private static final String DATA_PATH = "actual2" + "/result2" + "/part-00000";
+ private static final String HDFS_PATH = "/webmap";
+ private static final String HDFA_PATH_DATA = "/webmapdata";
+
+ private static final String RESULT_PATH = "/result4";
+ private static final String EXPECTED_PATH = "expected/result4";
+ private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH + "/comparesource.txt";
+ private static final int COUNT_REDUCER = 1;
+ private static final int SIZE_KMER = 3;
+
+ private MiniDFSCluster dfsCluster;
+ private MiniMRCluster mrCluster;
+ private FileSystem dfs;
+
+ @SuppressWarnings("resource")
+ @Test
+ public void test() throws Exception {
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHadoop();
+
+ MergePathH2Driver tldriver = new MergePathH2Driver();
+ tldriver.run(HDFS_PATH, RESULT_PATH, HDFA_PATH_DATA, COUNT_REDUCER, SIZE_KMER, 1, HADOOP_CONF_PATH);
+
+/* SequenceFile.Reader reader = null;
+// Path path = new Path(RESULT_PATH + "/part-00000");
+ Path path = new Path(RESULT_PATH + "/uncomplete0" + "/uncomplete0-r-00000");
+ reader = new SequenceFile.Reader(dfs, path, conf);
+ VKmerBytesWritable key = (VKmerBytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ MergePathValueWritable value = (MergePathValueWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
+ File filePathTo = new File(TEST_SOURCE_DIR);
+ BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+ while (reader.next(key, value)) {
+ bw.write(key.toString() + "\t" + value.getAdjBitMap() + "\t" + value.getFlag());
+ bw.newLine();
+ }
+ bw.close();*/
+// dumpResult();
+
+// TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
+
+ cleanupHadoop();
+
+ }
+ private void startHadoop() throws IOException {
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, 2, true, null);
+ dfs = dfsCluster.getFileSystem();
+ mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+
+ Path src = new Path(DATA_PATH);
+ Path dest = new Path(HDFS_PATH + "/");
+ dfs.mkdirs(dest);
+ dfs.copyFromLocalFile(src, dest);
+ Path data = new Path(HDFA_PATH_DATA + "/");
+ dfs.mkdirs(data);
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupHadoop() throws IOException {
+ mrCluster.shutdown();
+ dfsCluster.shutdown();
+ }
+
+ private void dumpResult() throws IOException {
+// Path src = new Path(HDFA_PATH_DATA + "/" + "complete2");
+ Path src = new Path(RESULT_PATH);
+ Path dest = new Path(ACTUAL_RESULT_DIR + "/");
+ dfs.copyToLocalFile(src, dest);
+ }
+}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/utils/TestUtils.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/utils/TestUtils.java
new file mode 100755
index 0000000..73a7254
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/utils/TestUtils.java
@@ -0,0 +1,75 @@
+package edu.uci.ics.genomix.utils;
+
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+
+/**
+ * This class offer the service for graphbuildingtest.class
+ */
+public class TestUtils {
+ public static void compareWithResult(File expectedFile, File actualFile) throws Exception {
+ BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
+ BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
+ String lineExpected, lineActual;
+ int num = 1;
+ try {
+ while ((lineExpected = readerExpected.readLine()) != null) {
+ lineActual = readerActual.readLine();
+ // Assert.assertEquals(lineExpected, lineActual);
+ if (lineActual == null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< " + lineExpected + "\n> ");
+ }
+ if (!equalStrings(lineExpected, lineActual)) {
+ throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
+ + lineActual);
+ }
+ ++num;
+ }
+ lineActual = readerActual.readLine();
+ if (lineActual != null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineActual);
+ }
+ } finally {
+ readerExpected.close();
+ readerActual.close();
+ }
+ }
+
+ private static boolean equalStrings(String s1, String s2) {
+ String[] rowsOne = s1.split("\t");
+ String[] rowsTwo = s2.split("\t");
+
+ if (rowsOne.length != rowsTwo.length)
+ return false;
+
+ for (int i = 0; i < rowsOne.length; i++) {
+ String row1 = rowsOne[i];
+ String row2 = rowsTwo[i];
+
+ if (row1.equals(row2))
+ continue;
+ else
+ return false;
+ }
+ return true;
+ }
+
+ public static void main(String[] args) throws Exception {
+ TestUtils TUtils = new TestUtils();
+ }
+}