change name for files
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/.DS_Store b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/.DS_Store
deleted file mode 100644
index f5eb144..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/.DS_Store
+++ /dev/null
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingDriver.java
deleted file mode 100644
index 132f6e0..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingDriver.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.gbresultschecking;
-
-import java.io.IOException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.TextOutputFormat;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-@SuppressWarnings("deprecation")
-public class ResultsCheckingDriver {
- private static class Options {
- @Option(name = "-inputpath1", usage = "the input path", required = true)
- public String inputPath1;
-
- @Option(name = "-inputpath2", usage = "the input path", required = true)
- public String inputPath2;
-
- @Option(name = "-outputpath", usage = "the output path", required = true)
- public String outputPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
- public int sizeKmer;
-
- }
-
- public void run(String inputPath1, String inputPath2, String outputPath, int numReducers, int sizeKmer,
- String defaultConfPath) throws IOException {
-
- JobConf conf = new JobConf(ResultsCheckingDriver.class);
-
- conf.setInt("sizeKmer", sizeKmer);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
-
- conf.setJobName("Results Checking");
- conf.setMapperClass(ResultsCheckingMapper.class);
- conf.setReducerClass(ResultsCheckingReducer.class);
-
- conf.setMapOutputKeyClass(Text.class);
- conf.setMapOutputValueClass(Text.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(TextOutputFormat.class);
-
- conf.setOutputKeyClass(Text.class);
- conf.setOutputValueClass(Text.class);
-
- Path[] inputList = new Path[2];
- inputList[0] = new Path(inputPath1);
- inputList[1] = new Path(inputPath2);
-
- FileInputFormat.setInputPaths(conf, inputList);
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
-
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- ResultsCheckingDriver driver = new ResultsCheckingDriver();
- driver.run(options.inputPath1, options.inputPath2, options.outputPath, options.numReducers, options.sizeKmer,
- null);
- }
-
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingMapper.java
deleted file mode 100644
index dca3808..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingMapper.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.gbresultschecking;
-
-import java.io.IOException;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
-@SuppressWarnings({ "unused", "deprecation" })
-public class ResultsCheckingMapper extends MapReduceBase implements Mapper<KmerBytesWritable, KmerCountValue, Text, Text> {
- KmerBytesWritable valWriter;
- private final static IntWritable one = new IntWritable(1);
- public static Text textkey = new Text();
- public static Text textvalue = new Text();
- public static String INPUT_PATH;
- public static int KMER_SIZE;
-
- public void configure(JobConf job) {
- KMER_SIZE = job.getInt("sizeKmer", 0);
- valWriter= new KmerBytesWritable(KMER_SIZE);
- }
-
- @Override
- public void map(KmerBytesWritable key, KmerCountValue value, OutputCollector<Text, Text> output, Reporter reporter)
- throws IOException {
-
- FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
- String filename = fileSplit.getPath().getName();
- textkey.set(key.toString() + "\t" + value.toString());
- textvalue.set(filename);
- output.collect(textkey, textvalue);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingReducer.java
deleted file mode 100644
index 6f02136..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingReducer.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.gbresultschecking;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-@SuppressWarnings("deprecation")
-public class ResultsCheckingReducer extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
-
- public static Text textkey = new Text();
- public static Text textvalue = new Text();
-
- @Override
- public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
- throws IOException {
- textkey.set(key);
- textvalue.set(values.next());
- if (values.hasNext() == false) {
- output.collect(textkey, textvalue);
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java
deleted file mode 100755
index 09a1bf1..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.graphbuilding;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
-/**
- * This class implement the combiner operator of Mapreduce model
- */
-@SuppressWarnings("deprecation")
-public class GenomixCombiner extends MapReduceBase implements
- Reducer<KmerBytesWritable, KmerCountValue, KmerBytesWritable, KmerCountValue> {
- private KmerCountValue vaWriter = new KmerCountValue();
-
- @Override
- public void reduce(KmerBytesWritable key, Iterator<KmerCountValue> values,
- OutputCollector<KmerBytesWritable, KmerCountValue> output, Reporter reporter) throws IOException {
- byte groupByAdjList = 0;
- int count = 0;
- byte bytCount = 0;
- while (values.hasNext()) {
- //Merge By the all adjacent Nodes;
- KmerCountValue geneValue = values.next();
- groupByAdjList = (byte) (groupByAdjList | geneValue.getAdjBitMap());
- count = count + (int) geneValue.getCount();
- }
- if (count >= 127)
- bytCount = (byte) 127;
- else
- bytCount = (byte) count;
- vaWriter.set(groupByAdjList, bytCount);
- output.collect(key, vaWriter);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java
deleted file mode 100755
index e2f0f85..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.graphbuilding;
-
-import java.io.IOException;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.TextInputFormat;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
-/**
- * This class implement driver which start the mapreduce program for graphbuilding
- */
-@SuppressWarnings("deprecation")
-public class GenomixDriver {
- private static class Options {
- @Option(name = "-inputpath", usage = "the input path", required = true)
- public String inputPath;
-
- @Option(name = "-outputpath", usage = "the output path", required = true)
- public String outputPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
- public int sizeKmer;
- }
-
- public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath)
- throws IOException {
-
- JobConf conf = new JobConf(GenomixDriver.class);
- conf.setInt("sizeKmer", sizeKmer);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
-
- conf.setJobName("Genomix Graph Building");
- conf.setMapperClass(GenomixMapper.class);
- conf.setReducerClass(GenomixReducer.class);
- conf.setCombinerClass(GenomixCombiner.class);
-
- conf.setMapOutputKeyClass(KmerBytesWritable.class);
- conf.setMapOutputValueClass(KmerCountValue.class);
-
- conf.setInputFormat(TextInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
- conf.setOutputKeyClass(KmerBytesWritable.class);
- conf.setOutputValueClass(KmerCountValue.class);
- FileInputFormat.setInputPaths(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
-
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- GenomixDriver driver = new GenomixDriver();
- driver.run(options.inputPath, options.outputPath, options.numReducers, options.sizeKmer, null);
- }
-
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
deleted file mode 100755
index f3f4584..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.graphbuilding;
-
-import java.io.IOException;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
-/**
- * This class implement mapper operator of mapreduce model
- */
-@SuppressWarnings("deprecation")
-public class GenomixMapper extends MapReduceBase implements
- Mapper<LongWritable, Text, KmerBytesWritable, KmerCountValue> {
-
- public class CurrenByte {
- public byte curByte;
- public byte preMarker;
- }
-
- public static int KMER_SIZE;
- public KmerCountValue outputAdjList;
- public KmerBytesWritable outputKmer;
-
- @Override
- public void configure(JobConf job) {
- KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
- outputAdjList = new KmerCountValue();
- outputKmer = new KmerBytesWritable(KMER_SIZE);
- }
-
- /*succeed node
- A 00000001 1
- C 00000010 2
- G 00000100 4
- T 00001000 8
- precursor node
- A 00010000 16
- C 00100000 32
- G 01000000 64
- T 10000000 128*/
- @Override
- public void map(LongWritable key, Text value, OutputCollector<KmerBytesWritable, KmerCountValue> output,
- Reporter reporter) throws IOException {
- /* A 00
- C 01
- G 10
- T 11*/
- String geneLine = value.toString(); // Read the Real Gene Line
- Pattern genePattern = Pattern.compile("[AGCT]+");
- Matcher geneMatcher = genePattern.matcher(geneLine);
- boolean isValid = geneMatcher.matches();
- if (isValid == true) {
- /** first kmer */
- byte count = 1;
- byte[] array = geneLine.getBytes();
- outputKmer.setByRead( array, 0);
- byte pre = 0;
- byte next = GeneCode.getAdjBit(array[KMER_SIZE]);
- byte adj = GeneCode.mergePreNextAdj(pre, next);
- outputAdjList.set(adj, count);
- output.collect(outputKmer, outputAdjList);
- /** middle kmer */
- for (int i = KMER_SIZE; i < array.length - 1; i++) {
- pre = GeneCode.getBitMapFromGeneCode(outputKmer.shiftKmerWithNextChar(array[i]));
- next = GeneCode.getAdjBit(array[i + 1]);
- adj = GeneCode.mergePreNextAdj(pre, next);
- outputAdjList.set(adj, count);
- output.collect(outputKmer, outputAdjList);
- }
- /** last kmer */
- pre = GeneCode.getBitMapFromGeneCode(outputKmer.shiftKmerWithNextChar(array[array.length - 1]));
- next = 0;
- adj = GeneCode.mergePreNextAdj(pre, next);
- outputAdjList.set(adj, count);
- output.collect(outputKmer, outputAdjList);
- }
- }
-}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java
deleted file mode 100755
index 1fba709..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.graphbuilding;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
-/**
- * This class implement reducer operator of mapreduce model
- */
-@SuppressWarnings("deprecation")
-public class GenomixReducer extends MapReduceBase implements
- Reducer<KmerBytesWritable, KmerCountValue, KmerBytesWritable, KmerCountValue> {
- KmerCountValue valWriter = new KmerCountValue();
- static enum MyCounters { NUM_RECORDS };
- @Override
- public void reduce(KmerBytesWritable key, Iterator<KmerCountValue> values,
- OutputCollector<KmerBytesWritable, KmerCountValue> output, Reporter reporter) throws IOException {
- byte groupByAdjList = 0;
- int count = 0;
- byte bytCount = 0;
- while (values.hasNext()) {
- //Merge By the all adjacent Nodes;
- KmerCountValue geneValue = values.next();
- groupByAdjList = (byte) (groupByAdjList | geneValue.getAdjBitMap());
- count = count + (int) geneValue.getCount();
- }
- if (count >= 127)
- bytCount = (byte) 127;
- else
- bytCount = (byte) count;
- valWriter.set(groupByAdjList, bytCount);
- output.collect(key, valWriter);
- reporter.incrCounter(MyCounters.NUM_RECORDS, 1);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterDriver.java
deleted file mode 100644
index d54eca2..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterDriver.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.graphcountfilter;
-
-import java.io.IOException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class CountFilterDriver {
- private static class Options {
- @Option(name = "-inputpath", usage = "the input path", required = true)
- public String inputPath;
-
- @Option(name = "-outputpath", usage = "the output path", required = true)
- public String outputPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- @Option(name = "-count-threshold", usage = "the threshold of count", required = true)
- public int countThreshold;
- }
-
- public void run(String inputPath, String outputPath, int numReducers, int countThreshold, String defaultConfPath)
- throws IOException {
-
- JobConf conf = new JobConf(CountFilterDriver.class);
- conf.setInt("countThreshold", countThreshold);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
-
- conf.setJobName("Count Filter");
- conf.setMapperClass(CountFilterMapper.class);
- conf.setReducerClass(CountFilterReducer.class);
- conf.setCombinerClass(CountFilterReducer.class);
-
- conf.setMapOutputKeyClass(KmerBytesWritable.class);
- conf.setMapOutputValueClass(ByteWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
-
- conf.setOutputKeyClass(KmerBytesWritable.class);
- conf.setOutputValueClass(ByteWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
-
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- CountFilterDriver driver = new CountFilterDriver();
- driver.run(options.inputPath, options.outputPath, options.numReducers, options.countThreshold, null);
- }
-
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterMapper.java
deleted file mode 100644
index 2a54217..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterMapper.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.graphcountfilter;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
-
-@SuppressWarnings({ "deprecation" })
-public class CountFilterMapper extends MapReduceBase implements
- Mapper<KmerBytesWritable, KmerCountValue, KmerBytesWritable, ByteWritable> {
- private int THRESHOLD;
- private ByteWritable adjByte = new ByteWritable();
- @Override
- public void configure(JobConf job) {
- THRESHOLD = Integer.parseInt(job.get("countThreshold"));
- }
- public void map(KmerBytesWritable key, KmerCountValue value, OutputCollector<KmerBytesWritable, ByteWritable> output,
- Reporter reporter) throws IOException {
- if(value.getCount() >= THRESHOLD){
- adjByte.set(value.getAdjBitMap());
- output.collect(key, adjByte );
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterReducer.java
deleted file mode 100644
index dd33451..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterReducer.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.graphcountfilter;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class CountFilterReducer extends MapReduceBase implements
- Reducer<KmerBytesWritable, ByteWritable, KmerBytesWritable, ByteWritable> {
- @Override
- public void reduce(KmerBytesWritable key, Iterator<ByteWritable> values,
- OutputCollector<KmerBytesWritable, ByteWritable> output, Reporter reporter) throws IOException {
- output.collect(key, values.next()); //Output the Pair
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/.DS_Store b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/.DS_Store
deleted file mode 100644
index f9e3926..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/.DS_Store
+++ /dev/null
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathDriver.java
deleted file mode 100644
index 36c12ae..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathDriver.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.pathmerging;
-
-import java.io.IOException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.TextOutputFormat;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
-import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class MergePathDriver {
-
- private static class Options {
- @Option(name = "-inputpath", usage = "the input path", required = true)
- public String inputPath;
-
- @Option(name = "-outputpath", usage = "the output path", required = true)
- public String outputPath;
-
- @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
- public String mergeResultPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
- public int sizeKmer;
-
- @Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
- public int mergeRound;
-
- }
-
-
- public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath)
- throws IOException{
-
- JobConf conf = new JobConf(MergePathDriver.class);
- conf.setInt("sizeKmer", sizeKmer);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
- conf.setJobName("Initial Path-Starting-Points Table");
- conf.setMapperClass(SNodeInitialMapper.class);
- conf.setReducerClass(SNodeInitialReducer.class);
-
- conf.setMapOutputKeyClass(KmerBytesWritable.class);
- conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
-
- conf.setOutputKeyClass(VKmerBytesWritable.class);
- conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(inputPath + "-step1"));
- conf.setNumReduceTasks(numReducers);
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(inputPath + "-step1"), true);
- JobClient.runJob(conf);
- int iMerge = 0;
-/*----------------------------------------------------------------------*/
- for(iMerge = 0; iMerge < mergeRound; iMerge ++){
-
- conf = new JobConf(MergePathDriver.class);
- conf.setInt("sizeKmer", sizeKmer);
- conf.setInt("iMerge", iMerge);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
- conf.setJobName("Path Merge");
-
- conf.setMapperClass(MergePathMapper.class);
- conf.setReducerClass(MergePathReducer.class);
-
- conf.setMapOutputKeyClass(VKmerBytesWritable.class);
- conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
-
- String uncomplete = "uncomplete" + iMerge;
- String complete = "complete" + iMerge;
-
- MultipleOutputs.addNamedOutput(conf, uncomplete,
- MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
-
- MultipleOutputs.addNamedOutput(conf, complete,
- MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
-
- conf.setOutputKeyClass(VKmerBytesWritable.class);
- conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- dfs.delete(new Path(inputPath + "-step1"), true);
- dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
- dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
- }
- /*----------------------------------------*/
- conf = new JobConf(MergePathDriver.class);
- conf.setInt("sizeKmer", sizeKmer);
- conf.setInt("iMerge", iMerge);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
- conf.setJobName("Path Merge");
-
- conf.setMapperClass(MergePathMapper.class);
- conf.setReducerClass(MergePathReducer.class);
-
- conf.setMapOutputKeyClass(VKmerBytesWritable.class);
- conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
-
- String uncomplete = "uncomplete" + iMerge;
- String complete = "complete" + iMerge;
-
- MultipleOutputs.addNamedOutput(conf, uncomplete,
- MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
-
- MultipleOutputs.addNamedOutput(conf, complete,
- MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
-
- conf.setOutputKeyClass(VKmerBytesWritable.class);
- conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- dfs.delete(new Path(inputPath + "-step1"), true);
- dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
- dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- MergePathDriver driver = new MergePathDriver();
- driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers, options.sizeKmer, options.mergeRound, null);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java
deleted file mode 100644
index 0c7dcc1..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.pathmerging;
-
-import java.io.IOException;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
-
-@SuppressWarnings("deprecation")
-public class MergePathMapper extends MapReduceBase implements
- Mapper<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
- private int KMER_SIZE;
- private VKmerBytesWritableFactory outputKmerFactory;
- private MergePathValueWritable outputValue;
- private VKmerBytesWritable tmpKmer;
- private VKmerBytesWritable outputKmer;
-
-
- public void configure(JobConf job) {
- KMER_SIZE = job.getInt("sizeKmer", 0);
- outputKmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
- outputValue = new MergePathValueWritable();
- tmpKmer = new VKmerBytesWritable(KMER_SIZE);
- outputKmer = new VKmerBytesWritable(KMER_SIZE);
- }
-
- @Override
- public void map(VKmerBytesWritable key, MergePathValueWritable value,
- OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
-
- byte precursor = (byte) 0xF0;
- byte succeed = (byte) 0x0F;
- byte adjBitMap = value.getAdjBitMap();
- byte bitFlag = value.getFlag();
- precursor = (byte) (precursor & adjBitMap);
- precursor = (byte) ((precursor & 0xff) >> 4);
- succeed = (byte) (succeed & adjBitMap);
- if (bitFlag == 1) {
- byte succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
- tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
- outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
-
- tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
- outputValue.set(adjBitMap, bitFlag, tmpKmer);
- output.collect(outputKmer, outputValue);
- } else {
- outputKmer.set(key);
- outputValue.set(value);
- output.collect(key, outputValue);
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiSeqOutputFormat.java
deleted file mode 100644
index bd4cd2a..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiSeqOutputFormat.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.pathmerging;
-
-import java.io.File;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-
-public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<VKmerBytesWritable, MergePathValueWritable>{
- @Override
- protected String generateLeafFileName(String name) {
- // TODO Auto-generated method stub System.out.println(name);
- String[] names = name.split("-");
- return names[0] + File.separator + name;
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiTextOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiTextOutputFormat.java
deleted file mode 100644
index 29d3b68..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiTextOutputFormat.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.pathmerging;
-
-import java.io.File;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
-
-public class MergePathMultiTextOutputFormat extends MultipleTextOutputFormat<Text, Text>{
- @Override
- protected String generateLeafFileName(String name) {
- // TODO Auto-generated method stub System.out.println(name);
- String[] names = name.split("-");
- return names[0] + File.separator + name;
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java
deleted file mode 100644
index 52abc1c..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.pathmerging;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
-
-@SuppressWarnings("deprecation")
-public class MergePathReducer extends MapReduceBase implements
- Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
- private VKmerBytesWritableFactory kmerFactory;
- private VKmerBytesWritable outputKmer;
- private VKmerBytesWritable tmpKmer;
- private int KMER_SIZE;
- private MergePathValueWritable outputValue;
- private MergePathValueWritable tmpOutputValue;
- MultipleOutputs mos = null;
- private int I_MERGE;
-
- public void configure(JobConf job) {
- mos = new MultipleOutputs(job);
- I_MERGE = Integer.parseInt(job.get("iMerge"));
- KMER_SIZE = job.getInt("sizeKmer", 0);
- outputValue = new MergePathValueWritable();
- tmpOutputValue = new MergePathValueWritable();
- kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
- outputKmer = new VKmerBytesWritable(KMER_SIZE);
- tmpKmer = new VKmerBytesWritable(KMER_SIZE);
- }
-
- @SuppressWarnings("unchecked")
- @Override
- public void reduce(VKmerBytesWritable key, Iterator<MergePathValueWritable> values,
- OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- outputValue = values.next();
- if (values.hasNext() == true) {
- if (outputValue.getFlag() != 1) {
- byte nextAdj = outputValue.getAdjBitMap();
- byte succeed = (byte) 0x0F;
- succeed = (byte) (succeed & nextAdj);
-
- outputValue = values.next();
- byte adjBitMap = outputValue.getAdjBitMap();
- byte flag = outputValue.getFlag();
- if (outputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
- else
- outputKmer.set(key);
-
- adjBitMap = (byte) (adjBitMap & 0xF0);
- adjBitMap = (byte) (adjBitMap | succeed);
- outputValue.set(adjBitMap, flag, null);
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
- } else {
- tmpOutputValue.set(outputValue);
- byte tmpAdjMap = tmpOutputValue.getAdjBitMap();
-
- outputValue = values.next();
- if (outputValue.getFlag() != 1) {
- if (tmpOutputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(tmpOutputValue.getKmer(), key));
- else
- outputKmer.set(key);
-
- byte nextAdj = outputValue.getAdjBitMap();
- byte succeed = (byte) 0x0F;
- succeed = (byte) (succeed & nextAdj);
- tmpAdjMap = (byte) (tmpAdjMap & 0xF0);
- tmpAdjMap = (byte) (tmpAdjMap | succeed);
- outputValue.set(tmpAdjMap, tmpOutputValue.getFlag(), null);
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
- } else {
-
- tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
- if (tmpOutputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(tmpOutputValue.getKmer(), tmpKmer));
- else
- outputKmer.set(tmpKmer);
- tmpOutputValue.set(tmpAdjMap, tmpOutputValue.getFlag(), null);
- mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, tmpOutputValue);
-
- tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
- if (outputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer));
- else
- outputKmer.set(tmpKmer);
- outputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
- mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
-
- while (values.hasNext()) {
- outputValue = values.next();
- tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
- if (outputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer));
- else
- outputKmer.set(tmpKmer);
- outputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
- mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
- }
- }
- }
- } else {
- if (outputValue.getFlag() != 0) {
- tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
- if (outputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer));
- else
- outputKmer.set(tmpKmer);
- outputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
- mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
-
- } else
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(key, outputValue);
- }
- }
-
- public void close() throws IOException {
- // TODO Auto-generated method stub
- mos.close();
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathValueWritable.java
deleted file mode 100644
index 9686c18..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathValueWritable.java
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.pathmerging;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.BinaryComparable;
-import org.apache.hadoop.io.WritableComparable;
-
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-public class MergePathValueWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
-
- private static final byte[] EMPTY_BYTES = {};
- private byte adjBitMap;
- private byte flag;
- private VKmerBytesWritable kmer;
-
- public MergePathValueWritable() {
- this((byte) 0, (byte) 0, 0, EMPTY_BYTES);
- }
-
- public MergePathValueWritable(byte adjBitMap, byte flag, int kmerSize, byte[] bytes) {
- this.adjBitMap = adjBitMap;
- this.flag = flag;
- this.kmer = new VKmerBytesWritable(kmerSize, bytes);
- kmer.set(bytes, 0, bytes.length);
- }
-
- public void set(MergePathValueWritable right) {
- set(right.getAdjBitMap(), right.getFlag(), right.getKmer());
- }
-
- public void set(byte adjBitMap, byte flag, VKmerBytesWritable kmer) {
- this.kmer.set(kmer);
- this.adjBitMap = adjBitMap;
- this.flag = flag;
- }
-
- @Override
- public void readFields(DataInput arg0) throws IOException {
- // TODO Auto-generated method stub
- kmer.readFields(arg0);
- adjBitMap = arg0.readByte();
- flag = arg0.readByte();
- }
-
- @Override
- public void write(DataOutput arg0) throws IOException {
- // TODO Auto-generated method stub
-
- kmer.write(arg0);
- arg0.writeByte(adjBitMap);
- arg0.writeByte(flag);
- }
-
- public VKmerBytesWritable getKmer() {
- if (kmer.getLength() != 0) {
- return kmer;
- }
- return null;
- }
-
- public byte getAdjBitMap() {
- return this.adjBitMap;
- }
-
- public byte getFlag() {
- return this.flag;
- }
-
- public String toString() {
- return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag);
- }
-
- @Override
- public byte[] getBytes() {
- // TODO Auto-generated method stub
- if (kmer.getLength() != 0) {
- return kmer.getBytes();
- } else
- return null;
-
- }
-
- public int getKmerLength() {
- return kmer.getKmerLength();
- }
-
- @Override
- public int getLength() {
- return kmer.getLength();
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java
deleted file mode 100644
index 1058fda..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.pathmerging;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.GeneCode;
-
-@SuppressWarnings("deprecation")
-public class SNodeInitialMapper extends MapReduceBase implements
- Mapper<KmerBytesWritable, ByteWritable, KmerBytesWritable, MergePathValueWritable> {
-
- public int KMER_SIZE;
- public KmerBytesWritable outputKmer;
- public MergePathValueWritable outputAdjList;
-
- public void configure(JobConf job) {
- KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
- outputKmer = new KmerBytesWritable(KMER_SIZE);
- outputAdjList = new MergePathValueWritable();
- }
-
- boolean measureDegree(byte adjacent) {
- boolean result = true;
- switch (adjacent) {
- case 0:
- result = true;
- break;
- case 1:
- result = false;
- break;
- case 2:
- result = false;
- break;
- case 3:
- result = true;
- break;
- case 4:
- result = false;
- break;
- case 5:
- result = true;
- break;
- case 6:
- result = true;
- break;
- case 7:
- result = true;
- break;
- case 8:
- result = false;
- break;
- case 9:
- result = true;
- break;
- case 10:
- result = true;
- break;
- case 11:
- result = true;
- break;
- case 12:
- result = true;
- break;
- case 13:
- result = true;
- break;
- case 14:
- result = true;
- break;
- case 15:
- result = true;
- break;
- }
- return result;
- }
-
- @Override
- public void map(KmerBytesWritable key, ByteWritable value,
- OutputCollector<KmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- byte precursor = (byte) 0xF0;
- byte succeed = (byte) 0x0F;
- byte adjBitMap = value.get();
- byte bitFlag = (byte) 0;
- precursor = (byte) (precursor & adjBitMap);
- precursor = (byte) ((precursor & 0xff) >> 4);
- succeed = (byte) (succeed & adjBitMap);
- boolean inDegree = measureDegree(precursor);
- boolean outDegree = measureDegree(succeed);
- if (inDegree == false && outDegree == false) {
- outputKmer.set(key);
- bitFlag = (byte) 2;
- outputAdjList.set(adjBitMap, bitFlag, null);///~~~~~kmersize----->0
- output.collect(outputKmer, outputAdjList);
- }
- else{
- for(int i = 0 ; i < 4; i ++){
- byte temp = 0x01;
- byte shiftedCode = 0;
- temp = (byte)(temp << i);
- temp = (byte) (succeed & temp);
- if(temp != 0 ){
- byte succeedCode = GeneCode.getGeneCodeFromBitMap(temp);
- shiftedCode = key.shiftKmerWithNextCode(succeedCode);
- outputKmer.set(key);
- outputAdjList.set((byte)0, bitFlag, null);
- output.collect(outputKmer, outputAdjList);
- key.shiftKmerWithPreCode(shiftedCode);
- }
- }
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialReducer.java
deleted file mode 100644
index 07cc32f..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialReducer.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.pathmerging;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class SNodeInitialReducer extends MapReduceBase implements
- Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
- private VKmerBytesWritable outputKmer = new VKmerBytesWritable();
- private MergePathValueWritable outputValue = new MergePathValueWritable();
-
-
- @Override
- public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
- OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- outputKmer.set(key);
- outputValue = values.next();
- if (values.hasNext() == true) {
- if (outputValue.getFlag() == 2) {
- byte bitFlag = 1;
- outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);///outputValue.getKmerLength()
- output.collect(outputKmer, outputValue);
- } else {
- boolean flag = false;
- while (values.hasNext()) {
- outputValue = values.next();
- if (outputValue.getFlag() == 2) {
- flag = true;
- break;
- }
- }
- if (flag == true) {
- byte bitFlag = 1;
- outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
- output.collect(outputKmer, outputValue);
- }
- }
- } else {
- if (outputValue.getFlag() == 2) {
- byte bitFlag = 0;
- outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
- output.collect(outputKmer, outputValue);
- }
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/.DS_Store b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/.DS_Store
deleted file mode 100644
index 7c4ae29..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/.DS_Store
+++ /dev/null
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/ENodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/ENodeInitialReducer.java
deleted file mode 100644
index 1f9bc82..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/ENodeInitialReducer.java
+++ /dev/null
@@ -1,58 +0,0 @@
-package edu.uci.ics.pathmergingh2;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerUtil;
-
-@SuppressWarnings("deprecation")
-public class ENodeInitialReducer extends MapReduceBase implements
- Reducer<BytesWritable, MergePathValueWritable, BytesWritable, MergePathValueWritable> {
- public BytesWritable outputKmer = new BytesWritable();
- public MergePathValueWritable outputAdjList = new MergePathValueWritable();
-
- @Override
- public void reduce(BytesWritable key, Iterator<MergePathValueWritable> values,
- OutputCollector<BytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- outputAdjList = values.next();
- outputKmer.set(key);
- if (values.hasNext() == true) {
- byte bitFlag = outputAdjList.getFlag();
- bitFlag = (byte) (bitFlag & 0xFE);
- if (bitFlag == 2) {
- bitFlag = (byte) (0x80 | outputAdjList.getFlag());
- outputAdjList.set(outputAdjList.getAdjBitMap(), bitFlag, null);
- output.collect(outputKmer, outputAdjList);
-
- } else {
- boolean flag = false;
- while (values.hasNext()) {
- outputAdjList = values.next();
- if (outputAdjList.getFlag() == 2) {
- flag = true;
- break;
- }
- }
- if (flag == true) {
- bitFlag = (byte) (0x80 | outputAdjList.getFlag());
- outputAdjList.set(outputAdjList.getAdjBitMap(), bitFlag, null);
- output.collect(outputKmer, outputAdjList);
- }
- }
- } else {
- byte bitFlag = outputAdjList.getFlag();
- bitFlag = (byte) (bitFlag & 0xFE);
- if (bitFlag == 2) {
- bitFlag = 0;
- outputAdjList.set(outputAdjList.getAdjBitMap(), bitFlag, null);
- output.collect(outputKmer, outputAdjList);
- }
- }
- }
-}
-
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Driver.java
deleted file mode 100644
index 2898a90..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Driver.java
+++ /dev/null
@@ -1,168 +0,0 @@
-package edu.uci.ics.pathmergingh2;
-
-import java.io.IOException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.TextOutputFormat;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
-import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-
-@SuppressWarnings("deprecation")
-public class MergePathH2Driver {
-
- private static class Options {
- @Option(name = "-inputpath", usage = "the input path", required = true)
- public String inputPath;
-
- @Option(name = "-outputpath", usage = "the output path", required = true)
- public String outputPath;
-
- @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
- public String mergeResultPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
- public int sizeKmer;
-
- @Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
- public int mergeRound;
-
- }
-
-
- public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath)
- throws IOException{
-
- JobConf conf = new JobConf(MergePathH2Driver.class);
- conf.setInt("sizeKmer", sizeKmer);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
- conf.setJobName("Initial Path-Starting-Points Table");
- conf.setMapperClass(SNodeInitialMapper.class);
- conf.setReducerClass(SNodeInitialReducer.class);
-
- conf.setMapOutputKeyClass(KmerBytesWritable.class);
- conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
-
- conf.setOutputKeyClass(VKmerBytesWritable.class);
- conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(inputPath + "-step1"));
- conf.setNumReduceTasks(numReducers);
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(inputPath + "-step1"), true);
- JobClient.runJob(conf);
- int iMerge = 0;
-/*----------------------------------------------------------------------*/
- for(iMerge = 0; iMerge < mergeRound; iMerge ++){
- if(!dfs.exists(new Path(inputPath + "-step1")))
- break;
- conf = new JobConf(MergePathH2Driver.class);
- conf.setInt("sizeKmer", sizeKmer);
- conf.setInt("iMerge", iMerge);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
- conf.setJobName("Path Merge");
-
- conf.setMapperClass(MergePathH2Mapper.class);
- conf.setReducerClass(MergePathH2Reducer.class);
-
- conf.setMapOutputKeyClass(VKmerBytesWritable.class);
- conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
-
- String uncomplete = "uncomplete" + iMerge;
- String complete = "complete" + iMerge;
-
- MultipleOutputs.addNamedOutput(conf, uncomplete,
- MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
-
- MultipleOutputs.addNamedOutput(conf, complete,
- MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
-
- conf.setOutputKeyClass(VKmerBytesWritable.class);
- conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- dfs.delete(new Path(inputPath + "-step1"), true);
- dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
- dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
- }
-/* conf = new JobConf(MergePathH2Driver.class);
- conf.setInt("sizeKmer", sizeKmer);
- conf.setInt("iMerge", iMerge);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
- conf.setJobName("Path Merge");
-
- conf.setMapperClass(MergePathH2Mapper.class);
- conf.setReducerClass(MergePathH2Reducer.class);
-
- conf.setMapOutputKeyClass(VKmerBytesWritable.class);
- conf.setMapOutputValueClass(MergePathValueWritable.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
-
- String uncomplete = "uncomplete" + iMerge;
- String complete = "complete" + iMerge;
-
- MultipleOutputs.addNamedOutput(conf, uncomplete,
- MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
-
- MultipleOutputs.addNamedOutput(conf, complete,
- MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class,
- MergePathValueWritable.class);
-
- conf.setOutputKeyClass(VKmerBytesWritable.class);
- conf.setOutputValueClass(MergePathValueWritable.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- dfs.delete(new Path(inputPath + "-step1"), true);
- dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
- dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));*/
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- MergePathH2Driver driver = new MergePathH2Driver();
- driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers, options.sizeKmer, options.mergeRound, null);
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Mapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Mapper.java
deleted file mode 100644
index 6ea9dd3..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Mapper.java
+++ /dev/null
@@ -1,87 +0,0 @@
-package edu.uci.ics.pathmergingh2;
-
-import java.io.IOException;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
-
-@SuppressWarnings("deprecation")
-public class MergePathH2Mapper extends MapReduceBase implements
- Mapper<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
-
- private int KMER_SIZE;
- private VKmerBytesWritableFactory outputKmerFactory;
- private MergePathValueWritable outputValue;
- private VKmerBytesWritable tmpKmer;
- private VKmerBytesWritable outputKmer;
-
- public void configure(JobConf job) {
- KMER_SIZE = job.getInt("sizeKmer", 0);
- outputKmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
- outputValue = new MergePathValueWritable();
- tmpKmer = new VKmerBytesWritable(KMER_SIZE);
- outputKmer = new VKmerBytesWritable(KMER_SIZE);
- }
-
- @Override
- public void map(VKmerBytesWritable key, MergePathValueWritable value,
- OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- byte precursor = (byte) 0xF0;
- byte succeed = (byte) 0x0F;
- byte adjBitMap = value.getAdjBitMap();
- byte bitFlag = value.getFlag();
- precursor = (byte) (precursor & adjBitMap);
- precursor = (byte) ((precursor & 0xff) >> 4);
- succeed = (byte) (succeed & adjBitMap);
- byte bitStartEnd = (byte) (0x81 & bitFlag);
-
- switch (bitStartEnd) {
- case (byte) 0x01:
- byte succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
- tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
- outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
-
- tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
- bitFlag = (byte) (bitFlag | 0x08);
- outputValue.set(adjBitMap, bitFlag, tmpKmer);
- output.collect(outputKmer, outputValue);
- break;
- case (byte) 0x80:
- tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(KMER_SIZE, key));
- outputKmer.set(tmpKmer);
- tmpKmer.set(outputKmerFactory.getLastKmerFromChain(key.getKmerLength() - KMER_SIZE, key));
- bitFlag = (byte) (bitFlag | 0x10);
- outputValue.set(adjBitMap, bitFlag, tmpKmer);
- output.collect(outputKmer, outputValue);
- break;
- case (byte) 0x00:
- succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
- tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
- outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
-
- tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
- bitFlag = (byte) (bitFlag | 0x08);
- outputValue.set(adjBitMap, bitFlag, tmpKmer);
- output.collect(outputKmer, outputValue);
-
- bitFlag = (byte) (bitFlag & 0xF7);
- tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(KMER_SIZE, key));
- outputKmer.set(tmpKmer);
- tmpKmer.set(outputKmerFactory.getLastKmerFromChain(key.getKmerLength() - KMER_SIZE, key));
- bitFlag = (byte) (bitFlag | 0x10);
- outputValue.set(adjBitMap, bitFlag, tmpKmer);
- output.collect(outputKmer, outputValue);
- break;
- case (byte) 0x81:
- outputKmer.set(key);
- outputValue.set(adjBitMap, bitFlag, null);
- output.collect(outputKmer, outputValue);
- break;
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Reducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Reducer.java
deleted file mode 100644
index 0bf7708..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Reducer.java
+++ /dev/null
@@ -1,119 +0,0 @@
-package edu.uci.ics.pathmergingh2;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.lib.MultipleOutputs;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
-
-@SuppressWarnings("deprecation")
-public class MergePathH2Reducer extends MapReduceBase implements
- Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
- private VKmerBytesWritableFactory kmerFactory;
- private VKmerBytesWritable outputKmer;
- private VKmerBytesWritable tmpKmer1;
- private VKmerBytesWritable tmpKmer2;
- private int KMER_SIZE;
- private MergePathValueWritable outputValue;
- private MergePathValueWritable tmpOutputValue;
-
- MultipleOutputs mos = null;
- private int I_MERGE;
-
- public void configure(JobConf job) {
- mos = new MultipleOutputs(job);
- I_MERGE = Integer.parseInt(job.get("iMerge"));
- KMER_SIZE = job.getInt("sizeKmer", 0);
- outputValue = new MergePathValueWritable();
- tmpOutputValue = new MergePathValueWritable();
- kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
- outputKmer = new VKmerBytesWritable(KMER_SIZE);
- tmpKmer1 = new VKmerBytesWritable(KMER_SIZE);
- tmpKmer2 = new VKmerBytesWritable(KMER_SIZE);
- }
-
- @SuppressWarnings("unchecked")
- public void reduce(VKmerBytesWritable key, Iterator<MergePathValueWritable> values,
- OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- outputValue = values.next();
- outputKmer.set(key);
- if (values.hasNext() == true) {
- byte bitFlag = outputValue.getFlag();
- byte bitStartEnd = (byte) (0x81 & bitFlag);
- byte bitPosiNegative = (byte) (0x18 & bitFlag);
- byte succeed = (byte) 0x0F;
- switch (bitPosiNegative) {
- case (byte) 0x08:
- if (outputValue.getKmerLength() != 0)
- tmpKmer1.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
- else
- tmpKmer1.set(key);
- byte adjBitMap = outputValue.getAdjBitMap();
- outputValue = values.next();
- bitStartEnd = (byte) (0x81 & outputValue.getFlag());
- if (bitStartEnd == (byte) 0x80) {
- if (outputValue.getKmerLength() != 0)
- tmpKmer2.set(kmerFactory.mergeTwoKmer(key, outputValue.getKmer()));
- else
- tmpKmer2.set(key);
- byte tmpFlag = (byte) 0x80;
- tmpOutputValue.set(outputValue.getAdjBitMap(), tmpFlag, null);
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(tmpKmer2, tmpOutputValue);
- }
- if (outputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(tmpKmer1, outputValue.getKmer()));
- else
- outputKmer.set(tmpKmer1);
- succeed = (byte) (succeed & outputValue.getAdjBitMap());
- adjBitMap = (byte) (adjBitMap & 0xF0);
- adjBitMap = (byte) (adjBitMap | succeed);
- byte outputFlag = (byte) (0x81 & bitFlag);
- outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
- outputValue.set(adjBitMap, outputFlag, null);
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
- break;
- case (byte) 0x10:
- if (outputValue.getKmerLength() != 0)
- tmpKmer1.set(kmerFactory.mergeTwoKmer(key, outputValue.getKmer()));
- else
- tmpKmer1.set(key);
- if (bitStartEnd == (byte) 0x80) {
- byte tmpFlag = (byte) 0x80;
- tmpOutputValue.set(outputValue.getAdjBitMap(), tmpFlag, null);
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(tmpKmer1, tmpOutputValue);
- }
- succeed = (byte) (succeed & outputValue.getAdjBitMap());
- outputValue = values.next();
- if (outputValue.getKmerLength() != 0)
- outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer1));
- else
- outputKmer.set(tmpKmer1);
- adjBitMap = outputValue.getAdjBitMap();
- adjBitMap = (byte) (adjBitMap & 0xF0);
- adjBitMap = (byte) (adjBitMap | succeed);
- outputFlag = (byte) (0x81 & bitFlag);
- outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
- outputValue.set(adjBitMap, outputFlag, null);
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
- break;
- }
- } else {
- byte bitFlag = outputValue.getFlag();
- byte bitStartEnd = (byte) (0x81 & bitFlag);
- if (bitStartEnd == (byte) 0x81) {
- outputKmer.set(key);
- mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
- }
- }
- }
- public void close() throws IOException {
- // TODO Auto-generated method stub
- mos.close();
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathMultiSeqOutputFormat.java
deleted file mode 100644
index 5e6f008..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathMultiSeqOutputFormat.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.pathmergingh2;
-
-import java.io.File;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
-
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-
-public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<VKmerBytesWritable, MergePathValueWritable>{
- @Override
- protected String generateLeafFileName(String name) {
- // TODO Auto-generated method stub System.out.println(name);
- String[] names = name.split("-");
- return names[0] + File.separator + name;
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathMultiTextOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathMultiTextOutputFormat.java
deleted file mode 100644
index d6176e2..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathMultiTextOutputFormat.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.pathmergingh2;
-
-import java.io.File;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
-
-public class MergePathMultiTextOutputFormat extends MultipleTextOutputFormat<Text, Text>{
- @Override
- protected String generateLeafFileName(String name) {
- // TODO Auto-generated method stub System.out.println(name);
- String[] names = name.split("-");
- return names[0] + File.separator + name;
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathValueWritable.java
deleted file mode 100644
index 2f1869d..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathValueWritable.java
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.pathmergingh2;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.BinaryComparable;
-import org.apache.hadoop.io.WritableComparable;
-
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-public class MergePathValueWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
-
- private static final byte[] EMPTY_BYTES = {};
- private byte adjBitMap;
- private byte flag;
- private VKmerBytesWritable kmer;
-
- public MergePathValueWritable() {
- this((byte) 0, (byte) 0, 0, EMPTY_BYTES);
- }
-
- public MergePathValueWritable(int k) {
- this.adjBitMap = 0;
- this.flag = 0;
- this.kmer = new VKmerBytesWritable(k);
- }
-
- public MergePathValueWritable(byte adjBitMap, byte flag, int kmerSize, byte[] bytes) {
- this.adjBitMap = adjBitMap;
- this.flag = flag;
- this.kmer = new VKmerBytesWritable(kmerSize, bytes);
- kmer.set(bytes, 0, bytes.length);
- }
-
- public void set(MergePathValueWritable right) {
- set(right.getAdjBitMap(), right.getFlag(), right.getKmer());
- }
-
- public void set(byte adjBitMap, byte flag, VKmerBytesWritable kmer) {
- this.kmer.set(kmer);
- this.adjBitMap = adjBitMap;
- this.flag = flag;
- }
-
- @Override
- public void readFields(DataInput arg0) throws IOException {
- // TODO Auto-generated method stub
- kmer.readFields(arg0);
- adjBitMap = arg0.readByte();
- flag = arg0.readByte();
- }
-
- @Override
- public void write(DataOutput arg0) throws IOException {
- // TODO Auto-generated method stub
-
- kmer.write(arg0);
- arg0.writeByte(adjBitMap);
- arg0.writeByte(flag);
- }
-
- public VKmerBytesWritable getKmer() {
- if (kmer.getLength() != 0) {
- return kmer;
- }
- return null;
- }
-
- public byte getAdjBitMap() {
- return this.adjBitMap;
- }
-
- public byte getFlag() {
- return this.flag;
- }
-
- public String toString() {
- return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag);
- }
-
- @Override
- public byte[] getBytes() {
- // TODO Auto-generated method stub
- if (kmer.getLength() != 0) {
- return kmer.getBytes();
- } else
- return null;
-
- }
-
- public int getKmerLength() {
- return kmer.getKmerLength();
- }
-
- @Override
- public int getLength() {
- return kmer.getLength();
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/SNodeInitialMapper.java
deleted file mode 100644
index 4c05dac..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/SNodeInitialMapper.java
+++ /dev/null
@@ -1,141 +0,0 @@
-package edu.uci.ics.pathmergingh2;
-
-import java.io.IOException;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class SNodeInitialMapper extends MapReduceBase implements
- Mapper<KmerBytesWritable, ByteWritable, KmerBytesWritable, MergePathValueWritable> {
-
- public int KMER_SIZE;
- public KmerBytesWritable outputKmer;
- public MergePathValueWritable outputAdjList;
-
- public void configure(JobConf job) {
- KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
- outputKmer = new KmerBytesWritable(KMER_SIZE);
- outputAdjList = new MergePathValueWritable();
- }
-
- boolean measureDegree(byte adjacent) {
- boolean result = true;
- switch (adjacent) {
- case 0:
- result = true;
- break;
- case 1:
- result = false;
- break;
- case 2:
- result = false;
- break;
- case 3:
- result = true;
- break;
- case 4:
- result = false;
- break;
- case 5:
- result = true;
- break;
- case 6:
- result = true;
- break;
- case 7:
- result = true;
- break;
- case 8:
- result = false;
- break;
- case 9:
- result = true;
- break;
- case 10:
- result = true;
- break;
- case 11:
- result = true;
- break;
- case 12:
- result = true;
- break;
- case 13:
- result = true;
- break;
- case 14:
- result = true;
- break;
- case 15:
- result = true;
- break;
- }
- return result;
- }
-
- @Override
- public void map(KmerBytesWritable key, ByteWritable value,
- OutputCollector<KmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- byte precursor = (byte) 0xF0;
- byte succeed = (byte) 0x0F;
- byte adjBitMap = value.get();
- byte bitFlag = (byte) 0;
- precursor = (byte) (precursor & adjBitMap);
- precursor = (byte) ((precursor & 0xff) >> 4);
- succeed = (byte) (succeed & adjBitMap);
- boolean inDegree = measureDegree(precursor);
- boolean outDegree = measureDegree(succeed);
- if (key.toString().equals("CGC")) {
- int a = 2;
- int b = a;
- }
- if (key.toString().equals("TCG")) {
- int a = 2;
- int b = a;
- }
- if (inDegree == false && outDegree == false) {
- outputKmer.set(key);
- System.out.println(outputKmer.hashCode());
- bitFlag = (byte) 2;
- outputAdjList.set(adjBitMap, bitFlag, null);
- output.collect(outputKmer, outputAdjList);
- } else {
- for (int i = 0; i < 4; i++) {
- byte temp = (byte) 0x01;
- byte shiftedCode = 0;
- temp = (byte) (temp << i);
- temp = (byte) (precursor & temp);
- if (temp != 0) {
- byte precurCode = GeneCode.getGeneCodeFromBitMap(temp);
- shiftedCode = key.shiftKmerWithPreCode(precurCode);
- outputKmer.set(key);
- bitFlag = (byte) 0x80;
- outputAdjList.set((byte) 0, bitFlag, null);
- output.collect(outputKmer, outputAdjList);
- key.shiftKmerWithNextCode(shiftedCode);
- }
- }
- for (int i = 0; i < 4; i++) {
- byte temp = (byte) 0x01;
- byte shiftedCode = 0;
- temp = (byte) (temp << i);
- temp = (byte) (succeed & temp);
- if (temp != 0) {
- byte succeedCode = GeneCode.getGeneCodeFromBitMap(temp);
- shiftedCode = key.shiftKmerWithNextCode(succeedCode);
- outputKmer.set(key);
- bitFlag = (byte) 0x01;
- outputAdjList.set((byte) 0, bitFlag, null);
- output.collect(outputKmer, outputAdjList);
- key.shiftKmerWithPreCode(shiftedCode);
- }
- }
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/SNodeInitialReducer.java
deleted file mode 100644
index 7fd7a2e..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/SNodeInitialReducer.java
+++ /dev/null
@@ -1,80 +0,0 @@
-package edu.uci.ics.pathmergingh2;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.VKmerBytesWritable;
-
-@SuppressWarnings("deprecation")
-public class SNodeInitialReducer extends MapReduceBase implements
- Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
- private VKmerBytesWritable outputKmer = new VKmerBytesWritable();
- private MergePathValueWritable outputValue = new MergePathValueWritable();
-
- @Override
- public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
- OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- outputKmer.set(key);
- outputValue = values.next();
- byte startFlag = 0x00;
- byte endFlag = 0x00;
- byte targetPointFlag = 0x00;
- byte targetAdjList = 0x00;
- byte outputFlag = 0x00;
- if(key.toString().equals("TCG")){
- int a = 2;
- int b = a;
- }
- if (values.hasNext() == true) {
- switch (outputValue.getFlag()) {
- case (byte) 0x01:
- startFlag = (byte) 0x01;
- break;
- case (byte) 0x80:
- endFlag = (byte) 0x80;
- break;
- case (byte) 0x02:
- targetPointFlag = (byte) 0x02;
- targetAdjList = outputValue.getAdjBitMap();
- break;
- }
- while (values.hasNext()) {
- outputValue = values.next();
- switch (outputValue.getFlag()) {
- case (byte) 0x01:
- startFlag = (byte) 0x01;
- break;
- case (byte) 0x80:
- endFlag = (byte) 0x80;
- break;
- case (byte) 0x02:
- targetPointFlag = (byte) 0x02;
- targetAdjList = outputValue.getAdjBitMap();
- break;
- }
- if(startFlag != (byte) 0x00 && endFlag!= (byte) 0x00 && targetPointFlag != (byte) 0x00)
- break;
- }
- if(targetPointFlag == (byte) 0x02) {
- if(startFlag == (byte) 0x01) {
- outputFlag = (byte) (outputFlag | startFlag);
- }
- if(endFlag == (byte) 0x80) {
- outputFlag = (byte) (outputFlag | endFlag);
- }
- outputValue.set(targetAdjList, outputFlag, null);
- output.collect(outputKmer, outputValue);
- }
- } else {
- if (outputValue.getFlag() == 2) {
- byte bitFlag = 0;
- outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
- output.collect(outputKmer, outputValue);
- }
- }
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatDriver.java
deleted file mode 100644
index efe1589..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatDriver.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.statistics;
-
-import java.io.IOException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileInputFormat;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-import edu.uci.ics.genomix.type.KmerCountValue;
-
-@SuppressWarnings("deprecation")
-public class GenomixStatDriver {
- private static class Options {
- @Option(name = "-inputpath", usage = "the input path", required = true)
- public String inputPath;
-
- @Option(name = "-outputpath", usage = "the output path", required = true)
- public String outputPath;
-
- @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
- public int numReducers;
-
- }
-
- public void run(String inputPath, String outputPath, int numReducers, String defaultConfPath)
- throws IOException {
-
- JobConf conf = new JobConf(GenomixStatDriver.class);
-
- if (defaultConfPath != null) {
- conf.addResource(new Path(defaultConfPath));
- }
-
- conf.setJobName("Genomix Statistics");
- conf.setMapperClass(GenomixStatMapper.class);
- conf.setReducerClass(GenomixStatReducer.class);
- conf.setCombinerClass(GenomixStatReducer.class);
-
- conf.setMapOutputKeyClass(BytesWritable.class);
- conf.setMapOutputValueClass(KmerCountValue.class);
-
- conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(SequenceFileOutputFormat.class);
-
- conf.setOutputKeyClass(BytesWritable.class);
- conf.setOutputValueClass(KmerCountValue.class);
-
- FileInputFormat.setInputPaths(conf, new Path(inputPath));
- FileOutputFormat.setOutputPath(conf, new Path(outputPath));
- conf.setNumReduceTasks(numReducers);
-
- FileSystem dfs = FileSystem.get(conf);
- dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- }
-
- public static void main(String[] args) throws Exception {
- Options options = new Options();
- CmdLineParser parser = new CmdLineParser(options);
- parser.parseArgument(args);
- GenomixStatDriver driver = new GenomixStatDriver();
- driver.run(options.inputPath, options.outputPath, options.numReducers, null);
- }
-}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatMapper.java
deleted file mode 100644
index c5feefe..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatMapper.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.statistics;
-
-import java.io.IOException;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.KmerCountValue;
-
-@SuppressWarnings({ "unused", "deprecation" })
-public class GenomixStatMapper extends MapReduceBase implements
- Mapper<BytesWritable, KmerCountValue, BytesWritable, KmerCountValue> {
-
- boolean measureDegree(byte adjacent) {
- boolean result = true;
- switch (adjacent) {
- case 0:
- result = true;
- break;
- case 1:
- result = false;
- break;
- case 2:
- result = false;
- break;
- case 3:
- result = true;
- break;
- case 4:
- result = false;
- break;
- case 5:
- result = true;
- break;
- case 6:
- result = true;
- break;
- case 7:
- result = true;
- break;
- case 8:
- result = false;
- break;
- case 9:
- result = true;
- break;
- case 10:
- result = true;
- break;
- case 11:
- result = true;
- break;
- case 12:
- result = true;
- break;
- case 13:
- result = true;
- break;
- case 14:
- result = true;
- break;
- case 15:
- result = true;
- break;
- }
- return result;
- }
- @Override
- public void map(BytesWritable key, KmerCountValue value, OutputCollector<BytesWritable, KmerCountValue> output,
- Reporter reporter) throws IOException {
- byte precursor = (byte) 0xF0;
- byte succeed = (byte) 0x0F;
- byte adj = value.getAdjBitMap();
- precursor = (byte) (precursor & adj);
- precursor = (byte) ((precursor & 0xff) >> 4);
- succeed = (byte) (succeed & adj);
- boolean inDegree = measureDegree(precursor);
- boolean outDegree = measureDegree(succeed);
- if (inDegree == true && outDegree == false) {
- output.collect(key, value);
- }
- }
-}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatReducer.java
deleted file mode 100644
index ea9a915..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatReducer.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.statistics;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-
-import edu.uci.ics.genomix.type.KmerCountValue;
-
-@SuppressWarnings("deprecation")
-public class GenomixStatReducer extends MapReduceBase implements
- Reducer<BytesWritable, KmerCountValue, BytesWritable, KmerCountValue> {
- static enum MyCounters { NUM_RECORDS };
- KmerCountValue valWriter = new KmerCountValue();
- @Override
- public void reduce(BytesWritable key, Iterator<KmerCountValue> values,
- OutputCollector<BytesWritable, KmerCountValue> output, Reporter reporter) throws IOException {
- reporter.incrCounter(MyCounters.NUM_RECORDS, 1);
- valWriter = values.next();
- output.collect(key, valWriter);
- }
-}
\ No newline at end of file