update the velvetGrapubuilding package
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
index 0276c3a..599e51f 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
@@ -51,6 +51,9 @@
outgoingList.set(outgoing);
}
+ public void setKmer(KmerBytesWritable right) {
+ this.kmer.set(right);
+ }
public void reset(int kmerSize) {
nodeID.set(0, (byte) 0);
incomingList.reset();
@@ -122,5 +125,11 @@
sbuilder.append(kmer.toString()).append(')');
return sbuilder.toString();
}
-
+
+ public boolean existsInSinglePath(){
+ if(this.incomingList.getCountOfPosition() == 1 & this.outgoingList.getCountOfPosition() == 1)
+ return true;
+ else
+ return false;
+ }
}
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
index c19af12..787afec 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
@@ -44,6 +44,9 @@
storage[offset + INTBYTES] = posInRead;
}
+ public void set(PositionWritable right) {
+ this.set(right.getReadID(), right.getPosInRead());
+ }
public int getReadID() {
return Marshal.getInt(storage, offset);
}
@@ -118,6 +121,19 @@
}
}
+ public static class FirstComparator extends WritableComparator {
+ public FirstComparator() {
+ super(PositionWritable.class);
+ }
+
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ int thisValue = Marshal.getInt(b1, s1);
+ int thatValue = Marshal.getInt(b2, s2);
+ int diff = thisValue - thatValue;
+ return diff;
+ }
+ }
+
static { // register this comparator
WritableComparator.define(PositionWritable.class, new Comparator());
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/DeepGraphBuildingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/DeepGraphBuildingMapper.java
deleted file mode 100644
index 591e3c7..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/DeepGraphBuildingMapper.java
+++ /dev/null
@@ -1,21 +0,0 @@
-package edu.uci.ics.genomix.hadoop.valvetgraphbuilding;
-
-import java.io.IOException;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.PositionListWritable;
-
-@SuppressWarnings("deprecation")
-public class DeepGraphBuildingMapper extends MapReduceBase implements
- Mapper<KmerBytesWritable, PositionListWritable, IntWritable, LineBasedmappingWritable> {
- IntWritable numLine = new IntWritable();
- LineBasedmappingWritable lineBasedWriter = new LineBasedmappingWritable();
- @Override
- public void map(KmerBytesWritable key, PositionListWritable value, OutputCollector<IntWritable, LineBasedmappingWritable> output,
- Reporter reporter) throws IOException {
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/DeepGraphBuildingReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/DeepGraphBuildingReducer.java
deleted file mode 100644
index 4b971a7..0000000
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/DeepGraphBuildingReducer.java
+++ /dev/null
@@ -1,86 +0,0 @@
-package edu.uci.ics.genomix.hadoop.valvetgraphbuilding;
-
-import java.io.IOException;
-import java.util.Iterator;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-import edu.uci.ics.genomix.type.NodeWritable;
-
-@SuppressWarnings("deprecation")
-public class DeepGraphBuildingReducer extends MapReduceBase implements
- Reducer<IntWritable, LineBasedmappingWritable, NodeWritable, NullWritable> {
-
-/* public ArrayList<LineBasedmappingWritable> lineElementsSet = new ArrayList<LineBasedmappingWritable>();
- public Position outputVerID = new Position();
- public VertexAdjacentWritable outputAdjacentList = new VertexAdjacentWritable();
- public PositionList srcVtexAdjList = new PositionList();
- public PositionList desVtexAdjList = new PositionList();
- public VertexIDListWritable srcAdjListWritable = new VertexIDListWritable();
- public VKmerBytesWritable desKmer = new VKmerBytesWritable(1);
- public VKmerBytesWritableFactory kmerFactory = new VKmerBytesWritableFactory(1);
- public VKmerBytesWritable srcKmer = new VKmerBytesWritable(1);*/
- @Override
- public void reduce(IntWritable key, Iterator<LineBasedmappingWritable> values,
- OutputCollector<NodeWritable, NullWritable> output, Reporter reporter) throws IOException {
-/* while (values.hasNext()) {
- lineElementsSet.add(values.next());
- }
- int[] orderLineTable = new int[lineElementsSet.size()];
- for (int i = 0; i < lineElementsSet.size(); i++) {
- int posInInvertedIndex = lineElementsSet.get(i).getPosInInvertedIndex();
- orderLineTable[lineElementsSet.get(i).getAdjVertexList().get().getPosinReadListElement(posInInvertedIndex)] = i;
- }
- //the first node in this read
- int posInInvertedIndex = lineElementsSet.get(orderLineTable[0]).getPosInInvertedIndex();
- outputVerID.set(
- lineElementsSet.get(orderLineTable[0]).getAdjVertexList().get().getReadListElement(posInInvertedIndex),
- (byte) 0);
- desVtexAdjList.set(lineElementsSet.get(orderLineTable[1]).getAdjVertexList().get());
- for (int i = 0; i < desVtexAdjList.getUsedSize(); i++) {
- if (desVtexAdjList.getPosinReadListElement(i) == (byte) 0) {
- srcVtexAdjList.addELementToList(desVtexAdjList.getReadListElement(i), (byte) 0);
- }
- }
- srcVtexAdjList.addELementToList(key.get(), (byte) 1);
- outputVerID.set(
- lineElementsSet.get(orderLineTable[0]).getAdjVertexList().get().getReadListElement(posInInvertedIndex),
- (byte) 0);
- srcAdjListWritable.set(srcVtexAdjList);
- outputAdjacentList.set(srcAdjListWritable, lineElementsSet.get(orderLineTable[0]).getVkmer());
- output.collect(outputVerID, outputAdjacentList);
- //srcVtexAdjList reset!!!!
-
- for (int i = 1; i < lineElementsSet.size(); i++) {
- desVtexAdjList.set(lineElementsSet.get(orderLineTable[i + 1]).getAdjVertexList().get());
- boolean flag = false;
- for (int j = 0; j < desVtexAdjList.getUsedSize(); j++) {
- if (desVtexAdjList.getPosinReadListElement(j) == (byte) 0) {
- srcVtexAdjList.addELementToList(desVtexAdjList.getReadListElement(i), (byte) 0);
- flag = true;
- }
- }
- if (flag = true) {
- //doesm't merge
- srcVtexAdjList.addELementToList(key.get(), (byte) (i + 1));
- outputVerID.set(
- lineElementsSet.get(orderLineTable[i]).getAdjVertexList().get()
- .getReadListElement(posInInvertedIndex), lineElementsSet.get(orderLineTable[i])
- .getAdjVertexList().get().getPosinReadListElement(posInInvertedIndex));
- srcAdjListWritable.set(srcVtexAdjList);
- outputAdjacentList.set(srcAdjListWritable, lineElementsSet.get(orderLineTable[i]).getVkmer());
- }
- else {
- //merge
- desKmer.set(kmerFactory.getFirstKmerFromChain(1, lineElementsSet.get(orderLineTable[i+1]).getVkmer()));
- srcKmer.set(lineElementsSet.get(orderLineTable[i]).getVkmer());
- lineElementsSet.get(orderLineTable[i+1]).getVkmer().set(kmerFactory.mergeTwoKmer(srcKmer, desKmer));
- orderLineTable[i+1] = orderLineTable[i];
- }
- }*/
- }
-}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingMapper.java
new file mode 100644
index 0000000..1a758c7
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingMapper.java
@@ -0,0 +1,55 @@
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
+
+import java.io.IOException;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.PositionListWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
+
+@SuppressWarnings("deprecation")
+public class DeepGraphBuildingMapper extends MapReduceBase implements
+ Mapper<KmerBytesWritable, PositionListWritable, PositionWritable, PositionListAndKmerWritable> {
+
+ public PositionWritable VertexID;
+ public PositionListWritable listPosZeroInRead;
+ public PositionListWritable listPosNonZeroInRead;
+ public PositionListAndKmerWritable outputListAndKmer;
+ @Override
+ public void configure(JobConf job) {
+ VertexID = new PositionWritable();
+ listPosZeroInRead = new PositionListWritable();
+ listPosNonZeroInRead = new PositionListWritable();
+ outputListAndKmer = new PositionListAndKmerWritable();
+ }
+ @Override
+ public void map(KmerBytesWritable key, PositionListWritable value, OutputCollector<PositionWritable, PositionListAndKmerWritable> output,
+ Reporter reporter) throws IOException {
+ listPosZeroInRead.reset();
+ listPosNonZeroInRead.reset();
+ outputListAndKmer.reset();
+ for(int i = 0; i < value.getLength(); i++) {
+ VertexID.set(value.getPosition(i));
+ if(VertexID.getPosInRead() == 0) {
+ listPosZeroInRead.append(VertexID);
+ }
+ else {
+ listPosNonZeroInRead.append(VertexID);
+ }
+ }
+ for(int i = 0; i < listPosZeroInRead.getCountOfPosition(); i++) {
+ VertexID.set(listPosZeroInRead.getPosition(i));
+ outputListAndKmer.set(listPosNonZeroInRead, key);
+ output.collect(VertexID, outputListAndKmer);
+ }
+ for(int i = 0; i < listPosNonZeroInRead.getCountOfPosition(); i++) {
+ VertexID.set(listPosNonZeroInRead.getPosition(i));
+ outputListAndKmer.set(listPosZeroInRead, key);
+ output.collect(VertexID, outputListAndKmer);
+ }
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingReducer.java
new file mode 100644
index 0000000..072a5c0
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/DeepGraphBuildingReducer.java
@@ -0,0 +1,140 @@
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.PositionListWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
+
+@SuppressWarnings("deprecation")
+public class DeepGraphBuildingReducer extends MapReduceBase implements
+ Reducer<PositionWritable, PositionListAndKmerWritable, NodeWritable, NullWritable> {
+
+ private PositionListAndKmerWritable nodeListAndKmer = new PositionListAndKmerWritable();
+ private PositionListAndKmerWritable nodeSuccListAndKmer = new PositionListAndKmerWritable();
+ private NodeWritable startNodeInRead = new NodeWritable();
+ private NodeWritable nodeToMerge = new NodeWritable();
+ private NodeWritable nodeToBeMerged = new NodeWritable();
+ private PositionListWritable incomingList = new PositionListWritable();
+ private PositionListWritable outgoingList = new PositionListWritable();
+ private NullWritable nullWritable = NullWritable.get();
+
+ public enum nodeToMergeState {
+ SRC_UPDATE_FROM_VALUES,
+ SRC_NODE_NON_UPDATE,
+ SRC_ASSIGNED_BY_RIGHTNODE;
+ };
+
+ @Override
+ public void reduce(PositionWritable key, Iterator<PositionListAndKmerWritable> values,
+ OutputCollector<NodeWritable, NullWritable> output, Reporter reporter) throws IOException {
+ //initialize the Start point in Read
+ int readID = key.getReadID();
+ byte posInRead = 0;
+ if (values.hasNext()) {
+ nodeListAndKmer.set(values.next());
+ incomingList.set(nodeSuccListAndKmer.getVertexIDList());
+ }
+ if (values.hasNext()) {
+ nodeSuccListAndKmer.set(values.next());
+ if (nodeSuccListAndKmer.getVertexIDList() != null)
+ outgoingList.set(nodeSuccListAndKmer.getVertexIDList());
+ }
+ outgoingList.append(readID, (byte) (posInRead + 1));
+ startNodeInRead.setNodeID(readID, posInRead);
+ startNodeInRead.setIncomingList(incomingList);
+ startNodeInRead.setOutgoingList(outgoingList);
+ startNodeInRead.setKmer(nodeListAndKmer.getKmer());
+ output.collect(startNodeInRead, nullWritable);
+ posInRead++;
+ //----initialize the nodeToMerge
+ nodeListAndKmer.set(nodeSuccListAndKmer);
+ incomingList.reset();
+ incomingList.append(key.getReadID(), key.getPosInRead());
+ if (values.hasNext()) {
+ nodeSuccListAndKmer.set(values.next());
+ if (nodeSuccListAndKmer.getVertexIDList() != null)
+ outgoingList.set(nodeSuccListAndKmer.getVertexIDList());
+ }
+ outgoingList.append(readID, (byte) (posInRead + 1));
+ nodeToMerge.setNodeID(readID, (byte) posInRead);
+ nodeToMerge.setIncomingList(incomingList);
+ nodeToMerge.setOutgoingList(outgoingList);
+ nodeToMerge.setKmer(nodeListAndKmer.getKmer());
+ posInRead++;
+ //----LOOP
+ nodeToMergeState srcState = nodeToMergeState.SRC_NODE_NON_UPDATE;
+ boolean srcOrTarget = true;
+ while (values.hasNext()) {
+ switch (srcState.toString()) {
+ case "SRC_UPDATE_FROM_VALUES":
+ srcOrTarget = true;
+ getNodeFromValues(readID, posInRead, values, srcOrTarget);
+ posInRead++;
+ srcOrTarget = false;
+ getNodeFromValues(readID, posInRead, values, srcOrTarget);
+ posInRead++;
+ break;
+ case "SRC_NODE_NON_UPDATE":
+ srcOrTarget = false;
+ getNodeFromValues(readID, posInRead, values, srcOrTarget);
+ posInRead++;
+ break;
+ case "SRC_ASSIGNED_BY_RIGHTNODE":
+ nodeToMerge.set(nodeToBeMerged);
+ srcOrTarget = false;
+ getNodeFromValues(readID, posInRead, values, srcOrTarget);
+ posInRead++;
+ break;
+ }
+ if(nodeToMerge.existsInSinglePath() == true && nodeToBeMerged.existsInSinglePath() == true){
+ nodeToMerge.mergeNextWithinOneRead(nodeToBeMerged);
+ srcState = nodeToMergeState.SRC_NODE_NON_UPDATE;
+ }
+ else{
+ if(nodeToMerge.existsInSinglePath() == false && nodeToBeMerged.existsInSinglePath() == true){
+
+ srcState = nodeToMergeState.SRC_ASSIGNED_BY_RIGHTNODE;
+ output.collect(nodeToBeMerged, nullWritable);
+ }
+ else {
+ srcState = nodeToMergeState.SRC_UPDATE_FROM_VALUES;
+ output.collect(nodeToMerge, nullWritable);
+ output.collect(nodeToBeMerged, nullWritable);
+ }
+ }
+
+ }
+ }
+
+ public void getNodeFromValues(int readID, byte posInRead, Iterator<PositionListAndKmerWritable> values,
+ boolean srcOrTarget) {
+ if (values.hasNext()) {
+ nodeListAndKmer.set(values.next());
+ incomingList.reset();
+ incomingList.append(readID, (byte) (posInRead - 1));
+ }
+ if (values.hasNext()) {
+ nodeSuccListAndKmer.set(values.next());
+ if (nodeSuccListAndKmer.getVertexIDList() != null)
+ outgoingList.set(nodeSuccListAndKmer.getVertexIDList());
+ }
+ outgoingList.append(readID, (byte) (posInRead + 1));
+ if (srcOrTarget == true) {
+ nodeToMerge.setNodeID(readID, (byte) posInRead);
+ nodeToMerge.setIncomingList(incomingList);
+ nodeToMerge.setOutgoingList(outgoingList);
+ nodeToMerge.setKmer(nodeListAndKmer.getKmer());
+ } else {
+ nodeToBeMerged.setNodeID(readID, (byte) posInRead);
+ nodeToBeMerged.setIncomingList(incomingList);
+ nodeToBeMerged.setOutgoingList(outgoingList);
+ nodeToBeMerged.setKmer(nodeListAndKmer.getKmer());
+ }
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java
new file mode 100644
index 0000000..acbc3f1
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphBuildingDriver.java
@@ -0,0 +1,112 @@
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Partitioner;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.NodeWritable;
+import edu.uci.ics.genomix.type.PositionListWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
+
+@SuppressWarnings("deprecation")
+public class GraphBuildingDriver {
+
+ private static class Options {
+ @Option(name = "-inputpath", usage = "the input path", required = true)
+ public String inputPath;
+
+ @Option(name = "-outputpath", usage = "the output path", required = true)
+ public String outputPath;
+
+ @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+ public int numReducers;
+
+ @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+ public int sizeKmer;
+
+ }
+
+ public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, String defaultConfPath)
+ throws IOException {
+
+ JobConf conf = new JobConf(GraphBuildingDriver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+
+ conf.setJobName("graph building");
+ conf.setMapperClass(GraphInvertedIndexBuildingMapper.class);
+ conf.setReducerClass(GraphInvertedIndexBuildingReducer.class);
+
+ conf.setMapOutputKeyClass(KmerBytesWritable.class);
+ conf.setMapOutputValueClass(PositionWritable.class);
+
+ conf.setInputFormat(TextInputFormat.class);
+ conf.setOutputFormat(SequenceFileOutputFormat.class);
+
+ conf.setOutputKeyClass(KmerBytesWritable.class);
+ conf.setOutputValueClass(PositionListWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath));
+ FileOutputFormat.setOutputPath(conf, new Path(inputPath + "-step1"));
+ conf.setNumReduceTasks(numReducers);
+
+ FileSystem dfs = FileSystem.get(conf);
+ dfs.delete(new Path(inputPath + "-step1"), true);
+ JobClient.runJob(conf);
+
+ //-------------
+ conf = new JobConf(GraphBuildingDriver.class);
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+ conf.setJobName("deep build");
+
+ conf.setMapperClass(DeepGraphBuildingMapper.class);
+ conf.setReducerClass(DeepGraphBuildingReducer.class);
+
+ conf.setMapOutputKeyClass(PositionWritable.class);
+ conf.setMapOutputValueClass(PositionListAndKmerWritable.class);
+
+ conf.setPartitionerClass(ReadIDPartitioner.class);
+
+ conf.setOutputKeyComparatorClass(PositionWritable.Comparator.class);
+ conf.setOutputValueGroupingComparator(PositionWritable.FirstComparator.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+ conf.setOutputFormat(TextOutputFormat.class);
+
+ conf.setOutputKeyClass(NodeWritable.class);
+ conf.setOutputValueClass(NullWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+ FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+ conf.setNumReduceTasks(1);
+ dfs.delete(new Path(outputPath), true);
+ JobClient.runJob(conf);
+ }
+
+ public static void main(String[] args) throws Exception {
+ Options options = new Options();
+ CmdLineParser parser = new CmdLineParser(options);
+ parser.parseArgument(args);
+ GraphBuildingDriver driver = new GraphBuildingDriver();
+ driver.run(options.inputPath, options.outputPath, options.numReducers, options.sizeKmer, null);
+ }
+}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/GraphInvertedIndexBuildingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingMapper.java
similarity index 79%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/GraphInvertedIndexBuildingMapper.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingMapper.java
index d3c2ff4..3c207f4 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/GraphInvertedIndexBuildingMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingMapper.java
@@ -1,4 +1,4 @@
-package edu.uci.ics.genomix.hadoop.valvetgraphbuilding;
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
@@ -33,17 +33,17 @@
/** first kmer */
byte[] array = geneLine.getBytes();
outputKmer.setByRead(array, 0);
+ System.out.println(key.get());
outputVertexID.set((int)key.get(), (byte)0);
output.collect(outputKmer, outputVertexID);
/** middle kmer */
- for (int i = KMER_SIZE; i < array.length - 1; i++) {
- GeneCode.getBitMapFromGeneCode(outputKmer.shiftKmerWithNextChar(array[i]));
+ int i = 0;
+ for (i = KMER_SIZE; i < array.length; i++) {
+ outputKmer.shiftKmerWithNextChar(array[i]);
+ System.out.println((int)key.get());
outputVertexID.set((int)key.get(), (byte)(i - KMER_SIZE + 1));
output.collect(outputKmer, outputVertexID);
}
/** last kmer */
- GeneCode.getBitMapFromGeneCode(outputKmer.shiftKmerWithNextChar(array[array.length - 1]));
- outputVertexID.set((int)key.get(), (byte)(array.length - 1 + 1));
- output.collect(outputKmer, outputVertexID);
}
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/GraphInvertedIndexBuildingReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingReducer.java
similarity index 92%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/GraphInvertedIndexBuildingReducer.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingReducer.java
index 72827f2..d2b6476 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/GraphInvertedIndexBuildingReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/GraphInvertedIndexBuildingReducer.java
@@ -1,4 +1,4 @@
-package edu.uci.ics.genomix.hadoop.valvetgraphbuilding;
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
import java.io.IOException;
import java.util.Iterator;
@@ -18,6 +18,7 @@
@Override
public void reduce(KmerBytesWritable key, Iterator<PositionWritable> values,
OutputCollector<KmerBytesWritable, PositionListWritable> output, Reporter reporter) throws IOException {
+ outputlist.reset();
while (values.hasNext()) {
outputlist.append(values.next());
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/LineBasedmappingWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/LineBasedmappingWritable.java
similarity index 94%
rename from genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/LineBasedmappingWritable.java
rename to genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/LineBasedmappingWritable.java
index 1e44903..0b6b0a8 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/valvetgraphbuilding/LineBasedmappingWritable.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/LineBasedmappingWritable.java
@@ -1,4 +1,4 @@
-package edu.uci.ics.genomix.hadoop.valvetgraphbuilding;
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
import java.io.DataInput;
import java.io.DataOutput;
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/PositionListAndKmerWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/PositionListAndKmerWritable.java
new file mode 100644
index 0000000..fff3faf
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/PositionListAndKmerWritable.java
@@ -0,0 +1,81 @@
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import org.apache.hadoop.io.WritableComparable;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.PositionListWritable;
+
+public class PositionListAndKmerWritable implements WritableComparable<PositionListAndKmerWritable> {
+
+ private PositionListWritable vertexIDList;
+ private KmerBytesWritable kmer;
+ private int countOfKmer;
+
+ public PositionListAndKmerWritable(){
+ countOfKmer = 0;
+ vertexIDList = new PositionListWritable();
+ kmer = new KmerBytesWritable();
+ }
+
+ public PositionListAndKmerWritable(int kmerSize) {
+ countOfKmer = 0;
+ vertexIDList = new PositionListWritable();
+ kmer = new KmerBytesWritable(kmerSize);
+ }
+
+ public int getCount() {
+ return countOfKmer;
+ }
+
+ public void setCount(int count) {
+ this.countOfKmer = count;
+ }
+
+ public void setvertexIDList(PositionListWritable posList) {
+ vertexIDList.set(posList);
+ }
+
+ public void reset() {
+ vertexIDList.reset();
+ countOfKmer = 0;
+ }
+
+ public PositionListWritable getVertexIDList() {
+ return vertexIDList;
+ }
+
+ public KmerBytesWritable getKmer() {
+ return kmer;
+ }
+
+ public void set(PositionListAndKmerWritable right) {
+ this.countOfKmer = right.countOfKmer;
+ this.vertexIDList.set(right.vertexIDList);
+ this.kmer.set(right.kmer);
+ }
+
+ public void set(PositionListWritable list, KmerBytesWritable kmer) {
+ this.vertexIDList.set(list);
+ this.kmer.set(kmer);
+ }
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ this.countOfKmer = in.readInt();
+ this.vertexIDList.readFields(in);
+ this.kmer.readFields(in);
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(this.countOfKmer);
+ this.vertexIDList.write(out);
+ this.kmer.write(out);
+ }
+
+ @Override
+ public int compareTo(PositionListAndKmerWritable o) {
+ return 0;
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/ReadIDPartitioner.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/ReadIDPartitioner.java
new file mode 100644
index 0000000..362ebf3
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/velvetgraphbuilding/ReadIDPartitioner.java
@@ -0,0 +1,19 @@
+package edu.uci.ics.genomix.hadoop.velvetgraphbuilding;
+
+//import org.apache.hadoop.mapreduce.Partitioner;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Partitioner;
+import edu.uci.ics.genomix.type.PositionListWritable;
+import edu.uci.ics.genomix.type.PositionWritable;
+
+public class ReadIDPartitioner implements Partitioner<PositionWritable, PositionListWritable>{
+
+ @Override
+ public int getPartition(PositionWritable key, PositionListWritable value, int numPartitions){
+ return (key.getReadID() & Integer.MAX_VALUE) % numPartitions;
+ }
+
+ @Override
+ public void configure(JobConf arg0) {
+ }
+}