create svn dir
git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_genomix@2738 123451ca-8445-de46-9d55-352943316053
diff --git a/genomix/genomix-hadoop/pom.xml b/genomix/genomix-hadoop/pom.xml
index 3e8cf5e..2bff6fb 100755
--- a/genomix/genomix-hadoop/pom.xml
+++ b/genomix/genomix-hadoop/pom.xml
@@ -2,8 +2,8 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
- <groupId>graphbuilding</groupId>
- <artifactId>graphbuilding</artifactId>
+ <groupId>hadoop</groupId>
+ <artifactId>hadoop</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>genomix</name>
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java
index a67d20e..5f4d991 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java
@@ -10,13 +10,15 @@
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
-public class GenomixCombiner extends MapReduceBase implements Reducer<LongWritable, IntWritable, LongWritable, IntWritable> {
- public void reduce(LongWritable key, Iterator<IntWritable> values, OutputCollector<LongWritable, IntWritable> output, Reporter reporter) throws IOException {
- int groupByAdjList = 0;
+public class GenomixCombiner extends MapReduceBase implements
+ Reducer<LongWritable, IntWritable, LongWritable, IntWritable> {
+ public void reduce(LongWritable key, Iterator<IntWritable> values,
+ OutputCollector<LongWritable, IntWritable> output, Reporter reporter) throws IOException {
+ int groupByAdjList = 0;
while (values.hasNext()) {
- //Merge By the all adjacent Nodes;
- groupByAdjList = groupByAdjList|values.next().get();
- }
- output.collect(key, new IntWritable(groupByAdjList));
+ //Merge By the all adjacent Nodes;
+ groupByAdjList = groupByAdjList | values.next().get();
}
- }
+ output.collect(key, new IntWritable(groupByAdjList));
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java
index 71c8733..9c8a689 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java
@@ -18,8 +18,8 @@
@SuppressWarnings("deprecation")
public class GenomixDriver {
- private static class Options {
- @Option(name = "-inputpath", usage = "the input path", required = true)
+ private static class Options {
+ @Option(name = "-inputpath", usage = "the input path", required = true)
public String inputPath;
@Option(name = "-outputpath", usage = "the output path", required = true)
@@ -28,39 +28,41 @@
@Option(name = "-num-reducers", usage = "the number of reducers", required = true)
public int numReducers;
}
- public void run(String inputPath, String outputPath, int numReducers, String defaultConfPath) throws IOException {
-
- JobConf conf = new JobConf(GenomixDriver.class);
+
+ public void run(String inputPath, String outputPath, int numReducers, String defaultConfPath) throws IOException {
+
+ JobConf conf = new JobConf(GenomixDriver.class);
if (defaultConfPath != null) {
conf.addResource(new Path(defaultConfPath));
}
- conf.setJobName("Genomix Graph Building");
- conf.setMapperClass(GenomixMapper.class);
- conf.setReducerClass(GenomixReducer.class);
- conf.setCombinerClass(GenomixCombiner.class);
-
- conf.setMapOutputKeyClass(LongWritable.class);
- conf.setMapOutputValueClass(IntWritable.class);
-
- conf.setInputFormat(TextInputFormat.class);
- conf.setOutputFormat(TextOutputFormat.class);
- conf.setOutputKeyClass(LongWritable.class);
- conf.setOutputValueClass(ValueWritable.class);
- FileInputFormat.setInputPaths(conf, new Path(inputPath));
+ conf.setJobName("Genomix Graph Building");
+ conf.setMapperClass(GenomixMapper.class);
+ conf.setReducerClass(GenomixReducer.class);
+ conf.setCombinerClass(GenomixCombiner.class);
+
+ conf.setMapOutputKeyClass(LongWritable.class);
+ conf.setMapOutputValueClass(IntWritable.class);
+
+ conf.setInputFormat(TextInputFormat.class);
+ conf.setOutputFormat(TextOutputFormat.class);
+ conf.setOutputKeyClass(LongWritable.class);
+ conf.setOutputValueClass(ValueWritable.class);
+ FileInputFormat.setInputPaths(conf, new Path(inputPath));
FileOutputFormat.setOutputPath(conf, new Path(outputPath));
conf.setNumReduceTasks(numReducers);
FileSystem dfs = FileSystem.get(conf);
dfs.delete(new Path(outputPath), true);
- JobClient.runJob(conf);
- }
- public static void main(String[] args) throws Exception {
- Options options = new Options();
+ JobClient.runJob(conf);
+ }
+
+ public static void main(String[] args) throws Exception {
+ Options options = new Options();
CmdLineParser parser = new CmdLineParser(options);
parser.parseArgument(args);
GenomixDriver driver = new GenomixDriver();
driver.run(options.inputPath, options.outputPath, options.numReducers, null);
}
-
+
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
index 7d0d75b..aff4e6d 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
@@ -13,125 +13,129 @@
import org.apache.hadoop.mapred.Reporter;
public class GenomixMapper extends MapReduceBase implements Mapper<LongWritable, Text, LongWritable, IntWritable> {
-
+
public static final int KMER_SIZE = 3; //User Specify
-// private Text Map_Pair_Key = new Text();
+ // private Text Map_Pair_Key = new Text();
+
/*precursor node
A 00000001 1
G 00000010 2
- C 00000100 4
- T 00001000 8
+ C 00000100 4
+ T 00001000 8
succeed node
- A 00010000 16
- G 00100000 32
- C 01000000 64
- T 10000000 128*/
- public void map(LongWritable key, Text value, OutputCollector<LongWritable, IntWritable> output,
- Reporter reporter) throws IOException {
- /* A 00
- G 01
- C 10
- T 11*/
- try
- {
- String geneLine = value.toString(); // Read the Real Gene Line
- Pattern genePattern = Pattern.compile("[AGCT]+");
- Matcher geneMatcher = genePattern.matcher(geneLine);
- boolean isValid = geneMatcher.matches();
- if(isValid == true)
- {
- long kmerValue = 0;
- long PreMarker = -1;
- //Initialization: get the first kmer of this geneLine
- for(int i = 0; i < KMER_SIZE; i++)
- {
- kmerValue = (kmerValue << 2);
- switch(geneLine.charAt(i))
- {
- case 'A': kmerValue = kmerValue + 0;
- break;
- case 'G': kmerValue = kmerValue + 1;
- break;
- case 'C': kmerValue = kmerValue + 2;
- break;
- case 'T': kmerValue = kmerValue + 3;
- break;
- }
- }
- int i;
- //Get the next kmer by shiftint one letter every time
- for(i = KMER_SIZE; i < geneLine.length(); i++)
- {
- LongWritable outputKmer = new LongWritable(kmerValue);
- int kmerAdjList = 0;
- //Get the precursor node using the premarker
- switch((int)PreMarker)
- {
- case -1: kmerAdjList = kmerAdjList + 0;
- break;
- case 0: kmerAdjList = kmerAdjList + 16;
- break;
- case 16: kmerAdjList = kmerAdjList + 32;
- break;
- case 32: kmerAdjList = kmerAdjList + 64;
- break;
- case 48: kmerAdjList = kmerAdjList + 128;
- break;
- }
- //Update the premarker
- PreMarker = 3;
- PreMarker = PreMarker<<(KMER_SIZE-1)*2;
- PreMarker = PreMarker & kmerValue;
- //Reset the top two bits
- long reset = 3;
- kmerValue = kmerValue << 2;
- reset = ~(reset << KMER_SIZE*2);
- kmerValue = kmerValue & reset;
- switch(geneLine.charAt(i))
- {
- case 'A':
- kmerAdjList = kmerAdjList + 1;
- kmerValue = kmerValue + 0;
- break;
- case 'G':
- kmerAdjList = kmerAdjList + 2;
- kmerValue = kmerValue + 1;
- break;
- case 'C':
- kmerAdjList = kmerAdjList + 4;
- kmerValue = kmerValue + 2;
- break;
- case 'T':
- kmerAdjList = kmerAdjList + 8;
- kmerValue = kmerValue + 3;
- break;
- }
- IntWritable outputAdjList = new IntWritable(kmerAdjList);
- output.collect(outputKmer, outputAdjList);
- }
- // arrive the last letter of this gene line
- if(i == geneLine.length())
- {
- int kmerAdjList = 0;
- switch((int)PreMarker)
- {
- case 0: kmerAdjList = kmerAdjList + 16;
- break;
- case 16: kmerAdjList = kmerAdjList + 32;
- break;
- case 32: kmerAdjList = kmerAdjList + 64;
- break;
- case 48: kmerAdjList = kmerAdjList + 128;
- break;
- }
- IntWritable outputAdjList = new IntWritable(kmerAdjList);
- LongWritable outputKmer = new LongWritable(kmerValue);
- output.collect(outputKmer, outputAdjList);
- }
- }
- }
- catch( Exception e ) {
- System.out.println( "Exception:"+e );
- }
- }
- }
+ A 00010000 16
+ G 00100000 32
+ C 01000000 64
+ T 10000000 128*/
+ public void map(LongWritable key, Text value, OutputCollector<LongWritable, IntWritable> output, Reporter reporter)
+ throws IOException {
+ /* A 00
+ G 01
+ C 10
+ T 11*/
+ try {
+ String geneLine = value.toString(); // Read the Real Gene Line
+ Pattern genePattern = Pattern.compile("[AGCT]+");
+ Matcher geneMatcher = genePattern.matcher(geneLine);
+ boolean isValid = geneMatcher.matches();
+ if (isValid == true) {
+ long kmerValue = 0;
+ long PreMarker = -1;
+ //Initialization: get the first kmer of this geneLine
+ for (int i = 0; i < KMER_SIZE; i++) {
+ kmerValue = (kmerValue << 2);
+ switch (geneLine.charAt(i)) {
+ case 'A':
+ kmerValue = kmerValue + 0;
+ break;
+ case 'G':
+ kmerValue = kmerValue + 1;
+ break;
+ case 'C':
+ kmerValue = kmerValue + 2;
+ break;
+ case 'T':
+ kmerValue = kmerValue + 3;
+ break;
+ }
+ }
+ int i;
+ //Get the next kmer by shiftint one letter every time
+ for (i = KMER_SIZE; i < geneLine.length(); i++) {
+ LongWritable outputKmer = new LongWritable(kmerValue);
+ int kmerAdjList = 0;
+ //Get the precursor node using the premarker
+ switch ((int) PreMarker) {
+ case -1:
+ kmerAdjList = kmerAdjList + 0;
+ break;
+ case 0:
+ kmerAdjList = kmerAdjList + 16;
+ break;
+ case 16:
+ kmerAdjList = kmerAdjList + 32;
+ break;
+ case 32:
+ kmerAdjList = kmerAdjList + 64;
+ break;
+ case 48:
+ kmerAdjList = kmerAdjList + 128;
+ break;
+ }
+ //Update the premarker
+ PreMarker = 3;
+ PreMarker = PreMarker << (KMER_SIZE - 1) * 2;
+ PreMarker = PreMarker & kmerValue;
+ //Reset the top two bits
+ long reset = 3;
+ kmerValue = kmerValue << 2;
+ reset = ~(reset << KMER_SIZE * 2);
+ kmerValue = kmerValue & reset;
+ switch (geneLine.charAt(i)) {
+ case 'A':
+ kmerAdjList = kmerAdjList + 1;
+ kmerValue = kmerValue + 0;
+ break;
+ case 'G':
+ kmerAdjList = kmerAdjList + 2;
+ kmerValue = kmerValue + 1;
+ break;
+ case 'C':
+ kmerAdjList = kmerAdjList + 4;
+ kmerValue = kmerValue + 2;
+ break;
+ case 'T':
+ kmerAdjList = kmerAdjList + 8;
+ kmerValue = kmerValue + 3;
+ break;
+ }
+ IntWritable outputAdjList = new IntWritable(kmerAdjList);
+ output.collect(outputKmer, outputAdjList);
+ }
+ // arrive the last letter of this gene line
+ if (i == geneLine.length()) {
+ int kmerAdjList = 0;
+ switch ((int) PreMarker) {
+ case 0:
+ kmerAdjList = kmerAdjList + 16;
+ break;
+ case 16:
+ kmerAdjList = kmerAdjList + 32;
+ break;
+ case 32:
+ kmerAdjList = kmerAdjList + 64;
+ break;
+ case 48:
+ kmerAdjList = kmerAdjList + 128;
+ break;
+ }
+ IntWritable outputAdjList = new IntWritable(kmerAdjList);
+ LongWritable outputKmer = new LongWritable(kmerValue);
+ output.collect(outputKmer, outputAdjList);
+ }
+ }
+ } catch (Exception e) {
+ System.out.println("Exception:" + e);
+ }
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java
index 231e089..244d058 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java
@@ -10,16 +10,17 @@
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
-
-public class GenomixReducer extends MapReduceBase implements Reducer<LongWritable, IntWritable, LongWritable, ValueWritable> {
- public void reduce(LongWritable key, Iterator<IntWritable> values, OutputCollector<LongWritable, ValueWritable> output, Reporter reporter) throws IOException {
- int groupByAdjList = 0;
- int count = 0;
+public class GenomixReducer extends MapReduceBase implements
+ Reducer<LongWritable, IntWritable, LongWritable, ValueWritable> {
+ public void reduce(LongWritable key, Iterator<IntWritable> values,
+ OutputCollector<LongWritable, ValueWritable> output, Reporter reporter) throws IOException {
+ int groupByAdjList = 0;
+ int count = 0;
while (values.hasNext()) {
- //Merge By the all adjacent Nodes;
- groupByAdjList = groupByAdjList|values.next().get();
- count ++;
- }
- output.collect(key, new ValueWritable(groupByAdjList, count));
+ //Merge By the all adjacent Nodes;
+ groupByAdjList = groupByAdjList | values.next().get();
+ count++;
}
+ output.collect(key, new ValueWritable(groupByAdjList, count));
}
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/ValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/ValueWritable.java
index 37775d7..3adac3c 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/ValueWritable.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/ValueWritable.java
@@ -6,71 +6,68 @@
import org.apache.hadoop.io.WritableComparable;
+public class ValueWritable implements WritableComparable<ValueWritable> {
+ private int first;
+ private int second;
-public class ValueWritable implements WritableComparable<ValueWritable>{
- private int first;
- private int second;
-
- public ValueWritable() {
- }
-
- public ValueWritable(int first, int second) {
- set(first,second);
- }
-
- public void set(int first, int second) {
- this.first = first;
- this.second = second;
- }
-
- public int getFirst() {
- return first;
- }
+ public ValueWritable() {
+ }
- public int getSecond() {
- return second;
- }
+ public ValueWritable(int first, int second) {
+ set(first, second);
+ }
- public void write(DataOutput out) throws IOException {
- out.writeInt(first);
- out.writeInt(second);
- }
+ public void set(int first, int second) {
+ this.first = first;
+ this.second = second;
+ }
- public void readFields(DataInput in) throws IOException {
- first = in.readInt();
- second = in.readInt();
- }
-
- public int hashCode() {
- return first+second;
- }
-
- public boolean equals(Object o) {
- if (o instanceof ValueWritable) {
- ValueWritable tp = (ValueWritable) o;
- return first == tp.first&&second==tp.second;
- }
- return false;
- }
+ public int getFirst() {
+ return first;
+ }
+ public int getSecond() {
+ return second;
+ }
- public String toString() {
- return Integer.toString(first) + "\t" + Integer.toString(second);
- }
-
- public int compareTo(ValueWritable tp) {
- int cmp;
- if(first == tp.first)
- cmp = 0;
- else
- cmp = 1;
- if(cmp != 0)
- return cmp;
- if(second == tp.second)
- return 0;
- else
- return 1;
- }
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(first);
+ out.writeInt(second);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ first = in.readInt();
+ second = in.readInt();
+ }
+
+ public int hashCode() {
+ return first + second;
+ }
+
+ public boolean equals(Object o) {
+ if (o instanceof ValueWritable) {
+ ValueWritable tp = (ValueWritable) o;
+ return first == tp.first && second == tp.second;
+ }
+ return false;
+ }
+
+ public String toString() {
+ return Integer.toString(first) + "\t" + Integer.toString(second);
+ }
+
+ public int compareTo(ValueWritable tp) {
+ int cmp;
+ if (first == tp.first)
+ cmp = 0;
+ else
+ cmp = 1;
+ if (cmp != 0)
+ return cmp;
+ if (second == tp.second)
+ return 0;
+ else
+ return 1;
+ }
}
-
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
index 090aa0b..783ecf4 100755
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
@@ -16,9 +16,9 @@
import edu.uci.ics.utils.TestUtils;
-public class GraphBuildingTest{
-
- private static final String ACTUAL_RESULT_DIR = "actual";
+public class GraphBuildingTest {
+
+ private static final String ACTUAL_RESULT_DIR = "actual";
private JobConf conf = new JobConf();
private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
private static final String DATA_PATH = "data/webmap/text.txt";
@@ -26,14 +26,14 @@
private static final String RESULT_PATH = "/result2";
private static final String DUMPED_RESULT = ACTUAL_RESULT_DIR + RESULT_PATH + "/part-00000";
private static final String EXPECTED_PATH = "expected/result2";
-
- private MiniDFSCluster dfsCluster;
+
+ private MiniDFSCluster dfsCluster;
private MiniMRCluster mrCluster;
private FileSystem dfs;
-
+
@Test
- public void test() throws Exception{
- FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ public void test() throws Exception {
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
startHadoop();
@@ -46,6 +46,7 @@
cleanupHadoop();
}
+
private void startHadoop() throws IOException {
FileSystem lfs = FileSystem.getLocal(new Configuration());
lfs.delete(new Path("build"), true);
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
index c2e30ed..0455b14 100755
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
@@ -2,10 +2,10 @@
import java.io.BufferedReader;
import java.io.File;
-import java.io.FileReader;
+import java.io.FileReader;
public class TestUtils {
- public static void compareWithResult(File expectedFile, File actualFile) throws Exception {
+ public static void compareWithResult(File expectedFile, File actualFile) throws Exception {
BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
String lineExpected, lineActual;
@@ -51,8 +51,10 @@
}
return true;
}
+
public static void main(String[] args) throws Exception {
- TestUtils TUtils = new TestUtils();
- TUtils.compareWithResult(new File("/Users/hadoop/Documents/workspace/Test/part-00000"), new File("/Users/hadoop/Documents/workspace/Test/test.txt"));
+ TestUtils TUtils = new TestUtils();
+ TUtils.compareWithResult(new File("/Users/hadoop/Documents/workspace/Test/part-00000"), new File(
+ "/Users/hadoop/Documents/workspace/Test/test.txt"));
}
}