create svn dir

git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_genomix@2738 123451ca-8445-de46-9d55-352943316053
diff --git a/genomix/genomix-hadoop/pom.xml b/genomix/genomix-hadoop/pom.xml
index 3e8cf5e..2bff6fb 100755
--- a/genomix/genomix-hadoop/pom.xml
+++ b/genomix/genomix-hadoop/pom.xml
@@ -2,8 +2,8 @@
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 	<modelVersion>4.0.0</modelVersion>
-	<groupId>graphbuilding</groupId>
-	<artifactId>graphbuilding</artifactId>
+	<groupId>hadoop</groupId>
+	<artifactId>hadoop</artifactId>
 	<version>0.0.1-SNAPSHOT</version>
 	<name>genomix</name>
 
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java
index a67d20e..5f4d991 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java
@@ -10,13 +10,15 @@
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 
-public class GenomixCombiner extends MapReduceBase implements Reducer<LongWritable, IntWritable, LongWritable, IntWritable> {
-	public void reduce(LongWritable key, Iterator<IntWritable> values, OutputCollector<LongWritable, IntWritable> output, Reporter reporter) throws IOException {
-    	int groupByAdjList = 0;
+public class GenomixCombiner extends MapReduceBase implements
+        Reducer<LongWritable, IntWritable, LongWritable, IntWritable> {
+    public void reduce(LongWritable key, Iterator<IntWritable> values,
+            OutputCollector<LongWritable, IntWritable> output, Reporter reporter) throws IOException {
+        int groupByAdjList = 0;
         while (values.hasNext()) {
-        	//Merge By the all adjacent Nodes;
-        	groupByAdjList = groupByAdjList|values.next().get(); 
-        	}
-        output.collect(key, new IntWritable(groupByAdjList)); 
+            //Merge By the all adjacent Nodes;
+            groupByAdjList = groupByAdjList | values.next().get();
         }
- 	}
+        output.collect(key, new IntWritable(groupByAdjList));
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java
index 71c8733..9c8a689 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java
@@ -18,8 +18,8 @@
 
 @SuppressWarnings("deprecation")
 public class GenomixDriver {
-	private static class Options {
-       @Option(name = "-inputpath", usage = "the input path", required = true)
+    private static class Options {
+        @Option(name = "-inputpath", usage = "the input path", required = true)
         public String inputPath;
 
         @Option(name = "-outputpath", usage = "the output path", required = true)
@@ -28,39 +28,41 @@
         @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
         public int numReducers;
     }
-	public void run(String inputPath, String outputPath, int numReducers, String defaultConfPath) throws IOException {
-		
-		JobConf conf = new JobConf(GenomixDriver.class);
+
+    public void run(String inputPath, String outputPath, int numReducers, String defaultConfPath) throws IOException {
+
+        JobConf conf = new JobConf(GenomixDriver.class);
         if (defaultConfPath != null) {
             conf.addResource(new Path(defaultConfPath));
         }
 
-		conf.setJobName("Genomix Graph Building");
-		conf.setMapperClass(GenomixMapper.class);
-		conf.setReducerClass(GenomixReducer.class);
-		conf.setCombinerClass(GenomixCombiner.class);	
-		
-		conf.setMapOutputKeyClass(LongWritable.class);
-		conf.setMapOutputValueClass(IntWritable.class);
-		
-		conf.setInputFormat(TextInputFormat.class);
-		conf.setOutputFormat(TextOutputFormat.class);
-		conf.setOutputKeyClass(LongWritable.class);
-		conf.setOutputValueClass(ValueWritable.class);
-		FileInputFormat.setInputPaths(conf, new Path(inputPath));
+        conf.setJobName("Genomix Graph Building");
+        conf.setMapperClass(GenomixMapper.class);
+        conf.setReducerClass(GenomixReducer.class);
+        conf.setCombinerClass(GenomixCombiner.class);
+
+        conf.setMapOutputKeyClass(LongWritable.class);
+        conf.setMapOutputValueClass(IntWritable.class);
+
+        conf.setInputFormat(TextInputFormat.class);
+        conf.setOutputFormat(TextOutputFormat.class);
+        conf.setOutputKeyClass(LongWritable.class);
+        conf.setOutputValueClass(ValueWritable.class);
+        FileInputFormat.setInputPaths(conf, new Path(inputPath));
         FileOutputFormat.setOutputPath(conf, new Path(outputPath));
         conf.setNumReduceTasks(numReducers);
 
         FileSystem dfs = FileSystem.get(conf);
         dfs.delete(new Path(outputPath), true);
-        JobClient.runJob(conf);		
-	}
-	public static void main(String[] args) throws Exception {
-		Options options = new Options();
+        JobClient.runJob(conf);
+    }
+
+    public static void main(String[] args) throws Exception {
+        Options options = new Options();
         CmdLineParser parser = new CmdLineParser(options);
         parser.parseArgument(args);
         GenomixDriver driver = new GenomixDriver();
         driver.run(options.inputPath, options.outputPath, options.numReducers, null);
     }
-	
+
 }
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
index 7d0d75b..aff4e6d 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
@@ -13,125 +13,129 @@
 import org.apache.hadoop.mapred.Reporter;
 
 public class GenomixMapper extends MapReduceBase implements Mapper<LongWritable, Text, LongWritable, IntWritable> {
-	
+
     public static final int KMER_SIZE = 3; //User Specify
-//    private Text Map_Pair_Key = new Text();
+    //    private Text Map_Pair_Key = new Text();
+
     /*precursor node
       A 00000001 1
       G 00000010 2
-	  C 00000100 4
-	  T 00001000 8
+      C 00000100 4
+      T 00001000 8
       succeed node
-	  A 00010000 16
-	  G 00100000 32
-	  C 01000000 64
-	  T 10000000 128*/
-    public void map(LongWritable key, Text value, OutputCollector<LongWritable, IntWritable> output, 
-    		Reporter reporter) throws IOException {
-    	/* A 00
-  	       G 01
-  	       C 10
-  	       T 11*/
-    	try
-      	{
-      		String geneLine = value.toString(); // Read the Real Gene Line
-          	Pattern genePattern = Pattern.compile("[AGCT]+");
-          	Matcher geneMatcher = genePattern.matcher(geneLine);
-          	boolean isValid = geneMatcher.matches();
-          	if(isValid == true)
-          	{
-    	    	long kmerValue = 0;
-          		long PreMarker = -1;
-          		//Initialization: get the first kmer of this geneLine 
-          		for(int i = 0; i < KMER_SIZE; i++)
-          		{
-          			kmerValue = (kmerValue << 2);
-          			switch(geneLine.charAt(i))
-          			{
-          			case 'A': kmerValue = kmerValue + 0;
-      				break;
-      				case 'G': kmerValue = kmerValue + 1;
-      				break;
-      				case 'C': kmerValue = kmerValue + 2;
-      				break;
-      				case 'T': kmerValue = kmerValue + 3;
-      				break;
-          			}
-          		}
-          		int i;
-          		//Get the next kmer by shiftint one letter every time
-          		for(i = KMER_SIZE; i < geneLine.length(); i++)
-          		{
-          			LongWritable outputKmer = new LongWritable(kmerValue);
-          			int kmerAdjList = 0;
-          			//Get the precursor node using the premarker
-              		switch((int)PreMarker)
-          			{
-              		case -1: kmerAdjList = kmerAdjList + 0;
-              		break;
-          			case 0: kmerAdjList = kmerAdjList + 16;
-          		    break;
-          		    case 16: kmerAdjList = kmerAdjList + 32;
-          			break;
-          			case 32: kmerAdjList = kmerAdjList + 64;
-          			break;
-          			case 48: kmerAdjList = kmerAdjList + 128;
-          			break;
-              		}
-              		//Update the premarker
-              		PreMarker = 3;
-              		PreMarker = PreMarker<<(KMER_SIZE-1)*2;
-              		PreMarker = PreMarker & kmerValue;
-              		//Reset the top two bits
-              		long reset = 3;
-              		kmerValue = kmerValue << 2;
-              		reset = ~(reset << KMER_SIZE*2);
-              		kmerValue = kmerValue & reset;
-          			switch(geneLine.charAt(i))
-      				{
-      				case 'A': 
-      					kmerAdjList = kmerAdjList + 1;
-      					kmerValue = kmerValue + 0;
-      				break;
-      				case 'G': 
-      					kmerAdjList = kmerAdjList + 2;
-      					kmerValue = kmerValue + 1;
-      				break;
-      				case 'C': 
-      					kmerAdjList = kmerAdjList + 4;
-      					kmerValue = kmerValue + 2;
-      				break;
-      				case 'T': 
-      					kmerAdjList = kmerAdjList + 8;
-      					kmerValue = kmerValue + 3;
-      				break;
-      				}
-          			IntWritable outputAdjList = new IntWritable(kmerAdjList);
-              		output.collect(outputKmer, outputAdjList);
-    	    	}
-          		// arrive the last letter of this gene line
-          		if(i == geneLine.length())
-          		{
-          			int kmerAdjList = 0;
-          			switch((int)PreMarker)
-          			{
-          			case 0: kmerAdjList = kmerAdjList + 16;
-          		    break;
-          		    case 16: kmerAdjList = kmerAdjList + 32;
-          			break;
-          			case 32: kmerAdjList = kmerAdjList + 64;
-          			break;
-          			case 48: kmerAdjList = kmerAdjList + 128;
-          			break;
-              		}
-          			IntWritable outputAdjList = new IntWritable(kmerAdjList);
-          			LongWritable outputKmer = new LongWritable(kmerValue);
-              		output.collect(outputKmer, outputAdjList);
-          		}
-          	}
-      	}
-      	catch( Exception e ) {     
-              System.out.println( "Exception:"+e );
-    	    }
-    	}
-  }
+      A 00010000 16
+      G 00100000 32
+      C 01000000 64
+      T 10000000 128*/
+    public void map(LongWritable key, Text value, OutputCollector<LongWritable, IntWritable> output, Reporter reporter)
+            throws IOException {
+        /* A 00
+           G 01
+           C 10
+           T 11*/
+        try {
+            String geneLine = value.toString(); // Read the Real Gene Line
+            Pattern genePattern = Pattern.compile("[AGCT]+");
+            Matcher geneMatcher = genePattern.matcher(geneLine);
+            boolean isValid = geneMatcher.matches();
+            if (isValid == true) {
+                long kmerValue = 0;
+                long PreMarker = -1;
+                //Initialization: get the first kmer of this geneLine 
+                for (int i = 0; i < KMER_SIZE; i++) {
+                    kmerValue = (kmerValue << 2);
+                    switch (geneLine.charAt(i)) {
+                        case 'A':
+                            kmerValue = kmerValue + 0;
+                            break;
+                        case 'G':
+                            kmerValue = kmerValue + 1;
+                            break;
+                        case 'C':
+                            kmerValue = kmerValue + 2;
+                            break;
+                        case 'T':
+                            kmerValue = kmerValue + 3;
+                            break;
+                    }
+                }
+                int i;
+                //Get the next kmer by shiftint one letter every time
+                for (i = KMER_SIZE; i < geneLine.length(); i++) {
+                    LongWritable outputKmer = new LongWritable(kmerValue);
+                    int kmerAdjList = 0;
+                    //Get the precursor node using the premarker
+                    switch ((int) PreMarker) {
+                        case -1:
+                            kmerAdjList = kmerAdjList + 0;
+                            break;
+                        case 0:
+                            kmerAdjList = kmerAdjList + 16;
+                            break;
+                        case 16:
+                            kmerAdjList = kmerAdjList + 32;
+                            break;
+                        case 32:
+                            kmerAdjList = kmerAdjList + 64;
+                            break;
+                        case 48:
+                            kmerAdjList = kmerAdjList + 128;
+                            break;
+                    }
+                    //Update the premarker
+                    PreMarker = 3;
+                    PreMarker = PreMarker << (KMER_SIZE - 1) * 2;
+                    PreMarker = PreMarker & kmerValue;
+                    //Reset the top two bits
+                    long reset = 3;
+                    kmerValue = kmerValue << 2;
+                    reset = ~(reset << KMER_SIZE * 2);
+                    kmerValue = kmerValue & reset;
+                    switch (geneLine.charAt(i)) {
+                        case 'A':
+                            kmerAdjList = kmerAdjList + 1;
+                            kmerValue = kmerValue + 0;
+                            break;
+                        case 'G':
+                            kmerAdjList = kmerAdjList + 2;
+                            kmerValue = kmerValue + 1;
+                            break;
+                        case 'C':
+                            kmerAdjList = kmerAdjList + 4;
+                            kmerValue = kmerValue + 2;
+                            break;
+                        case 'T':
+                            kmerAdjList = kmerAdjList + 8;
+                            kmerValue = kmerValue + 3;
+                            break;
+                    }
+                    IntWritable outputAdjList = new IntWritable(kmerAdjList);
+                    output.collect(outputKmer, outputAdjList);
+                }
+                // arrive the last letter of this gene line
+                if (i == geneLine.length()) {
+                    int kmerAdjList = 0;
+                    switch ((int) PreMarker) {
+                        case 0:
+                            kmerAdjList = kmerAdjList + 16;
+                            break;
+                        case 16:
+                            kmerAdjList = kmerAdjList + 32;
+                            break;
+                        case 32:
+                            kmerAdjList = kmerAdjList + 64;
+                            break;
+                        case 48:
+                            kmerAdjList = kmerAdjList + 128;
+                            break;
+                    }
+                    IntWritable outputAdjList = new IntWritable(kmerAdjList);
+                    LongWritable outputKmer = new LongWritable(kmerValue);
+                    output.collect(outputKmer, outputAdjList);
+                }
+            }
+        } catch (Exception e) {
+            System.out.println("Exception:" + e);
+        }
+    }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java
index 231e089..244d058 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java
@@ -10,16 +10,17 @@
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 
-
-public class GenomixReducer extends MapReduceBase implements Reducer<LongWritable, IntWritable, LongWritable, ValueWritable> {
-    public void reduce(LongWritable key, Iterator<IntWritable> values, OutputCollector<LongWritable, ValueWritable> output, Reporter reporter) throws IOException {
-    	int groupByAdjList = 0;
-    	int count = 0;
+public class GenomixReducer extends MapReduceBase implements
+        Reducer<LongWritable, IntWritable, LongWritable, ValueWritable> {
+    public void reduce(LongWritable key, Iterator<IntWritable> values,
+            OutputCollector<LongWritable, ValueWritable> output, Reporter reporter) throws IOException {
+        int groupByAdjList = 0;
+        int count = 0;
         while (values.hasNext()) {
-        	//Merge By the all adjacent Nodes;
-        	groupByAdjList = groupByAdjList|values.next().get(); 
-        	count ++;
-        	}
-        output.collect(key, new ValueWritable(groupByAdjList, count));
+            //Merge By the all adjacent Nodes;
+            groupByAdjList = groupByAdjList | values.next().get();
+            count++;
         }
+        output.collect(key, new ValueWritable(groupByAdjList, count));
     }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/ValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/ValueWritable.java
index 37775d7..3adac3c 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/ValueWritable.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/ValueWritable.java
@@ -6,71 +6,68 @@
 
 import org.apache.hadoop.io.WritableComparable;
 
+public class ValueWritable implements WritableComparable<ValueWritable> {
+    private int first;
+    private int second;
 
-public class ValueWritable implements WritableComparable<ValueWritable>{
-	private int first;
-	  private int second;
-	  
-	  public ValueWritable() {
-	  }
-	  
-	  public ValueWritable(int first, int second) {
-		  set(first,second);
-	  }	  
-	  
-	  public void set(int first, int second) {
-	    this.first = first;
-	    this.second = second;
-	  }
-	  
-	  public int getFirst() {
-	    return first;
-	  }
+    public ValueWritable() {
+    }
 
-	  public int getSecond() {
-	    return second;
-	  }
+    public ValueWritable(int first, int second) {
+        set(first, second);
+    }
 
-	  public void write(DataOutput out) throws IOException {
-		out.writeInt(first);
-	    out.writeInt(second);
-	  }
+    public void set(int first, int second) {
+        this.first = first;
+        this.second = second;
+    }
 
-	  public void readFields(DataInput in) throws IOException {
-		first = in.readInt();
-	    second = in.readInt();
-	  }
-	  
-	  public int hashCode() {
-	    return first+second;
-	  }
-	  
-	  public boolean equals(Object o) {
-	    if (o instanceof ValueWritable) {
-	      ValueWritable tp = (ValueWritable) o;
-	      return first == tp.first&&second==tp.second;
-	    }
-	    return false;
-	  }
+    public int getFirst() {
+        return first;
+    }
 
+    public int getSecond() {
+        return second;
+    }
 
-	  public String toString() {
-		return Integer.toString(first) + "\t" + Integer.toString(second);
-	  }
-	  
-	  public int compareTo(ValueWritable tp) {
-		 int cmp;
-		 if(first == tp.first)
-			 cmp = 0;
-	     else
-			cmp = 1;
-		 if(cmp != 0)
-			 return cmp;
-		 if(second == tp.second)
-			 return 0;
-		 else
-			 return 1;
-	  }
+    public void write(DataOutput out) throws IOException {
+        out.writeInt(first);
+        out.writeInt(second);
+    }
+
+    public void readFields(DataInput in) throws IOException {
+        first = in.readInt();
+        second = in.readInt();
+    }
+
+    public int hashCode() {
+        return first + second;
+    }
+
+    public boolean equals(Object o) {
+        if (o instanceof ValueWritable) {
+            ValueWritable tp = (ValueWritable) o;
+            return first == tp.first && second == tp.second;
+        }
+        return false;
+    }
+
+    public String toString() {
+        return Integer.toString(first) + "\t" + Integer.toString(second);
+    }
+
+    public int compareTo(ValueWritable tp) {
+        int cmp;
+        if (first == tp.first)
+            cmp = 0;
+        else
+            cmp = 1;
+        if (cmp != 0)
+            return cmp;
+        if (second == tp.second)
+            return 0;
+        else
+            return 1;
+    }
 
 }
-
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
index 090aa0b..783ecf4 100755
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
@@ -16,9 +16,9 @@
 
 import edu.uci.ics.utils.TestUtils;
 
-public class GraphBuildingTest{
-	
-	private static final String ACTUAL_RESULT_DIR = "actual";
+public class GraphBuildingTest {
+
+    private static final String ACTUAL_RESULT_DIR = "actual";
     private JobConf conf = new JobConf();
     private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
     private static final String DATA_PATH = "data/webmap/text.txt";
@@ -26,14 +26,14 @@
     private static final String RESULT_PATH = "/result2";
     private static final String DUMPED_RESULT = ACTUAL_RESULT_DIR + RESULT_PATH + "/part-00000";
     private static final String EXPECTED_PATH = "expected/result2";
-    
-	private MiniDFSCluster dfsCluster;
+
+    private MiniDFSCluster dfsCluster;
     private MiniMRCluster mrCluster;
     private FileSystem dfs;
-    
+
     @Test
-    public void test() throws Exception{
-    	FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+    public void test() throws Exception {
+        FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
         FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
         startHadoop();
 
@@ -46,6 +46,7 @@
         cleanupHadoop();
 
     }
+
     private void startHadoop() throws IOException {
         FileSystem lfs = FileSystem.getLocal(new Configuration());
         lfs.delete(new Path("build"), true);
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
index c2e30ed..0455b14 100755
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
@@ -2,10 +2,10 @@
 
 import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileReader; 
+import java.io.FileReader;
 
 public class TestUtils {
-	public static void compareWithResult(File expectedFile, File actualFile) throws Exception {
+    public static void compareWithResult(File expectedFile, File actualFile) throws Exception {
         BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
         BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
         String lineExpected, lineActual;
@@ -51,8 +51,10 @@
         }
         return true;
     }
+
     public static void main(String[] args) throws Exception {
-    	TestUtils TUtils = new TestUtils();
-    	TUtils.compareWithResult(new File("/Users/hadoop/Documents/workspace/Test/part-00000"), new File("/Users/hadoop/Documents/workspace/Test/test.txt"));
+        TestUtils TUtils = new TestUtils();
+        TUtils.compareWithResult(new File("/Users/hadoop/Documents/workspace/Test/part-00000"), new File(
+                "/Users/hadoop/Documents/workspace/Test/test.txt"));
     }
 }