add test for graph build
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/text.txt b/genomix/genomix-pregelix/data/graphbuild.test/text.txt
new file mode 100755
index 0000000..f9e7e07
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/text.txt
@@ -0,0 +1,2 @@
+1	AATAGAAGG

+2	ATAGAAACC

diff --git a/genomix/genomix-pregelix/data/graphbuild.test/text.txt~ b/genomix/genomix-pregelix/data/graphbuild.test/text.txt~
new file mode 100755
index 0000000..ba651ae
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/text.txt~
@@ -0,0 +1,2 @@
+1	AATAGAA

+2	ATAGAAA

diff --git a/genomix/genomix-pregelix/pom.xml b/genomix/genomix-pregelix/pom.xml
index bee0907..f609d3b 100644
--- a/genomix/genomix-pregelix/pom.xml
+++ b/genomix/genomix-pregelix/pom.xml
@@ -7,9 +7,9 @@
   <version>0.2.6-SNAPSHOT</version>
   <name>genomix-pregelix</name>
 
-  <properties>
-    <jvm.extraargs/>
-  </properties>
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+	</properties>
 
   <profiles>
     <profile>
@@ -224,6 +224,13 @@
                 	<type>jar</type>
                 	<scope>compile</scope>
                 </dependency>
+                <dependency>
+                	<groupId>edu.uci.ics.hyracks</groupId>
+                	<artifactId>genomix-hadoop</artifactId>
+                	<version>0.2.6-SNAPSHOT</version>
+                	<type>jar</type>
+                	<scope>compile</scope>
+                </dependency>
         </dependencies>
 
   <scm>
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/JobRunStepByStepTest.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/JobRunStepByStepTest.java
new file mode 100644
index 0000000..adf1706
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/JobRunStepByStepTest.java
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pregelix.graphbuilding;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+
+import junit.framework.Assert;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.hyracks.driver.Driver;
+import edu.uci.ics.genomix.hyracks.driver.Driver.Plan;
+import edu.uci.ics.genomix.hyracks.job.GenomixJobConf;
+import edu.uci.ics.genomix.type.NodeWritable;
+
+@SuppressWarnings("deprecation")
+public class JobRunStepByStepTest {
+    private static final int KmerSize = 5;
+    private static final int ReadLength = 9;
+    private static final String ACTUAL_RESULT_DIR = "actual";
+    private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+
+    private static final String DATA_INPUT_PATH = "data/graphbuild.test/text.txt";
+    private static final String HDFS_INPUT_PATH = "/webmap";
+    private static final String HDFS_OUTPUT_PATH = "/webmap_result";
+
+    private static final String EXPECTED_DIR = "src/test/resources/expected/";
+    private static final String EXPECTED_OUPUT_NODE = EXPECTED_DIR + "result_after_generateNode";
+    private static final String EXPECTED_UNMERGED = EXPECTED_DIR + "result_unmerged";
+
+    private static final String DUMPED_RESULT = ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH + "/merged.txt";
+    private static final String CONVERT_RESULT = DUMPED_RESULT + ".txt";
+    private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+    private MiniDFSCluster dfsCluster;
+
+    private JobConf conf = new JobConf();
+    private int numberOfNC = 2;
+    private int numPartitionPerMachine = 2;
+
+    private Driver driver;
+
+    @Test
+    public void TestAll() throws Exception {
+        TestEndToEnd();
+        //TestUnMergedNode();
+    }
+
+    public void TestEndToEnd() throws Exception {
+        //conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
+        conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_BINARY);
+        cleanUpReEntry();
+        conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
+        driver.runJob(new GenomixJobConf(conf), Plan.BUILD_DEBRUJIN_GRAPH, true);
+        Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_NODE, new int[] { 1, 2, 3, 4 }));
+    }
+    
+    public void TestUnMergedNode() throws Exception {
+        conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_BINARY);
+        cleanUpReEntry();
+        conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
+        driver.runJob(new GenomixJobConf(conf), Plan.BUILD_UNMERGED_GRAPH, true);
+        Assert.assertEquals(true, checkResults(EXPECTED_UNMERGED, new int[] { 1, 2, 3, 4 }));
+    }
+
+    @Before
+    public void setUp() throws Exception {
+        cleanupStores();
+        edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.init();
+        FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+        FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+        startHDFS();
+
+        FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
+        FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
+
+        conf.setInt(GenomixJobConf.KMER_LENGTH, KmerSize);
+        conf.setInt(GenomixJobConf.READ_LENGTH, ReadLength);
+        driver = new Driver(edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.CC_HOST,
+                edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT, numPartitionPerMachine);
+    }
+
+    private void cleanupStores() throws IOException {
+        FileUtils.forceMkdir(new File("teststore"));
+        FileUtils.forceMkdir(new File("build"));
+        FileUtils.cleanDirectory(new File("teststore"));
+        FileUtils.cleanDirectory(new File("build"));
+    }
+
+    private void startHDFS() throws IOException {
+        conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+        conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+        conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+
+        FileSystem lfs = FileSystem.getLocal(new Configuration());
+        lfs.delete(new Path("build"), true);
+        System.setProperty("hadoop.log.dir", "logs");
+        dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+        FileSystem dfs = FileSystem.get(conf);
+        Path src = new Path(DATA_INPUT_PATH);
+        Path dest = new Path(HDFS_INPUT_PATH);
+        dfs.mkdirs(dest);
+        // dfs.mkdirs(result);
+        dfs.copyFromLocalFile(src, dest);
+
+        DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+        conf.writeXml(confOutput);
+        confOutput.flush();
+        confOutput.close();
+    }
+
+    private void cleanUpReEntry() throws IOException {
+        FileSystem lfs = FileSystem.getLocal(new Configuration());
+        if (lfs.exists(new Path(DUMPED_RESULT))) {
+            lfs.delete(new Path(DUMPED_RESULT), true);
+        }
+        FileSystem dfs = FileSystem.get(conf);
+        if (dfs.exists(new Path(HDFS_OUTPUT_PATH))) {
+            dfs.delete(new Path(HDFS_OUTPUT_PATH), true);
+        }
+    }
+
+    private boolean checkResults(String expectedPath, int[] poslistField) throws Exception {
+        File dumped = null;
+        String format = conf.get(GenomixJobConf.OUTPUT_FORMAT);
+        if (GenomixJobConf.OUTPUT_FORMAT_TEXT.equalsIgnoreCase(format)) {
+            FileUtil.copyMerge(FileSystem.get(conf), new Path(HDFS_OUTPUT_PATH),
+                    FileSystem.getLocal(new Configuration()), new Path(DUMPED_RESULT), false, conf, null);
+            dumped = new File(DUMPED_RESULT);
+        } else {
+
+            FileSystem.getLocal(new Configuration()).mkdirs(new Path(ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH));
+            File filePathTo = new File(CONVERT_RESULT);
+            BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+            for (int i = 0; i < numPartitionPerMachine * numberOfNC; i++) {
+                String partname = "/part-" + i;
+                // FileUtil.copy(FileSystem.get(conf), new Path(HDFS_OUTPUT_PATH
+                // + partname), FileSystem.getLocal(new Configuration()),
+                // new Path(ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH + partname),
+                // false, conf);
+
+                Path path = new Path(HDFS_OUTPUT_PATH + partname);
+                FileSystem dfs = FileSystem.get(conf);
+                if (dfs.getFileStatus(path).getLen() == 0) {
+                    continue;
+                }
+                SequenceFile.Reader reader = new SequenceFile.Reader(dfs, path, conf);
+
+                NodeWritable node = new NodeWritable(conf.getInt(GenomixJobConf.KMER_LENGTH, KmerSize));
+                NullWritable value = NullWritable.get();
+                while (reader.next(node, value)) {
+                    if (node == null) {
+                        break;
+                    }
+                    bw.write(node.toString());
+                    System.out.println(node.toString());
+                    bw.newLine();
+                }
+                reader.close();
+            }
+            bw.close();
+            dumped = new File(CONVERT_RESULT);
+        }
+
+        if (poslistField != null) {
+            TestUtils.compareWithUnSortedPosition(new File(expectedPath), dumped, poslistField);
+        } else {
+            TestUtils.compareWithSortedResult(new File(expectedPath), dumped);
+        }
+        return true;
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.deinit();
+        cleanupHDFS();
+    }
+
+    private void cleanupHDFS() throws Exception {
+        dfsCluster.shutdown();
+    }
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/TestUtils.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/TestUtils.java
new file mode 100644
index 0000000..ca7755e
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/TestUtils.java
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.pregelix.graphbuilding;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.ArrayList;
+import java.util.Collections;
+
+public class TestUtils {
+    /**
+     * Compare with the sorted expected file.
+     * The actual file may not be sorted;
+     * 
+     * @param expectedFile
+     * @param actualFile
+     */
+    public static void compareWithSortedResult(File expectedFile, File actualFile) throws Exception {
+        BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
+        BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
+        ArrayList<String> actualLines = new ArrayList<String>();
+        String lineExpected, lineActual;
+        try {
+            while ((lineActual = readerActual.readLine()) != null) {
+                actualLines.add(lineActual);
+            }
+            Collections.sort(actualLines);
+            int num = 1;
+            for (String actualLine : actualLines) {
+                lineExpected = readerExpected.readLine();
+                if (lineExpected == null) {
+                    throw new Exception("Actual result changed at line " + num + ":\n< " + actualLine + "\n> ");
+                }
+                if (!equalStrings(lineExpected, actualLine)) {
+                    throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
+                            + actualLine);
+                }
+                ++num;
+            }
+            lineExpected = readerExpected.readLine();
+            if (lineExpected != null) {
+                throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineExpected);
+            }
+        } finally {
+            readerActual.close();
+            readerExpected.close();
+        }
+    }
+
+    public static void compareWithUnSortedPosition(File expectedFile, File actualFile, int[] poslistField)
+            throws Exception {
+        BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
+        BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
+        ArrayList<String> actualLines = new ArrayList<String>();
+        String lineExpected, lineActual;
+        try {
+            while ((lineActual = readerActual.readLine()) != null) {
+                actualLines.add(lineActual);
+            }
+            Collections.sort(actualLines);
+            int num = 1;
+            for (String actualLine : actualLines) {
+                lineExpected = readerExpected.readLine();
+                if (lineExpected == null) {
+                    throw new Exception("Actual result changed at line " + num + ":\n< " + actualLine + "\n> ");
+                }
+                if (!containStrings(lineExpected, actualLine, poslistField)) {
+                    throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
+                            + actualLine);
+                }
+                ++num;
+            }
+            lineExpected = readerExpected.readLine();
+            if (lineExpected != null) {
+                throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineExpected);
+            }
+        } finally {
+            readerActual.close();
+            readerExpected.close();
+        }
+    }
+
+    public static void compareWithResult(File expectedFile, File actualFile) throws Exception {
+        BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
+        BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
+        String lineExpected, lineActual;
+        int num = 1;
+        try {
+            while ((lineExpected = readerExpected.readLine()) != null) {
+                lineActual = readerActual.readLine();
+                // Assert.assertEquals(lineExpected, lineActual);
+                if (lineActual == null) {
+                    throw new Exception("Actual result changed at line " + num + ":\n< " + lineExpected + "\n> ");
+                }
+                if (!equalStrings(lineExpected, lineActual)) {
+                    throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
+                            + lineActual);
+                }
+                ++num;
+            }
+            lineActual = readerActual.readLine();
+            if (lineActual != null) {
+                throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineActual);
+            }
+        } finally {
+            readerExpected.close();
+            readerActual.close();
+        }
+    }
+
+    private static boolean equalStrings(String s1, String s2) {
+        String[] rowsOne = s1.split("\n");
+        String[] rowsTwo = s2.split("\n");
+
+        if (rowsOne.length != rowsTwo.length)
+            return false;
+
+        for (int i = 0; i < rowsOne.length; i++) {
+            String row1 = rowsOne[i];
+            String row2 = rowsTwo[i];
+
+            if (row1.equals(row2))
+                continue;
+
+            String[] fields1 = row1.split(",");
+            String[] fields2 = row2.split(",");
+
+            for (int j = 0; j < fields1.length; j++) {
+                if (fields1[j].equals(fields2[j])) {
+                    continue;
+                } else if (fields1[j].indexOf('.') < 0) {
+                    return false;
+                } else {
+                    fields1[j] = fields1[j].split("=")[1];
+                    fields2[j] = fields2[j].split("=")[1];
+                    Double double1 = Double.parseDouble(fields1[j]);
+                    Double double2 = Double.parseDouble(fields2[j]);
+                    float float1 = (float) double1.doubleValue();
+                    float float2 = (float) double2.doubleValue();
+
+                    if (Math.abs(float1 - float2) == 0)
+                        continue;
+                    else {
+                        return false;
+                    }
+                }
+            }
+        }
+        return true;
+    }
+
+    private static boolean containStrings(String lineExpected, String actualLine, int[] poslistField) {
+        if (lineExpected.equals(actualLine)) {
+            return true;
+        }
+        String[] fieldsExp = lineExpected.split("\\\t");
+        String[] fieldsAct = actualLine.split("\\\t");
+        if (fieldsAct.length != fieldsExp.length) {
+            return false;
+        }
+        for (int i = 0; i < fieldsAct.length; i++) {
+            boolean cont = false;
+            for (int x : poslistField) {
+                if (i == x) {
+                    cont = true;
+                    break;
+                }
+            }
+            if (cont) {
+                continue;
+            }
+            if (!fieldsAct[i].equals(fieldsExp[i])) {
+                return false;
+            }
+        }
+
+        ArrayList<String> posExp = new ArrayList<String>();
+        ArrayList<String> posAct = new ArrayList<String>();
+
+        for (int x : poslistField) {
+            String valueExp = lineExpected.split("\\\t")[x];
+            for (int i = 1; i < valueExp.length() - 1;) {
+                if (valueExp.charAt(i) == '(') {
+                    String str = "";
+                    i++;
+                    while (i < valueExp.length() - 1 && valueExp.charAt(i) != ')') {
+                        str += valueExp.charAt(i);
+                        i++;
+                    }
+                    posExp.add(str);
+                }
+                i++;
+            }
+            String valueAct = actualLine.split("\\\t")[x];
+            for (int i = 1; i < valueAct.length() - 1;) {
+                if (valueAct.charAt(i) == '(') {
+                    String str = "";
+                    i++;
+                    while (i < valueAct.length() - 1 && valueAct.charAt(i) != ')') {
+                        str += valueAct.charAt(i);
+                        i++;
+                    }
+                    posAct.add(str);
+                }
+                i++;
+            }
+
+            if (posExp.size() != posAct.size()) {
+                return false;
+            }
+            Collections.sort(posExp);
+            Collections.sort(posAct);
+            for (int i = 0; i < posExp.size(); i++) {
+                if (!posExp.get(i).equals(posAct.get(i))) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+}
diff --git a/genomix/genomix-pregelix/src/test/resources/data/webmap/text.txt b/genomix/genomix-pregelix/src/test/resources/data/webmap/text.txt
new file mode 100755
index 0000000..01c49e5
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/data/webmap/text.txt
@@ -0,0 +1,6 @@
+1	AATAGAAG

+2	AATAGCTT

+3	AATAGAAG

+4	AATAGCTT

+5	AATAGAAG

+6	AGAAGAAG