add test for graph build
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/text.txt b/genomix/genomix-pregelix/data/graphbuild.test/text.txt
new file mode 100755
index 0000000..f9e7e07
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/text.txt
@@ -0,0 +1,2 @@
+1 AATAGAAGG
+2 ATAGAAACC
diff --git a/genomix/genomix-pregelix/data/graphbuild.test/text.txt~ b/genomix/genomix-pregelix/data/graphbuild.test/text.txt~
new file mode 100755
index 0000000..ba651ae
--- /dev/null
+++ b/genomix/genomix-pregelix/data/graphbuild.test/text.txt~
@@ -0,0 +1,2 @@
+1 AATAGAA
+2 ATAGAAA
diff --git a/genomix/genomix-pregelix/pom.xml b/genomix/genomix-pregelix/pom.xml
index bee0907..f609d3b 100644
--- a/genomix/genomix-pregelix/pom.xml
+++ b/genomix/genomix-pregelix/pom.xml
@@ -7,9 +7,9 @@
<version>0.2.6-SNAPSHOT</version>
<name>genomix-pregelix</name>
- <properties>
- <jvm.extraargs/>
- </properties>
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ </properties>
<profiles>
<profile>
@@ -224,6 +224,13 @@
<type>jar</type>
<scope>compile</scope>
</dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>genomix-hadoop</artifactId>
+ <version>0.2.6-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
</dependencies>
<scm>
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/JobRunStepByStepTest.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/JobRunStepByStepTest.java
new file mode 100644
index 0000000..adf1706
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/JobRunStepByStepTest.java
@@ -0,0 +1,211 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.genomix.pregelix.graphbuilding;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+
+import junit.framework.Assert;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.hyracks.driver.Driver;
+import edu.uci.ics.genomix.hyracks.driver.Driver.Plan;
+import edu.uci.ics.genomix.hyracks.job.GenomixJobConf;
+import edu.uci.ics.genomix.type.NodeWritable;
+
+@SuppressWarnings("deprecation")
+public class JobRunStepByStepTest {
+ private static final int KmerSize = 5;
+ private static final int ReadLength = 9;
+ private static final String ACTUAL_RESULT_DIR = "actual";
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+
+ private static final String DATA_INPUT_PATH = "data/graphbuild.test/text.txt";
+ private static final String HDFS_INPUT_PATH = "/webmap";
+ private static final String HDFS_OUTPUT_PATH = "/webmap_result";
+
+ private static final String EXPECTED_DIR = "src/test/resources/expected/";
+ private static final String EXPECTED_OUPUT_NODE = EXPECTED_DIR + "result_after_generateNode";
+ private static final String EXPECTED_UNMERGED = EXPECTED_DIR + "result_unmerged";
+
+ private static final String DUMPED_RESULT = ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH + "/merged.txt";
+ private static final String CONVERT_RESULT = DUMPED_RESULT + ".txt";
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private MiniDFSCluster dfsCluster;
+
+ private JobConf conf = new JobConf();
+ private int numberOfNC = 2;
+ private int numPartitionPerMachine = 2;
+
+ private Driver driver;
+
+ @Test
+ public void TestAll() throws Exception {
+ TestEndToEnd();
+ //TestUnMergedNode();
+ }
+
+ public void TestEndToEnd() throws Exception {
+ //conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
+ conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_BINARY);
+ cleanUpReEntry();
+ conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
+ driver.runJob(new GenomixJobConf(conf), Plan.BUILD_DEBRUJIN_GRAPH, true);
+ Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_NODE, new int[] { 1, 2, 3, 4 }));
+ }
+
+ public void TestUnMergedNode() throws Exception {
+ conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_BINARY);
+ cleanUpReEntry();
+ conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
+ driver.runJob(new GenomixJobConf(conf), Plan.BUILD_UNMERGED_GRAPH, true);
+ Assert.assertEquals(true, checkResults(EXPECTED_UNMERGED, new int[] { 1, 2, 3, 4 }));
+ }
+
+ @Before
+ public void setUp() throws Exception {
+ cleanupStores();
+ edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.init();
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHDFS();
+
+ FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
+ FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
+
+ conf.setInt(GenomixJobConf.KMER_LENGTH, KmerSize);
+ conf.setInt(GenomixJobConf.READ_LENGTH, ReadLength);
+ driver = new Driver(edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.CC_HOST,
+ edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT, numPartitionPerMachine);
+ }
+
+ private void cleanupStores() throws IOException {
+ FileUtils.forceMkdir(new File("teststore"));
+ FileUtils.forceMkdir(new File("build"));
+ FileUtils.cleanDirectory(new File("teststore"));
+ FileUtils.cleanDirectory(new File("build"));
+ }
+
+ private void startHDFS() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+ FileSystem dfs = FileSystem.get(conf);
+ Path src = new Path(DATA_INPUT_PATH);
+ Path dest = new Path(HDFS_INPUT_PATH);
+ dfs.mkdirs(dest);
+ // dfs.mkdirs(result);
+ dfs.copyFromLocalFile(src, dest);
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanUpReEntry() throws IOException {
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ if (lfs.exists(new Path(DUMPED_RESULT))) {
+ lfs.delete(new Path(DUMPED_RESULT), true);
+ }
+ FileSystem dfs = FileSystem.get(conf);
+ if (dfs.exists(new Path(HDFS_OUTPUT_PATH))) {
+ dfs.delete(new Path(HDFS_OUTPUT_PATH), true);
+ }
+ }
+
+ private boolean checkResults(String expectedPath, int[] poslistField) throws Exception {
+ File dumped = null;
+ String format = conf.get(GenomixJobConf.OUTPUT_FORMAT);
+ if (GenomixJobConf.OUTPUT_FORMAT_TEXT.equalsIgnoreCase(format)) {
+ FileUtil.copyMerge(FileSystem.get(conf), new Path(HDFS_OUTPUT_PATH),
+ FileSystem.getLocal(new Configuration()), new Path(DUMPED_RESULT), false, conf, null);
+ dumped = new File(DUMPED_RESULT);
+ } else {
+
+ FileSystem.getLocal(new Configuration()).mkdirs(new Path(ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH));
+ File filePathTo = new File(CONVERT_RESULT);
+ BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+ for (int i = 0; i < numPartitionPerMachine * numberOfNC; i++) {
+ String partname = "/part-" + i;
+ // FileUtil.copy(FileSystem.get(conf), new Path(HDFS_OUTPUT_PATH
+ // + partname), FileSystem.getLocal(new Configuration()),
+ // new Path(ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH + partname),
+ // false, conf);
+
+ Path path = new Path(HDFS_OUTPUT_PATH + partname);
+ FileSystem dfs = FileSystem.get(conf);
+ if (dfs.getFileStatus(path).getLen() == 0) {
+ continue;
+ }
+ SequenceFile.Reader reader = new SequenceFile.Reader(dfs, path, conf);
+
+ NodeWritable node = new NodeWritable(conf.getInt(GenomixJobConf.KMER_LENGTH, KmerSize));
+ NullWritable value = NullWritable.get();
+ while (reader.next(node, value)) {
+ if (node == null) {
+ break;
+ }
+ bw.write(node.toString());
+ System.out.println(node.toString());
+ bw.newLine();
+ }
+ reader.close();
+ }
+ bw.close();
+ dumped = new File(CONVERT_RESULT);
+ }
+
+ if (poslistField != null) {
+ TestUtils.compareWithUnSortedPosition(new File(expectedPath), dumped, poslistField);
+ } else {
+ TestUtils.compareWithSortedResult(new File(expectedPath), dumped);
+ }
+ return true;
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.deinit();
+ cleanupHDFS();
+ }
+
+ private void cleanupHDFS() throws Exception {
+ dfsCluster.shutdown();
+ }
+}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/TestUtils.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/TestUtils.java
new file mode 100644
index 0000000..ca7755e
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/graphbuilding/TestUtils.java
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.pregelix.graphbuilding;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.ArrayList;
+import java.util.Collections;
+
+public class TestUtils {
+ /**
+ * Compare with the sorted expected file.
+ * The actual file may not be sorted;
+ *
+ * @param expectedFile
+ * @param actualFile
+ */
+ public static void compareWithSortedResult(File expectedFile, File actualFile) throws Exception {
+ BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
+ BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
+ ArrayList<String> actualLines = new ArrayList<String>();
+ String lineExpected, lineActual;
+ try {
+ while ((lineActual = readerActual.readLine()) != null) {
+ actualLines.add(lineActual);
+ }
+ Collections.sort(actualLines);
+ int num = 1;
+ for (String actualLine : actualLines) {
+ lineExpected = readerExpected.readLine();
+ if (lineExpected == null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< " + actualLine + "\n> ");
+ }
+ if (!equalStrings(lineExpected, actualLine)) {
+ throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
+ + actualLine);
+ }
+ ++num;
+ }
+ lineExpected = readerExpected.readLine();
+ if (lineExpected != null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineExpected);
+ }
+ } finally {
+ readerActual.close();
+ readerExpected.close();
+ }
+ }
+
+ public static void compareWithUnSortedPosition(File expectedFile, File actualFile, int[] poslistField)
+ throws Exception {
+ BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
+ BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
+ ArrayList<String> actualLines = new ArrayList<String>();
+ String lineExpected, lineActual;
+ try {
+ while ((lineActual = readerActual.readLine()) != null) {
+ actualLines.add(lineActual);
+ }
+ Collections.sort(actualLines);
+ int num = 1;
+ for (String actualLine : actualLines) {
+ lineExpected = readerExpected.readLine();
+ if (lineExpected == null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< " + actualLine + "\n> ");
+ }
+ if (!containStrings(lineExpected, actualLine, poslistField)) {
+ throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
+ + actualLine);
+ }
+ ++num;
+ }
+ lineExpected = readerExpected.readLine();
+ if (lineExpected != null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineExpected);
+ }
+ } finally {
+ readerActual.close();
+ readerExpected.close();
+ }
+ }
+
+ public static void compareWithResult(File expectedFile, File actualFile) throws Exception {
+ BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
+ BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
+ String lineExpected, lineActual;
+ int num = 1;
+ try {
+ while ((lineExpected = readerExpected.readLine()) != null) {
+ lineActual = readerActual.readLine();
+ // Assert.assertEquals(lineExpected, lineActual);
+ if (lineActual == null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< " + lineExpected + "\n> ");
+ }
+ if (!equalStrings(lineExpected, lineActual)) {
+ throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
+ + lineActual);
+ }
+ ++num;
+ }
+ lineActual = readerActual.readLine();
+ if (lineActual != null) {
+ throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineActual);
+ }
+ } finally {
+ readerExpected.close();
+ readerActual.close();
+ }
+ }
+
+ private static boolean equalStrings(String s1, String s2) {
+ String[] rowsOne = s1.split("\n");
+ String[] rowsTwo = s2.split("\n");
+
+ if (rowsOne.length != rowsTwo.length)
+ return false;
+
+ for (int i = 0; i < rowsOne.length; i++) {
+ String row1 = rowsOne[i];
+ String row2 = rowsTwo[i];
+
+ if (row1.equals(row2))
+ continue;
+
+ String[] fields1 = row1.split(",");
+ String[] fields2 = row2.split(",");
+
+ for (int j = 0; j < fields1.length; j++) {
+ if (fields1[j].equals(fields2[j])) {
+ continue;
+ } else if (fields1[j].indexOf('.') < 0) {
+ return false;
+ } else {
+ fields1[j] = fields1[j].split("=")[1];
+ fields2[j] = fields2[j].split("=")[1];
+ Double double1 = Double.parseDouble(fields1[j]);
+ Double double2 = Double.parseDouble(fields2[j]);
+ float float1 = (float) double1.doubleValue();
+ float float2 = (float) double2.doubleValue();
+
+ if (Math.abs(float1 - float2) == 0)
+ continue;
+ else {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+ }
+
+ private static boolean containStrings(String lineExpected, String actualLine, int[] poslistField) {
+ if (lineExpected.equals(actualLine)) {
+ return true;
+ }
+ String[] fieldsExp = lineExpected.split("\\\t");
+ String[] fieldsAct = actualLine.split("\\\t");
+ if (fieldsAct.length != fieldsExp.length) {
+ return false;
+ }
+ for (int i = 0; i < fieldsAct.length; i++) {
+ boolean cont = false;
+ for (int x : poslistField) {
+ if (i == x) {
+ cont = true;
+ break;
+ }
+ }
+ if (cont) {
+ continue;
+ }
+ if (!fieldsAct[i].equals(fieldsExp[i])) {
+ return false;
+ }
+ }
+
+ ArrayList<String> posExp = new ArrayList<String>();
+ ArrayList<String> posAct = new ArrayList<String>();
+
+ for (int x : poslistField) {
+ String valueExp = lineExpected.split("\\\t")[x];
+ for (int i = 1; i < valueExp.length() - 1;) {
+ if (valueExp.charAt(i) == '(') {
+ String str = "";
+ i++;
+ while (i < valueExp.length() - 1 && valueExp.charAt(i) != ')') {
+ str += valueExp.charAt(i);
+ i++;
+ }
+ posExp.add(str);
+ }
+ i++;
+ }
+ String valueAct = actualLine.split("\\\t")[x];
+ for (int i = 1; i < valueAct.length() - 1;) {
+ if (valueAct.charAt(i) == '(') {
+ String str = "";
+ i++;
+ while (i < valueAct.length() - 1 && valueAct.charAt(i) != ')') {
+ str += valueAct.charAt(i);
+ i++;
+ }
+ posAct.add(str);
+ }
+ i++;
+ }
+
+ if (posExp.size() != posAct.size()) {
+ return false;
+ }
+ Collections.sort(posExp);
+ Collections.sort(posAct);
+ for (int i = 0; i < posExp.size(); i++) {
+ if (!posExp.get(i).equals(posAct.get(i))) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+}
diff --git a/genomix/genomix-pregelix/src/test/resources/data/webmap/text.txt b/genomix/genomix-pregelix/src/test/resources/data/webmap/text.txt
new file mode 100755
index 0000000..01c49e5
--- /dev/null
+++ b/genomix/genomix-pregelix/src/test/resources/data/webmap/text.txt
@@ -0,0 +1,6 @@
+1 AATAGAAG
+2 AATAGCTT
+3 AATAGAAG
+4 AATAGCTT
+5 AATAGAAG
+6 AGAAGAAG