make TestSuite for graph building(hadoop)
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixDriver.java
index 2553d16..d9e4876 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixDriver.java
@@ -34,15 +34,14 @@
@Option(name = "-kmer-size", usage = "the size of kmer", required = true)
public int sizeKmer;
- @Option(name = "-read-length", usage = "the length of read", required = true)
- public int readLength;
+// @Option(name = "-read-length", usage = "the length of read", required = true)
+// public int readLength;
}
- public void run(String inputPath, String outputPath, int numReducers, int sizeKmer, int readLength,
+ public void run(String inputPath, String outputPath, int numReducers, int sizeKmer,
boolean seqOutput, String defaultConfPath) throws IOException{
JobConf conf = new JobConf(GenomixDriver.class);
conf.setInt("sizeKmer", sizeKmer);
- conf.setInt("readLength", readLength);
if (defaultConfPath != null) {
conf.addResource(new Path(defaultConfPath));
}
@@ -79,7 +78,6 @@
CmdLineParser parser = new CmdLineParser(options);
parser.parseArgument(args);
GenomixDriver driver = new GenomixDriver();
- driver.run(options.inputPath, options.outputPath, options.numReducers, options.sizeKmer,
- options.readLength, true, null);
+ driver.run(options.inputPath, options.outputPath, options.numReducers, options.sizeKmer, true, null);
}
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
index 6404f0d..4b79ea4 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GenomixReducer.java
@@ -35,7 +35,7 @@
while (values.hasNext()) {
tmpNode.set(values.next());
- outputNode.getNodeIdList().appendList(tmpNode.getNodeIdList());
+ outputNode.getNodeIdList().unionUpdate(tmpNode.getNodeIdList());
outputNode.getFFList().unionUpdate(tmpNode.getFFList()); //appendList need to check if insert node exists
outputNode.getFRList().unionUpdate(tmpNode.getFRList());
outputNode.getRFList().unionUpdate(tmpNode.getRFList());
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java
index 498a87d..f16e0a5 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTest.java
@@ -22,13 +22,12 @@
private JobConf conf = new JobConf();
private static final String ACTUAL_RESULT_DIR = "actual";
private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
- private static final String DATA_PATH = "data/webmap/pathmerge_TestSet/5";
+ private static final String DATA_PATH = "data/webmap/AdjSplitRepeat.txt";
private static final String HDFS_PATH = "/webmap";
private static final String RESULT_PATH = "/result";
-// private static final int COUNT_REDUCER = 2;
+ private static final int COUNT_REDUCER = 1;
private static final int SIZE_KMER = 3;
- private static final int READ_LENGTH = 7;
private MiniDFSCluster dfsCluster;
private MiniMRCluster mrCluster;
@@ -45,7 +44,7 @@
public void TestMapKmerToNode() throws Exception {
GenomixDriver driver = new GenomixDriver();
- driver.run(HDFS_PATH, RESULT_PATH, 1, SIZE_KMER, READ_LENGTH, true, HADOOP_CONF_PATH);
+ driver.run(HDFS_PATH, RESULT_PATH, COUNT_REDUCER, SIZE_KMER, true, HADOOP_CONF_PATH);
dumpResult();
}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestCase.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestCase.java
new file mode 100644
index 0000000..cb163a7
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestCase.java
@@ -0,0 +1,60 @@
+package edu.uci.ics.genomix.hadoop.contrailgraphbuilding;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.junit.Test;
+
+import edu.uci.ics.genomix.hadoop.pmcommon.HadoopMiniClusterTest;
+
+@SuppressWarnings("deprecation")
+public class GraphBuildingTestCase extends TestCase{
+
+ private final String RESULT_PATH;
+ private final String HADOOP_CONF_PATH;
+ private final String HDFS_INPUTPATH;
+ private FileSystem dfs;
+
+ private static final int COUNT_REDUCER = 1;
+ private final int SIZE_KMER;
+
+ public GraphBuildingTestCase(String resultFileDir, String hadoopConfPath,
+ String hdfsInputPath, int kmerSize, FileSystem dfs){
+ this.RESULT_PATH = resultFileDir;
+ this.HADOOP_CONF_PATH = hadoopConfPath;
+ this.HDFS_INPUTPATH = hdfsInputPath;
+ this.SIZE_KMER = kmerSize;
+ this.dfs = dfs;
+ }
+
+ @Test
+ public void test() throws Exception {
+ TestMapKmerToNode();
+ }
+
+ public void TestMapKmerToNode() throws Exception {
+ GenomixDriver driver = new GenomixDriver();
+ driver.run(HDFS_INPUTPATH, RESULT_PATH, COUNT_REDUCER, SIZE_KMER, true, HADOOP_CONF_PATH);
+ dumpResult();
+ }
+
+
+
+ private void dumpResult() throws IOException {
+ Path src = new Path(RESULT_PATH);
+ Path dest = new Path(RESULT_PATH);
+ dfs.copyToLocalFile(src, dest);
+// HadoopMiniClusterTest.copyResultsToLocal(RESULT_PATH, "actual/test.txt", false, conf, true, dfs);
+ }
+}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java
new file mode 100644
index 0000000..67a6e3b
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/genomix/hadoop/contrailgraphbuilding/GraphBuildingTestSuite.java
@@ -0,0 +1,113 @@
+package edu.uci.ics.genomix.hadoop.contrailgraphbuilding;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import junit.framework.TestSuite;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+
+@SuppressWarnings("deprecation")
+public class GraphBuildingTestSuite extends TestSuite{
+
+ private static int SIZE_KMER = 3;
+ public static final String PreFix = "data/webmap/pathmerge_TestSet";
+ public static final String[] TestDir = { PreFix + File.separator
+ + "2", PreFix + File.separator
+ + "3"};
+
+ private JobConf conf = new JobConf();
+ private static final String ACTUAL_RESULT_DIR = "actual";
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private static final String HDFS_INPUTPATH = "/webmap";
+
+ private MiniDFSCluster dfsCluster;
+ private MiniMRCluster mrCluster;
+ private static FileSystem dfs;
+
+ public void setup() throws Exception{
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHadoop();
+ }
+
+ private void startHadoop() throws IOException {
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, 1, true, null);
+ dfs = dfsCluster.getFileSystem();
+ mrCluster = new MiniMRCluster(1, dfs.getUri().toString(), 1);
+
+ for (String testDir : TestDir) {
+ File src = new File(testDir);
+ Path dest = new Path(HDFS_INPUTPATH + File.separator + src.getName());
+ dfs.mkdirs(dest);
+ for (File f : src.listFiles()) {
+ dfs.copyFromLocalFile(new Path(f.getAbsolutePath()), dest);
+ }
+ }
+//
+// Path src = new Path(DATA_PATH);
+// Path dest = new Path(HDFS_PATH + "/");
+// dfs.mkdirs(dest);
+// dfs.copyFromLocalFile(src, dest);
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ public void tearDown() throws Exception {
+ cleanupHadoop();
+ }
+
+ private void cleanupHadoop() throws IOException {
+ mrCluster.shutdown();
+ dfsCluster.shutdown();
+ }
+
+ public static Test suite() throws Exception {
+ GraphBuildingTestSuite testSuite = new GraphBuildingTestSuite();
+ for (String testPathStr : TestDir) {
+ File testDir = new File(testPathStr);
+ String resultFileName = ACTUAL_RESULT_DIR + File.separator +
+ "bin" + File.separator + testDir.getName();
+ testSuite.addTest(new GraphBuildingTestCase(resultFileName, HADOOP_CONF_PATH,
+ HDFS_INPUTPATH + File.separator + testDir.getName(), SIZE_KMER, dfs));
+ }
+ return testSuite;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+ try {
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ // cleanupStores();
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+ tearDown();
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+}
diff --git a/genomix/genomix-pregelix/data/SplitRepeat/1/part-00000 b/genomix/genomix-pregelix/data/SplitRepeat/1/part-00000
deleted file mode 100755
index a983577..0000000
--- a/genomix/genomix-pregelix/data/SplitRepeat/1/part-00000
+++ /dev/null
Binary files differ
diff --git a/genomix/genomix-pregelix/data/SplitRepeat/AdjSplitRepeat/part-00000 b/genomix/genomix-pregelix/data/SplitRepeat/AdjSplitRepeat/part-00000
index a187c64..665db46 100755
--- a/genomix/genomix-pregelix/data/SplitRepeat/AdjSplitRepeat/part-00000
+++ b/genomix/genomix-pregelix/data/SplitRepeat/AdjSplitRepeat/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/SplitRepeat/SimpleTest/part-00000 b/genomix/genomix-pregelix/data/SplitRepeat/SimpleTest/part-00000
deleted file mode 100755
index 4977247..0000000
--- a/genomix/genomix-pregelix/data/SplitRepeat/SimpleTest/part-00000
+++ /dev/null
Binary files differ
diff --git a/genomix/genomix-pregelix/data/SplitRepeat/SplitOnce/part-00000 b/genomix/genomix-pregelix/data/SplitRepeat/SplitOnce/part-00000
new file mode 100755
index 0000000..cd574bb
--- /dev/null
+++ b/genomix/genomix-pregelix/data/SplitRepeat/SplitOnce/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/data/SplitRepeat/SplitTwice/part-00000 b/genomix/genomix-pregelix/data/SplitRepeat/SplitTwice/part-00000
new file mode 100755
index 0000000..e56b813
--- /dev/null
+++ b/genomix/genomix-pregelix/data/SplitRepeat/SplitTwice/part-00000
Binary files differ
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/splitrepeat/SplitRepeatVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/splitrepeat/SplitRepeatVertex.java
index 83958fe..ee493f9 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/splitrepeat/SplitRepeatVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/operator/splitrepeat/SplitRepeatVertex.java
@@ -96,9 +96,11 @@
if(outgoingEdgeList == null)
outgoingEdgeList = new VKmerListWritable();
if(createdVertexId == null)
- createdVertexId = new VKmerBytesWritable(kmerSize);//kmerSize + 1
+ createdVertexId = new VKmerBytesWritable(kmerSize + 1);
if(destVertexId == null)
destVertexId = new VKmerBytesWritable(kmerSize);
+ if(tmpKmer == null)
+ tmpKmer = new VKmerBytesWritable();
}
/**
@@ -132,6 +134,11 @@
return "GGG";
}
+ public void randomGenerateVertexId(int numOfSuffix){
+ String newVertexId = getVertexId().toString() + generaterRandomString(numOfSuffix);;
+ createdVertexId.setByRead(kmerSize + numOfSuffix, newVertexId.getBytes(), 0);
+ }
+
public void generateKmerMap(Iterator<MessageWritable> msgIterator){
kmerMap.clear();
while(msgIterator.hasNext()){
@@ -322,7 +329,6 @@
/** set self readId set **/
setSelfReadIdSet();
- int count = 0;
//A set storing deleted edges
Set<DeletedEdge> deletedEdges = new HashSet<DeletedEdge>();
/** process connectedTable **/
@@ -330,8 +336,8 @@
/** set edgeList and edgeDir based on connectedTable **/
setEdgeListAndEdgeDir(i);
- VKmerBytesWritable incomingEdge = new VKmerBytesWritable(kmerSize);
- VKmerBytesWritable outgoingEdge = new VKmerBytesWritable(kmerSize);
+ VKmerBytesWritable incomingEdge = new VKmerBytesWritable();
+ VKmerBytesWritable outgoingEdge = new VKmerBytesWritable();
for(int x = 0; x < incomingEdgeList.getCountOfPosition(); x++){
for(int y = 0; y < outgoingEdgeList.getCountOfPosition(); y++){
incomingEdge.setAsCopy(incomingEdgeList.getPosition(x));
@@ -340,11 +346,8 @@
setNeighborEdgeIntersection(incomingEdge, outgoingEdge);
if(!neighborEdgeIntersection.isEmpty()){
- if(count == 0)
- createdVertexId.setByRead("AAA".length(), "AAA".getBytes(), 0);//kmerSize + 1 generaterRandomString(kmerSize).getBytes()
- else
- createdVertexId.setByRead("GGG".length(), "GGG".getBytes(), 0);
- count++;
+ /** random generate vertexId of new vertex **/
+ randomGenerateVertexId(3);
/** create new/created vertex **/
createNewVertex(i, incomingEdge, outgoingEdge);
@@ -356,31 +359,7 @@
storeDeletedEdge(deletedEdges, i, incomingEdge, outgoingEdge);
}
}
- }
-
-// for(KmerBytesWritable incomingEdge : incomingEdgeList){
-// for(KmerBytesWritable outgoingEdge : outgoingEdgeList){
-// /** set neighborEdge readId intersection **/
-// setNeighborEdgeIntersection(incomingEdge, outgoingEdge);
-//
-// if(!neighborEdgeIntersection.isEmpty()){
-// if(count == 0)
-// createdVertexId.setByRead("AAA".getBytes(), 0);//kmerSize + 1 generaterRandomString(kmerSize).getBytes()
-// else
-// createdVertexId.setByRead("GGG".getBytes(), 0);
-// count++;
-//
-// /** create new/created vertex **/
-// createNewVertex(i, incomingEdge, outgoingEdge);
-//
-// /** send msg to neighbors to update their edges to new vertex **/
-// sendMsgToUpdateEdge(incomingEdge, outgoingEdge);
-//
-// /** store deleted edge **/
-// storeDeletedEdge(deletedEdges, i, incomingEdge, outgoingEdge);
-// }
-// }
-// }
+ }
}
/** delete extra edges from old vertex **/
for(DeletedEdge deletedEdge : deletedEdges){
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/ConvertNodeToIdValue.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/ConvertNodeToIdValue.java
deleted file mode 100644
index 4e728e2..0000000
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/ConvertNodeToIdValue.java
+++ /dev/null
@@ -1,58 +0,0 @@
-package edu.uci.ics.genomix.pregelix.sequencefile;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.SequenceFile.CompressionType;
-
-import edu.uci.ics.genomix.type.NodeWritable;
-import edu.uci.ics.genomix.type.PositionWritable;
-import edu.uci.ics.genomix.pregelix.io.VertexValueWritable;
-import edu.uci.ics.genomix.pregelix.io.VertexValueWritable.State;
-
-
-public class ConvertNodeToIdValue {
-
- public static void convert(Path inFile, Path outFile)
- throws IOException {
- Configuration conf = new Configuration();
- FileSystem fileSys = FileSystem.get(conf);
-
- SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
- SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf, outFile, PositionWritable.class,
- VertexValueWritable.class, CompressionType.NONE);
- NodeWritable node = new NodeWritable();
- NullWritable value = NullWritable.get();
- PositionWritable outputKey = new PositionWritable();
- VertexValueWritable outputValue = new VertexValueWritable();
-
- while(reader.next(node, value)) {
-// System.out.println(node.getNodeID().toString());
-// outputKey.set(node.getNodeID());
- outputValue.setFFList(node.getFFList());
- outputValue.setFRList(node.getFRList());
- outputValue.setRFList(node.getRFList());
- outputValue.setRRList(node.getRRList());
- outputValue.setActualKmer(node.getKmer());
- outputValue.setState(State.IS_HEAD);
- writer.append(outputKey, outputValue);
- }
- writer.close();
- reader.close();
- }
-
- public static void main(String[] args) throws IOException {
- Path dir = new Path("data/test");
- Path outDir = new Path("data/input");
- FileUtils.cleanDirectory(new File("data/input"));
- Path inFile = new Path(dir, "result.graphbuild.txt.bin");
- Path outFile = new Path(outDir, "out");
- convert(inFile,outFile);
- }
-}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/ConvertToSequenceFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/ConvertToSequenceFile.java
deleted file mode 100644
index 2a7d668..0000000
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/ConvertToSequenceFile.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package edu.uci.ics.genomix.pregelix.sequencefile;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-
-public class ConvertToSequenceFile {
- public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
-
- Configuration conf = new Configuration();
- Job job = new Job(conf);
- job.setJobName("Convert Text");
- job.setJarByClass(Mapper.class);
-
- job.setMapperClass(Mapper.class);
- job.setReducerClass(Reducer.class);
-
- // increase if you need sorting or a special number of files
- job.setNumReduceTasks(0);
-
- job.setOutputKeyClass(LongWritable.class);
- job.setOutputValueClass(Text.class);
-
- job.setOutputFormatClass(SequenceFileOutputFormat.class);
- job.setInputFormatClass(TextInputFormat.class);
-
- TextInputFormat.addInputPath(job, new Path("data/webmap/part-00000"));
- SequenceFileOutputFormat.setOutputPath(job, new Path("folder_seq"));
-
- // submit and wait for completion
- job.waitForCompletion(true);
- }
-}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateSmallFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateSmallFile.java
deleted file mode 100644
index d3180c8..0000000
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateSmallFile.java
+++ /dev/null
@@ -1,97 +0,0 @@
-package edu.uci.ics.genomix.pregelix.sequencefile;
-
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.SequenceFile.CompressionType;
-
-import edu.uci.ics.genomix.type.KmerBytesWritable;
-
-public class GenerateSmallFile {
-
- public static void generateNumOfLinesFromGraphBuildResuiltBigFile(Path inFile, Path outFile, int numOfLines)
- throws IOException {
- Configuration conf = new Configuration();
- FileSystem fileSys = FileSystem.get(conf);
-
- SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
- SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf, outFile, KmerBytesWritable.class,
- NullWritable.class, CompressionType.NONE);
- KmerBytesWritable.setGlobalKmerLength(55);
- KmerBytesWritable outKey = new KmerBytesWritable();
- int i = 0;
-
- for (i = 0; i < numOfLines; i++) {
- // System.out.println(i);
- reader.next(outKey, null);
- writer.append(outKey, null);
- }
- writer.close();
- reader.close();
- }
-
- public static void generateNumOfLinesFromGraphBuildResuiltBigFile(String inFile, String outFile, int numOfLines)
- throws IOException {
- String lines = readTextFile(inFile, numOfLines);
- writeTextFile(outFile, lines);
- }
-
- public static String readTextFile(String fileName, int numOfLines) {
- String returnValue = "";
- FileReader file;
- String line = "";
- try {
- file = new FileReader(fileName);
- BufferedReader reader = new BufferedReader(file);
- try {
- while ((numOfLines > 0) && (line = reader.readLine()) != null) {
- returnValue += line + "\n";
- numOfLines--;
- }
- } finally {
- reader.close();
- }
- } catch (FileNotFoundException e) {
- throw new RuntimeException("File not found");
- } catch (IOException e) {
- throw new RuntimeException("IO Error occured");
- }
- return returnValue;
-
- }
-
- public static void writeTextFile(String fileName, String s) {
- FileWriter output;
- try {
- output = new FileWriter(fileName);
- BufferedWriter writer = new BufferedWriter(output);
- writer.write(s);
- writer.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- public static void main(String[] args) throws IOException {
- Path dir = new Path("data/split.aa");
- Path outDir = new Path("data/input");
- FileUtils.cleanDirectory(new File("data/input"));
- Path inFile = new Path(dir, "part-0");
- Path outFile = new Path(outDir, "part-0-out-1000");
- generateNumOfLinesFromGraphBuildResuiltBigFile(inFile, outFile, 1000);
- /* String inFile = "data/shortjump_1.head8M.fastq";
- String outFile = "data/testGeneFile";
- generateNumOfLinesFromGraphBuildResuiltBigFile(inFile, outFile, 100000);*/
- }
-}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/SplitRepeatSmallTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/SplitRepeatSmallTestSuite.java
index 43f4788..e3a33fa 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/SplitRepeatSmallTestSuite.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/SplitRepeatSmallTestSuite.java
@@ -43,9 +43,10 @@
public class SplitRepeatSmallTestSuite extends TestSuite {
private static final Logger LOGGER = Logger.getLogger(SplitRepeatSmallTestSuite.class.getName());
//P4ForMergeGraph/bin/read
- public static final String PreFix = "data/SplitRepeat"; //"graphbuildresult";
+ public static final String PreFix = "data/SplitRepeat";
public static final String[] TestDir = { PreFix + File.separator
- + "AdjSplitRepeat"};
+ + "SplitOnce", PreFix + File.separator
+ + "SplitTwice"};
private static final String ACTUAL_RESULT_DIR = "data/actual/splitrepeat";
private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";