ADD generateSmallFile.java
git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_genomix@3280 123451ca-8445-de46-9d55-352943316053
diff --git a/genomix/genomix-pregelix/pom.xml b/genomix/genomix-pregelix/pom.xml
index cb74294..ebed72c 100644
--- a/genomix/genomix-pregelix/pom.xml
+++ b/genomix/genomix-pregelix/pom.xml
@@ -111,6 +111,13 @@
<type>jar</type>
<scope>compile</scope>
</dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>genomix-data</artifactId>
+ <version>0.2.4-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
</dependencies>
<scm>
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/BinaryLoadGraphInputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/BinaryLoadGraphInputFormat.java
index f203297..82fe3d9 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/BinaryLoadGraphInputFormat.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/BinaryLoadGraphInputFormat.java
@@ -15,7 +15,7 @@
import edu.uci.ics.pregelix.api.util.BspUtils;
import edu.uci.ics.pregelix.bitwise.BitwiseOperation;
import edu.uci.ics.pregelix.example.io.MessageWritable;
-import edu.uci.ics.pregelix.type.KmerCountValue;
+import edu.uci.ics.genomix.type.KmerCountValue;
public class BinaryLoadGraphInputFormat extends
BinaryVertexInputFormat<BytesWritable, ByteWritable, NullWritable, MessageWritable>{
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/GraphVertexOperation.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/GraphVertexOperation.java
index 8197ea8..37f348c 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/GraphVertexOperation.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/GraphVertexOperation.java
@@ -24,7 +24,7 @@
import edu.uci.ics.pregelix.type.KmerCountValue;
public class GraphVertexOperation {
- public static final int k = 3; //kmer, k: the length of kmer
+ public static final int k = 15; //kmer, k: the length of kmer
static private final Path TMP_DIR = new Path(
GenerateSequenceFile.class.getSimpleName() + "_INTERIM");
/**
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/LogAlgorithmForMergeGraphInputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/LogAlgorithmForMergeGraphInputFormat.java
index a6e4a6c..4044a06 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/LogAlgorithmForMergeGraphInputFormat.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/LogAlgorithmForMergeGraphInputFormat.java
@@ -15,7 +15,7 @@
import edu.uci.ics.pregelix.bitwise.BitwiseOperation;
import edu.uci.ics.pregelix.example.io.LogAlgorithmMessageWritable;
import edu.uci.ics.pregelix.example.io.ValueStateWritable;
-import edu.uci.ics.pregelix.type.KmerCountValue;
+import edu.uci.ics.genomix.type.KmerCountValue;
import edu.uci.ics.pregelix.type.State;
public class LogAlgorithmForMergeGraphInputFormat extends
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/LogAlgorithmForMergeGraphVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/LogAlgorithmForMergeGraphVertex.java
index c341e0d..a3d5e3f 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/LogAlgorithmForMergeGraphVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/LogAlgorithmForMergeGraphVertex.java
@@ -55,6 +55,7 @@
*/
public class LogAlgorithmForMergeGraphVertex extends Vertex<BytesWritable, ValueStateWritable, NullWritable, LogAlgorithmMessageWritable>{
+ private byte[] tmpVertextId;
private byte[] tmpSourceVertextId;
private byte[] tmpDestVertexId;
private byte[] tmpChainVertexId;
@@ -74,8 +75,9 @@
@Override
public void compute(Iterator<LogAlgorithmMessageWritable> msgIterator) {
try {
- writer = new OutputStreamWriter(new FileOutputStream("test/check",true));
+ writer = new OutputStreamWriter(new FileOutputStream("test/check_Log",true));
} catch (FileNotFoundException e1) { e1.printStackTrace();}
+ tmpVertextId = GraphVertexOperation.generateValidDataFromBytesWritable(getVertexId());
if (getSuperstep() == 1) {
tmpVal = getVertexValue();
tmpVertexValue = tmpVal.getValue();
@@ -83,32 +85,31 @@
tmpMsg.setChainVertexId(tmpChainVertexId);
if(GraphVertexOperation.isHead(new ByteWritable(tmpVertexValue))){
tmpMsg.setMessage(Message.START);
- tmpDestVertexId = GraphVertexOperation.getDestVertexId(getVertexId().getBytes(), tmpVertexValue);
+ tmpDestVertexId = GraphVertexOperation.getDestVertexId(tmpVertextId, tmpVertexValue);
sendMsg(new BytesWritable(tmpDestVertexId),tmpMsg);
//test
GraphVertexOperation.testLogMessageCommunication(writer, getSuperstep(),
- getVertexId().getBytes(), tmpDestVertexId, tmpMsg);
- voteToHalt();
+ tmpVertextId, tmpDestVertexId, tmpMsg);
+ //voteToHalt();
}
else if(GraphVertexOperation.isRear(new ByteWritable(tmpVertexValue))){
tmpMsg.setMessage(Message.END);
- tmpDestVertexId = GraphVertexOperation.getLeftDestVertexId(getVertexId().getBytes(), tmpVertexValue);
+ tmpDestVertexId = GraphVertexOperation.getLeftDestVertexId(tmpVertextId, tmpVertexValue);
sendMsg(new BytesWritable(tmpDestVertexId),tmpMsg);
//test
GraphVertexOperation.testSetVertexState(writer, getSuperstep(), getVertexId().getBytes(),
tmpDestVertexId, tmpMsg, tmpVal);
- voteToHalt();
+ //voteToHalt();
}
else if(GraphVertexOperation.isPathVertex(new ByteWritable(tmpVertexValue))){
tmpVal = getVertexValue();
tmpVal.setState(State.MID_VERTEX);
setVertexValue(tmpVal);
//test
- GraphVertexOperation.testSetVertexState(writer, getSuperstep(), getVertexId().getBytes(),
- null, null, tmpVal);
+ GraphVertexOperation.testSetVertexState(writer, getSuperstep(),tmpVertextId ,
+ null, null, tmpVal);
}
- else
- voteToHalt();
+ voteToHalt();
}
else if(getSuperstep() == 2){
if(msgIterator.hasNext()){
@@ -120,16 +121,17 @@
tmpVal.setState(State.START_VERTEX);
setVertexValue(tmpVal);
//test
- GraphVertexOperation.testSetVertexState(writer, getSuperstep(), getVertexId().getBytes(),
+ GraphVertexOperation.testSetVertexState(writer, getSuperstep(), tmpVertextId,
null, null, tmpVal);
}
else if(tmpMessage == Message.END && tmpVertexState == State.MID_VERTEX){
tmpVal.setState(State.END_VERTEX);
setVertexValue(tmpVal);
//test
- GraphVertexOperation.testSetVertexState(writer, getSuperstep(), getVertexId().getBytes(),
+ GraphVertexOperation.testSetVertexState(writer, getSuperstep(), tmpVertextId,
null, null, tmpVal);
}
+ voteToHalt();
}
}
//head node sends message to path node
@@ -148,7 +150,7 @@
tmpMsg.setSourceVertexId(tmpSourceVertextId);
sendMsg(new BytesWritable(tmpDestVertexId),tmpMsg);
//test
- GraphVertexOperation.testMessageCommunication2(writer, getSuperstep(), getVertexId().getBytes(),
+ GraphVertexOperation.testMessageCommunication2(writer, getSuperstep(), tmpVertextId,
tmpDestVertexId, tmpMsg, null);
}
else if(tmpVertexState != State.END_VERTEX){
@@ -156,7 +158,7 @@
tmpMsg.setSourceVertexId(tmpSourceVertextId);
sendMsg(new BytesWritable(tmpDestVertexId),tmpMsg);
//test
- GraphVertexOperation.testMessageCommunication2(writer, getSuperstep(), getVertexId().getBytes(),
+ GraphVertexOperation.testMessageCommunication2(writer, getSuperstep(), tmpVertextId,
tmpDestVertexId, tmpMsg, null);
}
}
@@ -174,7 +176,7 @@
tmpMsg.setSourceVertexId(tmpSourceVertextId);
sendMsg(new BytesWritable(tmpDestVertexId),tmpMsg);
//test
- GraphVertexOperation.testMessageCommunication2(writer, getSuperstep(), getVertexId().getBytes(),
+ GraphVertexOperation.testMessageCommunication2(writer, getSuperstep(), tmpVertextId,
tmpDestVertexId, tmpMsg, null);
}
else if(tmpVertexState != State.END_VERTEX){
@@ -182,13 +184,14 @@
tmpMsg.setSourceVertexId(tmpSourceVertextId);
sendMsg(new BytesWritable(tmpDestVertexId),tmpMsg);
//test
- GraphVertexOperation.testMessageCommunication2(writer, getSuperstep(), getVertexId().getBytes(),
+ GraphVertexOperation.testMessageCommunication2(writer, getSuperstep(), tmpVertextId,
tmpDestVertexId, tmpMsg, null);
}
}
else
voteToHalt();
}
+ voteToHalt();
}
//path node sends message back to head node
else if(getSuperstep()%3 == 1){
@@ -199,7 +202,7 @@
tmpSourceVertextId = tmpMsg.getSourceVertexId();
if(tmpVal.getLengthOfMergeChain() == 0){
tmpVal.setLengthOfMergeChain(GraphVertexOperation.k);
- tmpVal.setMergeChain(getVertexId().getBytes());
+ tmpVal.setMergeChain(tmpVertextId);
setVertexValue(tmpVal);
}
tmpMsg.setLengthOfChain(tmpVal.getLengthOfMergeChain());
@@ -214,7 +217,7 @@
sendMsg(new BytesWritable(tmpSourceVertextId),tmpMsg);
//test
- GraphVertexOperation.testMessageCommunication2(writer, getSuperstep(), getVertexId().getBytes(),
+ GraphVertexOperation.testMessageCommunication2(writer, getSuperstep(), tmpVertextId,
tmpSourceVertextId, tmpMsg, tmpSourceVertextId);
//kill Message because it has been merged by the head
if(tmpMessage == Message.START){
@@ -227,7 +230,7 @@
&& getVertexValue().getState() != State.END_VERTEX
&& tmpMessage != Message.END && tmpMessage != Message.START){
- GraphVertexOperation.testDeleteVertexInfo(writer, getSuperstep(), getVertexId().getBytes(), "not receive any message");
+ GraphVertexOperation.testDeleteVertexInfo(writer, getSuperstep(), tmpVertextId, "not receive any message");
deleteVertex(getVertexId()); //killSelf because it doesn't receive any message
}
}
@@ -237,7 +240,7 @@
tmpMsg = msgIterator.next();
tmpVal = getVertexValue();
tmpVertexState = tmpVal.getState();
- tmpSourceVertextId = getVertexId().getBytes();
+ tmpSourceVertextId = tmpVertextId;
if(tmpVertexState == State.TODELETE){
GraphVertexOperation.testDeleteVertexInfo(writer, getSuperstep(),
tmpSourceVertextId, "already merged by head");
@@ -254,7 +257,7 @@
tmpVal.setState(tmpVertexState);
if(getSuperstep() == 5){
lengthOfMergeChainVertex = GraphVertexOperation.k;
- mergeChainVertexId = getVertexId().getBytes();
+ mergeChainVertexId = tmpVertextId;
}
else{
lengthOfMergeChainVertex = tmpVal.getLengthOfMergeChain();
@@ -280,20 +283,20 @@
sendMsg(getVertexId(),tmpMsg);
//test
GraphVertexOperation.testMessageCommunication2(writer, getSuperstep(), getVertexId().getBytes(),
- getVertexId().getBytes(), tmpMsg, null);
+ tmpVertextId, tmpMsg, null);
}
}
if(tmpVertexState == State.END_VERTEX){
voteToHalt();
//test
- GraphVertexOperation.testVoteVertexInfo(writer, getSuperstep(), getVertexId().getBytes(),
+ GraphVertexOperation.testVoteVertexInfo(writer, getSuperstep(), tmpVertextId,
" it is the rear!");
}
if(tmpVertexState == State.FINAL_VERTEX){
voteToHalt();
try {
GraphVertexOperation.flushChainToFile(tmpVal.getMergeChain(),
- tmpVal.getLengthOfMergeChain(),getVertexId().getBytes());
+ tmpVal.getLengthOfMergeChain(),tmpVertextId);
writer.write("Step: " + getSuperstep() + "\r\n");
writer.write("Flush! " + "\r\n");
} catch (IOException e) { e.printStackTrace(); }
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/MergeGraphVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/MergeGraphVertex.java
index 3e02d2f..5d8d199 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/MergeGraphVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/MergeGraphVertex.java
@@ -67,7 +67,7 @@
@Override
public void compute(Iterator<MessageWritable> msgIterator) {
try {
- writer = new OutputStreamWriter(new FileOutputStream("test/check",true));
+ writer = new OutputStreamWriter(new FileOutputStream("test/check_Naive",true));
} catch (FileNotFoundException e1) { e1.printStackTrace();}
tmpVertextId = GraphVertexOperation.generateValidDataFromBytesWritable(getVertexId());
if (getSuperstep() == 1) {
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/SequenceFile/GenerateSequenceFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/SequenceFile/GenerateSequenceFile.java
index e2f7b19..9cae8b2 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/SequenceFile/GenerateSequenceFile.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/SequenceFile/GenerateSequenceFile.java
@@ -13,7 +13,7 @@
import edu.uci.ics.pregelix.GraphVertexOperation;
import edu.uci.ics.pregelix.bitwise.BitwiseOperation;
-import edu.uci.ics.pregelix.type.KmerCountValue;
+import edu.uci.ics.genomix.type.KmerCountValue;
public class GenerateSequenceFile {
@@ -349,7 +349,10 @@
public static void generateNumOfLinesFromBigFile(Path inFile, Path outFile, int numOfLines) throws IOException{
Configuration conf = new Configuration();
FileSystem fileSys = FileSystem.get(conf);
- SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
+
+ ClassLoader ctxLoader = Thread.currentThread().getContextClassLoader();
+ Thread.currentThread().setContextClassLoader(GenerateSequenceFile.class.getClassLoader());
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
outFile, BytesWritable.class, KmerCountValue.class,
CompressionType.NONE);
@@ -364,13 +367,14 @@
}
writer.close();
reader.close();
+ Thread.currentThread().setContextClassLoader(ctxLoader);
}
public static void main(String[] argv) throws Exception {
//createTestDat();
//createMergeTest();
//createTestDat();
- /* Path dir = new Path("data/webmap");
+ /*Path dir = new Path("data/webmap");
Path inFile = new Path(dir, "part-1");
Path outFile = new Path(dir, "part-1-out-100");
generateNumOfLinesFromBigFile(inFile,outFile,100);*/
@@ -410,11 +414,12 @@
* AGCATGCTAT
*/
- generateSequenceFileFromGeneCode3("AGCATGGCCTGCTAT");//GTCGATT //before T: GGACG
+ //generateSequenceFileFromGeneCode3("AGCATGCTAT");//GTCGATT //before T: GGACG
+ generateSequenceFileFromGeneCode15("AAAAAAAAAAAAAGCATGCTATAAAAAAAAAAAA");
}
public static void generateSequenceFileFromGeneCode3(String s) throws IOException{
Configuration conf = new Configuration();
- Path outFile = new Path(outDir, "11");//sequenceShortFileMergeTest
+ Path outFile = new Path(outDir, "sequenceShortFileMergeTest");//sequenceShortFileMergeTest
FileSystem fileSys = FileSystem.get(conf);
SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
outFile, BytesWritable.class, KmerCountValue.class,
@@ -562,7 +567,7 @@
}
public static void generateSequenceFileFromGeneCode5(String s) throws IOException{
Configuration conf = new Configuration();
- Path outFile = new Path(outDir, "sequenceFileMergeTest4");
+ Path outFile = new Path(outDir, "sequenceFileMergeTest5");
FileSystem fileSys = FileSystem.get(conf);
SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
outFile, BytesWritable.class, KmerCountValue.class,
@@ -626,7 +631,7 @@
writer.close();
//read outputs
- Path inFile = new Path(outDir, "sequenceFileMergeTest4");
+ Path inFile = new Path(outDir, "sequenceFileMergeTest5");
outKey = new BytesWritable();
outValue = new KmerCountValue();
SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
@@ -645,10 +650,9 @@
reader.close();
}
}
-
public static void generateSequenceFileFromGeneCode8(String s) throws IOException{
Configuration conf = new Configuration();
- Path outFile = new Path(outDir, "sequenceFileMergeTest4");
+ Path outFile = new Path(outDir, "sequenceFileMergeTest8");
FileSystem fileSys = FileSystem.get(conf);
SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
outFile, BytesWritable.class, KmerCountValue.class,
@@ -712,7 +716,7 @@
writer.close();
//read outputs
- Path inFile = new Path(outDir, "sequenceFileMergeTest4");
+ Path inFile = new Path(outDir, "sequenceFileMergeTest8");
outKey = new BytesWritable();
outValue = new KmerCountValue();
SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
@@ -731,7 +735,91 @@
reader.close();
}
}
-
+ public static void generateSequenceFileFromGeneCode15(String s) throws IOException{
+ Configuration conf = new Configuration();
+ Path outFile = new Path(outDir, "sequenceFileMergeTest15");
+ FileSystem fileSys = FileSystem.get(conf);
+ SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
+ outFile, BytesWritable.class, KmerCountValue.class,
+ CompressionType.NONE);
+ BytesWritable outKey = null;
+ KmerCountValue outValue;
+ byte adjBitMap;
+ ArrayList<String> lists = new ArrayList<String>();
+
+ lists.add("000000000000000000000000001001"); //AAAAAAAAAAAAAGC
+ lists.add("110011000000000000000000000000"); //TATAAAAAAAAAAAA
+ String binaryString = "";
+ for(int i = 1; i < s.length()-GraphVertexOperation.k; i++){
+ binaryString = GraphVertexOperation.convertGeneCodeToBinaryString(s.substring(i,i+GraphVertexOperation.k));
+ if(lists.contains(binaryString)){
+ System.out.println("error: " + binaryString);
+ return;
+ }
+ lists.add(binaryString);
+ outKey = new BytesWritable(BitwiseOperation.convertBinaryStringToBytes(binaryString));
+ outValue = new KmerCountValue();
+ adjBitMap = GraphVertexOperation.getPrecursorFromGeneCode((byte)0, s.charAt(i-1));
+ adjBitMap = GraphVertexOperation.getSucceedFromGeneCode(adjBitMap, s.charAt(i+GraphVertexOperation.k));
+ outValue.setAdjBitMap(adjBitMap);
+ writer.append(outKey, outValue);
+ }
+ /**
+ * CAG - AGC ------ TAT - ATA
+ * GAG ATC
+ */
+ // AGC
+ String tmpKey = "000000000000000000000000001001";
+ byte[] key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
+ String tmpValue = "00000001";
+ byte value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
+ BytesWritable keyWritable = new BytesWritable(key);
+ ByteWritable valueWritable = new ByteWritable(value);
+
+ ArrayList<BytesWritable> arrayOfKeys = new ArrayList<BytesWritable>();
+ arrayOfKeys.add(keyWritable);
+ ArrayList<ByteWritable> arrayOfValues = new ArrayList<ByteWritable>();
+ arrayOfValues.add(valueWritable);
+
+ // TAT
+ tmpKey = "110011000000000000000000000000";
+ key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
+ tmpValue = "00100000";
+ value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
+ keyWritable = new BytesWritable(key);
+ valueWritable = new ByteWritable(value);
+ arrayOfKeys.add(keyWritable);
+ arrayOfValues.add(valueWritable);
+
+ KmerCountValue kmerCountValue = null;
+ //wirte to sequence file
+ for(int i = 0; i < arrayOfKeys.size(); i++){
+ kmerCountValue = new KmerCountValue();
+ kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
+ writer.append(arrayOfKeys.get(i), kmerCountValue);
+ }
+ writer.close();
+
+ //read outputs
+ Path inFile = new Path(outDir, "sequenceFileMergeTest15");
+ outKey = new BytesWritable();
+ outValue = new KmerCountValue();
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
+ int iteration = 1;
+ try {
+ while(reader.next(outKey, outValue)){
+ System.out.println(iteration);
+ String kmer = BitwiseOperation.convertBytesToBinaryStringKmer(outKey.getBytes(),GraphVertexOperation.k);
+ System.out.println("key: " + kmer);
+ System.out.println("code: " + GraphVertexOperation.convertBinaryStringToGenecode(kmer));
+ System.out.println("value: " + BitwiseOperation.convertByteToBinaryString(outValue.getAdjBitMap()));
+ System.out.println();
+ iteration++;
+ }
+ } finally {
+ reader.close();
+ }
+ }
public static void generateSequenceFileFromTwoGeneCode3(String s, String s2) throws IOException{
Configuration conf = new Configuration();
Path outFile = new Path(outDir, "sequenceFileMergeTest4");
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/SequenceFile/generateSmallFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/SequenceFile/generateSmallFile.java
new file mode 100644
index 0000000..bde6f43
--- /dev/null
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/SequenceFile/generateSmallFile.java
@@ -0,0 +1,51 @@
+package edu.uci.ics.pregelix.SequenceFile;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+public class generateSmallFile {
+
+ public static void generateNumOfLinesFromBigFile(Path inFile, Path outFile, int numOfLines) throws IOException{
+ Configuration conf = new Configuration();
+ FileSystem fileSys = FileSystem.get(conf);
+
+ //ClassLoader ctxLoader = Thread.currentThread().getContextClassLoader();
+ //Thread.currentThread().setContextClassLoader(GenerateSequenceFile.class.getClassLoader());
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
+ SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
+ outFile, BytesWritable.class, KmerCountValue.class,
+ CompressionType.NONE);
+ BytesWritable outKey = new BytesWritable();
+ KmerCountValue outValue = new KmerCountValue();
+ int i = 0;
+
+ for(i = 0; i < numOfLines; i++){
+ System.out.println(i);
+ reader.next(outKey, outValue);
+ writer.append(outKey, outValue);
+ }
+ writer.close();
+ reader.close();
+ //Thread.currentThread().setContextClassLoader(ctxLoader);
+ }
+ /**
+ * @param args
+ * @throws IOException
+ */
+ public static void main(String[] args) throws IOException {
+ // TODO Auto-generated method stub
+ Path dir = new Path("data/webmap");
+ Path inFile = new Path(dir, "part-1");
+ Path outFile = new Path(dir, "part-1-out-200000");
+ generateNumOfLinesFromBigFile(inFile,outFile,200000);
+ }
+
+}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/api/io/binary/BinaryVertexInputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/api/io/binary/BinaryVertexInputFormat.java
index c5db334..31274ac 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/api/io/binary/BinaryVertexInputFormat.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/api/io/binary/BinaryVertexInputFormat.java
@@ -3,7 +3,6 @@
import java.io.IOException;
import java.util.List;
-import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
@@ -15,7 +14,7 @@
import edu.uci.ics.pregelix.api.io.VertexInputFormat;
import edu.uci.ics.pregelix.api.io.VertexReader;
-import edu.uci.ics.pregelix.type.KmerCountValue;
+import edu.uci.ics.genomix.type.KmerCountValue;
public class BinaryVertexInputFormat <I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable>
extends VertexInputFormat<I, V, E, M>{
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobGen/JobGenerator.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobGen/JobGenerator.java
index 7affcbf..325ebdb 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobGen/JobGenerator.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobGen/JobGenerator.java
@@ -47,7 +47,7 @@
private static void generateBinaryLoadGraphJob(String jobName, String outputPath) throws IOException {
PregelixJob job = new PregelixJob(jobName);
- job.setVertexClass(TestLoadGraphVertex.class);
+ job.setVertexClass(MergeGraphVertex.class);
job.setVertexInputFormatClass(BinaryLoadGraphInputFormat.class);
job.setVertexOutputFormatClass(BinaryLoadGraphOutputFormat.class);
job.setOutputKeyClass(BytesWritable.class);
@@ -84,8 +84,8 @@
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
- //genLogAlgorithmForMergeGraph();
genBinaryLoadGraph();
+ //genLogAlgorithmForMergeGraph();
//genSequenceLoadGraph();
//genBasicBinaryLoadGraph();
}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobRun/RunJobTestCase.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobRun/RunJobTestCase.java
index e3fa41c..7af814c 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobRun/RunJobTestCase.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobRun/RunJobTestCase.java
@@ -1,6 +1,9 @@
package edu.uci.ics.pregelix.JobRun;
import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
import junit.framework.TestCase;
@@ -68,6 +71,7 @@
@Test
public void test() throws Exception {
setUp();
+
for (JobGen jobGen : giraphJobGens) {
FileSystem dfs = FileSystem.get(job.getConfiguration());
dfs.delete(new Path(HDFS_OUTPUTPAH), true);
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobRun/RunJobTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobRun/RunJobTestSuite.java
index 58e4ea3..879cc20 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobRun/RunJobTestSuite.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobRun/RunJobTestSuite.java
@@ -7,6 +7,7 @@
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
+import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
@@ -40,7 +41,7 @@
private static final String PATH_TO_ONLY = "src/test/resources/only.txt";
private static final String FILE_EXTENSION_OF_RESULTS = "result";
- private static final String DATA_PATH = "data/webmap/part-1-out";//sequenceFileMergeTest
+ private static final String DATA_PATH = "data/webmap/part-1-out-200000";//sequenceFileMergeTest
private static final String HDFS_PATH = "/webmap/";
private static final String HYRACKS_APP_NAME = "pregelix";
@@ -144,6 +145,12 @@
*/
@Override
public void run(TestResult result) {
+ OutputStreamWriter writer = null;
+ try {
+ writer = new OutputStreamWriter(new FileOutputStream("test/time",true));
+ } catch (FileNotFoundException e1) { e1.printStackTrace();}
+ long startTime = System.currentTimeMillis();
+
try {
int testCount = countTestCases();
for (int i = 0; i < testCount; i++) {
@@ -157,6 +164,15 @@
} catch (Exception e) {
throw new IllegalStateException(e);
}
+
+ long endTime = System.currentTimeMillis();
+ long totalTime = endTime - startTime;
+ System.out.println(totalTime);
+ try {
+ writer.write("Time: " + totalTime);
+ writer.close();
+ } catch (IOException e) { // TODO Auto-generated catch block
+ e.printStackTrace();}
}
protected static List<String> getFileList(String ignorePath)