Update
git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_genomix@3101 123451ca-8445-de46-9d55-352943316053
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/BinaryLoadGraphInputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/BinaryLoadGraphInputFormat.java
index eb78050..7cf8711 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/BinaryLoadGraphInputFormat.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/BinaryLoadGraphInputFormat.java
@@ -13,6 +13,7 @@
import edu.uci.ics.pregelix.api.io.VertexReader;
import edu.uci.ics.pregelix.api.io.binary.BinaryVertexInputFormat;
import edu.uci.ics.pregelix.api.util.BspUtils;
+import edu.uci.ics.pregelix.bitwise.BitwiseOperation;
import edu.uci.ics.pregelix.example.io.MessageWritable;
import edu.uci.ics.pregelix.type.KmerCountValue;
@@ -60,15 +61,32 @@
/**
* set the src vertex id
*/
- vertexId = getRecordReader().getCurrentKey();
- vertex.setVertexId(vertexId);
+ /*vertexId = getRecordReader().getCurrentKey();
+ byte[] vertexBytes = vertexId.getBytes();
+ int numOfByte = (2*GraphVertexOperation.k-1)/8 + 1;
+ if(vertexBytes.length == numOfByte)
+ vertex.setVertexId(vertexId);
+ else{
+ byte[] tmp = new byte[numOfByte];
+ for(int i = 0; i < numOfByte; i++)
+ tmp[i] = vertexBytes[i];
+ vertex.setVertexId(new BytesWritable(tmp));
+ }*/
+ vertexId = getRecordReader().getCurrentKey();
+ vertex.setVertexId(vertexId);
/**
* set the vertex value
*/
KmerCountValue kmerCountValue = getRecordReader().getCurrentValue();
vertexValue.set(kmerCountValue.getAdjBitMap());
vertex.setVertexValue(vertexValue);
+
+ String kmer = BitwiseOperation.convertBytesToBinaryStringKmer(vertexId.getBytes(),GraphVertexOperation.k);
+ System.out.println("key: " + kmer);
+ System.out.println("code: " + GraphVertexOperation.convertBinaryStringToGenecode(kmer));
+ System.out.println("value: " + BitwiseOperation.convertByteToBinaryString(kmerCountValue.getAdjBitMap()));
+ System.out.println();
}
return vertex;
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/GraphVertexOperation.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/GraphVertexOperation.java
index 076b1be..9260a2f 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/GraphVertexOperation.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/GraphVertexOperation.java
@@ -1,6 +1,10 @@
package edu.uci.ics.pregelix;
+import java.io.DataOutputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.OutputStreamWriter;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
@@ -13,6 +17,9 @@
import edu.uci.ics.pregelix.SequenceFile.GenerateSequenceFile;
import edu.uci.ics.pregelix.bitwise.BitwiseOperation;
+import edu.uci.ics.pregelix.example.io.MessageWritable;
+import edu.uci.ics.pregelix.hdfs.HDFSOperation;
+import edu.uci.ics.pregelix.type.KmerCountValue;
public class GraphVertexOperation {
public static final int k = 3; //kmer, k: the length of kmer
@@ -167,7 +174,7 @@
* Ex. 01 10 00(nothing) -> 01 10 00(A)/01(C)/10(G)/11(T)
*/
public static byte[] replaceLastTwoBits(byte[] vertexId, int n){
- String binaryStringVertexId = BitwiseOperation.convertBytesToBinaryStringKmer(vertexId, 3);
+ String binaryStringVertexId = BitwiseOperation.convertBytesToBinaryStringKmer(vertexId, k);
String resultString = "";
for(int i = 0; i < binaryStringVertexId.length()-2; i++)
resultString += binaryStringVertexId.charAt(i);
@@ -315,4 +322,103 @@
}
return result;
}
+ /**
+ * flush chainVertexId to file -- local file and hdfs file
+ * @throws IOException
+ */
+ public static void flushChainToFile(byte[] chainVertexId, int lengthOfChain, byte[] vertexId) throws IOException{
+ DataOutputStream out = new DataOutputStream(new
+ FileOutputStream("data/ChainVertex"));
+ out.write(vertexId);
+ out.writeInt(lengthOfChain);
+ out.write(chainVertexId);
+ out.close();
+ //String srcFile = "data/ChainVertex";
+ //String dstFile = "testHDFS/output/ChainVertex";
+ //HDFSOperation.copyFromLocalFile(srcFile, dstFile);
+ }
+ /**
+ * convert binaryString to geneCode
+ */
+ public static String convertBinaryStringToGenecode(String kmer){
+ String result = "";
+ for(int i = 0; i < kmer.length() ; ){
+ String substring = kmer.substring(i,i+2);
+ if(substring.compareTo("00") == 0)
+ result += "A";
+ else if(substring.compareTo("01") == 0)
+ result += "C";
+ else if(substring.compareTo("10") == 0)
+ result += "G";
+ else if(substring.compareTo("11") == 0)
+ result += "T";
+ i = i+2;
+ }
+ return result;
+ }
+ /**
+ * generate the valid data(byte[]) from BytesWritable
+ */
+ public static byte[] generateValidDataFromBytesWritable(BytesWritable bw){
+ byte[] wholeBytes = bw.getBytes();
+ int validNum = bw.getLength();
+ byte[] validBytes = new byte[validNum];
+ for(int i = 0; i < validNum; i++)
+ validBytes[i] = wholeBytes[i];
+ return validBytes;
+ }
+ /**
+ * output test for message communication
+ */
+ public static void testMessageCommunication(OutputStreamWriter writer, long step, byte[] tmpSourceVertextId,
+ byte[] tmpDestVertexId, MessageWritable tmpMsg){
+ //test
+ String kmer = BitwiseOperation.convertBytesToBinaryStringKmer(
+ tmpSourceVertextId,GraphVertexOperation.k);
+ try {
+ writer.write("Step: " + step + "\r\n");
+ writer.write("Source Key: " + kmer + "\r\n");
+
+ writer.write("Source Code: " +
+ GraphVertexOperation.convertBinaryStringToGenecode(kmer) + "\r\n");
+ writer.write("Send Message to: " +
+ GraphVertexOperation.convertBinaryStringToGenecode(
+ BitwiseOperation.convertBytesToBinaryStringKmer(
+ tmpDestVertexId,GraphVertexOperation.k)) + "\r\n");
+ writer.write("Chain Message: " +
+ GraphVertexOperation.convertBinaryStringToGenecode(
+ BitwiseOperation.convertBytesToBinaryString(
+ tmpMsg.getChainVertexId())) + "\r\n");
+ writer.write("Chain Length: " + tmpMsg.getLengthOfChain() + "\r\n");
+ writer.write("\r\n");
+ } catch (IOException e) { e.printStackTrace(); }
+ return;
+ }
+ /**
+ * output test for last message communication -- flush
+ */
+ public static void testLastMessageCommunication(OutputStreamWriter writer, long step, byte[] tmpVertextId,
+ byte[] tmpSourceVertextId, MessageWritable tmpMsg){
+ String kmer = BitwiseOperation.convertBytesToBinaryStringKmer(
+ tmpVertextId,GraphVertexOperation.k);
+ try {
+ writer.write("Step: " + step + "\r\n");
+ writer.write("Over!" + "\r\n");
+ writer.write("Source Key: " + kmer + "\r\n");
+
+ writer.write("Source Code: " +
+ GraphVertexOperation.convertBinaryStringToGenecode(kmer) + "\r\n");
+
+ writer.write("Flush Chain Message: " +
+ GraphVertexOperation.convertBinaryStringToGenecode(
+ BitwiseOperation.convertBytesToBinaryString(
+ tmpMsg.getChainVertexId())) + "\r\n");
+ writer.write("Chain Length: " + tmpMsg.getLengthOfChain() + "\r\n");
+ writer.write("\r\n");
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ }
+
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/LogAlgorithmForMergeGraphVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/LogAlgorithmForMergeGraphVertex.java
index 4061d4f..3561550 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/LogAlgorithmForMergeGraphVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/LogAlgorithmForMergeGraphVertex.java
@@ -59,7 +59,6 @@
private int tmpMessage;
private ValueStateWritable tmpVal = new ValueStateWritable();
private LogAlgorithmMessageWritable tmpMsg = new LogAlgorithmMessageWritable();
- public static final int k = 3; //kmer, k = 3
/**
* For test, in compute method, make each vertexValue shift 1 to left.
* It will be modified when going forward to next step.
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/MergeGraphVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/MergeGraphVertex.java
index 1261a0e..d8d7555 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/MergeGraphVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/MergeGraphVertex.java
@@ -1,6 +1,9 @@
package edu.uci.ics.pregelix;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.OutputStreamWriter;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
@@ -14,6 +17,7 @@
import edu.uci.ics.pregelix.api.graph.Vertex;
import edu.uci.ics.pregelix.api.job.PregelixJob;
import edu.uci.ics.pregelix.api.util.BspUtils;
+import edu.uci.ics.pregelix.bitwise.BitwiseOperation;
import edu.uci.ics.pregelix.dataflow.util.IterationUtils;
import edu.uci.ics.pregelix.example.client.Client;
import edu.uci.ics.pregelix.example.io.MessageWritable;
@@ -49,27 +53,35 @@
*/
public class MergeGraphVertex extends Vertex<BytesWritable, ByteWritable, NullWritable, MessageWritable>{
+ private byte[] tmpVertextId;
private byte[] tmpSourceVertextId;
private byte[] tmpDestVertexId;
private byte[] tmpChainVertexId;
- private byte[] tmpNeighberBytes = new byte[1];
+ private byte tmpNeighberByte;
private byte tmpVertexValue;
private MessageWritable tmpMsg = new MessageWritable();
- public static final int k = 3; //kmer, k = 3
+ OutputStreamWriter writer;
/**
* Naive Algorithm for merge graph
*/
@Override
public void compute(Iterator<MessageWritable> msgIterator) {
+ try {
+ writer = new OutputStreamWriter(new FileOutputStream("test/check",true));
+ } catch (FileNotFoundException e1) { e1.printStackTrace();}
+ tmpVertextId = GraphVertexOperation.generateValidDataFromBytesWritable(getVertexId());
if (getSuperstep() == 1) {
if(GraphVertexOperation.isHead(getVertexValue())){
- tmpSourceVertextId = getVertexId().getBytes();
+ tmpSourceVertextId = tmpVertextId;
tmpDestVertexId = GraphVertexOperation.getDestVertexId(tmpSourceVertextId,
getVertexValue().get());
- tmpMsg.setSourceVertexIdOrNeighberInfo(tmpSourceVertextId);
+ tmpMsg.setSourceVertexId(tmpSourceVertextId);
tmpChainVertexId = new byte[0];
tmpMsg.setChainVertexId(tmpChainVertexId);
sendMsg(new BytesWritable(tmpDestVertexId),tmpMsg);
+ //test
+ GraphVertexOperation.testMessageCommunication(writer,getSuperstep(),tmpSourceVertextId,
+ tmpDestVertexId,tmpMsg);
}
}
//path node sends message back to head node
@@ -78,65 +90,88 @@
tmpMsg = msgIterator.next();
if(!tmpMsg.isRear()){
if(GraphVertexOperation.isPathVertex(getVertexValue())){
- tmpSourceVertextId = tmpMsg.getSourceVertexIdOrNeighberInfo();
- //GraphVertexOperation.getDestVertexId(getVertexId().getBytes(), getVertexValue().get());
- tmpNeighberBytes[0] = getVertexValue().get();
- tmpMsg.setSourceVertexIdOrNeighberInfo(tmpNeighberBytes); //set neighber
+ tmpSourceVertextId = tmpMsg.getSourceVertexId();
+ tmpNeighberByte = getVertexValue().get();
+ tmpMsg.setNeighberInfo(tmpNeighberByte); //set neighber
tmpChainVertexId = tmpMsg.getChainVertexId();
if(tmpChainVertexId.length == 0){
- tmpMsg.setChainVertexId(getVertexId().getBytes());
- tmpMsg.setLengthOfChain(k);
+ tmpMsg.setLengthOfChain(GraphVertexOperation.k);
+ tmpMsg.setChainVertexId(tmpVertextId);
}
else{
- tmpMsg.setChainVertexId(GraphVertexOperation.updateChainVertexId(tmpChainVertexId,
- tmpMsg.getLengthOfChain(),getVertexId().getBytes()));
tmpMsg.incrementLength();
+ tmpMsg.setChainVertexId(GraphVertexOperation.updateChainVertexId(
+ tmpChainVertexId,
+ tmpMsg.getLengthOfChain()-1,
+ tmpVertextId));
//deleteVertex(getVertexId());
}
sendMsg(new BytesWritable(tmpSourceVertextId),tmpMsg);
-
+ //test
+ GraphVertexOperation.testMessageCommunication(writer,getSuperstep(),tmpVertextId,
+ tmpSourceVertextId,tmpMsg);
}
else if(GraphVertexOperation.isRear(getVertexValue())){
- tmpSourceVertextId = tmpMsg.getSourceVertexIdOrNeighberInfo();
- tmpMsg.setSourceVertexIdOrNeighberInfo(getVertexId().getBytes());
+ tmpSourceVertextId = tmpMsg.getSourceVertexId();
+ tmpMsg.setSourceVertexId(tmpVertextId);
tmpMsg.setRear(true);
sendMsg(new BytesWritable(tmpSourceVertextId),tmpMsg);
+ //test
+ try {
+ writer.write("It is Rear!\r\n");
+ } catch (IOException e) { e.printStackTrace(); }
+ GraphVertexOperation.testMessageCommunication(writer,getSuperstep(),tmpVertextId,
+ tmpSourceVertextId,tmpMsg);
}
- voteToHalt();
}
else{
tmpVertexValue = GraphVertexOperation.updateRightNeighberByVertexId(getVertexValue().get(),
- tmpMsg.getSourceVertexIdOrNeighberInfo());
- setVertexValue(new ByteWritable(tmpVertexValue));
- setVertexId(new BytesWritable(tmpMsg.getChainVertexId()));
- signalTerminate();
+ tmpMsg.getSourceVertexId());
+ //setVertexValue(new ByteWritable(tmpVertexValue));
+ //setVertexId(new BytesWritable(tmpMsg.getChainVertexId()));
+ //addVertex(new BytesWritable(tmpMsg.getChainVertexId()),new ByteWritable(tmpVertexValue));
+ try {
+ GraphVertexOperation.flushChainToFile(tmpMsg.getChainVertexId(),
+ tmpMsg.getLengthOfChain(),tmpVertextId);
+ } catch (IOException e) { e.printStackTrace(); }
+ //test
+ GraphVertexOperation.testLastMessageCommunication(writer,getSuperstep(),tmpVertextId,
+ tmpSourceVertextId,tmpMsg);
+
}
}
- else voteToHalt();
}
//head node sends message to path node
else if(getSuperstep()%2 == 1){
if (msgIterator.hasNext()){
tmpMsg = msgIterator.next();
- tmpNeighberBytes = tmpMsg.getSourceVertexIdOrNeighberInfo();
+ tmpNeighberByte = tmpMsg.getNeighberInfo();
tmpChainVertexId = tmpMsg.getChainVertexId();
if(!tmpMsg.isRear()){
byte[] lastKmer = GraphVertexOperation.getLastKmer(tmpChainVertexId,
tmpMsg.getLengthOfChain());
- tmpDestVertexId = GraphVertexOperation.getDestVertexId(lastKmer, tmpNeighberBytes[0]);
- tmpSourceVertextId = getVertexId().getBytes();
- tmpMsg.setSourceVertexIdOrNeighberInfo(tmpSourceVertextId);
+ tmpDestVertexId = GraphVertexOperation.getDestVertexId(lastKmer, tmpNeighberByte);
+ tmpSourceVertextId = tmpVertextId;
+ tmpMsg.setSourceVertexId(tmpSourceVertextId);
sendMsg(new BytesWritable(tmpDestVertexId),tmpMsg);
+ //test
+ GraphVertexOperation.testMessageCommunication(writer,getSuperstep(),tmpVertextId,
+ tmpDestVertexId,tmpMsg);
}
else{
- tmpDestVertexId = GraphVertexOperation.getDestVertexId(getVertexId().getBytes(),
+ tmpDestVertexId = GraphVertexOperation.getDestVertexId(tmpVertextId,
getVertexValue().get());
sendMsg(new BytesWritable(tmpDestVertexId),tmpMsg);
- voteToHalt();
+ //test
+ GraphVertexOperation.testMessageCommunication(writer,getSuperstep(),tmpVertextId,
+ tmpDestVertexId,tmpMsg);
}
}
- voteToHalt();
}
+ try {
+ writer.close();
+ } catch (IOException e) { e.printStackTrace(); }
+ voteToHalt();
}
private void signalTerminate() {
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/SequenceFile/GenerateSequenceFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/SequenceFile/GenerateSequenceFile.java
index c7a4556..891fb8d 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/SequenceFile/GenerateSequenceFile.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/SequenceFile/GenerateSequenceFile.java
@@ -20,7 +20,6 @@
static private final Path TMP_DIR = new Path(
GenerateSequenceFile.class.getSimpleName() + "_TMP");
private static Path outDir = new Path("data/webmap");
- private final static int k = 3;
/**
* create test.dat
@@ -229,7 +228,7 @@
try {
while(reader.next(outKey, outValue)){
System.out.println(iteration);
- System.out.println("key: " + BitwiseOperation.convertBytesToBinaryStringKmer(outKey.getBytes(),k));
+ System.out.println("key: " + BitwiseOperation.convertBytesToBinaryStringKmer(outKey.getBytes(),GraphVertexOperation.k));
System.out.println("value: " + BitwiseOperation.convertByteToBinaryString(outValue.getAdjBitMap()));
System.out.println();
iteration++;
@@ -337,7 +336,7 @@
try {
while(reader.next(outKey, outValue)){
System.out.println(iteration);
- System.out.println("key: " + BitwiseOperation.convertBytesToBinaryStringKmer(outKey.getBytes(),k));
+ System.out.println("key: " + BitwiseOperation.convertBytesToBinaryStringKmer(outKey.getBytes(),GraphVertexOperation.k));
System.out.println("value: " + BitwiseOperation.convertByteToBinaryString(outValue.getAdjBitMap()));
System.out.println();
iteration++;
@@ -377,14 +376,105 @@
generateNumOfLinesFromBigFile(inFile,outFile,10000);*/
/**
* AGC - A C - TAT
+ * AGCATGCTAT
* "AGCAAACACGAC T TGCC TAT"
* problem "AGCATGGACGTCGATTCTAT"
+ * problem "AGCAAACACGATTGCCTAT"
+ * problem "AGCACGTAACTTGCTCTAT"
+ * problem "AGCAACGATTGCCTAT"
+ * problem "AGCATTTAAACTCTAT"
+ *
* "AGCACTTAT"
* "AGCAAACACTTGCTGTACCGTGGCCTAT"
- */
- generateSequenceFileFromGeneCode("AGCATGCGGGTCTAT");//GTCGATT //before T: GGACG
+ * "AGCAAACACTTGCTGTACCCTAT"
+ *
+ * compare "AGCAACTAT"
+ * "AGCACCTAT"
+ * "AGCATCTAT"
+ *
+ * k = 5
+ * AGCGC - A C - TATAT
+ *
+ * k = 5
+ * AAAGCAGCTTGCTACTATAA
+ *
+ * k = 8
+ * AAAAAAGCAGCTTGCTACTATAAAAA
+ *
+ * k=4
+ * AAGCATGCTATA
+ *
+ * Two strings
+ * "AGCATGCTAT","TTCAGTACCCGC"
+ */
+
+ generateSequenceFileFromGeneCode3("AGCATGCTAT");//GTCGATT //before T: GGACG
}
- public static void generateSequenceFileFromGeneCode(String s) throws IOException{
+ public static void generateSequenceFileFromGeneCode3(String s) throws IOException{
+ Configuration conf = new Configuration();
+ Path outFile = new Path(outDir, "sequenceShortFileMergeTest");
+ FileSystem fileSys = FileSystem.get(conf);
+ SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
+ outFile, BytesWritable.class, KmerCountValue.class,
+ CompressionType.NONE);
+ BytesWritable outKey = null;
+ KmerCountValue outValue;
+ byte adjBitMap;
+ ArrayList<String> lists = new ArrayList<String>();
+ lists.add("001001"); //AGC
+ lists.add("110011"); //TAT
+ String binaryString = "";
+ for(int i = 1; i < s.length()-GraphVertexOperation.k; i++){
+ binaryString = GraphVertexOperation.convertGeneCodeToBinaryString(s.substring(i,i+GraphVertexOperation.k));
+ if(lists.contains(binaryString)){
+ System.out.println("error: " + binaryString);
+ return;
+ }
+ lists.add(binaryString);
+ outKey = new BytesWritable(BitwiseOperation.convertBinaryStringToBytes(binaryString));
+ outValue = new KmerCountValue();
+ adjBitMap = GraphVertexOperation.getPrecursorFromGeneCode((byte)0, s.charAt(i-1));
+ adjBitMap = GraphVertexOperation.getSucceedFromGeneCode(adjBitMap, s.charAt(i+GraphVertexOperation.k));
+ outValue.setAdjBitMap(adjBitMap);
+ writer.append(outKey, outValue);
+ }
+ /**
+ * CAG - AGC ------ TAT - ATA
+ * GAG ATC
+ */
+ // AGC
+ String tmpKey = "001001";
+ byte[] key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
+ String tmpValue = "00000001";
+ byte value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
+ BytesWritable keyWritable = new BytesWritable(key);
+ ByteWritable valueWritable = new ByteWritable(value);
+
+ ArrayList<BytesWritable> arrayOfKeys = new ArrayList<BytesWritable>();
+ arrayOfKeys.add(keyWritable);
+ ArrayList<ByteWritable> arrayOfValues = new ArrayList<ByteWritable>();
+ arrayOfValues.add(valueWritable);
+
+ // TAT
+ tmpKey = "110011";
+ key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
+ tmpValue = "00100000";
+ value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
+ keyWritable = new BytesWritable(key);
+ valueWritable = new ByteWritable(value);
+ arrayOfKeys.add(keyWritable);
+ arrayOfValues.add(valueWritable);
+
+ KmerCountValue kmerCountValue = null;
+ //wirte to sequence file
+ for(int i = 0; i < arrayOfKeys.size(); i++){
+ kmerCountValue = new KmerCountValue();
+ kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
+ writer.append(arrayOfKeys.get(i), kmerCountValue);
+ }
+ writer.close();
+ }
+ public static void generateSequenceFileFromGeneCode4(String s) throws IOException{
Configuration conf = new Configuration();
Path outFile = new Path(outDir, "sequenceFileMergeTest4");
FileSystem fileSys = FileSystem.get(conf);
@@ -395,15 +485,12 @@
KmerCountValue outValue;
byte adjBitMap;
ArrayList<String> lists = new ArrayList<String>();
- lists.add("010010"); //CAG
- lists.add("100010"); //GAG
- lists.add("001001"); //AGC
- lists.add("110011"); //TAT
- lists.add("001100"); //ATA
- lists.add("001101"); //ATC
+
+ lists.add("00001001"); //AAGC
+ lists.add("11001100"); //TATA
String binaryString = "";
- for(int i = 1; i < s.length()-k; i++){
- binaryString = GraphVertexOperation.convertGeneCodeToBinaryString(s.substring(i,i+k));
+ for(int i = 1; i < s.length()-GraphVertexOperation.k; i++){
+ binaryString = GraphVertexOperation.convertGeneCodeToBinaryString(s.substring(i,i+GraphVertexOperation.k));
if(lists.contains(binaryString)){
System.out.println("error: " + binaryString);
return;
@@ -412,7 +499,91 @@
outKey = new BytesWritable(BitwiseOperation.convertBinaryStringToBytes(binaryString));
outValue = new KmerCountValue();
adjBitMap = GraphVertexOperation.getPrecursorFromGeneCode((byte)0, s.charAt(i-1));
- adjBitMap = GraphVertexOperation.getSucceedFromGeneCode(adjBitMap, s.charAt(i+k));
+ adjBitMap = GraphVertexOperation.getSucceedFromGeneCode(adjBitMap, s.charAt(i+GraphVertexOperation.k));
+ outValue.setAdjBitMap(adjBitMap);
+ writer.append(outKey, outValue);
+ }
+
+ /**
+ * CAG - AGC ------ TAT - ATA
+ * GAG ATC
+ */
+ // AGC
+ String tmpKey = "00001001";
+ byte[] key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
+ String tmpValue = "00000001";
+ byte value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
+ BytesWritable keyWritable = new BytesWritable(key);
+ ByteWritable valueWritable = new ByteWritable(value);
+
+ ArrayList<BytesWritable> arrayOfKeys = new ArrayList<BytesWritable>();
+ arrayOfKeys.add(keyWritable);
+ ArrayList<ByteWritable> arrayOfValues = new ArrayList<ByteWritable>();
+ arrayOfValues.add(valueWritable);
+
+ // TAT
+ tmpKey = "11001100";
+ key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
+ tmpValue = "00100000";
+ value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
+ keyWritable = new BytesWritable(key);
+ valueWritable = new ByteWritable(value);
+ arrayOfKeys.add(keyWritable);
+ arrayOfValues.add(valueWritable);
+
+ KmerCountValue kmerCountValue = null;
+ //wirte to sequence file
+ for(int i = 0; i < arrayOfKeys.size(); i++){
+ kmerCountValue = new KmerCountValue();
+ kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
+ writer.append(arrayOfKeys.get(i), kmerCountValue);
+ }
+ writer.close();
+
+ //read outputs
+ Path inFile = new Path(outDir, "sequenceFileMergeTest4");
+ outKey = new BytesWritable();
+ outValue = new KmerCountValue();
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
+ int iteration = 1;
+ try {
+ while(reader.next(outKey, outValue)){
+ System.out.println(iteration);
+ System.out.println("key: " + BitwiseOperation.convertBytesToBinaryStringKmer(outKey.getBytes(),GraphVertexOperation.k));
+ System.out.println("value: " + BitwiseOperation.convertByteToBinaryString(outValue.getAdjBitMap()));
+ System.out.println();
+ iteration++;
+ }
+ } finally {
+ reader.close();
+ }
+ }
+ public static void generateSequenceFileFromGeneCode5(String s) throws IOException{
+ Configuration conf = new Configuration();
+ Path outFile = new Path(outDir, "sequenceFileMergeTest4");
+ FileSystem fileSys = FileSystem.get(conf);
+ SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
+ outFile, BytesWritable.class, KmerCountValue.class,
+ CompressionType.NONE);
+ BytesWritable outKey = null;
+ KmerCountValue outValue;
+ byte adjBitMap;
+ ArrayList<String> lists = new ArrayList<String>();
+
+ lists.add("0000001001"); //AAAGC
+ lists.add("1100110000"); //TATAA
+ String binaryString = "";
+ for(int i = 1; i < s.length()-GraphVertexOperation.k; i++){
+ binaryString = GraphVertexOperation.convertGeneCodeToBinaryString(s.substring(i,i+GraphVertexOperation.k));
+ if(lists.contains(binaryString)){
+ System.out.println("error: " + binaryString);
+ return;
+ }
+ lists.add(binaryString);
+ outKey = new BytesWritable(BitwiseOperation.convertBinaryStringToBytes(binaryString));
+ outValue = new KmerCountValue();
+ adjBitMap = GraphVertexOperation.getPrecursorFromGeneCode((byte)0, s.charAt(i-1));
+ adjBitMap = GraphVertexOperation.getSucceedFromGeneCode(adjBitMap, s.charAt(i+GraphVertexOperation.k));
outValue.setAdjBitMap(adjBitMap);
writer.append(outKey, outValue);
}
@@ -420,10 +591,10 @@
* CAG - AGC ------ TAT - ATA
* GAG ATC
*/
- // CAG
- String tmpKey = "010010";
+ // AGC
+ String tmpKey = "0000001001";
byte[] key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- String tmpValue = "00000010";
+ String tmpValue = "00000001";
byte value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
BytesWritable keyWritable = new BytesWritable(key);
ByteWritable valueWritable = new ByteWritable(value);
@@ -433,49 +604,181 @@
ArrayList<ByteWritable> arrayOfValues = new ArrayList<ByteWritable>();
arrayOfValues.add(valueWritable);
- // AGC
- tmpKey = "001001";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "01100001";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- // GAG
- tmpKey = "100010";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "00000010";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
// TAT
- tmpKey = "110011";
+ tmpKey = "1100110000";
key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "00100011";
+ tmpValue = "00100000";
value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
keyWritable = new BytesWritable(key);
valueWritable = new ByteWritable(value);
arrayOfKeys.add(keyWritable);
arrayOfValues.add(valueWritable);
- // ATA
- tmpKey = "001100";
+ KmerCountValue kmerCountValue = null;
+ //wirte to sequence file
+ for(int i = 0; i < arrayOfKeys.size(); i++){
+ kmerCountValue = new KmerCountValue();
+ kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
+ writer.append(arrayOfKeys.get(i), kmerCountValue);
+ }
+ writer.close();
+
+ //read outputs
+ Path inFile = new Path(outDir, "sequenceFileMergeTest4");
+ outKey = new BytesWritable();
+ outValue = new KmerCountValue();
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
+ int iteration = 1;
+ try {
+ while(reader.next(outKey, outValue)){
+ System.out.println(iteration);
+ String kmer = BitwiseOperation.convertBytesToBinaryStringKmer(outKey.getBytes(),GraphVertexOperation.k);
+ System.out.println("key: " + kmer);
+ System.out.println("code: " + GraphVertexOperation.convertBinaryStringToGenecode(kmer));
+ System.out.println("value: " + BitwiseOperation.convertByteToBinaryString(outValue.getAdjBitMap()));
+ System.out.println();
+ iteration++;
+ }
+ } finally {
+ reader.close();
+ }
+ }
+
+ public static void generateSequenceFileFromGeneCode8(String s) throws IOException{
+ Configuration conf = new Configuration();
+ Path outFile = new Path(outDir, "sequenceFileMergeTest4");
+ FileSystem fileSys = FileSystem.get(conf);
+ SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
+ outFile, BytesWritable.class, KmerCountValue.class,
+ CompressionType.NONE);
+ BytesWritable outKey = null;
+ KmerCountValue outValue;
+ byte adjBitMap;
+ ArrayList<String> lists = new ArrayList<String>();
+
+ lists.add("0000000000001001"); //AAAAAAGC
+ lists.add("1100110000000000"); //TATAAAAA
+ String binaryString = "";
+ for(int i = 1; i < s.length()-GraphVertexOperation.k; i++){
+ binaryString = GraphVertexOperation.convertGeneCodeToBinaryString(s.substring(i,i+GraphVertexOperation.k));
+ if(lists.contains(binaryString)){
+ System.out.println("error: " + binaryString);
+ return;
+ }
+ lists.add(binaryString);
+ outKey = new BytesWritable(BitwiseOperation.convertBinaryStringToBytes(binaryString));
+ outValue = new KmerCountValue();
+ adjBitMap = GraphVertexOperation.getPrecursorFromGeneCode((byte)0, s.charAt(i-1));
+ adjBitMap = GraphVertexOperation.getSucceedFromGeneCode(adjBitMap, s.charAt(i+GraphVertexOperation.k));
+ outValue.setAdjBitMap(adjBitMap);
+ writer.append(outKey, outValue);
+ }
+ /**
+ * CAG - AGC ------ TAT - ATA
+ * GAG ATC
+ */
+ // AGC
+ String tmpKey = "0000000000001001";
+ byte[] key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
+ String tmpValue = "00000001";
+ byte value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
+ BytesWritable keyWritable = new BytesWritable(key);
+ ByteWritable valueWritable = new ByteWritable(value);
+
+ ArrayList<BytesWritable> arrayOfKeys = new ArrayList<BytesWritable>();
+ arrayOfKeys.add(keyWritable);
+ ArrayList<ByteWritable> arrayOfValues = new ArrayList<ByteWritable>();
+ arrayOfValues.add(valueWritable);
+
+ // TAT
+ tmpKey = "1100110000000000";
key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "10000000";
+ tmpValue = "00100000";
value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
keyWritable = new BytesWritable(key);
valueWritable = new ByteWritable(value);
arrayOfKeys.add(keyWritable);
arrayOfValues.add(valueWritable);
- // ATC
- tmpKey = "001101";
+
+ KmerCountValue kmerCountValue = null;
+ //wirte to sequence file
+ for(int i = 0; i < arrayOfKeys.size(); i++){
+ kmerCountValue = new KmerCountValue();
+ kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
+ writer.append(arrayOfKeys.get(i), kmerCountValue);
+ }
+ writer.close();
+
+ //read outputs
+ Path inFile = new Path(outDir, "sequenceFileMergeTest4");
+ outKey = new BytesWritable();
+ outValue = new KmerCountValue();
+ SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
+ int iteration = 1;
+ try {
+ while(reader.next(outKey, outValue)){
+ System.out.println(iteration);
+ String kmer = BitwiseOperation.convertBytesToBinaryStringKmer(outKey.getBytes(),GraphVertexOperation.k);
+ System.out.println("key: " + kmer);
+ System.out.println("code: " + GraphVertexOperation.convertBinaryStringToGenecode(kmer));
+ System.out.println("value: " + BitwiseOperation.convertByteToBinaryString(outValue.getAdjBitMap()));
+ System.out.println();
+ iteration++;
+ }
+ } finally {
+ reader.close();
+ }
+ }
+
+ public static void generateSequenceFileFromTwoGeneCode3(String s, String s2) throws IOException{
+ Configuration conf = new Configuration();
+ Path outFile = new Path(outDir, "sequenceFileMergeTest4");
+ FileSystem fileSys = FileSystem.get(conf);
+ SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
+ outFile, BytesWritable.class, KmerCountValue.class,
+ CompressionType.NONE);
+ BytesWritable outKey = null;
+ KmerCountValue outValue;
+ byte adjBitMap;
+ ArrayList<String> lists = new ArrayList<String>();
+ lists.add("001001"); //AGC
+ lists.add("110011"); //TAT
+ String binaryString = "";
+ for(int i = 1; i < s.length()-GraphVertexOperation.k; i++){
+ binaryString = GraphVertexOperation.convertGeneCodeToBinaryString(s.substring(i,i+GraphVertexOperation.k));
+ if(lists.contains(binaryString)){
+ System.out.println("error: " + binaryString);
+ return;
+ }
+ lists.add(binaryString);
+ outKey = new BytesWritable(BitwiseOperation.convertBinaryStringToBytes(binaryString));
+ outValue = new KmerCountValue();
+ adjBitMap = GraphVertexOperation.getPrecursorFromGeneCode((byte)0, s.charAt(i-1));
+ adjBitMap = GraphVertexOperation.getSucceedFromGeneCode(adjBitMap, s.charAt(i+GraphVertexOperation.k));
+ outValue.setAdjBitMap(adjBitMap);
+ writer.append(outKey, outValue);
+ }
+ /**
+ * CAG - AGC ------ TAT - ATA
+ * GAG ATC
+ */
+ // AGC
+ String tmpKey = "001001";
+ byte[] key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
+ String tmpValue = "00000001";
+ byte value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
+ BytesWritable keyWritable = new BytesWritable(key);
+ ByteWritable valueWritable = new ByteWritable(value);
+
+ ArrayList<BytesWritable> arrayOfKeys = new ArrayList<BytesWritable>();
+ arrayOfKeys.add(keyWritable);
+ ArrayList<ByteWritable> arrayOfValues = new ArrayList<ByteWritable>();
+ arrayOfValues.add(valueWritable);
+
+ // TAT
+ tmpKey = "110011";
key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "10000000";
+ tmpValue = "00100000";
value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
keyWritable = new BytesWritable(key);
valueWritable = new ByteWritable(value);
@@ -489,6 +792,58 @@
kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
writer.append(arrayOfKeys.get(i), kmerCountValue);
}
+
+ lists.add("111101"); //TTC
+ lists.add("011001"); //CGC
+ binaryString = "";
+ for(int i = 1; i < s2.length()-GraphVertexOperation.k; i++){
+ binaryString = GraphVertexOperation.convertGeneCodeToBinaryString(s2.substring(i,i+GraphVertexOperation.k));
+ if(lists.contains(binaryString)){
+ System.out.println("error: " + binaryString);
+ return;
+ }
+ lists.add(binaryString);
+ outKey = new BytesWritable(BitwiseOperation.convertBinaryStringToBytes(binaryString));
+ outValue = new KmerCountValue();
+ adjBitMap = GraphVertexOperation.getPrecursorFromGeneCode((byte)0, s2.charAt(i-1));
+ adjBitMap = GraphVertexOperation.getSucceedFromGeneCode(adjBitMap, s2.charAt(i+GraphVertexOperation.k));
+ outValue.setAdjBitMap(adjBitMap);
+ writer.append(outKey, outValue);
+ }
+ /**
+ * CAG - AGC ------ TAT - ATA
+ * GAG ATC
+ */
+ // TTC
+ tmpKey = "111101";
+ key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
+ tmpValue = "00000001";
+ value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
+ keyWritable = new BytesWritable(key);
+ valueWritable = new ByteWritable(value);
+
+ arrayOfKeys = new ArrayList<BytesWritable>();
+ arrayOfKeys.add(keyWritable);
+ arrayOfValues = new ArrayList<ByteWritable>();
+ arrayOfValues.add(valueWritable);
+
+ // CGC
+ tmpKey = "011001";
+ key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
+ tmpValue = "00100000";
+ value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
+ keyWritable = new BytesWritable(key);
+ valueWritable = new ByteWritable(value);
+ arrayOfKeys.add(keyWritable);
+ arrayOfValues.add(valueWritable);
+
+ kmerCountValue = null;
+ //wirte to sequence file
+ for(int i = 0; i < arrayOfKeys.size(); i++){
+ kmerCountValue = new KmerCountValue();
+ kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
+ writer.append(arrayOfKeys.get(i), kmerCountValue);
+ }
writer.close();
}
public static byte[] hexStringToByteArray(String s) {
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/TestLoadGraphVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/TestLoadGraphVertex.java
index d238e9d..733d683 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/TestLoadGraphVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/TestLoadGraphVertex.java
@@ -1,5 +1,6 @@
package edu.uci.ics.pregelix;
+import java.util.Arrays;
import java.util.Iterator;
import org.apache.hadoop.io.ByteWritable;
@@ -8,6 +9,8 @@
import edu.uci.ics.pregelix.api.graph.Vertex;
import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.bitwise.BitwiseOperation;
+import edu.uci.ics.pregelix.example.GraphMutationVertex;
import edu.uci.ics.pregelix.example.client.Client;
import edu.uci.ics.pregelix.example.io.MessageWritable;
@@ -41,11 +44,49 @@
*/
public class TestLoadGraphVertex extends Vertex<BytesWritable, ByteWritable, NullWritable, MessageWritable>{
+ private byte[] tmpVertexId;
+ private BytesWritable vid;
+ private TestLoadGraphVertex newVertex;
+ private MessageWritable tmpMsg = new MessageWritable();
/**
* For test, just output original file
*/
@Override
public void compute(Iterator<MessageWritable> msgIterator) {
+ deleteVertex(getVertexId());
+ /*tmpVertexId = getVertexId().getBytes();
+ String a1 = "100100";
+ byte[] b1 = BitwiseOperation.convertBinaryStringToBytes(a1);
+ String a2 = "000000"; //"001001";
+ byte[] b2 = BitwiseOperation.convertBinaryStringToBytes(a2);
+ String valueString = "00000000";
+ byte value = BitwiseOperation.convertBinaryStringToByte(valueString);
+ if(getSuperstep() == 1 && Arrays.equals(b1,tmpVertexId)){
+ newVertex = new TestLoadGraphVertex();
+ vid.set(new BytesWritable(b2));
+ newVertex.setVertexId(vid);
+ newVertex.setVertexValue(getVertexValue());
+ addVertex(vid, newVertex);
+ //vertex.initialize(new BytesWritable(b2), new ByteWritable(value), null, null);
+ //addVertex(new BytesWritable(b2),this.createdNewLiveVertex());
+ //deleteVertex(getVertexId());
+ }*/
+ /*String a2 = "100111";
+ byte[] b2 = BitwiseOperation.convertBinaryStringToBytes(a2);
+ String a3 = "11111111";
+ byte[] b3 = BitwiseOperation.convertBinaryStringToBytes(a3);
+ byte[] bb1 = getVertexId().getBytes();
+ if(getSuperstep() == 1 && Arrays.equals(b1,bb1)){
+ //addVertex(new BytesWritable(b3),new ByteWritable(bb1[0]));
+ deleteVertex(new BytesWritable(b1));
+ }
+ else if(getSuperstep() == 2){
+ if(msgIterator.hasNext()){
+ tmpMsg = msgIterator.next();
+ byte[] b = tmpMsg.getChainVertexId();
+ setVertexValue(new ByteWritable(b[0]));
+ }
+ }*/
voteToHalt();
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/example/io/LogAlgorithmMessageWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/example/io/LogAlgorithmMessageWritable.java
index b7ac6ee..e2d33c1 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/example/io/LogAlgorithmMessageWritable.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/example/io/LogAlgorithmMessageWritable.java
@@ -7,6 +7,8 @@
import org.apache.hadoop.io.WritableComparable;
+import edu.uci.ics.pregelix.GraphVertexOperation;
+
public class LogAlgorithmMessageWritable implements WritableComparable<LogAlgorithmMessageWritable>{
/**
* sourceVertexId stores source vertexId when headVertex sends the message
@@ -20,7 +22,6 @@
private File file;
private int message;
private int sourceVertexState;
- private static int k = 3;
public LogAlgorithmMessageWritable(){
}
@@ -107,7 +108,7 @@
else
chainVertexId = new byte[0];
if(lengthOfChain % 2 == 0)
- sourceVertexIdOrNeighberInfo = new byte[(k-1)/4 + 1];
+ sourceVertexIdOrNeighberInfo = new byte[(GraphVertexOperation.k-1)/4 + 1];
else
sourceVertexIdOrNeighberInfo = new byte[1];
in.readFully(sourceVertexIdOrNeighberInfo);
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/example/io/MessageWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/example/io/MessageWritable.java
index eed352d..7861247 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/example/io/MessageWritable.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/pregelix/example/io/MessageWritable.java
@@ -7,6 +7,9 @@
import org.apache.hadoop.io.WritableComparable;
+import edu.uci.ics.pregelix.GraphVertexOperation;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+
public class MessageWritable implements WritableComparable<MessageWritable>{
/**
* sourceVertexId stores source vertexId when headVertex sends the message
@@ -14,30 +17,39 @@
* chainVertexId stores the chains of connected DNA
* file stores the point to the file that stores the chains of connected DNA
*/
- private byte[] sourceVertexIdOrNeighberInfo;
+ private byte[] sourceVertexId;
+ private byte neighberInfo;
private byte[] chainVertexId;
private File file;
private boolean isRear;
private int lengthOfChain;
- private static int k = 3;
public MessageWritable(){
}
- public void set(byte[] sourceVertexIdOrNeighberInfo, byte[] chainVertexId, File file){
- this.sourceVertexIdOrNeighberInfo = sourceVertexIdOrNeighberInfo;
+ public void set(byte[] sourceVertexId, byte neighberInfo, byte[] chainVertexId, File file){
+ this.sourceVertexId = sourceVertexId;
+ this.neighberInfo = neighberInfo;
this.chainVertexId = chainVertexId;
this.file = file;
this.isRear = false;
this.lengthOfChain = 0;
}
- public byte[] getSourceVertexIdOrNeighberInfo() {
- return sourceVertexIdOrNeighberInfo;
+ public byte[] getSourceVertexId() {
+ return sourceVertexId;
}
- public void setSourceVertexIdOrNeighberInfo(byte[] sourceVertexIdOrNeighberInfo) {
- this.sourceVertexIdOrNeighberInfo = sourceVertexIdOrNeighberInfo;
+ public void setSourceVertexId(byte[] sourceVertexId) {
+ this.sourceVertexId = sourceVertexId;
+ }
+
+ public byte getNeighberInfo() {
+ return neighberInfo;
+ }
+
+ public void setNeighberInfo(byte neighberInfo) {
+ this.neighberInfo = neighberInfo;
}
public byte[] getChainVertexId() {
@@ -45,6 +57,20 @@
}
public void setChainVertexId(byte[] chainVertexId) {
+ /*if(lengthOfChain == 0){
+ this.chainVertexId = chainVertexId;
+ return;
+ }
+ int numOfByte = (2*lengthOfChain-1)/8 + 1;
+ if(chainVertexId.length == numOfByte)
+ this.chainVertexId = chainVertexId;
+ else{
+ byte[] tmp = new byte[numOfByte];
+ for(int i = 0; i < numOfByte; i++)
+ tmp[i] = chainVertexId[i];
+ this.chainVertexId = tmp;
+ }*/
+
this.chainVertexId = chainVertexId;
}
@@ -82,7 +108,8 @@
out.writeInt(lengthOfChain);
if(lengthOfChain != 0)
out.write(chainVertexId);
- out.write(sourceVertexIdOrNeighberInfo);
+ out.write(sourceVertexId);
+ out.write(neighberInfo);
out.writeBoolean(isRear);
}
@@ -96,11 +123,9 @@
}
else
chainVertexId = new byte[0];
- if(lengthOfChain % 2 == 0)
- sourceVertexIdOrNeighberInfo = new byte[(k-1)/4 + 1];
- else
- sourceVertexIdOrNeighberInfo = new byte[1];
- in.readFully(sourceVertexIdOrNeighberInfo);
+ sourceVertexId = new byte[(GraphVertexOperation.k-1)/4 + 1];
+ in.readFully(sourceVertexId);
+ neighberInfo = in.readByte();
isRear = in.readBoolean();
}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobGen/JobGenerator.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobGen/JobGenerator.java
index 7399a3b..52e9f38 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobGen/JobGenerator.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobGen/JobGenerator.java
@@ -15,7 +15,6 @@
import edu.uci.ics.pregelix.LoadGraphVertex;
import edu.uci.ics.pregelix.MergeGraphVertex;
import edu.uci.ics.pregelix.LoadGraphVertex.SimpleLoadGraphVertexOutputFormat;
-import edu.uci.ics.pregelix.TestLoadGraphVertex;
import edu.uci.ics.pregelix.TextLoadGraphInputFormat;
import edu.uci.ics.pregelix.api.job.PregelixJob;
@@ -64,6 +63,7 @@
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
genBinaryLoadGraph();
+ //genSequenceLoadGraph();
//genBasicBinaryLoadGraph();
}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobRun/RunJobTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobRun/RunJobTestSuite.java
index 1649e79..090c57a 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobRun/RunJobTestSuite.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/pregelix/JobRun/RunJobTestSuite.java
@@ -40,7 +40,7 @@
private static final String PATH_TO_ONLY = "src/test/resources/only.txt";
private static final String FILE_EXTENSION_OF_RESULTS = "result";
- private static final String DATA_PATH = "data/webmap/sequenceFileMergeTest";//sequenceFileMergeTest
+ private static final String DATA_PATH = "data/webmap/sequenceShortFileMergeTest";//sequenceFileMergeTest
private static final String HDFS_PATH = "/webmap/";
private static final String HYRACKS_APP_NAME = "pregelix";
diff --git a/genomix/genomix-pregelix/src/test/resources/expected/BinaryLoadGraph.result b/genomix/genomix-pregelix/src/test/resources/expected/BinaryLoadGraph.result
index 2a98362..676e5c8 100644
--- a/genomix/genomix-pregelix/src/test/resources/expected/BinaryLoadGraph.result
+++ b/genomix/genomix-pregelix/src/test/resources/expected/BinaryLoadGraph.result
@@ -1,9 +1,8 @@
-Vertex(id=24,value=100, edges=())
-Vertex(id=30,value=-128, edges=())
-Vertex(id=34,value=-128, edges=())
-Vertex(id=48,value=2, edges=())
-Vertex(id=6c,value=65, edges=())
-Vertex(id=88,value=2, edges=())
-Vertex(id=9b 00,value=24, edges=())
-Vertex(id=b0,value=40, edges=())
-Vertex(id=cc,value=67, edges=())
+Vertex(id=24,value=1, edges=())
+Vertex(id=38,value=34, edges=())
+Vertex(id=4c,value=68, edges=())
+Vertex(id=70,value=72, edges=())
+Vertex(id=90,value=24, edges=())
+Vertex(id=9c,value=-127, edges=())
+Vertex(id=cc,value=32, edges=())
+Vertex(id=e4,value=24, edges=())