delete unuse function
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/GraphVertexOperation.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/GraphVertexOperation.java
index c8aea7c..6a5299b 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/GraphVertexOperation.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/GraphVertexOperation.java
@@ -1,32 +1,23 @@
package edu.uci.ics.genomix.pregelix;
-import java.io.DataOutputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.util.ArrayList;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.SequenceFile.CompressionType;
import edu.uci.ics.genomix.type.Kmer;
import edu.uci.ics.genomix.type.KmerUtil;
-import edu.uci.ics.genomix.pregelix.bitwise.BitwiseOperation;
-import edu.uci.ics.genomix.pregelix.io.LogAlgorithmMessageWritable;
-import edu.uci.ics.genomix.pregelix.io.MessageWritable;
-import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
-import edu.uci.ics.genomix.pregelix.sequencefile.GenerateSequenceFile;
public class GraphVertexOperation {
- public static final int k = 5; //kmer, k: the length of kmer
- public static final int numBytes = (GraphVertexOperation.k-1)/4 + 1;
- static private final Path TMP_DIR = new Path(
- GenerateSequenceFile.class.getSimpleName() + "_INTERIM");
+
+ /**
+ * generate the valid data(byte[]) from BytesWritable
+ */
+ public static byte[] generateValidDataFromBytesWritable(BytesWritable bw){
+ byte[] wholeBytes = bw.getBytes();
+ int validNum = bw.getLength();
+ byte[] validBytes = new byte[validNum];
+ for(int i = 0; i < validNum; i++)
+ validBytes[i] = wholeBytes[i];
+ return validBytes;
+ }
/**
* Single Vertex: in-degree = out-degree = 1
* @param vertexValue
@@ -54,218 +45,15 @@
return true;
return false;
}
- /**
- * Head Vertex: in-degree != 1, out-degree = 1,
- * @param vertexValue
- */
- public static boolean isHead(byte value){
- if(KmerUtil.inDegree(value) != 1 && KmerUtil.outDegree(value) == 1)
- return true;
- return false;
- }
/**
- * Rear Vertex: in-degree = 1, out-degree != 1,
- * @param vertexValue
+ * update right neighber based on next vertexId
*/
- public static boolean isRear(byte value){
- if(KmerUtil.inDegree(value) == 1 && KmerUtil.outDegree(value) != 1)
- return true;
- return false;
- }
- /**
- * write Kmer to Sequence File for test
- * @param arrayOfKeys
- * @param arrayOfValues
- * @param step
- * @throws IOException
- */
- public void writeKmerToSequenceFile(ArrayList<BytesWritable> arrayOfKeys, ArrayList<ByteWritable> arrayOfValues, long step) throws IOException{
+ public static byte updateRightNeighberByVertexId(byte oldVertexValue, byte[] neighberVertexId, int k){
- Configuration conf = new Configuration();
- Path outDir = new Path(TMP_DIR, "out");
- Path outFile = new Path(outDir, "B" + Long.toString(step));
- FileSystem fileSys = FileSystem.get(conf);
- SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
- outFile, BytesWritable.class, ByteWritable.class,
- CompressionType.NONE);
-
- //wirte to sequence file
- for(int i = 0; i < arrayOfKeys.size(); i++)
- writer.append(arrayOfKeys.get(i), arrayOfValues.get(i));
- writer.close();
- }
- /**
- * check what kind of succeed node
- * return 0:A 1:C 2:G 3:T 4:nothing
- */
- public static int findSucceedNode(byte vertexValue){
- String firstBit = "00000001"; //A
- String secondBit = "00000010"; //C
- String thirdBit = "00000100"; //G
- String fourthBit = "00001000"; //T
- int first = BitwiseOperation.convertBinaryStringToByte(firstBit) & 0xff;
- int second = BitwiseOperation.convertBinaryStringToByte(secondBit) & 0xff;
- int third = BitwiseOperation.convertBinaryStringToByte(thirdBit) & 0xff;
- int fourth = BitwiseOperation.convertBinaryStringToByte(fourthBit) & 0xff;
- int value = vertexValue & 0xff;
- int tmp = value & first;
- if(tmp != 0)
- return Kmer.GENE_CODE.A;
- else{
- tmp = value & second;
- if(tmp != 0)
- return Kmer.GENE_CODE.C;
- else{
- tmp = value & third;
- if(tmp != 0)
- return Kmer.GENE_CODE.G;
- else{
- tmp = value & fourth;
- if(tmp != 0)
- return Kmer.GENE_CODE.T;
- else
- return 4;
- }
- }
- }
- }
- /**
- * check what kind of precursor node
- * return 0:A 1:C 2:G 3:T 4:nothing
- */
- public static int findPrecursorNode(byte vertexValue){
- String firstBit = "00010000"; //A
- String secondBit = "00100000"; //C
- String thirdBit = "01000000"; //G
- String fourthBit = "10000000"; //T
- int first = BitwiseOperation.convertBinaryStringToByte(firstBit) & 0xff;
- int second = BitwiseOperation.convertBinaryStringToByte(secondBit) & 0xff;
- int third = BitwiseOperation.convertBinaryStringToByte(thirdBit) & 0xff;
- int fourth = BitwiseOperation.convertBinaryStringToByte(fourthBit) & 0xff;
- int value = vertexValue & 0xff;
- int tmp = value & first;
- if(tmp != 0)
- return Kmer.GENE_CODE.A;
- else{
- tmp = value & second;
- if(tmp != 0)
- return Kmer.GENE_CODE.C;
- else{
- tmp = value & third;
- if(tmp != 0)
- return Kmer.GENE_CODE.G;
- else{
- tmp = value & fourth;
- if(tmp != 0)
- return Kmer.GENE_CODE.T;
- else
- return 4;
- }
- }
- }
- }
- /**
- * replace last two bits based on n
- * Ex. 01 10 00(nothing) -> 01 10 00(A)/01(C)/10(G)/11(T)
- */
- public static byte[] replaceLastTwoBits(byte[] vertexId, int n){
- String binaryStringVertexId = BitwiseOperation.convertBytesToBinaryStringKmer(vertexId, k);
- String resultString = "";
- for(int i = 0; i < binaryStringVertexId.length()-2; i++)
- resultString += binaryStringVertexId.charAt(i);
- switch(n){
- case 0:
- resultString += "00";
- break;
- case 1:
- resultString += "01";
- break;
- case 2:
- resultString += "10";
- break;
- case 3:
- resultString += "11";
- break;
- default:
- break;
- }
-
- return BitwiseOperation.convertBinaryStringToBytes(resultString);
- }
- /**
- * replace first two bits based on n
- * Ex. 01 10 00(nothing) -> 00(A)/01(C)/10(G)/11(T) 10 00
- */
- public static byte[] replaceFirstTwoBits(byte[] vertexId, int n){
- String binaryStringVertexId = BitwiseOperation.convertBytesToBinaryStringKmer(vertexId, k);
- String resultString = "";
- switch(n){
- case 0:
- resultString += "00";
- break;
- case 1:
- resultString += "01";
- break;
- case 2:
- resultString += "10";
- break;
- case 3:
- resultString += "11";
- break;
- default:
- break;
- }
- for(int i = 2; i < binaryStringVertexId.length(); i++)
- resultString += binaryStringVertexId.charAt(i);
- return BitwiseOperation.convertBinaryStringToBytes(resultString);
- }
- /**
- * find the vertexId of the destination node - left neighber
- */
- public static byte[] getDestVertexId(byte[] sourceVertexId, byte vertexValue){
- byte[] destVertexId = BitwiseOperation.shiftBitsLeft(sourceVertexId, 2);
- return replaceLastTwoBits(destVertexId, findSucceedNode(vertexValue));
- }
- /**
- * find the vertexId of the destination node - right neighber
- */
- public static byte[] getLeftDestVertexId(byte[] sourceVertexId, byte vertexValue){
- byte[] destVertexId = BitwiseOperation.shiftBitsRight(sourceVertexId, 2);
- return replaceFirstTwoBits(destVertexId, findPrecursorNode(vertexValue));
- }
- /**
- * update the chain vertexId
- */
- public static byte[] updateChainVertexId(byte[] chainVertexId, int lengthOfChainVertex, byte[] newVertexId){
- return BitwiseOperation.addLastTwoBits(chainVertexId,lengthOfChainVertex,BitwiseOperation.getLastTwoBits(newVertexId,k));
- }
- /**
- * get the first kmer from chainVertexId
- */
- public static byte[] getFirstKmer(byte[] chainVertexId){
- String originalVertexId = BitwiseOperation.convertBytesToBinaryString(chainVertexId);
- return BitwiseOperation.convertBinaryStringToBytes(originalVertexId.substring(0,k-1));
- }
- /**
- * get the last kmer from chainVertexId
- */
- public static byte[] getLastKmer(byte[] chainVertexId, int lengthOfChainVertex){
- String originalVertexId = BitwiseOperation.convertBytesToBinaryString(chainVertexId);
- return BitwiseOperation.convertBinaryStringToBytes(originalVertexId.substring(2*(lengthOfChainVertex-k),2*lengthOfChainVertex));
- }
- /**
- * read vertexId from RecordReader
- */
- public static BytesWritable readVertexIdFromRecordReader(BytesWritable currentKey){
- String finalBinaryString = BitwiseOperation.convertBytesToBinaryStringKmer(currentKey.getBytes(),k);
- return new BytesWritable(BitwiseOperation.convertBinaryStringToBytes(finalBinaryString));
- }
- /**
- * merge two BytesWritable. Ex. merge two vertexId
- */
- public static byte[] mergeTwoChainVertex(byte[] b1, int length, byte[] b2, int length2){
- String s2 = BitwiseOperation.convertBytesToBinaryString(b2).substring(2*k-2,2*length2);
- return BitwiseOperation.mergeTwoBytesArray(b1, length, BitwiseOperation.convertBinaryStringToBytes(s2), length2-k+1);
+ String neighberVertex = Kmer.recoverKmerFrom(k, neighberVertexId, 0, neighberVertexId.length);
+
+ byte newBit = Kmer.GENE_CODE.getAdjBit((byte)neighberVertex.charAt(neighberVertex.length() - 1));
+ return (byte) ((byte)(oldVertexValue & 0xF0) | (byte) (newBit & 0x0F));
}
/**
* update right neighber
@@ -273,321 +61,4 @@
public static byte updateRightNeighber(byte oldVertexValue, byte newVertexValue){
return (byte) ((byte)(oldVertexValue & 0xF0) | (byte) (newVertexValue & 0x0F));
}
- /**
- * update right neighber based on next vertexId
- */
- public static byte updateRightNeighberByVertexId(byte oldVertexValue, byte[] neighberVertexId){
-
- String neighberVertex = Kmer.recoverKmerFrom(GraphVertexOperation.k, neighberVertexId, 0, neighberVertexId.length);
-
- byte newBit = Kmer.GENE_CODE.getAdjBit((byte)neighberVertex.charAt(neighberVertex.length() - 1));
- return (byte) ((byte)(oldVertexValue & 0xF0) | (byte) (newBit & 0x0F));
- /*String oldVertex = BitwiseOperation.convertByteToBinaryString(oldVertexValue);
- String neighber = BitwiseOperation.convertBytesToBinaryStringKmer(neighberVertexId, k);
- String lastTwoBits = neighber.substring(2*k-2,2*k);
- if(lastTwoBits.compareTo("00") == 0)
- return BitwiseOperation.convertBinaryStringToByte(oldVertex.substring(0,4) + "0001");
- else if(lastTwoBits.compareTo("01") == 0)
- return BitwiseOperation.convertBinaryStringToByte(oldVertex.substring(0,4) + "0010");
- else if(lastTwoBits.compareTo("10") == 0)
- return BitwiseOperation.convertBinaryStringToByte(oldVertex.substring(0,4) + "0100");
- else if(lastTwoBits.compareTo("11") == 0)
- return BitwiseOperation.convertBinaryStringToByte(oldVertex.substring(0,4) + "1000");
-
- return (Byte) null;*/
- }
- /**
- * get precursor in vertexValue from gene code
- */
- public static byte getPrecursorFromGeneCode(byte vertexValue, char precursor){
- String oldVertex = BitwiseOperation.convertByteToBinaryString(vertexValue);
- switch(precursor){
- case 'A':
- return BitwiseOperation.convertBinaryStringToByte("0001" + oldVertex.substring(0,4));
- case 'C':
- return BitwiseOperation.convertBinaryStringToByte("0010" + oldVertex.substring(0,4));
- case 'G':
- return BitwiseOperation.convertBinaryStringToByte("0100" + oldVertex.substring(0,4));
- case 'T':
- return BitwiseOperation.convertBinaryStringToByte("1000" + oldVertex.substring(0,4));
- default:
- return (Byte) null;
- }
- }
- /**
- * get succeed in vertexValue from gene code
- */
- public static byte getSucceedFromGeneCode(byte vertexValue, char succeed){
- String oldVertex = BitwiseOperation.convertByteToBinaryString(vertexValue);
- switch(succeed){
- case 'A':
- return BitwiseOperation.convertBinaryStringToByte(oldVertex.substring(0,4) + "0001");
- case 'C':
- return BitwiseOperation.convertBinaryStringToByte(oldVertex.substring(0,4) + "0010");
- case 'G':
- return BitwiseOperation.convertBinaryStringToByte(oldVertex.substring(0,4) + "0100");
- case 'T':
- return BitwiseOperation.convertBinaryStringToByte(oldVertex.substring(0,4) + "1000");
- default:
- return (Byte) null;
- }
- }
- /**
- * convert gene code to binary string
- */
- public static String convertGeneCodeToBinaryString(String gene){
- String result = "";
- for(int i = 0; i < gene.length(); i++){
- switch(gene.charAt(i)){
- case 'A':
- result += "00";
- break;
- case 'C':
- result += "01";
- break;
- case 'G':
- result += "10";
- break;
- case 'T':
- result += "11";
- break;
- default:
- break;
- }
- }
- return result;
- }
- /**
- * flush chainVertexId to file -- local file and hdfs file
- * @throws IOException
- */
- public static void flushChainToFile(byte[] chainVertexId, int lengthOfChain, byte[] vertexId) throws IOException{
- DataOutputStream out = new DataOutputStream(new
- FileOutputStream("data/ChainVertex"));
- out.write(vertexId);
- out.writeInt(lengthOfChain);
- out.write(chainVertexId);
- out.close();
- //String srcFile = "data/ChainVertex";
- //String dstFile = "testHDFS/output/ChainVertex";
- //HDFSOperation.copyFromLocalFile(srcFile, dstFile);
- }
- /**
- * convert binaryString to geneCode
- */
- public static String convertBinaryStringToGenecode(String kmer){
- String result = "";
- for(int i = 0; i < kmer.length() ; ){
- String substring = kmer.substring(i,i+2);
- if(substring.compareTo("00") == 0)
- result += "A";
- else if(substring.compareTo("01") == 0)
- result += "C";
- else if(substring.compareTo("10") == 0)
- result += "G";
- else if(substring.compareTo("11") == 0)
- result += "T";
- i = i+2;
- }
- return result;
- }
- /**
- * generate the valid data(byte[]) from BytesWritable
- */
- public static byte[] generateValidDataFromBytesWritable(BytesWritable bw){
- byte[] wholeBytes = bw.getBytes();
- int validNum = bw.getLength();
- byte[] validBytes = new byte[validNum];
- for(int i = 0; i < validNum; i++)
- validBytes[i] = wholeBytes[i];
- return validBytes;
- }
- /**
- * output test for message communication
- */
- public static void testMessageCommunication(OutputStreamWriter writer, long step, byte[] tmpSourceVertextId,
- byte[] tmpDestVertexId, MessageWritable tmpMsg){
- //test
- String kmer = BitwiseOperation.convertBytesToBinaryStringKmer(
- tmpSourceVertextId,GraphVertexOperation.k);
- try {
- writer.write("Step: " + step + "\r\n");
- writer.write("Source Key: " + kmer + "\r\n");
-
- writer.write("Source Code: " +
- GraphVertexOperation.convertBinaryStringToGenecode(kmer) + "\r\n");
- writer.write("Send Message to: " +
- GraphVertexOperation.convertBinaryStringToGenecode(
- BitwiseOperation.convertBytesToBinaryStringKmer(
- tmpDestVertexId,GraphVertexOperation.k)) + "\r\n");
- writer.write("Chain Message: " +
- GraphVertexOperation.convertBinaryStringToGenecode(
- BitwiseOperation.convertBytesToBinaryString(
- tmpMsg.getChainVertexId())) + "\r\n");
- writer.write("Chain Length: " + tmpMsg.getLengthOfChain() + "\r\n");
- writer.write("\r\n");
- } catch (IOException e) { e.printStackTrace(); }
- return;
- }
- /**
- * output test for message communication
- */
- public static void testMessageCommunication2(OutputStreamWriter writer, long step, byte[] tmpSourceVertextId,
- byte[] tmpDestVertexId, LogAlgorithmMessageWritable tmpMsg, byte[] myownId){
- //test
- String kmer = BitwiseOperation.convertBytesToBinaryStringKmer(
- tmpSourceVertextId,GraphVertexOperation.k);
- try {
- writer.write("Step: " + step + "\r\n");
- writer.write("Source Key: " + kmer + "\r\n");
-
- writer.write("Source Code: " +
- GraphVertexOperation.convertBinaryStringToGenecode(kmer) + "\r\n");
- writer.write("Send Message to: " +
- GraphVertexOperation.convertBinaryStringToGenecode(
- BitwiseOperation.convertBytesToBinaryStringKmer(
- tmpDestVertexId,GraphVertexOperation.k)) + "\r\n");
- if(tmpMsg.getLengthOfChain() != 0){
- writer.write("Chain Message: " +
- GraphVertexOperation.convertBinaryStringToGenecode(
- BitwiseOperation.convertBytesToBinaryString(
- tmpMsg.getChainVertexId())) + "\r\n");
- writer.write("Chain Length: " + tmpMsg.getLengthOfChain() + "\r\n");
- }
- if(myownId != null)
- writer.write("My own Id is: " +
- GraphVertexOperation.convertBinaryStringToGenecode(
- BitwiseOperation.convertBytesToBinaryStringKmer(
- myownId,GraphVertexOperation.k)) + "\r\n");
- if(tmpMsg.getMessage() != 0)
- writer.write("Message is: " + tmpMsg.getMessage() + "\r\n");
- writer.write("\r\n");
- } catch (IOException e) { e.printStackTrace(); }
- return;
- }
- /**
- * output test for last message communication -- flush
- */
- public static void testLastMessageCommunication(OutputStreamWriter writer, long step, byte[] tmpVertextId,
- byte[] tmpSourceVertextId, MessageWritable tmpMsg){
- String kmer = BitwiseOperation.convertBytesToBinaryStringKmer(
- tmpVertextId,GraphVertexOperation.k);
- try {
- writer.write("Step: " + step + "\r\n");
- writer.write("Over!" + "\r\n");
- writer.write("Source Key: " + kmer + "\r\n");
-
- writer.write("Source Code: " +
- GraphVertexOperation.convertBinaryStringToGenecode(kmer) + "\r\n");
-
- writer.write("Flush Chain Message: " +
- GraphVertexOperation.convertBinaryStringToGenecode(
- BitwiseOperation.convertBytesToBinaryString(
- tmpMsg.getChainVertexId())) + "\r\n");
- writer.write("Chain Length: " + tmpMsg.getLengthOfChain() + "\r\n");
- writer.write("\r\n");
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- }
- /**
- * output test for log message communication
- */
- public static void testLogMessageCommunication(OutputStreamWriter writer, long step, byte[] tmpSourceVertextId,
- byte[] tmpDestVertexId, LogAlgorithmMessageWritable tmpMsg){
- //test
- String kmer = BitwiseOperation.convertBytesToBinaryStringKmer(
- tmpSourceVertextId,GraphVertexOperation.k);
- try {
- writer.write("Step: " + step + "\r\n");
- writer.write("Source Key: " + kmer + "\r\n");
-
- writer.write("Source Code: " +
- GraphVertexOperation.convertBinaryStringToGenecode(kmer) + "\r\n");
- writer.write("Send Message to: " +
- GraphVertexOperation.convertBinaryStringToGenecode(
- BitwiseOperation.convertBytesToBinaryStringKmer(
- tmpDestVertexId,GraphVertexOperation.k)) + "\r\n");
- writer.write("Message is: " +
- tmpMsg.getMessage() + "\r\n");
- writer.write("\r\n");
- } catch (IOException e) { e.printStackTrace(); }
- return;
- }
- /**
- * test set vertex state
- */
- public static void testSetVertexState(OutputStreamWriter writer, long step,byte[] tmpSourceVertextId,
- byte[] tmpDestVertexId, LogAlgorithmMessageWritable tmpMsg, ValueStateWritable tmpVal){
- //test
- String kmer = BitwiseOperation.convertBytesToBinaryStringKmer(
- tmpSourceVertextId,GraphVertexOperation.k);
- try {
- writer.write("Step: " + step + "\r\n");
- writer.write("Source Key: " + kmer + "\r\n");
-
- writer.write("Source Code: " +
- GraphVertexOperation.convertBinaryStringToGenecode(kmer) + "\r\n");
- if(tmpDestVertexId != null && tmpMsg != null){
- writer.write("Send Message to: " +
- GraphVertexOperation.convertBinaryStringToGenecode(
- BitwiseOperation.convertBytesToBinaryStringKmer(
- tmpDestVertexId,GraphVertexOperation.k)) + "\r\n");
- writer.write("Message is: " +
- tmpMsg.getMessage() + "\r\n");
- }
- writer.write("Set vertex state to " +
- tmpVal.getState() + "\r\n");
- writer.write("\r\n");
-
- } catch (IOException e) { e.printStackTrace(); }
- return;
- }
- /**
- * test delete vertex information
- */
- public static void testDeleteVertexInfo(OutputStreamWriter writer, long step, byte[] vertexId, String reason){
- try {
- writer.write("Step: " + step + "\r\n");
- writer.write(reason + "\r\n");
- writer.write("delete " + BitwiseOperation.convertBytesToBinaryStringKmer(vertexId, GraphVertexOperation.k)
- + "\t" + GraphVertexOperation.convertBinaryStringToGenecode(
- BitwiseOperation.convertBytesToBinaryStringKmer(
- vertexId,GraphVertexOperation.k)) + "\r\n");
- writer.write("\r\n");
- } catch (IOException e) { e.printStackTrace(); }
- return;
- }
- /**
- * test voteToHalt vertex information
- */
- public static void testVoteVertexInfo(OutputStreamWriter writer, long step, byte[] vertexId, String reason){
- try {
- writer.write("Step: " + step + "\r\n");
- writer.write(reason + "\r\n");
- writer.write("voteToHalt " + BitwiseOperation.convertBytesToBinaryStringKmer(vertexId, GraphVertexOperation.k)
- + "\t" + GraphVertexOperation.convertBinaryStringToGenecode(
- BitwiseOperation.convertBytesToBinaryStringKmer(
- vertexId,GraphVertexOperation.k)) + "\r\n");
- writer.write("\r\n");
- } catch (IOException e) { e.printStackTrace(); }
- return;
- }
- /**
- * test merge chain vertex
- */
- public static void testMergeChainVertex(OutputStreamWriter writer, long step, byte[] mergeChain,
- int lengthOfChain){
- try {
- writer.write("Step: " + step + "\r\n");
- writer.write("Merge Chain: " +
- GraphVertexOperation.convertBinaryStringToGenecode(
- BitwiseOperation.convertBytesToBinaryString(
- mergeChain)) + "\r\n");
- writer.write("Chain Length: " + lengthOfChain + "\r\n");
- writer.write("\r\n");
- } catch (IOException e) { e.printStackTrace(); }
- return;
- }
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LogAlgorithmForMergeGraphVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LogAlgorithmForMergeGraphVertex.java
index 2264301..7034239 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LogAlgorithmForMergeGraphVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/LogAlgorithmForMergeGraphVertex.java
@@ -46,7 +46,9 @@
* The details about message are in edu.uci.ics.pregelix.example.io.MessageWritable.
*/
public class LogAlgorithmForMergeGraphVertex extends Vertex<BytesWritable, ValueStateWritable, NullWritable, LogAlgorithmMessageWritable>{
-
+ public static final String KMER_SIZE = "LogAlgorithmForMergeGraphVertex.kmerSize";
+ public static int kmerSize = -1;
+
private byte[] tmpVertexId;
private byte[] tmpDestVertexId;
private BytesWritable destVertexId = new BytesWritable();
@@ -59,9 +61,17 @@
* Log Algorithm for path merge graph
*/
+ /**
+ * Load KmerSize
+ */
+ public LogAlgorithmForMergeGraphVertex(){
+
+ }
+
@Override
public void compute(Iterator<LogAlgorithmMessageWritable> msgIterator) {
-
+ if(kmerSize == -1)
+ kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
tmpVertexId = GraphVertexOperation.generateValidDataFromBytesWritable(getVertexId());
tmpVal = getVertexValue();
if (getSuperstep() == 1) {
@@ -70,7 +80,7 @@
tmpMsg.setMessage(Message.START);
for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
if((tmpVal.getValue() & (1 << x)) != 0){
- tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(GraphVertexOperation.k, tmpVertexId, 0, tmpVertexId.length,x);
+ tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(kmerSize, tmpVertexId, 0, tmpVertexId.length, x);
destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
sendMsg(destVertexId,tmpMsg);
}
@@ -82,7 +92,7 @@
for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
if(((tmpVal.getValue()>> 4) & (1 << x)) != 0){
- tmpDestVertexId = KmerUtil.shiftKmerWithPreCode(GraphVertexOperation.k, tmpVertexId, 0, tmpVertexId.length, x);
+ tmpDestVertexId = KmerUtil.shiftKmerWithPreCode(kmerSize, tmpVertexId, 0, tmpVertexId.length, x);
destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
sendMsg(destVertexId,tmpMsg);
}
@@ -120,7 +130,8 @@
else if(getSuperstep()%3 == 0){
if(getSuperstep() == 3){
tmpMsg = new LogAlgorithmMessageWritable();
- tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(GraphVertexOperation.k, tmpVertexId, 0, tmpVertexId.length,
+ tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(kmerSize, tmpVertexId,
+ 0, tmpVertexId.length,
Kmer.GENE_CODE.getGeneCodeFromBitMap((byte)(tmpVal.getValue() & 0x0F)));
destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
if(tmpVal.getState() == State.START_VERTEX){
@@ -139,10 +150,12 @@
else{
if(msgIterator.hasNext()){
tmpMsg = msgIterator.next();
- byte[] lastKmer = KmerUtil.getLastKmerFromChain(GraphVertexOperation.k,
+ byte[] lastKmer = KmerUtil.getLastKmerFromChain(kmerSize,
tmpVal.getLengthOfMergeChain(),
- tmpVal.getMergeChain(),0,tmpVal.getMergeChain().length);
- tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(GraphVertexOperation.k, lastKmer, 0, lastKmer.length,
+ tmpVal.getMergeChain(),
+ 0, tmpVal.getMergeChain().length);
+ tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(kmerSize, lastKmer,
+ 0, lastKmer.length,
Kmer.GENE_CODE.getGeneCodeFromBitMap((byte)(tmpVal.getValue() & 0x0F))); //tmpMsg.getNeighberInfo()
destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
if(tmpVal.getState() == State.START_VERTEX){
@@ -166,7 +179,7 @@
tmpMsg = msgIterator.next();
int message = tmpMsg.getMessage();
if(tmpVal.getLengthOfMergeChain() == 0){
- tmpVal.setLengthOfMergeChain(GraphVertexOperation.k);
+ tmpVal.setLengthOfMergeChain(kmerSize);
tmpVal.setMergeChain(tmpVertexId);
setVertexValue(tmpVal);
}
@@ -215,22 +228,22 @@
}
if(getSuperstep() == 5){
- lengthOfMergeChainVertex = GraphVertexOperation.k;
+ lengthOfMergeChainVertex = kmerSize;
mergeChainVertexId = tmpVertexId;
}
else{
lengthOfMergeChainVertex = tmpVal.getLengthOfMergeChain();
mergeChainVertexId = tmpVal.getMergeChain();
}
- byte[] tmplastKmer = KmerUtil.getLastKmerFromChain(tmpMsg.getLengthOfChain() - GraphVertexOperation.k + 1,
+ byte[] tmplastKmer = KmerUtil.getLastKmerFromChain(tmpMsg.getLengthOfChain() - kmerSize + 1,
tmpMsg.getLengthOfChain(), tmpMsg.getChainVertexId(),0, tmpMsg.getChainVertexId().length);
mergeChainVertexId = KmerUtil.mergeTwoKmer(lengthOfMergeChainVertex,
- mergeChainVertexId, 0, mergeChainVertexId.length,
- tmpMsg.getLengthOfChain() - GraphVertexOperation.k + 1,
- tmplastKmer, 0 , tmplastKmer.length
- );
+ mergeChainVertexId,
+ 0, mergeChainVertexId.length,
+ tmpMsg.getLengthOfChain() - kmerSize + 1,
+ tmplastKmer, 0, tmplastKmer.length);
lengthOfMergeChainVertex = lengthOfMergeChainVertex + tmpMsg.getLengthOfChain()
- - GraphVertexOperation.k + 1;
+ - kmerSize + 1;
tmpVal.setLengthOfMergeChain(lengthOfMergeChainVertex);
tmpVal.setMergeChain(mergeChainVertexId);
@@ -258,8 +271,8 @@
* @param args
*/
public static void main(String[] args) throws Exception {
- PregelixJob job = new PregelixJob(MergeGraphVertex.class.getSimpleName());
- job.setVertexClass(MergeGraphVertex.class);
+ PregelixJob job = new PregelixJob(LogAlgorithmForMergeGraphVertex.class.getSimpleName());
+ job.setVertexClass(LogAlgorithmForMergeGraphVertex.class);
/**
* BinaryInput and BinaryOutput~/
*/
@@ -267,6 +280,7 @@
job.setVertexOutputFormatClass(LogAlgorithmForMergeGraphOutputFormat.class);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(ValueStateWritable.class);
+ job.setDynamicVertexValueSize(true);
Client.run(args, job);
}
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/MergeGraphVertex.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/MergeGraphVertex.java
index 7f2cc43..12450b4 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/MergeGraphVertex.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/MergeGraphVertex.java
@@ -1,10 +1,12 @@
package edu.uci.ics.genomix.pregelix;
-import java.io.IOException;
import java.util.Iterator;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import edu.uci.ics.genomix.type.Kmer;
import edu.uci.ics.genomix.type.KmerUtil;
@@ -47,6 +49,9 @@
*/
public class MergeGraphVertex extends Vertex<BytesWritable, ValueStateWritable, NullWritable, MessageWritable>{
+ public static final String KMER_SIZE = "MergeGraphVertex.kmerSize";
+ public static int kmerSize = -1;
+
private byte[] tmpVertexId;
private byte[] tmpDestVertexId;
private BytesWritable destVertexId = new BytesWritable();
@@ -59,9 +64,13 @@
* @throws
*/
+ /**
+ * Load KmerSize
+ */
@Override
public void compute(Iterator<MessageWritable> msgIterator) {
-
+ if(kmerSize == -1)
+ kmerSize = getContext().getConfiguration().getInt(KMER_SIZE, 5);
tmpVertexId = GraphVertexOperation.generateValidDataFromBytesWritable(getVertexId());
if (getSuperstep() == 1) {
if(GraphVertexOperation.isHeadVertex(getVertexValue().getValue())){
@@ -71,7 +80,7 @@
tmpMsg.setChainVertexId(tmpChainVertexId.getBytes());
for(byte x = Kmer.GENE_CODE.A; x<= Kmer.GENE_CODE.T ; x++){
if((getVertexValue().getValue() & (1 << x)) != 0){
- tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(GraphVertexOperation.k, tmpVertexId, 0, tmpVertexId.length, x);
+ tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(kmerSize, tmpVertexId, 0, tmpVertexId.length, x);
destVertexId.set(tmpDestVertexId, 0, tmpDestVertexId.length);
sendMsg(destVertexId,tmpMsg);
}
@@ -92,14 +101,15 @@
tmpDestVertexId = tmpMsg.getSourceVertexId();
tmpMsg.setNeighberInfo(getVertexValue().getValue()); //set neighber
if(tmpMsg.getLengthOfChain() == 0){
- tmpMsg.setLengthOfChain(GraphVertexOperation.k);
+ tmpMsg.setLengthOfChain(kmerSize);
tmpMsg.setChainVertexId(tmpVertexId);
}
else{
- String source = Kmer.recoverKmerFrom(GraphVertexOperation.k, tmpVertexId, 0, tmpVertexId.length);
+ String source = Kmer.recoverKmerFrom(kmerSize, tmpVertexId, 0, tmpVertexId.length);
tmpMsg.setChainVertexId(KmerUtil.mergeKmerWithNextCode(
tmpMsg.getLengthOfChain(),
- tmpMsg.getChainVertexId(),0, tmpMsg.getChainVertexId().length,
+ tmpMsg.getChainVertexId(),
+ 0, tmpMsg.getChainVertexId().length,
Kmer.GENE_CODE.getCodeFromSymbol((byte)source.charAt(source.length() - 1))));
tmpMsg.incrementLength();
deleteVertex(getVertexId());
@@ -122,15 +132,17 @@
else{
tmpVertexValue.setState(State.START_VERTEX);
tmpVertexValue.setValue(GraphVertexOperation.updateRightNeighberByVertexId(getVertexValue().getValue(),
- tmpMsg.getSourceVertexId()));
+ tmpMsg.getSourceVertexId(), kmerSize));
tmpVertexValue.setLengthOfMergeChain(tmpMsg.getLengthOfChain());
tmpVertexValue.setMergeChain(tmpMsg.getChainVertexId());
setVertexValue(tmpVertexValue);
- try {
- //String source = Kmer.recoverKmerFrom(tmpMsg.getLengthOfChain(), tmpMsg.getChainVertexId(), 0, tmpMsg.getChainVertexId().length);
+ //String source = Kmer.recoverKmerFrom(tmpMsg.getLengthOfChain(), tmpMsg.getChainVertexId(), 0, tmpMsg.getChainVertexId().length);
+ //System.out.print("");
+ /*try {
+
GraphVertexOperation.flushChainToFile(tmpMsg.getChainVertexId(),
tmpMsg.getLengthOfChain(),tmpVertexId);
- } catch (IOException e) { e.printStackTrace(); }
+ } catch (IOException e) { e.printStackTrace(); }*/
}
}
}
@@ -139,10 +151,12 @@
while (msgIterator.hasNext()){
tmpMsg = msgIterator.next();
if(!tmpMsg.isRear()){
- byte[] lastKmer = KmerUtil.getLastKmerFromChain(GraphVertexOperation.k,
+ byte[] lastKmer = KmerUtil.getLastKmerFromChain(kmerSize,
tmpMsg.getLengthOfChain(),
- tmpMsg.getChainVertexId(), 0 , tmpMsg.getChainVertexId().length);
- tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(GraphVertexOperation.k, lastKmer, 0, lastKmer.length,
+ tmpMsg.getChainVertexId(),
+ 0, tmpMsg.getChainVertexId().length);
+ tmpDestVertexId = KmerUtil.shiftKmerWithNextCode(kmerSize, lastKmer,
+ 0, lastKmer.length,
Kmer.GENE_CODE.getGeneCodeFromBitMap((byte)(tmpMsg.getNeighberInfo() & 0x0F)));
tmpMsg.setSourceVertexId(tmpVertexId);
@@ -170,6 +184,7 @@
*/
job.setVertexInputFormatClass(BinaryLoadGraphInputFormat.class);
job.setVertexOutputFormatClass(BinaryLoadGraphOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(ValueStateWritable.class);
Client.run(args, job);
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/TestMemory.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/TestMemory.java
deleted file mode 100644
index 247ca42..0000000
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/TestMemory.java
+++ /dev/null
@@ -1,32 +0,0 @@
-package edu.uci.ics.genomix.pregelix;
-/**
-* Class: TestMemory
-* @author: Anbang Xu
-* @description: Prints JVM memory utilization statistics
-*/
-public class TestMemory {
-
- public static void main(String [] args) {
-
- int mb = 1024*1024;
-
- //Getting the runtime reference from system
- Runtime runtime = Runtime.getRuntime();
-
- System.out.println("##### Heap utilization statistics [MB] #####");
-
- //Print used memory
- System.out.println("Used Memory:"
- + (runtime.totalMemory() - runtime.freeMemory()) / mb);
-
- //Print free memory
- System.out.println("Free Memory:"
- + runtime.freeMemory() / mb);
-
- //Print total available memory
- System.out.println("Total Memory:" + runtime.totalMemory() / mb);
-
- //Print Maximum available memory
- System.out.println("Max Memory:" + runtime.maxMemory() / mb);
- }
-}
\ No newline at end of file
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/client/Client.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/client/Client.java
index 128b0dc..ab0fb57 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/client/Client.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/client/Client.java
@@ -10,6 +10,8 @@
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
+import edu.uci.ics.genomix.pregelix.LogAlgorithmForMergeGraphVertex;
+import edu.uci.ics.genomix.pregelix.MergeGraphVertex;
import edu.uci.ics.pregelix.api.job.PregelixJob;
import edu.uci.ics.pregelix.core.base.IDriver.Plan;
import edu.uci.ics.pregelix.core.driver.Driver;
@@ -31,6 +33,9 @@
@Option(name = "-plan", usage = "query plan choice", required = false)
public Plan planChoice = Plan.OUTER_JOIN;
+
+ @Option(name = "-kmer-size", usage = "the size of kmer", required = false)
+ public int sizeKmer;
@Option(name = "-runtime-profiling", usage = "whether to do runtime profifling", required = false)
public String profiling = "false";
@@ -52,6 +57,8 @@
for (int i = 1; i < inputs.length; i++)
FileInputFormat.addInputPaths(job, inputs[0]);
FileOutputFormat.setOutputPath(job, new Path(options.outputPath));
+ job.getConfiguration().setInt(MergeGraphVertex.KMER_SIZE, options.sizeKmer);
+ job.getConfiguration().setInt(LogAlgorithmForMergeGraphVertex.KMER_SIZE, options.sizeKmer);
return options;
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/BinaryLoadGraphOutputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/BinaryLoadGraphOutputFormat.java
index f02d426..2b87379 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/BinaryLoadGraphOutputFormat.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/BinaryLoadGraphOutputFormat.java
@@ -2,7 +2,6 @@
import java.io.IOException;
-import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.RecordWriter;
@@ -10,7 +9,6 @@
import edu.uci.ics.genomix.pregelix.api.io.binary.BinaryVertexOutputFormat;
import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
-import edu.uci.ics.genomix.pregelix.io.ValueWritable;
import edu.uci.ics.pregelix.api.graph.Vertex;
import edu.uci.ics.pregelix.api.io.VertexWriter;
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForMergeGraphInputFormat.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForMergeGraphInputFormat.java
index 4834e56..0e74c2d 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForMergeGraphInputFormat.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/format/LogAlgorithmForMergeGraphInputFormat.java
@@ -1,7 +1,6 @@
package edu.uci.ics.genomix.pregelix.format;
import java.io.IOException;
-import java.util.logging.FileHandler;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.NullWritable;
@@ -9,9 +8,7 @@
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import edu.uci.ics.genomix.pregelix.GraphVertexOperation;
import edu.uci.ics.genomix.pregelix.api.io.binary.BinaryVertexInputFormat;
-import edu.uci.ics.genomix.pregelix.bitwise.BitwiseOperation;
import edu.uci.ics.genomix.pregelix.io.LogAlgorithmMessageWritable;
import edu.uci.ics.genomix.pregelix.io.ValueStateWritable;
import edu.uci.ics.genomix.pregelix.type.State;
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/Graph.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/Graph.java
index a63c307..df09a64 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/Graph.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/graph/Graph.java
@@ -1,11 +1,8 @@
package edu.uci.ics.genomix.pregelix.graph;
import java.io.BufferedReader;
-import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
public class Graph {
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/hdfs/HDFSOperation.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/hdfs/HDFSOperation.java
deleted file mode 100644
index 824722a..0000000
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/hdfs/HDFSOperation.java
+++ /dev/null
@@ -1,123 +0,0 @@
-package edu.uci.ics.genomix.pregelix.hdfs;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
-public class HDFSOperation {
-
- private static Configuration conf;
- private static FileSystem hdfs;
- private static Path path;
-
- public HDFSOperation() throws IOException{
- conf = new Configuration();
- hdfs = FileSystem.get(conf);
- path = null;
- }
-
- public static boolean insertHDFSFile(String fileName, int length, byte[] buffer) throws IOException{
- path = new Path(fileName);
- if (!hdfs.exists(path))
- createHDFSFile(fileName,length,buffer);
- else
- appendHDFSFile(fileName,length,buffer);
- return true;
- }
-
- public static boolean createHDFSFile(String fileName, int length, byte[] buffer) throws IOException{
- path = new Path(fileName);
- if (hdfs.exists(path)){
- System.out.println("Output already exists");
- return false;
- }
- /*if (!hdfs.isFile(path)){
- System.out.println("Output should be a file");
- return false;
- }*/
- FSDataOutputStream outputStream = hdfs.create(path);
- outputStream.writeInt(length);
- outputStream.write(buffer);
- outputStream.close();
- return true;
- }
-
- public static boolean appendHDFSFile(String fileName, int length, byte[] buffer) throws IOException{
- path = new Path(fileName);
- if (!hdfs.exists(path)){
- System.out.println("Output not found");
- return false;
- }
- if (!hdfs.isFile(path)){
- System.out.println("Output should be a file");
- return false;
- }
- FSDataOutputStream outputStream = hdfs.append(path);
- outputStream.writeInt(length);
- outputStream.write(buffer);
- outputStream.close();
- return true;
- }
-
- public static boolean deleteHDFSFile(String fileName) throws IOException{
- path = new Path(fileName);
- if (!hdfs.exists(path)){
- System.out.println("Input file not found");
- return false;
- }
- if (!hdfs.isFile(path)){
- System.out.println("Input should be a file");
- return false;
- }
- return hdfs.delete(path,true);
- }
-
- public static boolean copyFromLocalFile(String srcFile, String dstFile) throws IOException{
- Path srcPath = new Path(srcFile);
- path = new Path(dstFile);
- if (!hdfs.exists(path)){
- System.out.println("Input file not found");
- return false;
- }
- if (!hdfs.isFile(path)){
- System.out.println("Input should be a file");
- return false;
- }
- hdfs.copyFromLocalFile(srcPath, path);
- return true;
- }
-
- public static void testReadAndWriteHDFS() throws Exception{
- String inFileName = "testHDFS/testInput";
- String outFileName = "testHDFS/testOutput";
- Configuration conf = new Configuration();
- FileSystem fs = FileSystem.get(conf);
- Path inFile = new Path(inFileName);
- Path outFile = new Path(outFileName);
- if (!fs.exists(inFile)){
- System.out.println("Input file not found");
- return;
- }
- if (!fs.isFile(inFile)){
- System.out.println("Input should be a file");
- return;
- }
- if (fs.exists(outFile)){
- System.out.println("Output already exists");
- return;
- }
- FSDataInputStream in = fs.open(inFile);
- FSDataOutputStream out = fs.create(outFile);
- byte[] buffer = new byte[1024];
- int bytesRead = 0;
- while ((bytesRead = in.read(buffer)) > 0) {
- out.write(buffer, 0, bytesRead);
- }
- in.close();
- out.close();
- }
-}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/LogAlgorithmMessageWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/LogAlgorithmMessageWritable.java
index 4be98bc..42f1269 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/LogAlgorithmMessageWritable.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/LogAlgorithmMessageWritable.java
@@ -6,8 +6,7 @@
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
-
-import edu.uci.ics.genomix.pregelix.GraphVertexOperation;
+import edu.uci.ics.genomix.pregelix.LogAlgorithmForMergeGraphVertex;
public class LogAlgorithmMessageWritable implements WritableComparable<LogAlgorithmMessageWritable>{
/**
@@ -25,7 +24,7 @@
private int sourceVertexState;
public LogAlgorithmMessageWritable(){
- sourceVertexId = new byte[(GraphVertexOperation.k-1)/4 + 1];
+ sourceVertexId = new byte[(LogAlgorithmForMergeGraphVertex.kmerSize-1)/4 + 1];
}
public void set(byte[] sourceVertexId,byte neighberInfo, byte[] chainVertexId, File file){
@@ -37,7 +36,7 @@
}
public void reset(){
- sourceVertexId = new byte[(GraphVertexOperation.k-1)/4 + 1];
+ sourceVertexId = new byte[(LogAlgorithmForMergeGraphVertex.kmerSize-1)/4 + 1];
neighberInfo = (Byte) null;
lengthOfChain = 0;
chainVertexId = null;
@@ -133,7 +132,7 @@
message = in.readInt();
sourceVertexState = in.readInt();
- sourceVertexId = new byte[(GraphVertexOperation.k-1)/4 + 1];
+ sourceVertexId = new byte[(LogAlgorithmForMergeGraphVertex.kmerSize-1)/4 + 1];
in.readFully(sourceVertexId);
neighberInfo = in.readByte();
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/MessageWritable.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/MessageWritable.java
index 3146088..2976c87 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/MessageWritable.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/io/MessageWritable.java
@@ -6,8 +6,7 @@
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
-
-import edu.uci.ics.genomix.pregelix.GraphVertexOperation;
+import edu.uci.ics.genomix.pregelix.MergeGraphVertex;
public class MessageWritable implements WritableComparable<MessageWritable>{
/**
@@ -120,9 +119,9 @@
}
else
chainVertexId = new byte[0];
- sourceVertexId = new byte[(GraphVertexOperation.k-1)/4 + 1];
+ sourceVertexId = new byte[(MergeGraphVertex.kmerSize-1)/4 + 1];
in.readFully(sourceVertexId);
- head = new byte[(GraphVertexOperation.k-1)/4 + 1];
+ head = new byte[(MergeGraphVertex.kmerSize-1)/4 + 1];
in.readFully(head);
neighberInfo = in.readByte();
isRear = in.readBoolean();
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/LogAlgorithmLogFormatter.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/LogAlgorithmLogFormatter.java
index 2e4fd68..e0d241a 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/LogAlgorithmLogFormatter.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/LogAlgorithmLogFormatter.java
@@ -2,9 +2,6 @@
import java.util.logging.*;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.io.BytesWritable;
-
import edu.uci.ics.genomix.pregelix.io.LogAlgorithmMessageWritable;
import edu.uci.ics.genomix.pregelix.type.Message;
import edu.uci.ics.genomix.pregelix.type.State;
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/NaiveAlgorithmLogFormatter.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/NaiveAlgorithmLogFormatter.java
index c9abd6e..ef0d96f 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/NaiveAlgorithmLogFormatter.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/log/NaiveAlgorithmLogFormatter.java
@@ -2,9 +2,6 @@
import java.util.logging.*;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.io.BytesWritable;
-
import edu.uci.ics.genomix.pregelix.io.MessageWritable;
import edu.uci.ics.genomix.type.Kmer;
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/CombineSequenceFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/CombineSequenceFile.java
index bbbb84f..8924302 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/CombineSequenceFile.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/CombineSequenceFile.java
@@ -1,6 +1,6 @@
package edu.uci.ics.genomix.pregelix.sequencefile;
+
import java.io.File;
-import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -9,7 +9,6 @@
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
-import edu.uci.ics.genomix.pregelix.GraphVertexOperation;
import edu.uci.ics.genomix.type.Kmer;
import edu.uci.ics.genomix.type.KmerCountValue;
@@ -22,10 +21,11 @@
*/
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
+ int kmerSize = 5;
Configuration conf = new Configuration();
FileSystem fileSys = FileSystem.get(conf);
- Path p = new Path("data/ThreeKmer");
+ Path p = new Path("data/SinglePath_55");
Path p2 = new Path("data/result");
Path outFile = new Path(p2, "output");
SequenceFile.Reader reader;
@@ -35,13 +35,13 @@
BytesWritable key = new BytesWritable();
KmerCountValue value = new KmerCountValue();
- File dir = new File("data/ThreeKmer");
+ File dir = new File("data/SinglePath_55");
for(File child : dir.listFiles()){
String name = child.getAbsolutePath();
Path inFile = new Path(p, name);
reader = new SequenceFile.Reader(fileSys, inFile, conf);
while (reader.next(key, value)) {
- System.out.println(Kmer.recoverKmerFrom(GraphVertexOperation.k, key.getBytes(), 0,
+ System.out.println(Kmer.recoverKmerFrom(kmerSize, key.getBytes(), 0,
key.getLength())
+ "\t" + value.toString());
writer.append(key, value);
@@ -53,7 +53,7 @@
reader = new SequenceFile.Reader(fileSys, outFile, conf);
while (reader.next(key, value)) {
- System.err.println(Kmer.recoverKmerFrom(GraphVertexOperation.k, key.getBytes(), 0,
+ System.err.println(Kmer.recoverKmerFrom(kmerSize, key.getBytes(), 0,
key.getLength())
+ "\t" + value.toString());
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateSequenceFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateSequenceFile.java
deleted file mode 100644
index 5c740e4..0000000
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/GenerateSequenceFile.java
+++ /dev/null
@@ -1,948 +0,0 @@
-package edu.uci.ics.genomix.pregelix.sequencefile;
-
-import java.io.IOException;
-import java.util.ArrayList;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.ByteWritable;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.SequenceFile.CompressionType;
-
-import edu.uci.ics.genomix.type.KmerCountValue;
-import edu.uci.ics.genomix.pregelix.GraphVertexOperation;
-import edu.uci.ics.genomix.pregelix.bitwise.BitwiseOperation;
-
-public class GenerateSequenceFile {
-
- static private final Path TMP_DIR = new Path(
- GenerateSequenceFile.class.getSimpleName() + "_TMP");
- private static Path outDir = new Path("data/webmap");
-
- /**
- * create test.dat
- * A - ACG - A 000110 00010001 06 11
- * C - ACT - C 000111 00100010 07 22
- * G - CGT - G 011011 01000100 1B 44
- * T - GTC - T 101101 10001000 2D 88
- */
- public static void createTestDat() throws IOException{
- //write output to a file
- Configuration conf = new Configuration();
- Path outFile = new Path(outDir, "test-out.dat");
- FileSystem fileSys = FileSystem.get(conf);
- SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
- outFile, BytesWritable.class, ByteWritable.class,
- CompressionType.NONE);
-
-
- //Generate <key,value> <BytesWritable, ByteWritable>
- byte[] key = hexStringToByteArray("06"); //000110
- byte[] value = hexStringToByteArray("11"); //00010001
- System.out.println(Integer.toHexString(key[0]));
- System.out.println(Integer.toHexString(value[0]));
- BytesWritable keyWritable = new BytesWritable(key);
- ByteWritable valueWritable = new ByteWritable(value[0]);
-
- ArrayList<BytesWritable> arrayOfKeys = new ArrayList<BytesWritable>();
- arrayOfKeys.add(keyWritable);
- ArrayList<ByteWritable> arrayOfValues = new ArrayList<ByteWritable>();
- arrayOfValues.add(valueWritable);
-
- key = hexStringToByteArray("07"); //000111
- value = hexStringToByteArray("22"); //00100010
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value[0]);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- key = hexStringToByteArray("1B"); //011010
- value = hexStringToByteArray("44"); //01000100
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value[0]);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- key = hexStringToByteArray("2D"); //100011
- value = hexStringToByteArray("88"); //10001000
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value[0]);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- //wirte to sequence file
- for(int i = 0; i < arrayOfKeys.size(); i++)
- writer.append(arrayOfKeys.get(i), arrayOfValues.get(i));
- writer.close();
-
- //read outputs
- Path inFile = new Path(outDir, "test-out.dat");
- BytesWritable outKey = new BytesWritable();
- ByteWritable outValue = new ByteWritable();
- SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
- try {
- reader.next(outKey, outValue);
- System.out.println(outKey.getBytes());
- System.out.println(outValue.get());
- } finally {
- reader.close();
- }
- }
-
- /**
- * create a mergeTest SequenceFile
- * CAG - AGC - GCG - CGT - GTA - TAT - ATA
- * GAG ATC
- *
- * CAG 010010 00000010
- * AGC 001001 01100100
- * GCG 100110 00011000
- * CGT 011011 01000001
- * GTA 101100 00101000
- * TAT 110011 01000011
- * ATA 001100 10000000
- * GAG 100010 00000010
- * ATC 001101 10000000
- */
- public static void createMergeTest() throws IOException{
- //write output to a file
- Configuration conf = new Configuration();
- Path outFile = new Path(outDir, "sequenceFileMergeTest");
- FileSystem fileSys = FileSystem.get(conf);
- SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
- outFile, BytesWritable.class, KmerCountValue.class,
- CompressionType.NONE);
-
-
- //Generate <key,value> <BytesWritable, ByteWritable>
- // 1
- String tmpKey = "010010";
- byte[] key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- String tmpValue = "00000010";
- byte value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- BytesWritable keyWritable = new BytesWritable(key);
- ByteWritable valueWritable = new ByteWritable(value);
-
- ArrayList<BytesWritable> arrayOfKeys = new ArrayList<BytesWritable>();
- arrayOfKeys.add(keyWritable);
- ArrayList<ByteWritable> arrayOfValues = new ArrayList<ByteWritable>();
- arrayOfValues.add(valueWritable);
-
- // 2
- tmpKey = "001001";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "01100100";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- // 3
- tmpKey = "100110";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "00011000";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- // 4
- tmpKey = "011011";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "01000001";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- // 5
- tmpKey = "101100";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "00101000";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- // 6
- tmpKey = "110011";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "01000011";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- // 7
- tmpKey = "001100";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "10000000";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- // 8
- tmpKey = "100010";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "00000010";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- // 9
- tmpKey = "001101";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "10000000";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- KmerCountValue kmerCountValue = null;
- //wirte to sequence file
- for(int i = 0; i < arrayOfKeys.size(); i++){
- kmerCountValue = new KmerCountValue();
- kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
- writer.append(arrayOfKeys.get(i), kmerCountValue);
- }
- writer.close();
-
- //read outputs
- Path inFile = new Path(outDir, "sequenceFileMergeTest");
- BytesWritable outKey = new BytesWritable();
- KmerCountValue outValue = new KmerCountValue();
- SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
- int iteration = 1;
- try {
- while(reader.next(outKey, outValue)){
- System.out.println(iteration);
- System.out.println("key: " + BitwiseOperation.convertBytesToBinaryStringKmer(outKey.getBytes(),GraphVertexOperation.k));
- System.out.println("value: " + BitwiseOperation.convertByteToBinaryString(outValue.getAdjBitMap()));
- System.out.println();
- iteration++;
- }
- } finally {
- reader.close();
- }
- }
-
- /**
- * create a mergeTest SequenceFile
- * CAG - AGC - GCG - CGT - GTA - TAT - ATA
- * GAG ATC
- *
- */
- public static void createLongMergeTest() throws IOException{
- //write output to a file
- Configuration conf = new Configuration();
- Path outFile = new Path(outDir, "sequenceFileMergeTest");
- FileSystem fileSys = FileSystem.get(conf);
- SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
- outFile, BytesWritable.class, KmerCountValue.class,
- CompressionType.NONE);
-
-
- //Generate <key,value> <BytesWritable, ByteWritable>
- // CAG
- String tmpKey = "010010";
- byte[] key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- String tmpValue = "00000010";
- byte value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- BytesWritable keyWritable = new BytesWritable(key);
- ByteWritable valueWritable = new ByteWritable(value);
-
- ArrayList<BytesWritable> arrayOfKeys = new ArrayList<BytesWritable>();
- arrayOfKeys.add(keyWritable);
- ArrayList<ByteWritable> arrayOfValues = new ArrayList<ByteWritable>();
- arrayOfValues.add(valueWritable);
-
- // AGC
- tmpKey = "001001";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "01100001";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- // GAG
- tmpKey = "100010";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "00000010";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- // TAT
- tmpKey = "110011";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "00100011";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- // ATA
- tmpKey = "001100";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "10000000";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- // ATC
- tmpKey = "001101";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "10000000";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- KmerCountValue kmerCountValue = null;
- //wirte to sequence file
- for(int i = 0; i < arrayOfKeys.size(); i++){
- kmerCountValue = new KmerCountValue();
- kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
- writer.append(arrayOfKeys.get(i), kmerCountValue);
- }
- writer.close();
-
- //read outputs
- Path inFile = new Path(outDir, "sequenceFileMergeTest");
- BytesWritable outKey = new BytesWritable();
- KmerCountValue outValue = new KmerCountValue();
- SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
- int iteration = 1;
- try {
- while(reader.next(outKey, outValue)){
- System.out.println(iteration);
- System.out.println("key: " + BitwiseOperation.convertBytesToBinaryStringKmer(outKey.getBytes(),GraphVertexOperation.k));
- System.out.println("value: " + BitwiseOperation.convertByteToBinaryString(outValue.getAdjBitMap()));
- System.out.println();
- iteration++;
- }
- } finally {
- reader.close();
- }
- }
-
- public static void generateNumOfLinesFromBigFile(Path inFile, Path outFile, int numOfLines) throws IOException{
- Configuration conf = new Configuration();
- FileSystem fileSys = FileSystem.get(conf);
-
- ClassLoader ctxLoader = Thread.currentThread().getContextClassLoader();
- Thread.currentThread().setContextClassLoader(GenerateSequenceFile.class.getClassLoader());
- SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
- SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
- outFile, BytesWritable.class, KmerCountValue.class,
- CompressionType.NONE);
- BytesWritable outKey = new BytesWritable();
- KmerCountValue outValue = new KmerCountValue();
- int i = 0;
-
- for(i = 0; i < numOfLines; i++){
- System.out.println(i);
- reader.next(outKey, outValue);
- writer.append(outKey, outValue);
- }
- writer.close();
- reader.close();
- Thread.currentThread().setContextClassLoader(ctxLoader);
- }
-
- public static void main(String[] argv) throws Exception {
- //createTestDat();
- //createMergeTest();
- //createTestDat();
- /*Path dir = new Path("data/webmap");
- Path inFile = new Path(dir, "part-1");
- Path outFile = new Path(dir, "part-1-out-100");
- generateNumOfLinesFromBigFile(inFile,outFile,100);*/
- /**
- * AGC - A C - TAT
- * AGCATGCTAT
- * "AGCAAACACGAC T TGCC TAT"
- * problem "AGCATGGACGTCGATTCTAT"
- * problem "AGCAAACACGATTGCCTAT"
- * problem "AGCACGTAACTTGCTCTAT"
- * problem "AGCAACGATTGCCTAT"
- * problem "AGCATTTAAACTCTAT"
- *
- * "AGCACTTAT"
- * "AGCAAACACTTGCTGTACCGTGGCCTAT"
- * "AGCAAACACTTGCTGTACCCTAT"
- *
- * compare "AGCAACTAT"
- * "AGCACCTAT"
- * "AGCATCTAT"
- *
- * k = 5
- * AGCGC - A C - TATAT
- *
- * k = 5
- * AAAGCAGCTTGCTACTATAA
- *
- * k = 8
- * AAAAAAGCAGCTTGCTACTATAAAAA
- *
- * k=4
- * AAGCATGCTATA
- *
- * Two strings
- * "AGCATGCTAT","TTCAGTACCCGC"
- *
- * AGCATGCTAT
- */
-
- generateSequenceFileFromGeneCode3("AGCATGCTAT");//GTCGATT //before T: GGACG
- //generateSequenceFileFromGeneCode15("AAAAAAAAAAAAAGCATGCTATAAAAAAAAAAAA");
- }
- public static void generateSequenceFileFromGeneCode3(String s) throws IOException{
- Configuration conf = new Configuration();
- Path outFile = new Path(outDir, "sequenceShortFileMergeTest");//sequenceShortFileMergeTest
- FileSystem fileSys = FileSystem.get(conf);
- SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
- outFile, BytesWritable.class, KmerCountValue.class,
- CompressionType.NONE);
- BytesWritable outKey = null;
- KmerCountValue outValue;
- byte adjBitMap;
- ArrayList<String> lists = new ArrayList<String>();
- lists.add("001001"); //AGC
- lists.add("110011"); //TAT
- String binaryString = "";
- for(int i = 1; i < s.length()-GraphVertexOperation.k; i++){
- binaryString = GraphVertexOperation.convertGeneCodeToBinaryString(s.substring(i,i+GraphVertexOperation.k));
- if(lists.contains(binaryString)){
- System.out.println("error: " + binaryString);
- return;
- }
- lists.add(binaryString);
- outKey = new BytesWritable(BitwiseOperation.convertBinaryStringToBytes(binaryString));
- outValue = new KmerCountValue();
- adjBitMap = GraphVertexOperation.getPrecursorFromGeneCode((byte)0, s.charAt(i-1));
- adjBitMap = GraphVertexOperation.getSucceedFromGeneCode(adjBitMap, s.charAt(i+GraphVertexOperation.k));
- outValue.setAdjBitMap(adjBitMap);
- writer.append(outKey, outValue);
- }
- /**
- * CAG - AGC ------ TAT - ATA
- * GAG ATC
- */
- // AGC
- String tmpKey = "001001";
- byte[] key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- String tmpValue = "00000001";
- byte value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- BytesWritable keyWritable = new BytesWritable(key);
- ByteWritable valueWritable = new ByteWritable(value);
-
- ArrayList<BytesWritable> arrayOfKeys = new ArrayList<BytesWritable>();
- arrayOfKeys.add(keyWritable);
- ArrayList<ByteWritable> arrayOfValues = new ArrayList<ByteWritable>();
- arrayOfValues.add(valueWritable);
-
- // TAT
- tmpKey = "110011";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "00100000";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- KmerCountValue kmerCountValue = null;
- //wirte to sequence file
- for(int i = 0; i < arrayOfKeys.size(); i++){
- kmerCountValue = new KmerCountValue();
- kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
- writer.append(arrayOfKeys.get(i), kmerCountValue);
- }
- writer.close();
- }
- public static void generateSequenceFileFromGeneCode4(String s) throws IOException{
- Configuration conf = new Configuration();
- Path outFile = new Path(outDir, "sequenceFileMergeTest4");
- FileSystem fileSys = FileSystem.get(conf);
- SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
- outFile, BytesWritable.class, KmerCountValue.class,
- CompressionType.NONE);
- BytesWritable outKey = null;
- KmerCountValue outValue;
- byte adjBitMap;
- ArrayList<String> lists = new ArrayList<String>();
-
- lists.add("00001001"); //AAGC
- lists.add("11001100"); //TATA
- String binaryString = "";
- for(int i = 1; i < s.length()-GraphVertexOperation.k; i++){
- binaryString = GraphVertexOperation.convertGeneCodeToBinaryString(s.substring(i,i+GraphVertexOperation.k));
- if(lists.contains(binaryString)){
- System.out.println("error: " + binaryString);
- return;
- }
- lists.add(binaryString);
- outKey = new BytesWritable(BitwiseOperation.convertBinaryStringToBytes(binaryString));
- outValue = new KmerCountValue();
- adjBitMap = GraphVertexOperation.getPrecursorFromGeneCode((byte)0, s.charAt(i-1));
- adjBitMap = GraphVertexOperation.getSucceedFromGeneCode(adjBitMap, s.charAt(i+GraphVertexOperation.k));
- outValue.setAdjBitMap(adjBitMap);
- writer.append(outKey, outValue);
- }
-
- /**
- * CAG - AGC ------ TAT - ATA
- * GAG ATC
- */
- // AGC
- String tmpKey = "00001001";
- byte[] key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- String tmpValue = "00000001";
- byte value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- BytesWritable keyWritable = new BytesWritable(key);
- ByteWritable valueWritable = new ByteWritable(value);
-
- ArrayList<BytesWritable> arrayOfKeys = new ArrayList<BytesWritable>();
- arrayOfKeys.add(keyWritable);
- ArrayList<ByteWritable> arrayOfValues = new ArrayList<ByteWritable>();
- arrayOfValues.add(valueWritable);
-
- // TAT
- tmpKey = "11001100";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "00100000";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- KmerCountValue kmerCountValue = null;
- //wirte to sequence file
- for(int i = 0; i < arrayOfKeys.size(); i++){
- kmerCountValue = new KmerCountValue();
- kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
- writer.append(arrayOfKeys.get(i), kmerCountValue);
- }
- writer.close();
-
- //read outputs
- Path inFile = new Path(outDir, "sequenceFileMergeTest4");
- outKey = new BytesWritable();
- outValue = new KmerCountValue();
- SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
- int iteration = 1;
- try {
- while(reader.next(outKey, outValue)){
- System.out.println(iteration);
- System.out.println("key: " + BitwiseOperation.convertBytesToBinaryStringKmer(outKey.getBytes(),GraphVertexOperation.k));
- System.out.println("value: " + BitwiseOperation.convertByteToBinaryString(outValue.getAdjBitMap()));
- System.out.println();
- iteration++;
- }
- } finally {
- reader.close();
- }
- }
- public static void generateSequenceFileFromGeneCode5(String s) throws IOException{
- Configuration conf = new Configuration();
- Path outFile = new Path(outDir, "sequenceFileMergeTest5");
- FileSystem fileSys = FileSystem.get(conf);
- SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
- outFile, BytesWritable.class, KmerCountValue.class,
- CompressionType.NONE);
- BytesWritable outKey = null;
- KmerCountValue outValue;
- byte adjBitMap;
- ArrayList<String> lists = new ArrayList<String>();
-
- lists.add("0000001001"); //AAAGC
- lists.add("1100110000"); //TATAA
- String binaryString = "";
- for(int i = 1; i < s.length()-GraphVertexOperation.k; i++){
- binaryString = GraphVertexOperation.convertGeneCodeToBinaryString(s.substring(i,i+GraphVertexOperation.k));
- if(lists.contains(binaryString)){
- System.out.println("error: " + binaryString);
- return;
- }
- lists.add(binaryString);
- outKey = new BytesWritable(BitwiseOperation.convertBinaryStringToBytes(binaryString));
- outValue = new KmerCountValue();
- adjBitMap = GraphVertexOperation.getPrecursorFromGeneCode((byte)0, s.charAt(i-1));
- adjBitMap = GraphVertexOperation.getSucceedFromGeneCode(adjBitMap, s.charAt(i+GraphVertexOperation.k));
- outValue.setAdjBitMap(adjBitMap);
- writer.append(outKey, outValue);
- }
- /**
- * CAG - AGC ------ TAT - ATA
- * GAG ATC
- */
- // AGC
- String tmpKey = "0000001001";
- byte[] key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- String tmpValue = "00000001";
- byte value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- BytesWritable keyWritable = new BytesWritable(key);
- ByteWritable valueWritable = new ByteWritable(value);
-
- ArrayList<BytesWritable> arrayOfKeys = new ArrayList<BytesWritable>();
- arrayOfKeys.add(keyWritable);
- ArrayList<ByteWritable> arrayOfValues = new ArrayList<ByteWritable>();
- arrayOfValues.add(valueWritable);
-
- // TAT
- tmpKey = "1100110000";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "00100000";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- KmerCountValue kmerCountValue = null;
- //wirte to sequence file
- for(int i = 0; i < arrayOfKeys.size(); i++){
- kmerCountValue = new KmerCountValue();
- kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
- writer.append(arrayOfKeys.get(i), kmerCountValue);
- }
- writer.close();
-
- //read outputs
- Path inFile = new Path(outDir, "sequenceFileMergeTest5");
- outKey = new BytesWritable();
- outValue = new KmerCountValue();
- SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
- int iteration = 1;
- try {
- while(reader.next(outKey, outValue)){
- System.out.println(iteration);
- String kmer = BitwiseOperation.convertBytesToBinaryStringKmer(outKey.getBytes(),GraphVertexOperation.k);
- System.out.println("key: " + kmer);
- System.out.println("code: " + GraphVertexOperation.convertBinaryStringToGenecode(kmer));
- System.out.println("value: " + BitwiseOperation.convertByteToBinaryString(outValue.getAdjBitMap()));
- System.out.println();
- iteration++;
- }
- } finally {
- reader.close();
- }
- }
- public static void generateSequenceFileFromGeneCode8(String s) throws IOException{
- Configuration conf = new Configuration();
- Path outFile = new Path(outDir, "sequenceFileMergeTest8");
- FileSystem fileSys = FileSystem.get(conf);
- SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
- outFile, BytesWritable.class, KmerCountValue.class,
- CompressionType.NONE);
- BytesWritable outKey = null;
- KmerCountValue outValue;
- byte adjBitMap;
- ArrayList<String> lists = new ArrayList<String>();
-
- lists.add("0000000000001001"); //AAAAAAGC
- lists.add("1100110000000000"); //TATAAAAA
- String binaryString = "";
- for(int i = 1; i < s.length()-GraphVertexOperation.k; i++){
- binaryString = GraphVertexOperation.convertGeneCodeToBinaryString(s.substring(i,i+GraphVertexOperation.k));
- if(lists.contains(binaryString)){
- System.out.println("error: " + binaryString);
- return;
- }
- lists.add(binaryString);
- outKey = new BytesWritable(BitwiseOperation.convertBinaryStringToBytes(binaryString));
- outValue = new KmerCountValue();
- adjBitMap = GraphVertexOperation.getPrecursorFromGeneCode((byte)0, s.charAt(i-1));
- adjBitMap = GraphVertexOperation.getSucceedFromGeneCode(adjBitMap, s.charAt(i+GraphVertexOperation.k));
- outValue.setAdjBitMap(adjBitMap);
- writer.append(outKey, outValue);
- }
- /**
- * CAG - AGC ------ TAT - ATA
- * GAG ATC
- */
- // AGC
- String tmpKey = "0000000000001001";
- byte[] key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- String tmpValue = "00000001";
- byte value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- BytesWritable keyWritable = new BytesWritable(key);
- ByteWritable valueWritable = new ByteWritable(value);
-
- ArrayList<BytesWritable> arrayOfKeys = new ArrayList<BytesWritable>();
- arrayOfKeys.add(keyWritable);
- ArrayList<ByteWritable> arrayOfValues = new ArrayList<ByteWritable>();
- arrayOfValues.add(valueWritable);
-
- // TAT
- tmpKey = "1100110000000000";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "00100000";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- KmerCountValue kmerCountValue = null;
- //wirte to sequence file
- for(int i = 0; i < arrayOfKeys.size(); i++){
- kmerCountValue = new KmerCountValue();
- kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
- writer.append(arrayOfKeys.get(i), kmerCountValue);
- }
- writer.close();
-
- //read outputs
- Path inFile = new Path(outDir, "sequenceFileMergeTest8");
- outKey = new BytesWritable();
- outValue = new KmerCountValue();
- SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
- int iteration = 1;
- try {
- while(reader.next(outKey, outValue)){
- System.out.println(iteration);
- String kmer = BitwiseOperation.convertBytesToBinaryStringKmer(outKey.getBytes(),GraphVertexOperation.k);
- System.out.println("key: " + kmer);
- System.out.println("code: " + GraphVertexOperation.convertBinaryStringToGenecode(kmer));
- System.out.println("value: " + BitwiseOperation.convertByteToBinaryString(outValue.getAdjBitMap()));
- System.out.println();
- iteration++;
- }
- } finally {
- reader.close();
- }
- }
- public static void generateSequenceFileFromGeneCode15(String s) throws IOException{
- Configuration conf = new Configuration();
- Path outFile = new Path(outDir, "sequenceFileMergeTest15");
- FileSystem fileSys = FileSystem.get(conf);
- SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
- outFile, BytesWritable.class, KmerCountValue.class,
- CompressionType.NONE);
- BytesWritable outKey = null;
- KmerCountValue outValue;
- byte adjBitMap;
- ArrayList<String> lists = new ArrayList<String>();
-
- lists.add("000000000000000000000000001001"); //AAAAAAAAAAAAAGC
- lists.add("110011000000000000000000000000"); //TATAAAAAAAAAAAA
- String binaryString = "";
- for(int i = 1; i < s.length()-GraphVertexOperation.k; i++){
- binaryString = GraphVertexOperation.convertGeneCodeToBinaryString(s.substring(i,i+GraphVertexOperation.k));
- if(lists.contains(binaryString)){
- System.out.println("error: " + binaryString);
- return;
- }
- lists.add(binaryString);
- outKey = new BytesWritable(BitwiseOperation.convertBinaryStringToBytes(binaryString));
- outValue = new KmerCountValue();
- adjBitMap = GraphVertexOperation.getPrecursorFromGeneCode((byte)0, s.charAt(i-1));
- adjBitMap = GraphVertexOperation.getSucceedFromGeneCode(adjBitMap, s.charAt(i+GraphVertexOperation.k));
- outValue.setAdjBitMap(adjBitMap);
- writer.append(outKey, outValue);
- }
- /**
- * CAG - AGC ------ TAT - ATA
- * GAG ATC
- */
- // AGC
- String tmpKey = "000000000000000000000000001001";
- byte[] key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- String tmpValue = "00000001";
- byte value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- BytesWritable keyWritable = new BytesWritable(key);
- ByteWritable valueWritable = new ByteWritable(value);
-
- ArrayList<BytesWritable> arrayOfKeys = new ArrayList<BytesWritable>();
- arrayOfKeys.add(keyWritable);
- ArrayList<ByteWritable> arrayOfValues = new ArrayList<ByteWritable>();
- arrayOfValues.add(valueWritable);
-
- // TAT
- tmpKey = "110011000000000000000000000000";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "00100000";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- KmerCountValue kmerCountValue = null;
- //wirte to sequence file
- for(int i = 0; i < arrayOfKeys.size(); i++){
- kmerCountValue = new KmerCountValue();
- kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
- writer.append(arrayOfKeys.get(i), kmerCountValue);
- }
- writer.close();
-
- //read outputs
- Path inFile = new Path(outDir, "sequenceFileMergeTest15");
- outKey = new BytesWritable();
- outValue = new KmerCountValue();
- SequenceFile.Reader reader = new SequenceFile.Reader(fileSys, inFile, conf);
- int iteration = 1;
- try {
- while(reader.next(outKey, outValue)){
- System.out.println(iteration);
- String kmer = BitwiseOperation.convertBytesToBinaryStringKmer(outKey.getBytes(),GraphVertexOperation.k);
- System.out.println("key: " + kmer);
- System.out.println("code: " + GraphVertexOperation.convertBinaryStringToGenecode(kmer));
- System.out.println("value: " + BitwiseOperation.convertByteToBinaryString(outValue.getAdjBitMap()));
- System.out.println();
- iteration++;
- }
- } finally {
- reader.close();
- }
- }
- public static void generateSequenceFileFromTwoGeneCode3(String s, String s2) throws IOException{
- Configuration conf = new Configuration();
- Path outFile = new Path(outDir, "sequenceFileMergeTest4");
- FileSystem fileSys = FileSystem.get(conf);
- SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
- outFile, BytesWritable.class, KmerCountValue.class,
- CompressionType.NONE);
- BytesWritable outKey = null;
- KmerCountValue outValue;
- byte adjBitMap;
- ArrayList<String> lists = new ArrayList<String>();
- lists.add("001001"); //AGC
- lists.add("110011"); //TAT
- String binaryString = "";
- for(int i = 1; i < s.length()-GraphVertexOperation.k; i++){
- binaryString = GraphVertexOperation.convertGeneCodeToBinaryString(s.substring(i,i+GraphVertexOperation.k));
- if(lists.contains(binaryString)){
- System.out.println("error: " + binaryString);
- return;
- }
- lists.add(binaryString);
- outKey = new BytesWritable(BitwiseOperation.convertBinaryStringToBytes(binaryString));
- outValue = new KmerCountValue();
- adjBitMap = GraphVertexOperation.getPrecursorFromGeneCode((byte)0, s.charAt(i-1));
- adjBitMap = GraphVertexOperation.getSucceedFromGeneCode(adjBitMap, s.charAt(i+GraphVertexOperation.k));
- outValue.setAdjBitMap(adjBitMap);
- writer.append(outKey, outValue);
- }
- /**
- * CAG - AGC ------ TAT - ATA
- * GAG ATC
- */
- // AGC
- String tmpKey = "001001";
- byte[] key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- String tmpValue = "00000001";
- byte value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- BytesWritable keyWritable = new BytesWritable(key);
- ByteWritable valueWritable = new ByteWritable(value);
-
- ArrayList<BytesWritable> arrayOfKeys = new ArrayList<BytesWritable>();
- arrayOfKeys.add(keyWritable);
- ArrayList<ByteWritable> arrayOfValues = new ArrayList<ByteWritable>();
- arrayOfValues.add(valueWritable);
-
- // TAT
- tmpKey = "110011";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "00100000";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- KmerCountValue kmerCountValue = null;
- //wirte to sequence file
- for(int i = 0; i < arrayOfKeys.size(); i++){
- kmerCountValue = new KmerCountValue();
- kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
- writer.append(arrayOfKeys.get(i), kmerCountValue);
- }
-
- lists.add("111101"); //TTC
- lists.add("011001"); //CGC
- binaryString = "";
- for(int i = 1; i < s2.length()-GraphVertexOperation.k; i++){
- binaryString = GraphVertexOperation.convertGeneCodeToBinaryString(s2.substring(i,i+GraphVertexOperation.k));
- if(lists.contains(binaryString)){
- System.out.println("error: " + binaryString);
- return;
- }
- lists.add(binaryString);
- outKey = new BytesWritable(BitwiseOperation.convertBinaryStringToBytes(binaryString));
- outValue = new KmerCountValue();
- adjBitMap = GraphVertexOperation.getPrecursorFromGeneCode((byte)0, s2.charAt(i-1));
- adjBitMap = GraphVertexOperation.getSucceedFromGeneCode(adjBitMap, s2.charAt(i+GraphVertexOperation.k));
- outValue.setAdjBitMap(adjBitMap);
- writer.append(outKey, outValue);
- }
- /**
- * CAG - AGC ------ TAT - ATA
- * GAG ATC
- */
- // TTC
- tmpKey = "111101";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "00000001";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
-
- arrayOfKeys = new ArrayList<BytesWritable>();
- arrayOfKeys.add(keyWritable);
- arrayOfValues = new ArrayList<ByteWritable>();
- arrayOfValues.add(valueWritable);
-
- // CGC
- tmpKey = "011001";
- key = BitwiseOperation.convertBinaryStringToBytes(tmpKey);
- tmpValue = "00100000";
- value = BitwiseOperation.convertBinaryStringToByte(tmpValue);
- keyWritable = new BytesWritable(key);
- valueWritable = new ByteWritable(value);
- arrayOfKeys.add(keyWritable);
- arrayOfValues.add(valueWritable);
-
- kmerCountValue = null;
- //wirte to sequence file
- for(int i = 0; i < arrayOfKeys.size(); i++){
- kmerCountValue = new KmerCountValue();
- kmerCountValue.setAdjBitMap(arrayOfValues.get(i).get());
- writer.append(arrayOfKeys.get(i), kmerCountValue);
- }
- writer.close();
- }
- public static byte[] hexStringToByteArray(String s) {
- int len = s.length();
- byte[] data = new byte[len / 2];
- for (int i = 0; i < len; i += 2) {
- data[i / 2] = (byte) ((Character.digit(s.charAt(i), 16) << 4)
- + Character.digit(s.charAt(i+1), 16));
- }
- return data;
- }
-}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/generateSmallFile.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/generateSmallFile.java
index f44781f..45329d0 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/generateSmallFile.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/sequencefile/generateSmallFile.java
@@ -40,9 +40,9 @@
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Path dir = new Path("data/webmap");
- Path inFile = new Path(dir, "part-1");
- Path outFile = new Path(dir, "part-1-out-20000000");
- generateNumOfLinesFromBigFile(inFile,outFile,20000000);
+ Path inFile = new Path(dir, "part-2");
+ Path outFile = new Path(dir, "part-2-out-20000");
+ generateNumOfLinesFromBigFile(inFile,outFile,20000);
}
}
diff --git a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/GenerateTestInput.java b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/GenerateTestInput.java
index 2dac10d..8ff5589 100644
--- a/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/GenerateTestInput.java
+++ b/genomix/genomix-pregelix/src/main/java/edu/uci/ics/genomix/pregelix/testcase/GenerateTestInput.java
@@ -55,13 +55,13 @@
// TODO Auto-generated method stub
OutputStreamWriter writer;
try {
- writer = new OutputStreamWriter(new FileOutputStream("SinglePath"));
- writer.write(simplePath(5,10,1));
+ writer = new OutputStreamWriter(new FileOutputStream("graph/55/SinglePath_55"));
+ writer.write(simplePath(55,60,1));
writer.close();
- writer = new OutputStreamWriter(new FileOutputStream("SimplePath"));
- writer.write(simplePath(5,10,3));
+ writer = new OutputStreamWriter(new FileOutputStream("graph/55/SimplePath_55"));
+ writer.write(simplePath(55,60,3));
writer.close();
- writer = new OutputStreamWriter(new FileOutputStream("TreePath"));
+ /*writer = new OutputStreamWriter(new FileOutputStream("TreePath"));
writer.write(treePath(5, 5, 5, 3));
writer.close();
writer = new OutputStreamWriter(new FileOutputStream("CyclePath"));
@@ -69,7 +69,7 @@
writer.close();
writer = new OutputStreamWriter(new FileOutputStream("BridgePath"));
writer.write(bridgePath(5,2));
- writer.close();
+ writer.close();*/
} catch (IOException e) {
e.printStackTrace();
}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
index 3cfe4c8..53340dc 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobGen/JobGenerator.java
@@ -48,10 +48,12 @@
job.setVertexClass(MergeGraphVertex.class);
job.setVertexInputFormatClass(BinaryLoadGraphInputFormat.class);
job.setVertexOutputFormatClass(BinaryLoadGraphOutputFormat.class);
+ job.setDynamicVertexValueSize(true);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(ValueStateWritable.class);
FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
+ job.getConfiguration().setInt(MergeGraphVertex.KMER_SIZE, 55);
job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
}
@@ -69,6 +71,7 @@
job.setOutputValueClass(ValueStateWritable.class);
FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
+ job.getConfiguration().setInt(LogAlgorithmForMergeGraphVertex.KMER_SIZE, 55);
job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
}
diff --git a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RunJobTestSuite.java b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RunJobTestSuite.java
index dac588d..0238ca0 100644
--- a/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RunJobTestSuite.java
+++ b/genomix/genomix-pregelix/src/test/java/edu/uci/ics/genomix/pregelix/JobRun/RunJobTestSuite.java
@@ -40,7 +40,7 @@
private static final String PATH_TO_ONLY = "src/test/resources/only.txt";
private static final String FILE_EXTENSION_OF_RESULTS = "result";
- private static final String DATA_PATH = "data/test_55_small/part-0";//sequenceShortFileMergeTest
+ private static final String DATA_PATH = "data/input/part-1-out-500000";//sequenceShortFileMergeTest
private static final String HDFS_PATH = "/webmap/";
private static final String HYRACKS_APP_NAME = "pregelix";
diff --git a/genomix/genomix-pregelix/src/test/resources/expected/BinaryLoadGraph.result b/genomix/genomix-pregelix/src/test/resources/expected/BinaryLoadGraph.result
deleted file mode 100644
index 676e5c8..0000000
--- a/genomix/genomix-pregelix/src/test/resources/expected/BinaryLoadGraph.result
+++ /dev/null
@@ -1,8 +0,0 @@
-Vertex(id=24,value=1, edges=())
-Vertex(id=38,value=34, edges=())
-Vertex(id=4c,value=68, edges=())
-Vertex(id=70,value=72, edges=())
-Vertex(id=90,value=24, edges=())
-Vertex(id=9c,value=-127, edges=())
-Vertex(id=cc,value=32, edges=())
-Vertex(id=e4,value=24, edges=())
diff --git a/genomix/genomix-pregelix/src/test/resources/expected/LoadGraph.result b/genomix/genomix-pregelix/src/test/resources/expected/LoadGraph.result
deleted file mode 100644
index 6595d86..0000000
--- a/genomix/genomix-pregelix/src/test/resources/expected/LoadGraph.result
+++ /dev/null
@@ -1,4 +0,0 @@
-Vertex(id=06,value=34, edges=())
-Vertex(id=07,value=68, edges=())
-Vertex(id=1b,value=-120, edges=())
-Vertex(id=2d,value=-34, edges=())