create svn dir
git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_genomix@2899 123451ca-8445-de46-9d55-352943316053
diff --git a/genomix/genomix-hadoop/actual/result2/.part-00000.crc b/genomix/genomix-hadoop/actual/result2/.part-00000.crc
index 8871afc..ea0c7ed 100755
--- a/genomix/genomix-hadoop/actual/result2/.part-00000.crc
+++ b/genomix/genomix-hadoop/actual/result2/.part-00000.crc
Binary files differ
diff --git a/genomix/genomix-hadoop/actual/result2/part-00000 b/genomix/genomix-hadoop/actual/result2/part-00000
index 9d64c60..882a5db 100755
--- a/genomix/genomix-hadoop/actual/result2/part-00000
+++ b/genomix/genomix-hadoop/actual/result2/part-00000
@@ -1,7 +1,4 @@
-01 33 1
-03 1 1
-04 -103 2
-0c 18 1
-10 18 1
-13 16 1
-31 17 1
+10 03 18 1
+31 00 1 1
+41 00 -128 1
+c4 00 17 1
diff --git a/genomix/genomix-hadoop/data/webmap/text.txt b/genomix/genomix-hadoop/data/webmap/text.txt
index c7fb713..f63a141 100755
--- a/genomix/genomix-hadoop/data/webmap/text.txt
+++ b/genomix/genomix-hadoop/data/webmap/text.txt
@@ -1,4 +1,4 @@
@625E1AAXX100810:1:100:10000:10271/1
-AATAGAAGAT
+AATAGAAG
+
-EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?
\ No newline at end of file
+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?
diff --git a/genomix/genomix-hadoop/expected/result2 b/genomix/genomix-hadoop/expected/result2
index aa56fbf..2c44be3 100755
--- a/genomix/genomix-hadoop/expected/result2
+++ b/genomix/genomix-hadoop/expected/result2
@@ -1,7 +1,4 @@
-01 33 1
-03 1 1
-04 -103 2
-0c 18 1
-10 18 1
-13 16 1
-31 17 1
+10 03 18 1
+31 00 1 1
+41 00 -128 1
+c4 00 17 1
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
index 3eb5e12..d8d0ff8 100755
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
@@ -103,7 +103,12 @@
boolean isValid = geneMatcher.matches();
int i = 0;
if (isValid == true) {
- byte[] kmerValue = new byte[KMER_SIZE * 2 / 8 + 1];
+ int size = 0;
+ if (KMER_SIZE * 2 % 8 == 0)
+ size = KMER_SIZE * 2 / 8;
+ else
+ size = KMER_SIZE * 2 / 8 + 1;
+ byte[] kmerValue = new byte[size];
for (int k = 0; k < kmerValue.length; k++)
kmerValue[i] = 0x00;
CurrenByte currentByte = new CurrenByte();
@@ -114,7 +119,7 @@
byte kmerAdjList = 0;
byte initial;
if (i >= KMER_SIZE) {
- outputKmer.set(kmerValue, 0, KMER_SIZE * 2 / 8 + 1);
+ outputKmer.set(kmerValue, 0, size);
switch ((int) preMarker) {
case -1:
kmerAdjList = (byte) (kmerAdjList + 0);
@@ -136,7 +141,6 @@
switch (geneLine.charAt(i)) {
case 'A':
kmerAdjList = (byte) (kmerAdjList + 1);
-
initial = (byte) 0x00;
if (kmerValue.length == 1) {
currentByte = lastByteShift(kmerValue[kmerValue.length - 1], initial, KMER_SIZE);
@@ -159,7 +163,6 @@
break;
case 'G':
kmerAdjList = (byte) (kmerAdjList + 2);
-
initial = (byte) 0x01;
if (kmerValue.length == 1) {
currentByte = lastByteShift(kmerValue[kmerValue.length - 1], initial, KMER_SIZE);
@@ -181,7 +184,6 @@
break;
case 'C':
kmerAdjList = (byte) (kmerAdjList + 4);
-
initial = (byte) 0x02;
if (kmerValue.length == 1) {
currentByte = lastByteShift(kmerValue[kmerValue.length - 1], initial, KMER_SIZE);
@@ -217,6 +219,9 @@
preMarker = currentByte.preMarker;
kmerValue[j] = currentByte.curByte;
}
+ currentByte = lastByteShift(kmerValue[kmerValue.length - 1], preMarker, KMER_SIZE);
+ preMarker = currentByte.preMarker;
+ kmerValue[kmerValue.length - 1] = currentByte.curByte;
}
break;
}
@@ -231,21 +236,24 @@
if (i == geneLine.length()) {
byte kmerAdjList = 0;
switch ((int) preMarker) {
+ case -1:
+ kmerAdjList = (byte) (kmerAdjList + 0);
+ break;
case 0:
kmerAdjList = (byte) (kmerAdjList + 16);
break;
- case 16:
+ case 1:
kmerAdjList = (byte) (kmerAdjList + 32);
break;
- case 32:
+ case 2:
kmerAdjList = (byte) (kmerAdjList + 64);
break;
- case 48:
+ case 3:
kmerAdjList = (byte) (kmerAdjList + 128);
break;
}
outputAdjList.set(kmerAdjList, count);
- outputKmer.set(kmerValue, 0, KMER_SIZE * 2 / 8 + 1);
+ outputKmer.set(kmerValue, 0, size);
output.collect(outputKmer, outputAdjList);
}
}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
index d04cbdb..49126d6 100755
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
@@ -56,7 +56,7 @@
// run graph transformation tests
GenomixDriver tldriver = new GenomixDriver();
- tldriver.run(HDFS_PATH, RESULT_PATH, 2, 3, HADOOP_CONF_PATH);
+ tldriver.run(HDFS_PATH, RESULT_PATH, 2, 5, HADOOP_CONF_PATH);
dumpResult();
TestUtils.compareWithResult(new File(DUMPED_RESULT), new File(EXPECTED_PATH));
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
index e63aa5b..237a764 100755
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
@@ -1,4 +1,5 @@
package edu.uci.ics.utils;
+
/*
* Copyright 2009-2012 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,6 +17,7 @@
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
+
/**
* This class offer the service for graphbuildingtest.class
*/
@@ -50,7 +52,7 @@
private static boolean equalStrings(String s1, String s2) {
String[] rowsOne = s1.split("\t");
- String[] rowsTwo = s2.split(" ");
+ String[] rowsTwo = s2.split("\t");
if (rowsOne.length != rowsTwo.length)
return false;