Revised genomix
git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_genomix@2913 123451ca-8445-de46-9d55-352943316053
diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/FileScanDescriptor.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/FileScanDescriptor.java
index f0b277a..f44c790 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/FileScanDescriptor.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/FileScanDescriptor.java
@@ -71,6 +71,10 @@
private ArrayTupleBuilder tupleBuilder;
private ByteBuffer outputBuffer;
private FrameTupleAppender outputAppender;
+
+ private byte filter0;
+ private byte filter1;
+ private byte filter2;
@SuppressWarnings("resource")
@Override
@@ -80,6 +84,19 @@
outputBuffer = ctx.allocateFrame();
outputAppender = new FrameTupleAppender(ctx.getFrameSize());
outputAppender.reset(outputBuffer, true);
+
+ filter0 = (byte) 0xC0;
+ filter1 = (byte) 0xFC;
+ filter2 = 0;
+
+ int r = byteNum * 8 - 2 * k;
+ r = 8 - r;
+ for (int i = 0; i < r; i++) {
+ filter2 <<= 1;
+ filter2 |= 1;
+ }
+
+
try {// one try with multiple catch?
writer.open();
String s = pathSurfix + String.valueOf(temp);
@@ -93,7 +110,7 @@
new InputStreamReader(
new FileInputStream(fa[i])));
String read = readsfile.readLine();
- int count = 0;
+ //int count = 0;
while (read != null) {
read = readsfile.readLine();
//if(count % 4 == 1)
@@ -104,8 +121,8 @@
read = readsfile.readLine();
read = readsfile.readLine();
- count += 1;
- System.err.println(count);
+ //count += 1;
+ //System.err.println(count);
}
}
if (outputAppender.getTupleCount() > 0) {
@@ -131,7 +148,7 @@
int count = 0;
int bcount = 0;
- for (int i = start; i < start + k; i++) {
+ for (int i = start; i < start+k ; i++) {
l <<= 2;
switch (array[i]) {
case 'A':
@@ -152,13 +169,14 @@
break;
}
count += 2;
- if (count % 8 == 0) {
+ if (count % 8 == 0 && byteNum != bcount + 1) {
bcount += 1;
- bytes[bcount] = l;
+ bytes[byteNum-bcount] = l;
count = 0;
+ l = 0;
}
}
- bytes[bcount + 1] = l;
+ bytes[1] = l;
return bytes;
}
@@ -209,30 +227,20 @@
}
void MoveKmer(byte[] bytes, byte c) {
- byte filter0 = (byte) 0xC0;
- byte filter1 = (byte) 0xFC;
- byte filter2 = 0;
-
- int r = byteNum * 8 - 2 * k;
- r = 8 - r;
- for (int i = 0; i < r; i++) {
- filter2 <<= 1;
- filter2 |= 1;
- }
-
int i = byteNum;
bytes[i] <<= 2;
bytes[i] &= filter2;
i -= 1;
- while (i >= 0) {
+ while (i > 0) {
byte f = (byte) (bytes[i] & filter0);
f >>= 6;
+ f &= 3;
bytes[i + 1] |= f;
bytes[i] <<= 2;
bytes[i] &= filter1;
i -= 1;
}
- bytes[0] |= ConvertSymbol(c);
+ bytes[1] |= ConvertSymbol(c);
}
private void SplitReads(byte[] array) {
diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/PrinterOperatorDescriptor.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/PrinterOperatorDescriptor.java
index 9070a35..f8decc3 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/PrinterOperatorDescriptor.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/PrinterOperatorDescriptor.java
@@ -118,9 +118,9 @@
PrintBytes(j);
}
if (true == writeFile) {
- twriter.write("\n");
+ twriter.write("\r\n");
} else {
- System.err.println();
+ System.err.println("");
}
}
} catch (IOException e) {
diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java
index 60eaa35..be7ada4 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java
@@ -23,10 +23,23 @@
private int k;
private int byteNum;
+ private byte filter0;
+ private byte filter1;
+ private byte filter2;
public ReadsKeyValueParserFactory(int k) {
this.k = k;
byteNum = (byte) Math.ceil((double) k / 4.0);
+ filter0 = (byte) 0xC0;
+ filter1 = (byte) 0xFC;
+ filter2 = 0;
+
+ int r = byteNum * 8 - 2 * k;
+ r = 8 - r;
+ for (int i = 0; i < r; i++) {
+ filter2 <<= 1;
+ filter2 |= 1;
+ }
}
@Override
@@ -69,7 +82,7 @@
int count = 0;
int bcount = 0;
- for (int i = start; i < start + k; i++) {
+ for (int i = start; i < start+k ; i++) {
l <<= 2;
switch (array[i]) {
case 'A':
@@ -90,13 +103,14 @@
break;
}
count += 2;
- if (count % 8 == 0) {
+ if (count % 8 == 0 && byteNum != bcount + 1) {
bcount += 1;
- bytes[bcount] = l;
+ bytes[byteNum-bcount] = l;
count = 0;
+ l = 0;
}
}
- bytes[bcount + 1] = l;
+ bytes[1] = l;
return bytes;
}
@@ -147,17 +161,6 @@
}
void MoveKmer(byte[] bytes, byte c) {
- byte filter0 = (byte) 0xC0;
- byte filter1 = (byte) 0xFC;
- byte filter2 = 0;
-
- int r = byteNum * 8 - 2 * k;
- r = 8 - r;
- for (int i = 0; i < r; i++) {
- filter2 <<= 1;
- filter2 |= 1;
- }
-
int i = byteNum;
bytes[i] <<= 2;
bytes[i] &= filter2;
@@ -165,13 +168,16 @@
while (i > 0) {
byte f = (byte) (bytes[i] & filter0);
f >>= 6;
+ f &= 3;
bytes[i + 1] |= f;
bytes[i] <<= 2;
bytes[i] &= filter1;
+ i -= 1;
}
- bytes[i + 1] |= ConvertSymbol(c);
+ bytes[1] |= ConvertSymbol(c);
}
+
private void SplitReads(byte[] array, IFrameWriter writer) {
try {
byte[] bytes = null;
diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/Tester.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/Tester.java
index 22cb0f7..30a2277 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/Tester.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/Tester.java
@@ -282,7 +282,7 @@
//PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, cross_grouper, NC1_ID, NC2_ID,NC3_ID,NC4_ID);
spec.connect(conn_partition, single_grouper, 0, cross_grouper, 0);
- //PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec, "G:\\data\\result");
+ //PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec);
PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec, "G:\\data\\result");
//PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID, NC2_ID,NC3_ID,NC4_ID);
PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);