Revised genomix

git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_genomix@2913 123451ca-8445-de46-9d55-352943316053
diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/FileScanDescriptor.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/FileScanDescriptor.java
index f0b277a..f44c790 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/FileScanDescriptor.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/FileScanDescriptor.java
@@ -71,6 +71,10 @@
 			private ArrayTupleBuilder tupleBuilder;

 			private ByteBuffer outputBuffer;

 			private FrameTupleAppender outputAppender;

+			

+			private byte filter0;

+			private byte filter1;

+			private byte filter2;

 

 			@SuppressWarnings("resource")

 			@Override

@@ -80,6 +84,19 @@
 				outputBuffer = ctx.allocateFrame();

 				outputAppender = new FrameTupleAppender(ctx.getFrameSize());

 				outputAppender.reset(outputBuffer, true);

+				

+				filter0 = (byte) 0xC0;

+				filter1 = (byte) 0xFC;

+				filter2 = 0;

+

+				int r = byteNum * 8 - 2 * k;

+				r = 8 - r;

+				for (int i = 0; i < r; i++) {

+					filter2 <<= 1;

+					filter2 |= 1;

+				}

+				

+				

 				try {// one try with multiple catch?

 					writer.open();

 					String s = pathSurfix + String.valueOf(temp);

@@ -93,7 +110,7 @@
 								new InputStreamReader(

 										new FileInputStream(fa[i])));

 						String read = readsfile.readLine();

-						int count  = 0;

+						//int count  = 0;

 						while (read != null) {

 							read = readsfile.readLine();

 							//if(count % 4 == 1)

@@ -104,8 +121,8 @@
 							read = readsfile.readLine();

 

 							read = readsfile.readLine();

-							count += 1;

-							System.err.println(count);

+							//count += 1;

+							//System.err.println(count);

 						}

 					}

 					if (outputAppender.getTupleCount() > 0) {

@@ -131,7 +148,7 @@
 				int count = 0;

 				int bcount = 0;

 

-				for (int i = start; i < start + k; i++) {

+				for (int i = start; i < start+k ; i++) {

 					l <<= 2;

 					switch (array[i]) {

 					case 'A':

@@ -152,13 +169,14 @@
 						break;

 					}

 					count += 2;

-					if (count % 8 == 0) {

+					if (count % 8 == 0 && byteNum != bcount + 1) {

 						bcount += 1;

-						bytes[bcount] = l;

+						bytes[byteNum-bcount] = l;

 						count = 0;

+						l = 0;

 					}

 				}

-				bytes[bcount + 1] = l;

+				bytes[1] = l;

 				return bytes;

 			}

 

@@ -209,30 +227,20 @@
 			}

 

 			void MoveKmer(byte[] bytes, byte c) {

-				byte filter0 = (byte) 0xC0;

-				byte filter1 = (byte) 0xFC;

-				byte filter2 = 0;

-

-				int r = byteNum * 8 - 2 * k;

-				r = 8 - r;

-				for (int i = 0; i < r; i++) {

-					filter2 <<= 1;

-					filter2 |= 1;

-				}

-

 				int i = byteNum;

 				bytes[i] <<= 2;

 				bytes[i] &= filter2;

 				i -= 1;

-				while (i >= 0) {

+				while (i > 0) {

 					byte f = (byte) (bytes[i] & filter0);

 					f >>= 6;

+					f &= 3;

 					bytes[i + 1] |= f;

 					bytes[i] <<= 2;

 					bytes[i] &= filter1;

 					i -= 1;

 				}

-				bytes[0] |= ConvertSymbol(c);

+				bytes[1] |= ConvertSymbol(c);

 			}

 

 			private void SplitReads(byte[] array) {

diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/PrinterOperatorDescriptor.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/PrinterOperatorDescriptor.java
index 9070a35..f8decc3 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/PrinterOperatorDescriptor.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/PrinterOperatorDescriptor.java
@@ -118,9 +118,9 @@
 							PrintBytes(j);

 						}

 						if (true == writeFile) {

-							twriter.write("\n");

+							twriter.write("\r\n");

 						} else {

-							System.err.println();

+							System.err.println("");

 						}

 					}

 				} catch (IOException e) {

diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java
index 60eaa35..be7ada4 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java
@@ -23,10 +23,23 @@
 

 	private int k;

 	private int byteNum;

+	private byte filter0;

+	private byte filter1;

+	private byte filter2;

 

 	public ReadsKeyValueParserFactory(int k) {

 		this.k = k;

 		byteNum = (byte) Math.ceil((double) k / 4.0);

+		filter0 = (byte) 0xC0;

+		filter1 = (byte) 0xFC;

+		filter2 = 0;

+

+		int r = byteNum * 8 - 2 * k;

+		r = 8 - r;

+		for (int i = 0; i < r; i++) {

+			filter2 <<= 1;

+			filter2 |= 1;

+		}

 	}

 

 	@Override

@@ -69,7 +82,7 @@
 				int count = 0;

 				int bcount = 0;

 

-				for (int i = start; i < start + k; i++) {

+				for (int i = start; i < start+k ; i++) {

 					l <<= 2;

 					switch (array[i]) {

 					case 'A':

@@ -90,13 +103,14 @@
 						break;

 					}

 					count += 2;

-					if (count % 8 == 0) {

+					if (count % 8 == 0 && byteNum != bcount + 1) {

 						bcount += 1;

-						bytes[bcount] = l;

+						bytes[byteNum-bcount] = l;

 						count = 0;

+						l = 0;

 					}

 				}

-				bytes[bcount + 1] = l;

+				bytes[1] = l;

 				return bytes;

 			}

 

@@ -147,17 +161,6 @@
 			}

 

 			void MoveKmer(byte[] bytes, byte c) {

-				byte filter0 = (byte) 0xC0;

-				byte filter1 = (byte) 0xFC;

-				byte filter2 = 0;

-

-				int r = byteNum * 8 - 2 * k;

-				r = 8 - r;

-				for (int i = 0; i < r; i++) {

-					filter2 <<= 1;

-					filter2 |= 1;

-				}

-

 				int i = byteNum;

 				bytes[i] <<= 2;

 				bytes[i] &= filter2;

@@ -165,13 +168,16 @@
 				while (i > 0) {

 					byte f = (byte) (bytes[i] & filter0);

 					f >>= 6;

+					f &= 3;

 					bytes[i + 1] |= f;

 					bytes[i] <<= 2;

 					bytes[i] &= filter1;

+					i -= 1;

 				}

-				bytes[i + 1] |= ConvertSymbol(c);

+				bytes[1] |= ConvertSymbol(c);

 			}

 

+

 			private void SplitReads(byte[] array, IFrameWriter writer) {

 				try {

 					byte[] bytes = null;

diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/Tester.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/Tester.java
index 22cb0f7..30a2277 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/Tester.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/Tester.java
@@ -282,7 +282,7 @@
         //PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, cross_grouper, NC1_ID, NC2_ID,NC3_ID,NC4_ID);

         spec.connect(conn_partition, single_grouper, 0, cross_grouper, 0);

 

-        //PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec, "G:\\data\\result");

+        //PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec);

         PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec, "G:\\data\\result");

         //PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID, NC2_ID,NC3_ID,NC4_ID);

         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);