change the bytebuffer function in MergeKmerAggregator

git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_genomix@2993 123451ca-8445-de46-9d55-352943316053
diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/FileScanDescriptor.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/FileScanDescriptor.java
index d8f0122..ec71111 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/FileScanDescriptor.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/FileScanDescriptor.java
@@ -5,6 +5,8 @@
 import java.io.FileInputStream;

 import java.io.InputStreamReader;

 import java.nio.ByteBuffer;

+import java.util.regex.Matcher;

+import java.util.regex.Pattern;

 

 import org.apache.hadoop.fs.Path;

 

@@ -30,7 +32,6 @@
 

 	private static final long serialVersionUID = 1L;

 	private int k;

-	private Path[] filesplit = null;

 	private String pathSurfix;

 	private int byteNum;

 

@@ -52,7 +53,6 @@
 			Path[] inputPaths) {

 		super(jobSpec, 0, 1);

 		this.k = kmers;

-		this.filesplit = inputPaths;

 		this.pathSurfix = inputPaths[0].toString();

 		// recordDescriptors[0] = news RecordDescriptor(

 		// new ISerializerDeserializer[] {

@@ -75,7 +75,6 @@
 			private ByteBuffer outputBuffer;

 			private FrameTupleAppender outputAppender;

 

-			@SuppressWarnings("resource")

 			@Override

 			public void initialize() {

 

@@ -101,13 +100,12 @@
 						while (read != null) {

 							read = readsfile.readLine();

 							// if(count % 4 == 1)

-							SplitReads(read.getBytes(),writer);

-							// read.getBytes();

-							read = readsfile.readLine();

-

-							read = readsfile.readLine();

-

-							read = readsfile.readLine();

+							Pattern genePattern = Pattern.compile("[AGCT]+");

+							Matcher geneMatcher = genePattern.matcher(read);

+							boolean isValid = geneMatcher.matches();

+							if (isValid) {

+								SplitReads(read.getBytes(),writer);

+							}

 							// count += 1;

 							// System.err.println(count);

 						}

diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java
index bc65204..1f0cd73 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/ReadsKeyValueParserFactory.java
@@ -34,8 +34,6 @@
 	@Override

 	public IKeyValueParser<LongWritable, Text> createKeyValueParser(

 			final IHyracksTaskContext ctx) {

-		;

-

 		final ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(2);

 		final ByteBuffer outputBuffer = ctx.allocateFrame();

 		final FrameTupleAppender outputAppender = new FrameTupleAppender(

diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/Tester.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/Tester.java
index 35702cf..6256f86 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/Tester.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/Tester.java
@@ -140,30 +140,30 @@
 		nc1 = new NodeControllerService(ncConfig1);

 		nc1.start();

 

-		NCConfig ncConfig2 = new NCConfig();

-		ncConfig2.ccHost = "localhost";

-		ncConfig2.ccPort = 39001;

-		ncConfig2.clusterNetIPAddress = "127.0.0.1";

-		ncConfig2.dataIPAddress = "127.0.0.1";

-		ncConfig2.nodeId = NC2_ID;

-		nc2 = new NodeControllerService(ncConfig2);

-		nc2.start();

-		NCConfig ncConfig3 = new NCConfig();

-		ncConfig3.ccHost = "localhost";

-		ncConfig3.ccPort = 39001;

-		ncConfig3.clusterNetIPAddress = "127.0.0.1";

-		ncConfig3.dataIPAddress = "127.0.0.1";

-		ncConfig3.nodeId = NC3_ID;

-		nc3 = new NodeControllerService(ncConfig3);

-		nc3.start();

-		NCConfig ncConfig4 = new NCConfig();

-		ncConfig4.ccHost = "localhost";

-		ncConfig4.ccPort = 39001;

-		ncConfig4.clusterNetIPAddress = "127.0.0.1";

-		ncConfig4.dataIPAddress = "127.0.0.1";

-		ncConfig4.nodeId = NC4_ID;

-		nc4 = new NodeControllerService(ncConfig4);

-		nc4.start();

+//		NCConfig ncConfig2 = new NCConfig();

+//		ncConfig2.ccHost = "localhost";

+//		ncConfig2.ccPort = 39001;

+//		ncConfig2.clusterNetIPAddress = "127.0.0.1";

+//		ncConfig2.dataIPAddress = "127.0.0.1";

+//		ncConfig2.nodeId = NC2_ID;

+//		nc2 = new NodeControllerService(ncConfig2);

+//		nc2.start();

+//		NCConfig ncConfig3 = new NCConfig();

+//		ncConfig3.ccHost = "localhost";

+//		ncConfig3.ccPort = 39001;

+//		ncConfig3.clusterNetIPAddress = "127.0.0.1";

+//		ncConfig3.dataIPAddress = "127.0.0.1";

+//		ncConfig3.nodeId = NC3_ID;

+//		nc3 = new NodeControllerService(ncConfig3);

+//		nc3.start();

+//		NCConfig ncConfig4 = new NCConfig();

+//		ncConfig4.ccHost = "localhost";

+//		ncConfig4.ccPort = 39001;

+//		ncConfig4.clusterNetIPAddress = "127.0.0.1";

+//		ncConfig4.dataIPAddress = "127.0.0.1";

+//		ncConfig4.nodeId = NC4_ID;

+//		nc4 = new NodeControllerService(ncConfig4);

+//		nc4.start();

 

 		hcc = new HyracksConnection(ccConfig.clientNetIpAddress,

 				ccConfig.clientNetPort);

@@ -181,10 +181,10 @@
 		spec.setFrameSize(32768);

 

 		FileScanDescriptor scan = new FileScanDescriptor(spec, k, filename);

-		PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scan,

-				NC1_ID, NC2_ID, NC3_ID, NC4_ID);

-		// PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scan,

-		// NC1_ID);

+//		PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scan,

+//				NC1_ID, NC2_ID, NC3_ID, NC4_ID);

+		 PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, scan,

+		 NC1_ID);

 

 		RecordDescriptor outputRec = new RecordDescriptor(

 				new ISerializerDeserializer[] { null,

@@ -319,30 +319,30 @@
 					new DistributedMergeLmerAggregateFactory(), outputRec, true);

 		}

 

-		// PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,

-		// single_grouper, NC1_ID);

-		PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,

-				single_grouper, NC1_ID, NC2_ID, NC3_ID, NC4_ID);

+		 PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,

+		 single_grouper, NC1_ID);

+//		PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,

+//				single_grouper, NC1_ID, NC2_ID, NC3_ID, NC4_ID);

 

 		IConnectorDescriptor readfileConn = new OneToOneConnectorDescriptor(

 				spec);

 		spec.connect(readfileConn, scan, 0, single_grouper, 0);

 

-		// PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,

-		// cross_grouper, NC1_ID);

-		PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,

-				cross_grouper, NC1_ID, NC2_ID, NC3_ID, NC4_ID);

+		 PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,

+		 cross_grouper, NC1_ID);

+//		PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,

+//				cross_grouper, NC1_ID, NC2_ID, NC3_ID, NC4_ID);

 		spec.connect(conn_partition, single_grouper, 0, cross_grouper, 0);

 

 		// PrinterOperatorDescriptor printer = new

 		// PrinterOperatorDescriptor(spec);

 		PrinterOperatorDescriptor printer = new PrinterOperatorDescriptor(spec,

 				"result");

-		PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,

-				NC1_ID, NC2_ID, NC3_ID, NC4_ID);

-		// PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,

-		// printer,

-		// NC1_ID);

+//		PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer,

+//				NC1_ID, NC2_ID, NC3_ID, NC4_ID);

+		 PartitionConstraintHelper.addAbsoluteLocationConstraint(spec,

+		 printer,

+		 NC1_ID);

 

 		IConnectorDescriptor printConn = new OneToOneConnectorDescriptor(spec);

 		spec.connect(printConn, cross_grouper, 0, printer, 0);

diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/DistributedMergeLmerAggregateFactory.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/DistributedMergeLmerAggregateFactory.java
index ba61d18..00f4256 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/DistributedMergeLmerAggregateFactory.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/DistributedMergeLmerAggregateFactory.java
@@ -2,7 +2,6 @@
 

 import java.io.DataOutput;

 import java.io.IOException;

-import java.nio.ByteBuffer;

 

 import edu.uci.ics.genomix.data.serde.ByteSerializerDeserializer;

 import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;

@@ -21,7 +20,7 @@
 public class DistributedMergeLmerAggregateFactory implements

 		IAggregatorDescriptorFactory {

 	private static final long serialVersionUID = 1L;

-	private static final int max = 127;

+	private static final int MAX = 127;

 

 	public DistributedMergeLmerAggregateFactory() {

 	}

@@ -50,6 +49,17 @@
 				});

 			}

 

+			private byte getField(IFrameTupleAccessor accessor, int tIndex,

+					int fieldId) {

+				int tupleOffset = accessor.getTupleStartOffset(tIndex);

+				int fieldStart = accessor.getFieldStartOffset(tIndex, fieldId);

+				int offset = tupleOffset + fieldStart

+						+ accessor.getFieldSlotsLength();

+				byte data = ByteSerializerDeserializer.getByte(accessor

+						.getBuffer().array(), offset);

+				return data;

+			}

+

 			/**

 			 * met a new kmer

 			 */

@@ -57,23 +67,9 @@
 			public void init(ArrayTupleBuilder tupleBuilder,

 					IFrameTupleAccessor accessor, int tIndex,

 					AggregateState state) throws HyracksDataException {

-				byte bitmap = 0;

-				byte count = 0;

-				int tupleOffset = accessor.getTupleStartOffset(tIndex);

-				int fieldStart = accessor.getFieldStartOffset(tIndex, 1);

-				bitmap |= ByteSerializerDeserializer.getByte(accessor

-						.getBuffer().array(),

-						tupleOffset + accessor.getFieldSlotsLength()

-								+ fieldStart);

 

-				tupleOffset = accessor.getTupleStartOffset(tIndex);

-				fieldStart = accessor.getFieldStartOffset(tIndex, 2);

-				int offset = tupleOffset + fieldStart

-						+ accessor.getFieldSlotsLength();

-

-				count += ByteSerializerDeserializer.getByte(accessor

-						.getBuffer().array(), offset);

-

+				byte bitmap = getField(accessor, tIndex, 1);

+				byte count = getField(accessor, tIndex, 2);

 				DataOutput fieldOutput = tupleBuilder.getDataOutput();

 				try {

 					fieldOutput.writeByte(bitmap);

@@ -84,31 +80,14 @@
 					throw new HyracksDataException(

 							"I/O exception when initializing the aggregator.");

 				}

-

 			}

 

 			@Override

 			public void aggregate(IFrameTupleAccessor accessor, int tIndex,

 					IFrameTupleAccessor stateAccessor, int stateTupleIndex,

 					AggregateState state) throws HyracksDataException {

-				// TODO Auto-generated method stub

-

-				byte bitmap = 0;

-				byte count = 0;

-

-				int tupleOffset = accessor.getTupleStartOffset(tIndex);

-				int fieldStart = accessor.getFieldStartOffset(tIndex, 1);

-				int offset = tupleOffset + accessor.getFieldSlotsLength()

-						+ fieldStart;

-				bitmap |= ByteSerializerDeserializer.getByte(accessor

-						.getBuffer().array(), offset);

-

-				tupleOffset = accessor.getTupleStartOffset(tIndex);

-				fieldStart = accessor.getFieldStartOffset(tIndex, 2);

-				offset = tupleOffset + fieldStart

-						+ accessor.getFieldSlotsLength();

-				count = ByteSerializerDeserializer.getByte(accessor.getBuffer()

-						.array(), offset);

+				byte bitmap = getField(accessor, tIndex, 1);

+				byte count = getField(accessor, tIndex, 2);

 

 				int statetupleOffset = stateAccessor

 						.getTupleStartOffset(stateTupleIndex);

@@ -116,39 +95,25 @@
 						stateTupleIndex, 1);

 				int stateoffset = statetupleOffset

 						+ stateAccessor.getFieldSlotsLength() + statefieldStart;

-

+				

 				byte[] data = stateAccessor.getBuffer().array();

 

-				ByteBuffer buf = ByteBuffer.wrap(data);

-				bitmap |= buf.getChar(stateoffset);

-				buf.position(stateoffset + 1);

-				count += buf.get();

-

-				if (count > max) {

-					count = (byte) max;

+				bitmap |= data[stateoffset];

+				count += data[stateoffset + 1];

+				if (count >= MAX) {

+					count = (byte) MAX;

 				}

-

-				buf.put(stateoffset, bitmap);

-				buf.put(stateoffset + 1, count);

+				data[stateoffset] = bitmap;

+				data[stateoffset + 1] = (byte) count;

 			}

 

 			@Override

 			public void outputPartialResult(ArrayTupleBuilder tupleBuilder,

 					IFrameTupleAccessor accessor, int tIndex,

 					AggregateState state) throws HyracksDataException {

-				// TODO Auto-generated method stub

-				byte bitmap;

-				byte count;

+				byte bitmap = getField(accessor, tIndex, 1);

+				byte count = getField(accessor, tIndex, 2);

 				DataOutput fieldOutput = tupleBuilder.getDataOutput();

-				byte[] data = accessor.getBuffer().array();

-				int tupleOffset = accessor.getTupleStartOffset(tIndex);

-				int fieldOffset = accessor.getFieldStartOffset(tIndex, 1);

-

-				int offset = fieldOffset + accessor.getFieldSlotsLength()

-						+ tupleOffset;

-				bitmap = ByteSerializerDeserializer.getByte(data, offset);

-

-				count = ByteSerializerDeserializer.getByte(data, offset + 1);

 				try {

 					fieldOutput.writeByte(bitmap);

 					tupleBuilder.addFieldEndOffset();

@@ -165,29 +130,7 @@
 			public void outputFinalResult(ArrayTupleBuilder tupleBuilder,

 					IFrameTupleAccessor accessor, int tIndex,

 					AggregateState state) throws HyracksDataException {

-				// TODO Auto-generated method stub

-				byte bitmap;

-				byte count;

-

-				byte[] data = accessor.getBuffer().array();

-				int tupleOffset = accessor.getTupleStartOffset(tIndex);

-				int fieldOffset = accessor.getFieldStartOffset(tIndex, 1);

-				int offset = tupleOffset + accessor.getFieldSlotsLength()

-						+ fieldOffset;

-

-				bitmap = ByteSerializerDeserializer.getByte(data, offset);

-				count = ByteSerializerDeserializer.getByte(data, offset + 1);

-

-				DataOutput fieldOutput = tupleBuilder.getDataOutput();

-				try {

-					fieldOutput.writeByte(bitmap);

-					tupleBuilder.addFieldEndOffset();

-					fieldOutput.writeByte(count);

-					tupleBuilder.addFieldEndOffset();

-				} catch (IOException e) {

-					throw new HyracksDataException(

-							"I/O exception when writing aggregation to the output buffer.");

-				}

+				outputPartialResult(tupleBuilder, accessor, tIndex, state);

 			}

 

 		};

diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/MergeKmerAggregateFactory.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/MergeKmerAggregateFactory.java
index 427ad4f..32c50bb 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/MergeKmerAggregateFactory.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/dataflow/aggregators/MergeKmerAggregateFactory.java
@@ -2,7 +2,6 @@
 

 import java.io.DataOutput;

 import java.io.IOException;

-import java.nio.ByteBuffer;

 

 import edu.uci.ics.genomix.data.serde.ByteSerializerDeserializer;

 import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;

@@ -20,7 +19,7 @@
  */

 public class MergeKmerAggregateFactory implements IAggregatorDescriptorFactory {

 	private static final long serialVersionUID = 1L;

-	private static final int max = 127;

+	private static final int MAX = 127;

 

 	public MergeKmerAggregateFactory() {

 	}

@@ -49,20 +48,23 @@
 				});

 			}

 

+			private byte getField(IFrameTupleAccessor accessor, int tIndex,

+					int fieldId) {

+				int tupleOffset = accessor.getTupleStartOffset(tIndex);

+				int fieldStart = accessor.getFieldStartOffset(tIndex, fieldId);

+				int offset = tupleOffset + fieldStart

+						+ accessor.getFieldSlotsLength();

+				byte data = ByteSerializerDeserializer.getByte(accessor

+						.getBuffer().array(), offset);

+				return data;

+			}

+

 			@Override

 			public void init(ArrayTupleBuilder tupleBuilder,

 					IFrameTupleAccessor accessor, int tIndex,

 					AggregateState state) throws HyracksDataException {

-				byte bitmap = 0;

-				byte count = 0;

-				int tupleOffset = accessor.getTupleStartOffset(tIndex);

-				int fieldStart = accessor.getFieldStartOffset(tIndex, 1);

-

-				bitmap |= accessor.getBuffer().get(

-						tupleOffset + accessor.getFieldSlotsLength()

-								+ fieldStart);

-

-				count += 1;

+				byte bitmap = getField(accessor, tIndex, 1);

+				byte count = 1;

 

 				DataOutput fieldOutput = tupleBuilder.getDataOutput();

 				try {

@@ -81,16 +83,8 @@
 			public void aggregate(IFrameTupleAccessor accessor, int tIndex,

 					IFrameTupleAccessor stateAccessor, int stateTupleIndex,

 					AggregateState state) throws HyracksDataException {

-				// TODO Auto-generated method stub

-				byte bitmap = 0;

-				byte count = 0;

-

-				int tupleOffset = accessor.getTupleStartOffset(tIndex);

-				int fieldStart = accessor.getFieldStartOffset(tIndex, 1);

-

-				bitmap |= accessor.getBuffer().get(

-						tupleOffset + accessor.getFieldSlotsLength()

-								+ fieldStart);

+				byte bitmap = getField(accessor, tIndex, 1);

+				short count = 1;

 

 				int statetupleOffset = stateAccessor

 						.getTupleStartOffset(stateTupleIndex);

@@ -99,42 +93,24 @@
 				int stateoffset = statetupleOffset

 						+ stateAccessor.getFieldSlotsLength() + statefieldStart;

 

-				count += 1;

-				if (count > max) {

-					count = max;

-				}

-

 				byte[] data = stateAccessor.getBuffer().array();

 

-				ByteBuffer buf = ByteBuffer.wrap(data);

-				bitmap |= buf.getChar(stateoffset);

-				buf.position(stateoffset + 1);

-				count += buf.get();

-

-				if (count > max) {

-					count = (byte) max;

+				bitmap |= data[stateoffset];

+				count += data[stateoffset + 1];

+				if (count >= MAX) {

+					count = (byte) MAX;

 				}

-

-				buf.put(stateoffset, bitmap);

-				buf.put(stateoffset + 1, count);

+				data[stateoffset] = bitmap;

+				data[stateoffset + 1] = (byte) count;

 			}

 

 			@Override

 			public void outputPartialResult(ArrayTupleBuilder tupleBuilder,

 					IFrameTupleAccessor accessor, int tIndex,

 					AggregateState state) throws HyracksDataException {

-				// TODO Auto-generated method stub

-				byte bitmap;

-				byte count;

+				byte bitmap = getField(accessor, tIndex, 1);

+				byte count = getField(accessor, tIndex, 2);

 				DataOutput fieldOutput = tupleBuilder.getDataOutput();

-				byte[] data = accessor.getBuffer().array();

-				int tupleOffset = accessor.getTupleStartOffset(tIndex);

-				int fieldOffset = accessor.getFieldStartOffset(tIndex, 1);

-

-				int offset = fieldOffset + accessor.getFieldSlotsLength()

-						+ tupleOffset;

-				bitmap = ByteSerializerDeserializer.getByte(data, offset);

-				count = ByteSerializerDeserializer.getByte(data, offset + 1);

 				try {

 					fieldOutput.writeByte(bitmap);

 					tupleBuilder.addFieldEndOffset();

@@ -151,29 +127,7 @@
 			public void outputFinalResult(ArrayTupleBuilder tupleBuilder,

 					IFrameTupleAccessor accessor, int tIndex,

 					AggregateState state) throws HyracksDataException {

-				// TODO Auto-generated method stub

-				byte bitmap;

-				byte count;

-

-				byte[] data = accessor.getBuffer().array();

-				int tupleOffset = accessor.getTupleStartOffset(tIndex);

-				int fieldOffset = accessor.getFieldStartOffset(tIndex, 1);

-				int offset = tupleOffset + accessor.getFieldSlotsLength()

-						+ fieldOffset;

-

-				bitmap = ByteSerializerDeserializer.getByte(data, offset);

-				count = ByteSerializerDeserializer.getByte(data, offset + 1);

-

-				DataOutput fieldOutput = tupleBuilder.getDataOutput();

-				try {

-					fieldOutput.writeByte(bitmap);

-					tupleBuilder.addFieldEndOffset();

-					fieldOutput.writeByte(count);

-					tupleBuilder.addFieldEndOffset();

-				} catch (IOException e) {

-					throw new HyracksDataException(

-							"I/O exception when writing aggregation to the output buffer.");

-				}

+				outputPartialResult(tupleBuilder, accessor, tIndex, state);

 			}

 

 		};

diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/job/JobGenBrujinGraph.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/job/JobGenBrujinGraph.java
index baffdcb..95562a6 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/job/JobGenBrujinGraph.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/job/JobGenBrujinGraph.java
@@ -91,7 +91,7 @@
 			System.arraycopy(nodes, 0, ncNodeNames, i * nodes.length,
 					nodes.length);
 		}
-		LOG.info("nc nodes:" + ncNodeNames.length + ncNodeNames.toString());
+		LOG.info("nc nodes:" + ncNodeNames.length + " " + ncNodeNames.toString());
 	}
 
 	private ExternalGroupOperatorDescriptor newExternalGroupby(
@@ -190,6 +190,7 @@
 			InputSplit[] splits = job.getInputFormat().getSplits(job,
 					ncNodeNames.length);
 
+			LOG.info("HDFS read into " + splits.length + " splits");
 			String[] readSchedule = scheduler.getLocationConstraints(splits);
 			return new HDFSReadOperatorDescriptor(jobSpec, readOutputRec, job,
 					splits, readSchedule, new ReadsKeyValueParserFactory(kmers));
@@ -299,6 +300,12 @@
 			outputFormat = OutputFormat.BINARY;
 		}
 		job = new JobConf(conf);
+		LOG.info("Genomix Graph Build Configuration");
+		LOG.info("Kmer:" + kmers);
+		LOG.info("Groupby type:" + type);
+		LOG.info("Output format:" + output);
+		LOG.info("Frame limit" + frameLimits);
+		LOG.info("Frame size" + frameSize);
 	}
 
 }
diff --git a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/type/Kmer.java b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/type/Kmer.java
index 1fbcc22..7dd310c 100644
--- a/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/type/Kmer.java
+++ b/genomix/genomix-core/src/main/java/edu/uci/ics/genomix/type/Kmer.java
@@ -69,7 +69,7 @@
 		}
 
 		public static byte mergePreNextAdj(byte pre, byte next) {
-			return (byte) (pre << 4 | next & 0x0f);
+			return (byte) (pre << 4 | (next & 0x0f));
 		}
 
 		public static String getSymbolFromBitMap(byte code) {
@@ -149,7 +149,7 @@
 	 * @return the shiftout gene, in gene code format
 	 */
 	public static byte MoveKmer(int k, byte[] kmer, byte c) {
-		int byteNum = (byte) Math.ceil((double) k / 4.0);
+		int byteNum = kmer.length;
 		byte output = (byte) (kmer[byteNum - 1] & 0x03);
 		for (int i = byteNum - 1; i > 0; i--) {
 			byte in = (byte) (kmer[i - 1] & 0x03);
@@ -158,7 +158,7 @@
 
 		int pos = ((k - 1) % 4) * 2;
 		byte code = (byte) (GENE_CODE.getCodeFromSymbol(c) << pos);
-		kmer[0] = (byte) ((kmer[0] >>> 2) | code);
+		kmer[0] = (byte) (((kmer[0] >>> 2) & 0x3f) | code);
 		return (byte) (1 << output);
 	}