clear unset bit
diff --git a/genomix/genomix-data/.classpath b/genomix/genomix-data/.classpath
new file mode 100644
index 0000000..e43402f
--- /dev/null
+++ b/genomix/genomix-data/.classpath
@@ -0,0 +1,36 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" output="target/classes" path="src/main/java">
+		<attributes>
+			<attribute name="optional" value="true"/>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
+		<attributes>
+			<attribute name="optional" value="true"/>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="output" path="target/classes"/>
+</classpath>
diff --git a/genomix/genomix-data/.project b/genomix/genomix-data/.project
new file mode 100644
index 0000000..f22376e
--- /dev/null
+++ b/genomix/genomix-data/.project
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>genomix-data</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+		<buildCommand>
+			<name>org.eclipse.m2e.core.maven2Builder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+		<nature>org.eclipse.m2e.core.maven2Nature</nature>
+	</natures>
+</projectDescription>
diff --git a/genomix/genomix-data/.settings/org.eclipse.core.resources.prefs b/genomix/genomix-data/.settings/org.eclipse.core.resources.prefs
new file mode 100644
index 0000000..609d3ca
--- /dev/null
+++ b/genomix/genomix-data/.settings/org.eclipse.core.resources.prefs
@@ -0,0 +1,4 @@
+eclipse.preferences.version=1
+encoding//src/main/resources=UTF-8
+encoding//src/test/resources=UTF-8
+encoding/<project>=UTF-8
diff --git a/genomix/genomix-data/.settings/org.eclipse.jdt.core.prefs b/genomix/genomix-data/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..ec4300d
--- /dev/null
+++ b/genomix/genomix-data/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,5 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
+org.eclipse.jdt.core.compiler.compliance=1.7
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.source=1.7
diff --git a/genomix/genomix-data/.settings/org.eclipse.m2e.core.prefs b/genomix/genomix-data/.settings/org.eclipse.m2e.core.prefs
new file mode 100644
index 0000000..f897a7f
--- /dev/null
+++ b/genomix/genomix-data/.settings/org.eclipse.m2e.core.prefs
@@ -0,0 +1,4 @@
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerUtil.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerUtil.java
index 697f537..1c8c46e 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerUtil.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerUtil.java
@@ -176,7 +176,7 @@
 						- j - 1] << posNeedToMove));
 			}
 			if ( nextK % 4 == 0 || (nextK % 4) * 2 + posNeedToMove > 8) {
-				mergedKmer[0] = (byte) (kmerNext[offsetNext] >> (8 - posNeedToMove));
+				mergedKmer[0] = (byte) ((0xff & kmerNext[offsetNext] )>> (8 - posNeedToMove));
 			}
 		}
 		return mergedKmer;
diff --git a/genomix/genomix-data/target/classes/META-INF/MANIFEST.MF b/genomix/genomix-data/target/classes/META-INF/MANIFEST.MF
new file mode 100644
index 0000000..da18195
--- /dev/null
+++ b/genomix/genomix-data/target/classes/META-INF/MANIFEST.MF
@@ -0,0 +1,5 @@
+Manifest-Version: 1.0

+Built-By: jiajianfeng

+Build-Jdk: 1.6.0_43

+Created-By: Maven Integration for Eclipse

+

diff --git a/genomix/genomix-data/target/classes/META-INF/maven/edu.uci.ics.hyracks/genomix-data/pom.properties b/genomix/genomix-data/target/classes/META-INF/maven/edu.uci.ics.hyracks/genomix-data/pom.properties
new file mode 100644
index 0000000..e59f3fc
--- /dev/null
+++ b/genomix/genomix-data/target/classes/META-INF/maven/edu.uci.ics.hyracks/genomix-data/pom.properties
@@ -0,0 +1,7 @@
+#Generated by Maven Integration for Eclipse
+#Fri Apr 12 22:12:36 PDT 2013
+version=0.2.4-SNAPSHOT
+groupId=edu.uci.ics.hyracks
+m2e.projectName=genomix-data
+m2e.projectLocation=/Users/jiajianfeng/workplace/code/asterix/fullstack/genomix/genomix-data
+artifactId=genomix-data
diff --git a/genomix/genomix-data/target/classes/META-INF/maven/edu.uci.ics.hyracks/genomix-data/pom.xml b/genomix/genomix-data/target/classes/META-INF/maven/edu.uci.ics.hyracks/genomix-data/pom.xml
new file mode 100644
index 0000000..5ce538f
--- /dev/null
+++ b/genomix/genomix-data/target/classes/META-INF/maven/edu.uci.ics.hyracks/genomix-data/pom.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<artifactId>genomix-data</artifactId>
+	<name>genomix-data</name>
+
+	<parent>
+		<groupId>edu.uci.ics.hyracks</groupId>
+		<artifactId>genomix</artifactId>
+		<version>0.2.4-SNAPSHOT</version>
+	</parent>
+
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+	</properties>
+
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-compiler-plugin</artifactId>
+				<version>2.0.2</version>
+				<configuration>
+					<source>1.7</source>
+					<target>1.7</target>
+					<fork>true</fork>
+				</configuration>
+			</plugin>
+		</plugins>
+	</build>
+
+
+	<dependencies>
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+			<version>4.8.1</version>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-core</artifactId>
+			<version>0.20.2</version>
+		</dependency>
+	</dependencies>
+</project>
diff --git a/genomix/genomix-data/target/classes/conf/cluster.properties b/genomix/genomix-data/target/classes/conf/cluster.properties
new file mode 100644
index 0000000..eabd81b
--- /dev/null
+++ b/genomix/genomix-data/target/classes/conf/cluster.properties
@@ -0,0 +1,40 @@
+#The CC port for Hyracks clients
+CC_CLIENTPORT=3099
+
+#The CC port for Hyracks cluster management
+CC_CLUSTERPORT=1099
+
+#The directory of hyracks binaries
+HYRACKS_HOME="../../../../hyracks"
+
+#The tmp directory for cc to install jars
+CCTMP_DIR=/tmp/t1
+
+#The tmp directory for nc to install jars
+NCTMP_DIR=/tmp/t2
+
+#The directory to put cc logs
+CCLOGS_DIR=$CCTMP_DIR/logs
+
+#The directory to put nc logs
+NCLOGS_DIR=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS="/tmp/t3,/tmp/t4"
+
+#The JAVA_HOME
+JAVA_HOME=$JAVA_HOME
+
+#HADOOP_HOME 
+CLASSPATH="${HADOOP_HOME}:${CLASSPATH}:."
+
+#The frame size of the internal dataflow engine
+FRAME_SIZE=65536
+
+#CC JAVA_OPTS
+CCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7001,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+# Yourkit option: -agentpath:/grid/0/dev/vborkar/tools/yjp-10.0.4/bin/linux-x86-64/libyjpagent.so=port=20001"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7002,server=y,suspend=n -Xmx10g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/genomix/genomix-data/target/classes/conf/debugnc.properties b/genomix/genomix-data/target/classes/conf/debugnc.properties
new file mode 100644
index 0000000..27afa26
--- /dev/null
+++ b/genomix/genomix-data/target/classes/conf/debugnc.properties
@@ -0,0 +1,12 @@
+#The tmp directory for nc to install jars
+NCTMP_DIR2=/tmp/t-1
+
+#The directory to put nc logs
+NCLOGS_DIR2=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS2="/tmp/t-2,/tmp/t-3"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS2="-Xdebug -Xrunjdwp:transport=dt_socket,address=7003,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/genomix/genomix-data/target/classes/conf/master b/genomix/genomix-data/target/classes/conf/master
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/genomix/genomix-data/target/classes/conf/master
@@ -0,0 +1 @@
+localhost
diff --git a/genomix/genomix-data/target/classes/conf/slaves b/genomix/genomix-data/target/classes/conf/slaves
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/genomix/genomix-data/target/classes/conf/slaves
@@ -0,0 +1 @@
+localhost
diff --git a/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/Kmer$GENE_CODE.class b/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/Kmer$GENE_CODE.class
new file mode 100644
index 0000000..623f0c2
--- /dev/null
+++ b/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/Kmer$GENE_CODE.class
Binary files differ
diff --git a/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/Kmer.class b/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/Kmer.class
new file mode 100644
index 0000000..ffeb20f
--- /dev/null
+++ b/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/Kmer.class
Binary files differ
diff --git a/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/KmerBytesWritable$Comparator.class b/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/KmerBytesWritable$Comparator.class
new file mode 100644
index 0000000..c94d38c
--- /dev/null
+++ b/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/KmerBytesWritable$Comparator.class
Binary files differ
diff --git a/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/KmerBytesWritable.class b/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/KmerBytesWritable.class
new file mode 100644
index 0000000..0620a64
--- /dev/null
+++ b/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/KmerBytesWritable.class
Binary files differ
diff --git a/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/KmerCountValue.class b/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/KmerCountValue.class
new file mode 100644
index 0000000..67f9303
--- /dev/null
+++ b/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/KmerCountValue.class
Binary files differ
diff --git a/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/KmerUtil.class b/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/KmerUtil.class
new file mode 100644
index 0000000..8324f12
--- /dev/null
+++ b/genomix/genomix-data/target/classes/edu/uci/ics/genomix/type/KmerUtil.class
Binary files differ
diff --git a/genomix/genomix-data/target/classes/scripts/genomix b/genomix/genomix-data/target/classes/scripts/genomix
new file mode 100644
index 0000000..bdd7f20
--- /dev/null
+++ b/genomix/genomix-data/target/classes/scripts/genomix
@@ -0,0 +1,113 @@
+#!/bin/sh
+# ----------------------------------------------------------------------------
+#  Copyright 2001-2006 The Apache Software Foundation.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+# ----------------------------------------------------------------------------
+#
+#   Copyright (c) 2001-2006 The Apache Software Foundation.  All rights
+#   reserved.
+
+
+# resolve links - $0 may be a softlink
+PRG="$0"
+
+while [ -h "$PRG" ]; do
+  ls=`ls -ld "$PRG"`
+  link=`expr "$ls" : '.*-> \(.*\)$'`
+  if expr "$link" : '/.*' > /dev/null; then
+    PRG="$link"
+  else
+    PRG=`dirname "$PRG"`/"$link"
+  fi
+done
+
+PRGDIR=`dirname "$PRG"`
+BASEDIR=`cd "$PRGDIR/.." >/dev/null; pwd`
+
+
+
+# OS specific support.  $var _must_ be set to either true or false.
+cygwin=false;
+darwin=false;
+case "`uname`" in
+  CYGWIN*) cygwin=true ;;
+  Darwin*) darwin=true
+           if [ -z "$JAVA_VERSION" ] ; then
+             JAVA_VERSION="CurrentJDK"
+           else
+             echo "Using Java version: $JAVA_VERSION"
+           fi
+           if [ -z "$JAVA_HOME" ] ; then
+             JAVA_HOME=/System/Library/Frameworks/JavaVM.framework/Versions/${JAVA_VERSION}/Home
+           fi
+           ;;
+esac
+
+if [ -z "$JAVA_HOME" ] ; then
+  if [ -r /etc/gentoo-release ] ; then
+    JAVA_HOME=`java-config --jre-home`
+  fi
+fi
+
+# For Cygwin, ensure paths are in UNIX format before anything is touched
+if $cygwin ; then
+  [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
+  [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
+fi
+
+# If a specific java binary isn't specified search for the standard 'java' binary
+if [ -z "$JAVACMD" ] ; then
+  if [ -n "$JAVA_HOME"  ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+      # IBM's JDK on AIX uses strange locations for the executables
+      JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+      JAVACMD="$JAVA_HOME/bin/java"
+    fi
+  else
+    JAVACMD=`which java`
+  fi
+fi
+
+if [ ! -x "$JAVACMD" ] ; then
+  echo "Error: JAVA_HOME is not defined correctly." 1>&2
+  echo "  We cannot execute $JAVACMD" 1>&2
+  exit 1
+fi
+
+if [ -z "$REPO" ]
+then
+  REPO="$BASEDIR"/lib
+fi
+
+CLASSPATH=$CLASSPATH_PREFIX:"$BASEDIR"/etc:"$REPO"/hyracks-dataflow-std-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-api-0.2.3-SNAPSHOT.jar:"$REPO"/json-20090211.jar:"$REPO"/httpclient-4.1-alpha2.jar:"$REPO"/httpcore-4.1-beta1.jar:"$REPO"/commons-logging-1.1.1.jar:"$REPO"/commons-codec-1.4.jar:"$REPO"/args4j-2.0.12.jar:"$REPO"/commons-lang3-3.1.jar:"$REPO"/hyracks-dataflow-common-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-data-std-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-control-cc-0.2.3-SNAPSHOT.jar:"$REPO"/hyracks-control-common-0.2.3-SNAPSHOT.jar:"$REPO"/jetty-server-8.0.0.RC0.jar:"$REPO"/servlet-api-3.0.20100224.jar:"$REPO"/jetty-continuation-8.0.0.RC0.jar:"$REPO"/jetty-http-8.0.0.RC0.jar:"$REPO"/jetty-io-8.0.0.RC0.jar:"$REPO"/jetty-webapp-8.0.0.RC0.jar:"$REPO"/jetty-xml-8.0.0.RC0.jar:"$REPO"/jetty-util-8.0.0.RC0.jar:"$REPO"/jetty-servlet-8.0.0.RC0.jar:"$REPO"/jetty-security-8.0.0.RC0.jar:"$REPO"/wicket-core-1.5.2.jar:"$REPO"/wicket-util-1.5.2.jar:"$REPO"/wicket-request-1.5.2.jar:"$REPO"/slf4j-api-1.6.1.jar:"$REPO"/slf4j-jcl-1.6.3.jar:"$REPO"/hyracks-control-nc-0.2.3-SNAPSHOT.jar:"$REPO"/dcache-client-0.0.1.jar:"$REPO"/jetty-client-8.0.0.M0.jar:"$REPO"/hyracks-net-0.2.3-SNAPSHOT.jar:"$REPO"/commons-io-1.3.1.jar:"$REPO"/hyracks-ipc-0.2.3-SNAPSHOT.jar:"$REPO"/genomix-0.2.3-SNAPSHOT.pom
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin; then
+  [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
+  [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
+  [ -n "$HOME" ] && HOME=`cygpath --path --windows "$HOME"`
+  [ -n "$BASEDIR" ] && BASEDIR=`cygpath --path --windows "$BASEDIR"`
+  [ -n "$REPO" ] && REPO=`cygpath --path --windows "$REPO"`
+fi
+
+exec "$JAVACMD" $JAVA_OPTS  \
+  -classpath "$CLASSPATH" \
+  -Dapp.name="genomix" \
+  -Dapp.pid="$$" \
+  -Dapp.repo="$REPO" \
+  -Dapp.home="$BASEDIR" \
+  -Dbasedir="$BASEDIR" \
+  edu.uci.ics.genomix.driver.Driver \
+  "$@"
diff --git a/genomix/genomix-data/target/classes/scripts/genomix.bat b/genomix/genomix-data/target/classes/scripts/genomix.bat
new file mode 100644
index 0000000..1bd2098
--- /dev/null
+++ b/genomix/genomix-data/target/classes/scripts/genomix.bat
@@ -0,0 +1,108 @@
+@REM ----------------------------------------------------------------------------

+@REM  Copyright 2001-2006 The Apache Software Foundation.

+@REM

+@REM  Licensed under the Apache License, Version 2.0 (the "License");

+@REM  you may not use this file except in compliance with the License.

+@REM  You may obtain a copy of the License at

+@REM

+@REM       http://www.apache.org/licenses/LICENSE-2.0

+@REM

+@REM  Unless required by applicable law or agreed to in writing, software

+@REM  distributed under the License is distributed on an "AS IS" BASIS,

+@REM  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+@REM  See the License for the specific language governing permissions and

+@REM  limitations under the License.

+@REM ----------------------------------------------------------------------------

+@REM

+@REM   Copyright (c) 2001-2006 The Apache Software Foundation.  All rights

+@REM   reserved.

+

+@echo off

+

+set ERROR_CODE=0

+

+:init

+@REM Decide how to startup depending on the version of windows

+

+@REM -- Win98ME

+if NOT "%OS%"=="Windows_NT" goto Win9xArg

+

+@REM set local scope for the variables with windows NT shell

+if "%OS%"=="Windows_NT" @setlocal

+

+@REM -- 4NT shell

+if "%eval[2+2]" == "4" goto 4NTArgs

+

+@REM -- Regular WinNT shell

+set CMD_LINE_ARGS=%*

+goto WinNTGetScriptDir

+

+@REM The 4NT Shell from jp software

+:4NTArgs

+set CMD_LINE_ARGS=%$

+goto WinNTGetScriptDir

+

+:Win9xArg

+@REM Slurp the command line arguments.  This loop allows for an unlimited number

+@REM of arguments (up to the command line limit, anyway).

+set CMD_LINE_ARGS=

+:Win9xApp

+if %1a==a goto Win9xGetScriptDir

+set CMD_LINE_ARGS=%CMD_LINE_ARGS% %1

+shift

+goto Win9xApp

+

+:Win9xGetScriptDir

+set SAVEDIR=%CD%

+%0\

+cd %0\..\.. 

+set BASEDIR=%CD%

+cd %SAVEDIR%

+set SAVE_DIR=

+goto repoSetup

+

+:WinNTGetScriptDir

+set BASEDIR=%~dp0\..

+

+:repoSetup

+

+

+if "%JAVACMD%"=="" set JAVACMD=java

+

+if "%REPO%"=="" set REPO=%BASEDIR%\lib

+

+set CLASSPATH="%BASEDIR%"\etc;"%REPO%"\hyracks-dataflow-std-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-api-0.2.3-SNAPSHOT.jar;"%REPO%"\json-20090211.jar;"%REPO%"\httpclient-4.1-alpha2.jar;"%REPO%"\httpcore-4.1-beta1.jar;"%REPO%"\commons-logging-1.1.1.jar;"%REPO%"\commons-codec-1.4.jar;"%REPO%"\args4j-2.0.12.jar;"%REPO%"\commons-lang3-3.1.jar;"%REPO%"\hyracks-dataflow-common-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-data-std-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-control-cc-0.2.3-SNAPSHOT.jar;"%REPO%"\hyracks-control-common-0.2.3-SNAPSHOT.jar;"%REPO%"\jetty-server-8.0.0.RC0.jar;"%REPO%"\servlet-api-3.0.20100224.jar;"%REPO%"\jetty-continuation-8.0.0.RC0.jar;"%REPO%"\jetty-http-8.0.0.RC0.jar;"%REPO%"\jetty-io-8.0.0.RC0.jar;"%REPO%"\jetty-webapp-8.0.0.RC0.jar;"%REPO%"\jetty-xml-8.0.0.RC0.jar;"%REPO%"\jetty-util-8.0.0.RC0.jar;"%REPO%"\jetty-servlet-8.0.0.RC0.jar;"%REPO%"\jetty-security-8.0.0.RC0.jar;"%REPO%"\wicket-core-1.5.2.jar;"%REPO%"\wicket-util-1.5.2.jar;"%REPO%"\wicket-request-1.5.2.jar;"%REPO%"\slf4j-api-1.6.1.jar;"%REPO%"\slf4j-jcl-1.6.3.jar;"%REPO%"\hyracks-control-nc-0.2.3-SNAPSHOT.jar;"%REPO%"\dcache-client-0.0.1.jar;"%REPO%"\jetty-client-8.0.0.M0.jar;"%REPO%"\hyracks-net-0.2.3-SNAPSHOT.jar;"%REPO%"\commons-io-1.3.1.jar;"%REPO%"\hyracks-ipc-0.2.3-SNAPSHOT.jar;"%REPO%"\genomix-0.2.3-SNAPSHOT.pom

+goto endInit

+

+@REM Reaching here means variables are defined and arguments have been captured

+:endInit

+

+%JAVACMD% %JAVA_OPTS%  -classpath %CLASSPATH_PREFIX%;%CLASSPATH% -Dapp.name="genomix" -Dapp.repo="%REPO%" -Dapp.home="%BASEDIR%" -Dbasedir="%BASEDIR%" edu.uci.ics.genomix.driver.Driver %CMD_LINE_ARGS%

+if ERRORLEVEL 1 goto error

+goto end

+

+:error

+if "%OS%"=="Windows_NT" @endlocal

+set ERROR_CODE=%ERRORLEVEL%

+

+:end

+@REM set local scope for the variables with windows NT shell

+if "%OS%"=="Windows_NT" goto endNT

+

+@REM For old DOS remove the set variables from ENV - we assume they were not set

+@REM before we started - at least we don't leave any baggage around

+set CMD_LINE_ARGS=

+goto postExec

+

+:endNT

+@REM If error code is set to 1 then the endlocal was done already in :error.

+if %ERROR_CODE% EQU 0 @endlocal

+

+

+:postExec

+

+if "%FORCE_EXIT_ON_ERROR%" == "on" (

+  if %ERROR_CODE% NEQ 0 exit %ERROR_CODE%

+)

+

+exit /B %ERROR_CODE%

diff --git a/genomix/genomix-data/target/classes/scripts/getip.sh b/genomix/genomix-data/target/classes/scripts/getip.sh
new file mode 100644
index 0000000..e0cdf73
--- /dev/null
+++ b/genomix/genomix-data/target/classes/scripts/getip.sh
@@ -0,0 +1,21 @@
+#get the OS
+OS_NAME=`uname -a|awk '{print $1}'`
+LINUX_OS='Linux'
+
+if [ $OS_NAME = $LINUX_OS ];
+then
+        #Get IP Address
+        IPADDR=`/sbin/ifconfig eth0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+	if [ "$IPADDR" = "" ]
+        then
+		IPADDR=`/sbin/ifconfig lo | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+        fi 
+else
+        IPADDR=`/sbin/ifconfig en1 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+	if [ "$IPADDR" = "" ]
+        then
+                IPADDR=`/sbin/ifconfig lo0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+        fi
+
+fi
+echo $IPADDR
diff --git a/genomix/genomix-data/target/classes/scripts/startAllNCs.sh b/genomix/genomix-data/target/classes/scripts/startAllNCs.sh
new file mode 100644
index 0000000..5e38c40
--- /dev/null
+++ b/genomix/genomix-data/target/classes/scripts/startAllNCs.sh
@@ -0,0 +1,6 @@
+GENOMIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+   ssh $i "cd ${GENOMIX_PATH}; bin/startnc.sh"
+done
diff --git a/genomix/genomix-data/target/classes/scripts/startCluster.sh b/genomix/genomix-data/target/classes/scripts/startCluster.sh
new file mode 100644
index 0000000..4727764
--- /dev/null
+++ b/genomix/genomix-data/target/classes/scripts/startCluster.sh
@@ -0,0 +1,19 @@
+bin/startcc.sh
+sleep 5
+bin/startAllNCs.sh
+
+. conf/cluster.properties
+# do we need to specify the version somewhere?
+hyrackcmd=`ls ${HYRACKS_HOME}/hyracks-cli/target/hyracks-cli-*-binary-assembly/bin/hyrackscli`
+# find zip file
+appzip=`ls $PWD/../genomix-*-binary-assembly.zip`
+
+[ -f $hyrackcmd ] || { echo "Hyracks commandline is missing"; exit -1;}
+[ -f $appzip ] || { echo "Genomix binary-assembly.zip is missing"; exit -1;}
+
+CCHOST_NAME=`cat conf/master`
+
+IPADDR=`bin/getip.sh`
+echo "connect to \"${IPADDR}:${CC_CLIENTPORT}\"; create application genomix \"$appzip\";" | $hyrackcmd 
+echo ""
+
diff --git a/genomix/genomix-data/target/classes/scripts/startDebugNc.sh b/genomix/genomix-data/target/classes/scripts/startDebugNc.sh
new file mode 100644
index 0000000..fe6cf27
--- /dev/null
+++ b/genomix/genomix-data/target/classes/scripts/startDebugNc.sh
@@ -0,0 +1,50 @@
+hostname
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+. conf/debugnc.properties
+
+#Clean up temp dir
+
+rm -rf $NCTMP_DIR2
+mkdir $NCTMP_DIR2
+
+#Clean up log dir
+rm -rf $NCLOGS_DIR2
+mkdir $NCLOGS_DIR2
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS2 | tr "," "\n")
+for io_dir in $io_dirs
+do
+	rm -rf $io_dir
+	mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+#Get OS
+IPADDR=`bin/getip.sh`
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+NODEID=${NODEID}2
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS2
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR2
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR  -data-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS2}" &> $NCLOGS_DIR2/$NODEID.log &
diff --git a/genomix/genomix-data/target/classes/scripts/startcc.sh b/genomix/genomix-data/target/classes/scripts/startcc.sh
new file mode 100644
index 0000000..fe2551d
--- /dev/null
+++ b/genomix/genomix-data/target/classes/scripts/startcc.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+hostname
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CCHOST=`bin/getip.sh`
+
+#Remove the temp dir
+rm -rf $CCTMP_DIR
+mkdir $CCTMP_DIR
+
+#Remove the logs dir
+rm -rf $CCLOGS_DIR
+mkdir $CCLOGS_DIR
+
+#Export JAVA_HOME and JAVA_OPTS
+export JAVA_HOME=$JAVA_HOME
+export JAVA_OPTS=$CCJAVA_OPTS
+
+#Launch hyracks cc script
+chmod -R 755 $HYRACKS_HOME
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyrackscc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 3 &> $CCLOGS_DIR/cc.log &
diff --git a/genomix/genomix-data/target/classes/scripts/startnc.sh b/genomix/genomix-data/target/classes/scripts/startnc.sh
new file mode 100644
index 0000000..6e0f90e
--- /dev/null
+++ b/genomix/genomix-data/target/classes/scripts/startnc.sh
@@ -0,0 +1,49 @@
+hostname
+
+MY_NAME=`hostname`
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Clean up temp dir
+
+rm -rf $NCTMP_DIR
+mkdir $NCTMP_DIR
+
+#Clean up log dir
+rm -rf $NCLOGS_DIR
+mkdir $NCLOGS_DIR
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+	rm -rf $io_dir
+	mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+IPADDR=`bin/getip.sh`
+#echo $IPADDR
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR  -data-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS}" &> $NCLOGS_DIR/$NODEID.log &
diff --git a/genomix/genomix-data/target/classes/scripts/stopAllNCs.sh b/genomix/genomix-data/target/classes/scripts/stopAllNCs.sh
new file mode 100644
index 0000000..66ed866
--- /dev/null
+++ b/genomix/genomix-data/target/classes/scripts/stopAllNCs.sh
@@ -0,0 +1,6 @@
+GENOMIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+   ssh $i "cd ${GENOMIX_PATH}; bin/stopnc.sh"
+done
diff --git a/genomix/genomix-data/target/classes/scripts/stopCluster.sh b/genomix/genomix-data/target/classes/scripts/stopCluster.sh
new file mode 100644
index 0000000..4889934
--- /dev/null
+++ b/genomix/genomix-data/target/classes/scripts/stopCluster.sh
@@ -0,0 +1,3 @@
+bin/stopAllNCs.sh
+sleep 2
+bin/stopcc.sh
diff --git a/genomix/genomix-data/target/classes/scripts/stopcc.sh b/genomix/genomix-data/target/classes/scripts/stopcc.sh
new file mode 100644
index 0000000..1865054
--- /dev/null
+++ b/genomix/genomix-data/target/classes/scripts/stopcc.sh
@@ -0,0 +1,10 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep hyracks|awk '{print $2}'`
+echo $PID
+[ "$PID" != "" ] && kill -9 $PID
+
+#Clean up CC temp dir
+rm -rf $CCTMP_DIR/*
diff --git a/genomix/genomix-data/target/classes/scripts/stopnc.sh b/genomix/genomix-data/target/classes/scripts/stopnc.sh
new file mode 100644
index 0000000..3928bb7
--- /dev/null
+++ b/genomix/genomix-data/target/classes/scripts/stopnc.sh
@@ -0,0 +1,23 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+
+if [ "$PID" == "" ]; then
+  USERID=`id | sed 's/^uid=//;s/(.*$//'`
+  PID=`ps -ef|grep ${USERID}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+fi
+
+echo $PID
+[ "$PID" != "" ] && kill -9 $PID
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+	rm -rf $io_dir/*
+done
+
+#Clean up NC temp dir
+rm -rf $NCTMP_DIR/*
diff --git a/genomix/genomix-data/target/test-classes/data/0/text.txt b/genomix/genomix-data/target/test-classes/data/0/text.txt
new file mode 100644
index 0000000..f63a141
--- /dev/null
+++ b/genomix/genomix-data/target/test-classes/data/0/text.txt
@@ -0,0 +1,4 @@
+@625E1AAXX100810:1:100:10000:10271/1

+AATAGAAG

++

+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?

diff --git a/genomix/genomix-data/target/test-classes/data/webmap/text.txt b/genomix/genomix-data/target/test-classes/data/webmap/text.txt
new file mode 100644
index 0000000..f63a141
--- /dev/null
+++ b/genomix/genomix-data/target/test-classes/data/webmap/text.txt
@@ -0,0 +1,4 @@
+@625E1AAXX100810:1:100:10000:10271/1

+AATAGAAG

++

+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?

diff --git a/genomix/genomix-data/target/test-classes/edu/uci/ics/genomix/example/kmer/KmerTest.class b/genomix/genomix-data/target/test-classes/edu/uci/ics/genomix/example/kmer/KmerTest.class
new file mode 100644
index 0000000..69fd8d3
--- /dev/null
+++ b/genomix/genomix-data/target/test-classes/edu/uci/ics/genomix/example/kmer/KmerTest.class
Binary files differ
diff --git a/genomix/genomix-data/target/test-classes/edu/uci/ics/genomix/example/kmer/KmerUtilTest.class b/genomix/genomix-data/target/test-classes/edu/uci/ics/genomix/example/kmer/KmerUtilTest.class
new file mode 100644
index 0000000..122107c
--- /dev/null
+++ b/genomix/genomix-data/target/test-classes/edu/uci/ics/genomix/example/kmer/KmerUtilTest.class
Binary files differ
diff --git a/genomix/genomix-data/target/test-classes/expected/result2 b/genomix/genomix-data/target/test-classes/expected/result2
new file mode 100644
index 0000000..5e76458
--- /dev/null
+++ b/genomix/genomix-data/target/test-classes/expected/result2
@@ -0,0 +1,4 @@
+AATAG	|A	1
+AGAAG	T|	1
+ATAGA	A|A	1
+TAGAA	A|G	1
diff --git a/genomix/genomix-data/target/test-classes/hadoop/conf/core-site.xml b/genomix/genomix-data/target/test-classes/hadoop/conf/core-site.xml
new file mode 100644
index 0000000..47dfac5
--- /dev/null
+++ b/genomix/genomix-data/target/test-classes/hadoop/conf/core-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+<property>
+    <name>fs.default.name</name>
+    <value>hdfs://127.0.0.1:31888</value>
+</property>
+<property>
+    <name>hadoop.tmp.dir</name>
+    <value>/tmp/hadoop</value>
+</property>
+
+
+</configuration>
diff --git a/genomix/genomix-data/target/test-classes/hadoop/conf/hdfs-site.xml b/genomix/genomix-data/target/test-classes/hadoop/conf/hdfs-site.xml
new file mode 100644
index 0000000..8d29b1d
--- /dev/null
+++ b/genomix/genomix-data/target/test-classes/hadoop/conf/hdfs-site.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+<property>
+   <name>dfs.replication</name>
+   <value>1</value>
+</property>
+
+<property>
+	<name>dfs.block.size</name>
+	<value>65536</value>
+</property>
+
+</configuration>
diff --git a/genomix/genomix-data/target/test-classes/hadoop/conf/log4j.properties b/genomix/genomix-data/target/test-classes/hadoop/conf/log4j.properties
new file mode 100644
index 0000000..d5e6004
--- /dev/null
+++ b/genomix/genomix-data/target/test-classes/hadoop/conf/log4j.properties
@@ -0,0 +1,94 @@
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=FATAL,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshhold=FATAL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this 
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.metrics.jvm.EventCounter
diff --git a/genomix/genomix-data/target/test-classes/hadoop/conf/mapred-site.xml b/genomix/genomix-data/target/test-classes/hadoop/conf/mapred-site.xml
new file mode 100644
index 0000000..39b6505
--- /dev/null
+++ b/genomix/genomix-data/target/test-classes/hadoop/conf/mapred-site.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+  <property>
+    <name>mapred.job.tracker</name>
+    <value>localhost:29007</value>
+  </property>
+  <property>
+     <name>mapred.tasktracker.map.tasks.maximum</name>
+     <value>20</value>
+  </property>
+   <property>
+      <name>mapred.tasktracker.reduce.tasks.maximum</name>
+      <value>20</value>
+   </property>
+   <property>
+      <name>mapred.max.split.size</name>
+      <value>2048</value>
+   </property>
+
+</configuration>