hadoop pathmerge h1 algorithm git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_genomix@3378 123451ca-8445-de46-9d55-352943316053

commit: 8a8987636c9c1f2425d622564cf92305c3cb87f1 [log] [tgz]
author: zhangnan2920214@gmail.com <zhangnan2920214@gmail.com@123451ca-8445-de46-9d55-352943316053> Thu Apr 11 07:16:32 2013 +0000
committer: zhangnan2920214@gmail.com <zhangnan2920214@gmail.com@123451ca-8445-de46-9d55-352943316053> Thu Apr 11 07:16:32 2013 +0000
tree: ee5780c44c7d56cf22e943ec4709a63f9bd0db7d
parent: c4102497f95e950fa2bf93bbd9dafea07034cc19 [diff]
diff --git a/genomix/genomix-hadoop/actual1/conf.xml b/genomix/genomix-hadoop/actual1/conf.xml
new file mode 100644
index 0000000..506913d
--- /dev/null
+++ b/genomix/genomix-hadoop/actual1/conf.xml

@@ -0,0 +1,179 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value></property>
+<property><name>dfs.https.need.client.auth</name><value>false</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>dfs.namenode.logging.level</name><value>info</value></property>
+<property><name>dfs.datanode.address</name><value>127.0.0.1:0</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>fs.default.name</name><value>hdfs://localhost:61115</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>dfs.namenode.handler.count</name><value>10</value></property>
+<property><name>dfs.blockreport.initialDelay</name><value>0</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>dfs.safemode.extension</name><value>0</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.StaticMapping</value></property>
+<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>dfs.name.edits.dir</name><value>${dfs.name.dir}</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>hadoop.job.ugi</name><value>hadoop,staff,everyone,localaccounts</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>dfs.block.size</name><value>67108864</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>dfs.datanode.ipc.address</name><value>127.0.0.1:0</value></property>
+<property><name>dfs.permissions</name><value>true</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
+<property><name>dfs.replication.max</name><value>512</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>dfs.https.client.keystore.resource</name><value>ssl-client.xml</value></property>
+<property><name>dfs.namenode.startup</name><value>REGULAR</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>dfs.https.address</name><value>0.0.0.0:50470</value></property>
+<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>fs.checkpoint.dir</name><value>build/test/data/dfs/namesecondary1,build/test/data/dfs/namesecondary2</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>dfs.max.objects</name><value>0</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>dfs.client.block.write.retries</name><value>3</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>dfs.https.enable</name><value>false</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>dfs.df.interval</name><value>60000</value></property>
+<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
+<property><name>dfs.support.append</name><value>false</value></property>
+<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>dfs.replication.min</name><value>1</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>dfs.namenode.decommission.interval</name><value>3</value></property>
+<property><name>dfs.http.address</name><value>localhost:61116</value></property>
+<property><name>dfs.heartbeat.interval</name><value>3</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>dfs.name.dir</name><value>build/test/data/dfs/name1,build/test/data/dfs/name2</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>dfs.datanode.http.address</name><value>127.0.0.1:0</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>dfs.replication.interval</name><value>3</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>dfs.replication</name><value>2</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>dfs.access.time.precision</name><value>3600000</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>dfs.datanode.handler.count</name><value>3</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>dfs.replication.considerLoad</name><value>true</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+</configuration>
\ No newline at end of file

diff --git a/genomix/genomix-hadoop/actual1/result1/.part-00000.crc b/genomix/genomix-hadoop/actual1/result1/.part-00000.crc
new file mode 100644
index 0000000..3422e04
--- /dev/null
+++ b/genomix/genomix-hadoop/actual1/result1/.part-00000.crc
Binary files differ

diff --git a/genomix/genomix-hadoop/actual1/result1/part-00000 b/genomix/genomix-hadoop/actual1/result1/part-00000
new file mode 100755
index 0000000..c21f5f6
--- /dev/null
+++ b/genomix/genomix-hadoop/actual1/result1/part-00000
Binary files differ

diff --git a/genomix/genomix-hadoop/actual2/conf.xml b/genomix/genomix-hadoop/actual2/conf.xml
new file mode 100644
index 0000000..ff11b9e
--- /dev/null
+++ b/genomix/genomix-hadoop/actual2/conf.xml

@@ -0,0 +1,179 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value></property>
+<property><name>dfs.https.need.client.auth</name><value>false</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>dfs.namenode.logging.level</name><value>info</value></property>
+<property><name>dfs.datanode.address</name><value>127.0.0.1:0</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>fs.default.name</name><value>hdfs://localhost:61195</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>dfs.namenode.handler.count</name><value>10</value></property>
+<property><name>dfs.blockreport.initialDelay</name><value>0</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>dfs.safemode.extension</name><value>0</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.StaticMapping</value></property>
+<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>dfs.name.edits.dir</name><value>${dfs.name.dir}</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>hadoop.job.ugi</name><value>hadoop,staff,everyone,localaccounts</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>dfs.block.size</name><value>67108864</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>dfs.datanode.ipc.address</name><value>127.0.0.1:0</value></property>
+<property><name>dfs.permissions</name><value>true</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
+<property><name>dfs.replication.max</name><value>512</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>dfs.https.client.keystore.resource</name><value>ssl-client.xml</value></property>
+<property><name>dfs.namenode.startup</name><value>REGULAR</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>dfs.https.address</name><value>0.0.0.0:50470</value></property>
+<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>fs.checkpoint.dir</name><value>build/test/data/dfs/namesecondary1,build/test/data/dfs/namesecondary2</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>dfs.max.objects</name><value>0</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>dfs.client.block.write.retries</name><value>3</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>dfs.https.enable</name><value>false</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>dfs.df.interval</name><value>60000</value></property>
+<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
+<property><name>dfs.support.append</name><value>false</value></property>
+<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>dfs.replication.min</name><value>1</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>dfs.namenode.decommission.interval</name><value>3</value></property>
+<property><name>dfs.http.address</name><value>localhost:61196</value></property>
+<property><name>dfs.heartbeat.interval</name><value>3</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>dfs.name.dir</name><value>build/test/data/dfs/name1,build/test/data/dfs/name2</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>dfs.datanode.http.address</name><value>127.0.0.1:0</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>dfs.replication.interval</name><value>3</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>dfs.replication</name><value>2</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>dfs.access.time.precision</name><value>3600000</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>dfs.datanode.handler.count</name><value>3</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>dfs.replication.considerLoad</name><value>true</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+</configuration>
\ No newline at end of file

diff --git a/genomix/genomix-hadoop/actual2/result2/.part-00000.crc b/genomix/genomix-hadoop/actual2/result2/.part-00000.crc
new file mode 100644
index 0000000..3f8c2c5
--- /dev/null
+++ b/genomix/genomix-hadoop/actual2/result2/.part-00000.crc
Binary files differ

diff --git a/genomix/genomix-hadoop/actual2/result2/part-00000 b/genomix/genomix-hadoop/actual2/result2/part-00000
new file mode 100755
index 0000000..ea3e875
--- /dev/null
+++ b/genomix/genomix-hadoop/actual2/result2/part-00000
Binary files differ

diff --git a/genomix/genomix-hadoop/actual3/complete2/.complete2-r-00000.crc b/genomix/genomix-hadoop/actual3/complete2/.complete2-r-00000.crc
new file mode 100644
index 0000000..b0b2753
--- /dev/null
+++ b/genomix/genomix-hadoop/actual3/complete2/.complete2-r-00000.crc
Binary files differ

diff --git a/genomix/genomix-hadoop/actual3/complete2/complete2-r-00000 b/genomix/genomix-hadoop/actual3/complete2/complete2-r-00000
new file mode 100755
index 0000000..d3d3667
--- /dev/null
+++ b/genomix/genomix-hadoop/actual3/complete2/complete2-r-00000
Binary files differ

diff --git a/genomix/genomix-hadoop/actual3/conf.xml b/genomix/genomix-hadoop/actual3/conf.xml
new file mode 100644
index 0000000..16a0edc
--- /dev/null
+++ b/genomix/genomix-hadoop/actual3/conf.xml

@@ -0,0 +1,179 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value></property>
+<property><name>dfs.https.need.client.auth</name><value>false</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>dfs.namenode.logging.level</name><value>info</value></property>
+<property><name>dfs.datanode.address</name><value>127.0.0.1:0</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>fs.default.name</name><value>hdfs://localhost:62106</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>dfs.namenode.handler.count</name><value>10</value></property>
+<property><name>dfs.blockreport.initialDelay</name><value>0</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>dfs.safemode.extension</name><value>0</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.StaticMapping</value></property>
+<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>dfs.name.edits.dir</name><value>${dfs.name.dir}</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>hadoop.job.ugi</name><value>hadoop,staff,everyone,localaccounts</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>dfs.block.size</name><value>67108864</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>dfs.datanode.ipc.address</name><value>127.0.0.1:0</value></property>
+<property><name>dfs.permissions</name><value>true</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
+<property><name>dfs.replication.max</name><value>512</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>dfs.https.client.keystore.resource</name><value>ssl-client.xml</value></property>
+<property><name>dfs.namenode.startup</name><value>REGULAR</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>dfs.https.address</name><value>0.0.0.0:50470</value></property>
+<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>fs.checkpoint.dir</name><value>build/test/data/dfs/namesecondary1,build/test/data/dfs/namesecondary2</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>dfs.max.objects</name><value>0</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>dfs.client.block.write.retries</name><value>3</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>dfs.https.enable</name><value>false</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>dfs.df.interval</name><value>60000</value></property>
+<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
+<property><name>dfs.support.append</name><value>false</value></property>
+<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>dfs.replication.min</name><value>1</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>dfs.namenode.decommission.interval</name><value>3</value></property>
+<property><name>dfs.http.address</name><value>localhost:62107</value></property>
+<property><name>dfs.heartbeat.interval</name><value>3</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>dfs.name.dir</name><value>build/test/data/dfs/name1,build/test/data/dfs/name2</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>dfs.datanode.http.address</name><value>127.0.0.1:0</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>dfs.replication.interval</name><value>3</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>dfs.replication</name><value>2</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>dfs.access.time.precision</name><value>3600000</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>dfs.datanode.handler.count</name><value>3</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>dfs.replication.considerLoad</name><value>true</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+</configuration>
\ No newline at end of file

diff --git a/genomix/genomix-hadoop/actual5/conf.xml b/genomix/genomix-hadoop/actual5/conf.xml
new file mode 100644
index 0000000..d19b061
--- /dev/null
+++ b/genomix/genomix-hadoop/actual5/conf.xml

@@ -0,0 +1,179 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value></property>
+<property><name>dfs.https.need.client.auth</name><value>false</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>dfs.namenode.logging.level</name><value>info</value></property>
+<property><name>dfs.datanode.address</name><value>127.0.0.1:0</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>fs.default.name</name><value>hdfs://localhost:58289</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>dfs.namenode.handler.count</name><value>10</value></property>
+<property><name>dfs.blockreport.initialDelay</name><value>0</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>dfs.safemode.extension</name><value>0</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.StaticMapping</value></property>
+<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>dfs.name.edits.dir</name><value>${dfs.name.dir}</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>hadoop.job.ugi</name><value>hadoop,staff,everyone,localaccounts</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>dfs.block.size</name><value>67108864</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>dfs.datanode.ipc.address</name><value>127.0.0.1:0</value></property>
+<property><name>dfs.permissions</name><value>true</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
+<property><name>dfs.replication.max</name><value>512</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>dfs.https.client.keystore.resource</name><value>ssl-client.xml</value></property>
+<property><name>dfs.namenode.startup</name><value>REGULAR</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>dfs.https.address</name><value>0.0.0.0:50470</value></property>
+<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>fs.checkpoint.dir</name><value>build/test/data/dfs/namesecondary1,build/test/data/dfs/namesecondary2</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>dfs.max.objects</name><value>0</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>dfs.client.block.write.retries</name><value>3</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>dfs.https.enable</name><value>false</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>dfs.df.interval</name><value>60000</value></property>
+<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
+<property><name>dfs.support.append</name><value>false</value></property>
+<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>dfs.replication.min</name><value>1</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>dfs.namenode.decommission.interval</name><value>3</value></property>
+<property><name>dfs.http.address</name><value>localhost:58290</value></property>
+<property><name>dfs.heartbeat.interval</name><value>3</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>dfs.name.dir</name><value>build/test/data/dfs/name1,build/test/data/dfs/name2</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>dfs.datanode.http.address</name><value>127.0.0.1:0</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>dfs.replication.interval</name><value>3</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>dfs.replication</name><value>2</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>dfs.access.time.precision</name><value>3600000</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>dfs.datanode.handler.count</name><value>3</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>dfs.replication.considerLoad</name><value>true</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+</configuration>
\ No newline at end of file

diff --git a/genomix/genomix-hadoop/actual5/result5/.part-00000.crc b/genomix/genomix-hadoop/actual5/result5/.part-00000.crc
new file mode 100644
index 0000000..dafaae3
--- /dev/null
+++ b/genomix/genomix-hadoop/actual5/result5/.part-00000.crc
Binary files differ

diff --git a/genomix/genomix-hadoop/actual5/result5/part-00000 b/genomix/genomix-hadoop/actual5/result5/part-00000
new file mode 100755
index 0000000..deeff28
--- /dev/null
+++ b/genomix/genomix-hadoop/actual5/result5/part-00000
Binary files differ

diff --git a/genomix/genomix-hadoop/compare/result1/comparesource.txt b/genomix/genomix-hadoop/compare/result1/comparesource.txt
new file mode 100644
index 0000000..ba52008
--- /dev/null
+++ b/genomix/genomix-hadoop/compare/result1/comparesource.txt

@@ -0,0 +1,8 @@
+GCA	ACT|T	3
+AGC	|A	1
+CGC	T|AT	2
+TGC	|A	1
+ATC	C|G	2
+TCG	A|C	2
+CAT	G|C	2
+GCT	C|	1

diff --git a/genomix/genomix-hadoop/compare/result2/comparesource.txt b/genomix/genomix-hadoop/compare/result2/comparesource.txt
new file mode 100644
index 0000000..db55a38
--- /dev/null
+++ b/genomix/genomix-hadoop/compare/result2/comparesource.txt

@@ -0,0 +1,8 @@
+GCA	-72
+AGC	1
+CGC	-119
+TGC	1
+ATC	36
+TCG	18
+CAT	66
+GCT	32

diff --git a/genomix/genomix-hadoop/compare/result3/comparesource.txt b/genomix/genomix-hadoop/compare/result3/comparesource.txt
new file mode 100644
index 0000000..5f9dd78
--- /dev/null
+++ b/genomix/genomix-hadoop/compare/result3/comparesource.txt

@@ -0,0 +1 @@
+02 71	66	1

diff --git a/genomix/genomix-hadoop/compare/result5/comparesource.txt b/genomix/genomix-hadoop/compare/result5/comparesource.txt
new file mode 100644
index 0000000..6f4bd5e
--- /dev/null
+++ b/genomix/genomix-hadoop/compare/result5/comparesource.txt

@@ -0,0 +1,3 @@
+GCA	AT|T	2
+AGC	|A	1
+TGC	|A	1

diff --git a/genomix/genomix-hadoop/data/webmap/Test.txt b/genomix/genomix-hadoop/data/webmap/Test.txt
new file mode 100755
index 0000000..6d02b25
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/Test.txt

@@ -0,0 +1,10 @@
+@625E1AAXX100810:1:100:10000:10271/1

+AGCATCGCA

++

+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?
+@625E1AAXX100810:1:100:10000:10271/1

+TGCATCGCT

++

+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?
+
+


diff --git a/genomix/genomix-hadoop/expected/result1 b/genomix/genomix-hadoop/expected/result1
new file mode 100644
index 0000000..ba52008
--- /dev/null
+++ b/genomix/genomix-hadoop/expected/result1

@@ -0,0 +1,8 @@
+GCA	ACT|T	3
+AGC	|A	1
+CGC	T|AT	2
+TGC	|A	1
+ATC	C|G	2
+TCG	A|C	2
+CAT	G|C	2
+GCT	C|	1

diff --git a/genomix/genomix-hadoop/expected/result3 b/genomix/genomix-hadoop/expected/result3
new file mode 100644
index 0000000..5f9dd78
--- /dev/null
+++ b/genomix/genomix-hadoop/expected/result3

@@ -0,0 +1 @@
+02 71	66	1

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingDriver.java
new file mode 100644
index 0000000..132f6e0
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingDriver.java

@@ -0,0 +1,96 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.gbresultschecking;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+@SuppressWarnings("deprecation")
+public class ResultsCheckingDriver {
+    private static class Options {
+        @Option(name = "-inputpath1", usage = "the input path", required = true)
+        public String inputPath1;
+
+        @Option(name = "-inputpath2", usage = "the input path", required = true)
+        public String inputPath2;
+
+        @Option(name = "-outputpath", usage = "the output path", required = true)
+        public String outputPath;
+
+        @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+        public int numReducers;
+
+        @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+        public int sizeKmer;
+
+    }
+
+    public void run(String inputPath1, String inputPath2, String outputPath, int numReducers, int sizeKmer,
+            String defaultConfPath) throws IOException {
+
+        JobConf conf = new JobConf(ResultsCheckingDriver.class);
+
+        conf.setInt("sizeKmer", sizeKmer);
+
+        if (defaultConfPath != null) {
+            conf.addResource(new Path(defaultConfPath));
+        }
+
+        conf.setJobName("Results Checking");
+        conf.setMapperClass(ResultsCheckingMapper.class);
+        conf.setReducerClass(ResultsCheckingReducer.class);
+
+        conf.setMapOutputKeyClass(Text.class);
+        conf.setMapOutputValueClass(Text.class);
+
+        conf.setInputFormat(SequenceFileInputFormat.class);
+        conf.setOutputFormat(TextOutputFormat.class);
+
+        conf.setOutputKeyClass(Text.class);
+        conf.setOutputValueClass(Text.class);
+
+        Path[] inputList = new Path[2];
+        inputList[0] = new Path(inputPath1);
+        inputList[1] = new Path(inputPath2);
+
+        FileInputFormat.setInputPaths(conf, inputList);
+        FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+        conf.setNumReduceTasks(numReducers);
+
+        FileSystem dfs = FileSystem.get(conf);
+        dfs.delete(new Path(outputPath), true);
+        JobClient.runJob(conf);
+    }
+
+    public static void main(String[] args) throws Exception {
+        Options options = new Options();
+        CmdLineParser parser = new CmdLineParser(options);
+        parser.parseArgument(args);
+        ResultsCheckingDriver driver = new ResultsCheckingDriver();
+        driver.run(options.inputPath1, options.inputPath2, options.outputPath, options.numReducers, options.sizeKmer,
+                null);
+    }
+
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingMapper.java
new file mode 100644
index 0000000..fe56e1a
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingMapper.java

@@ -0,0 +1,58 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.gbresultschecking;
+
+import java.io.IOException;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+@SuppressWarnings({ "unused", "deprecation" })
+public class ResultsCheckingMapper extends MapReduceBase implements Mapper<BytesWritable, KmerCountValue, Text, Text> {
+    BytesWritable valWriter = new BytesWritable();
+    private final static IntWritable one = new IntWritable(1);
+    public static Text textkey = new Text();
+    public static Text textvalue = new Text();
+    public static String INPUT_PATH;
+    public static int KMER_SIZE;
+
+    public void configure(JobConf job) {
+        KMER_SIZE = job.getInt("sizeKmer", 0);
+    }
+
+    @Override
+    public void map(BytesWritable key, KmerCountValue value, OutputCollector<Text, Text> output, Reporter reporter)
+            throws IOException {
+
+        FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
+        String filename = fileSplit.getPath().getName();
+        byte[] bkey = key.getBytes();
+        textkey.set(Kmer.recoverKmerFrom(KMER_SIZE, key.getBytes(), 0, key.getLength()) + "\t" + value.toString());
+        textvalue.set(filename);
+        output.collect(textkey, textvalue);
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingReducer.java
new file mode 100644
index 0000000..6f02136
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/ResultsCheckingReducer.java

@@ -0,0 +1,40 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.gbresultschecking;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+@SuppressWarnings("deprecation")
+public class ResultsCheckingReducer extends MapReduceBase implements Reducer<Text, Text, Text, Text> {
+
+    public static Text textkey = new Text();
+    public static Text textvalue = new Text();
+
+    @Override
+    public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
+            throws IOException {
+        textkey.set(key);
+        textvalue.set(values.next());
+        if (values.hasNext() == false) {
+            output.collect(textkey, textvalue);
+        }
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterDriver.java
new file mode 100644
index 0000000..b3a6102
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterDriver.java

@@ -0,0 +1,88 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.graphcountfilter;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+@SuppressWarnings("deprecation")
+public class CountFilterDriver {
+    private static class Options {
+        @Option(name = "-inputpath", usage = "the input path", required = true)
+        public String inputPath;
+
+        @Option(name = "-outputpath", usage = "the output path", required = true)
+        public String outputPath;
+
+        @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+        public int numReducers;
+
+        @Option(name = "-count-threshold", usage = "the threshold of count", required = true)
+        public int countThreshold;
+    }
+   
+    public void run(String inputPath, String outputPath, int numReducers, int countThreshold, String defaultConfPath)
+            throws IOException {
+
+        JobConf conf = new JobConf(CountFilterDriver.class);
+        conf.setInt("countThreshold", countThreshold);
+
+        if (defaultConfPath != null) {
+            conf.addResource(new Path(defaultConfPath));
+        }
+
+        conf.setJobName("Count Filter");
+        conf.setMapperClass(CountFilterMapper.class);
+        conf.setReducerClass(CountFilterReducer.class);
+        conf.setCombinerClass(CountFilterReducer.class);
+
+        conf.setMapOutputKeyClass(BytesWritable.class);
+        conf.setMapOutputValueClass(ByteWritable.class);
+
+        conf.setInputFormat(SequenceFileInputFormat.class);
+        conf.setOutputFormat(SequenceFileOutputFormat.class);
+        
+        conf.setOutputKeyClass(BytesWritable.class);
+        conf.setOutputValueClass(ByteWritable.class);
+        
+        FileInputFormat.setInputPaths(conf, new Path(inputPath));
+        FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+        conf.setNumReduceTasks(numReducers);
+
+        FileSystem dfs = FileSystem.get(conf);
+        dfs.delete(new Path(outputPath), true);
+        JobClient.runJob(conf);
+    }
+
+    public static void main(String[] args) throws Exception {
+        Options options = new Options();
+        CmdLineParser parser = new CmdLineParser(options);
+        parser.parseArgument(args);
+        CountFilterDriver driver = new CountFilterDriver();
+        driver.run(options.inputPath, options.outputPath, options.numReducers, options.countThreshold, null);
+    }
+
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterMapper.java
new file mode 100644
index 0000000..80557e9
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterMapper.java

@@ -0,0 +1,47 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.graphcountfilter;
+
+import java.io.IOException;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+
+@SuppressWarnings({ "unused", "deprecation" })
+public class CountFilterMapper extends MapReduceBase implements
+        Mapper<BytesWritable, KmerCountValue, BytesWritable, ByteWritable> {
+    public static int THRESHOLD;
+    public BytesWritable outputKmer = new BytesWritable();
+    public ByteWritable outputAdjList = new ByteWritable();
+    @Override
+    public void configure(JobConf job) {
+        THRESHOLD = Integer.parseInt(job.get("countThreshold"));
+    }
+    public void map(BytesWritable key, KmerCountValue value, OutputCollector<BytesWritable, ByteWritable> output,
+            Reporter reporter) throws IOException {
+        if(value.getCount() >= THRESHOLD){
+            outputKmer.set(key);
+            outputAdjList.set(value.getAdjBitMap());
+            output.collect(outputKmer, outputAdjList);
+        }
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterReducer.java
new file mode 100644
index 0000000..c692336
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/graphcountfilter/CountFilterReducer.java

@@ -0,0 +1,34 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.graphcountfilter;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+@SuppressWarnings("deprecation")
+public class CountFilterReducer extends MapReduceBase implements
+        Reducer<BytesWritable, ByteWritable, BytesWritable, ByteWritable> {
+    @Override
+    public void reduce(BytesWritable key, Iterator<ByteWritable> values,
+            OutputCollector<BytesWritable, ByteWritable> output, Reporter reporter) throws IOException {
+        output.collect(key, values.next()); //Output the Pair
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathDriver.java
new file mode 100644
index 0000000..5025a7b
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathDriver.java

@@ -0,0 +1,138 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+@SuppressWarnings("deprecation")
+public class MergePathDriver {
+    
+    private static class Options {
+        @Option(name = "-inputpath", usage = "the input path", required = true)
+        public String inputPath;
+
+        @Option(name = "-outputpath", usage = "the output path", required = true)
+        public String outputPath;
+
+        @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
+        public String mergeResultPath;
+        
+        @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+        public int numReducers;
+
+        @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+        public int sizeKmer;
+        
+        @Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
+        public int mergeRound;
+
+    }
+
+
+    public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath)
+            throws IOException{
+
+        JobConf conf = new JobConf(MergePathDriver.class);
+        conf.setInt("sizeKmer", sizeKmer);
+        
+        if (defaultConfPath != null) {
+            conf.addResource(new Path(defaultConfPath));
+        }
+        conf.setJobName("Initial Path-Starting-Points Table");
+        conf.setMapperClass(SNodeInitialMapper.class); 
+        conf.setReducerClass(SNodeInitialReducer.class);
+        
+        conf.setMapOutputKeyClass(BytesWritable.class);
+        conf.setMapOutputValueClass(MergePathValueWritable.class);
+        
+        conf.setInputFormat(SequenceFileInputFormat.class);
+        conf.setOutputFormat(SequenceFileOutputFormat.class);
+        
+        conf.setOutputKeyClass(BytesWritable.class);
+        conf.setOutputValueClass(MergePathValueWritable.class);
+        
+        FileInputFormat.setInputPaths(conf, new Path(inputPath));
+        FileOutputFormat.setOutputPath(conf, new Path(inputPath + "-step1"));
+        conf.setNumReduceTasks(numReducers);
+        FileSystem dfs = FileSystem.get(conf);
+        dfs.delete(new Path(inputPath + "-step1"), true);
+        JobClient.runJob(conf);
+/*----------------------------------------------------------------------*/
+        for(int iMerge = 0; iMerge < mergeRound; iMerge ++){
+        
+            conf = new JobConf(MergePathDriver.class);
+            conf.setInt("sizeKmer", sizeKmer);
+            conf.setInt("iMerge", iMerge);
+            
+            if (defaultConfPath != null) {
+                conf.addResource(new Path(defaultConfPath));
+            }
+            conf.setJobName("Path Merge");
+            
+            conf.setMapperClass(MergePathMapper.class);
+            conf.setReducerClass(MergePathReducer.class);
+            
+            conf.setMapOutputKeyClass(BytesWritable.class);
+            conf.setMapOutputValueClass(MergePathValueWritable.class);
+            
+            conf.setInputFormat(SequenceFileInputFormat.class);
+            conf.setOutputFormat(MultipleSequenceFileOutputFormat.class);
+            
+            String uncomplete = "uncomplete" + iMerge;
+            String complete = "complete" + iMerge;
+           
+            MultipleOutputs.addNamedOutput(conf, uncomplete,
+                    MergePathMultiSeqOutputFormat.class, BytesWritable.class,
+                    MergePathValueWritable.class);
+
+            MultipleOutputs.addNamedOutput(conf, complete,
+                    MergePathMultiSeqOutputFormat.class, BytesWritable.class,
+                    MergePathValueWritable.class);
+            
+            conf.setOutputKeyClass(BytesWritable.class);
+            conf.setOutputValueClass(MergePathValueWritable.class);
+            
+            FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+            FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+            conf.setNumReduceTasks(numReducers);
+            dfs.delete(new Path(outputPath), true);
+            JobClient.runJob(conf);
+            dfs.delete(new Path(inputPath + "-step1"), true);
+            dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
+            dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        Options options = new Options();
+        CmdLineParser parser = new CmdLineParser(options);
+        parser.parseArgument(args);
+        MergePathDriver driver = new MergePathDriver();
+        driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers, options.sizeKmer, options.mergeRound, null);
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java
new file mode 100644
index 0000000..50146ad
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java

@@ -0,0 +1,74 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.IOException;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.genomix.type.KmerUtil;
+import edu.uci.ics.genomix.type.Kmer.GENE_CODE;
+
+@SuppressWarnings("deprecation")
+public class MergePathMapper extends MapReduceBase implements
+        Mapper<BytesWritable, MergePathValueWritable, BytesWritable, MergePathValueWritable> {
+    public static int KMER_SIZE;
+    public BytesWritable outputKmer = new BytesWritable();
+    public MergePathValueWritable outputAdjList = new MergePathValueWritable();
+
+
+    public void configure(JobConf job) {
+        KMER_SIZE = job.getInt("sizeKmer", 0);
+    }
+
+    @Override
+    public void map(BytesWritable key, MergePathValueWritable value,
+            OutputCollector<BytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+
+        byte precursor = (byte) 0xF0;
+        byte succeed = (byte) 0x0F;
+        byte adjBitMap = value.getAdjBitMap();
+        byte bitFlag = value.getFlag();
+        precursor = (byte) (precursor & adjBitMap);
+        precursor = (byte) ((precursor & 0xff) >> 4);
+        succeed = (byte) (succeed & adjBitMap);
+
+        byte[] kmerValue = new byte[key.getLength()];
+        for (int i = 0; i < kmerValue.length; i++) {
+            kmerValue[i] = key.getBytes()[i];
+        }
+        if (bitFlag == 1) {
+            byte succeedCode = GENE_CODE.getGeneCodeFromBitMap(succeed);
+            int originalByteNum = Kmer.getByteNumFromK(KMER_SIZE);
+            byte[] tmpKmer = KmerUtil.getLastKmerFromChain(KMER_SIZE, value.getKmerSize(), kmerValue);
+            byte[] newKmer = KmerUtil.shiftKmerWithNextCode(KMER_SIZE, tmpKmer, succeedCode);
+            outputKmer.set(newKmer, 0, originalByteNum);
+
+            int mergeByteNum = Kmer.getByteNumFromK(value.getKmerSize() - (KMER_SIZE - 1));
+            byte[] mergeKmer = KmerUtil.getFirstKmerFromChain(value.getKmerSize() - (KMER_SIZE - 1),
+                    value.getKmerSize(), kmerValue);
+            outputAdjList.set(mergeKmer, 0, mergeByteNum, adjBitMap, bitFlag, value.getKmerSize() - (KMER_SIZE - 1));
+            output.collect(outputKmer, outputAdjList);
+        } else {
+            outputKmer.set(key);
+            outputAdjList.set(value);
+            output.collect(outputKmer, outputAdjList);
+        }
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiSeqOutputFormat.java
new file mode 100644
index 0000000..64fbb91
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiSeqOutputFormat.java

@@ -0,0 +1,29 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.File;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
+
+
+public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<BytesWritable, MergePathValueWritable>{
+    @Override
+    protected String generateLeafFileName(String name) {
+        // TODO Auto-generated method stub System.out.println(name); 
+        String[] names = name.split("-");
+        return names[0] + File.separator + name;
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiTextOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiTextOutputFormat.java
new file mode 100644
index 0000000..29d3b68
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiTextOutputFormat.java

@@ -0,0 +1,28 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.File;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
+
+public class MergePathMultiTextOutputFormat extends MultipleTextOutputFormat<Text, Text>{
+    @Override
+    protected String generateLeafFileName(String name) {
+        // TODO Auto-generated method stub System.out.println(name); 
+        String[] names = name.split("-");
+        return names[0] + File.separator + name;
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java
new file mode 100644
index 0000000..0ffc6e0
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java

@@ -0,0 +1,129 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.genomix.type.KmerUtil;
+
+@SuppressWarnings("deprecation")
+public class MergePathReducer extends MapReduceBase implements
+        Reducer<BytesWritable, MergePathValueWritable, BytesWritable, MergePathValueWritable> {
+    public BytesWritable outputKmer = new BytesWritable();
+    public static int KMER_SIZE;
+    public MergePathValueWritable outputAdjList = new MergePathValueWritable();
+    MultipleOutputs mos = null;
+    public static int I_MERGE;
+
+    public void configure(JobConf job) {
+        mos = new MultipleOutputs(job);
+        I_MERGE = Integer.parseInt(job.get("iMerge"));
+        KMER_SIZE = job.getInt("sizeKmer", 0);
+    }
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public void reduce(BytesWritable key, Iterator<MergePathValueWritable> values,
+            OutputCollector<BytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+        outputAdjList = values.next();
+
+        if (values.hasNext() == true) {
+
+            byte[] keyBytes = new byte[key.getLength()];
+            for (int i = 0; i < keyBytes.length; i++) {
+                keyBytes[i] = key.getBytes()[i];
+            }
+            if (outputAdjList.getFlag() == 1) {
+                byte adjBitMap = outputAdjList.getAdjBitMap();
+                byte bitFlag = outputAdjList.getFlag();
+                int kmerSize = outputAdjList.getKmerSize();
+                int mergeByteNum = Kmer.getByteNumFromK(KMER_SIZE + kmerSize);
+                byte[] valueBytes = new byte[outputAdjList.getLength()];
+                for (int i = 0; i < valueBytes.length; i++) {
+                    valueBytes[i] = outputAdjList.getBytes()[i];
+                }
+                byte[] mergeKmer = KmerUtil.mergeTwoKmer(outputAdjList.getKmerSize(), valueBytes, KMER_SIZE, keyBytes);
+                outputKmer.set(mergeKmer, 0, mergeByteNum);
+
+                outputAdjList = values.next();
+                byte nextAdj = outputAdjList.getAdjBitMap();
+                byte succeed = (byte) 0x0F;
+                succeed = (byte) (succeed & nextAdj);
+                adjBitMap = (byte) (adjBitMap & 0xF0);
+                adjBitMap = (byte) (adjBitMap | succeed);
+                outputAdjList.set(null, 0, 0, adjBitMap, bitFlag, KMER_SIZE + kmerSize);
+
+                mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputAdjList);
+            } else {
+                byte nextAdj = outputAdjList.getAdjBitMap();
+                byte succeed = (byte) 0x0F;
+                succeed = (byte) (succeed & nextAdj);
+                outputAdjList = values.next();
+                byte adjBitMap = outputAdjList.getAdjBitMap();
+                byte flag = outputAdjList.getFlag();
+                int kmerSize = outputAdjList.getKmerSize();
+                int mergeByteNum = Kmer.getByteNumFromK(KMER_SIZE + kmerSize);
+                byte[] valueBytes = new byte[outputAdjList.getLength()];
+                for (int i = 0; i < valueBytes.length; i++) {
+                    valueBytes[i] = outputAdjList.getBytes()[i];
+                }
+                byte[] mergeKmer = KmerUtil.mergeTwoKmer(outputAdjList.getKmerSize(), valueBytes, KMER_SIZE, keyBytes);
+                outputKmer.set(mergeKmer, 0, mergeByteNum);
+
+                adjBitMap = (byte) (adjBitMap & 0xF0);
+                adjBitMap = (byte) (adjBitMap | succeed);
+                outputAdjList.set(null, 0, 0, adjBitMap, flag, KMER_SIZE + kmerSize);
+
+                mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputAdjList);
+            }
+        } else {
+            byte[] keyBytes = new byte[key.getLength()];
+            for (int i = 0; i < keyBytes.length; i++) {
+                keyBytes[i] = key.getBytes()[i];
+            }
+            if (outputAdjList.getFlag() != 0) {
+                byte adjBitMap = outputAdjList.getAdjBitMap();
+                byte flag = outputAdjList.getFlag();
+                int kmerSize = outputAdjList.getKmerSize();
+                int mergeByteNum = Kmer.getByteNumFromK(KMER_SIZE - 1 + kmerSize);
+                byte[] tmpKmer = KmerUtil.getFirstKmerFromChain(KMER_SIZE - 1, KMER_SIZE, keyBytes);
+                byte[] valueBytes = new byte[outputAdjList.getLength()];
+                for (int i = 0; i < valueBytes.length; i++) {
+                    valueBytes[i] = outputAdjList.getBytes()[i];
+                }
+                byte[] mergeKmer = KmerUtil.mergeTwoKmer(outputAdjList.getKmerSize(), valueBytes, KMER_SIZE - 1,
+                        tmpKmer);
+                outputKmer.set(mergeKmer, 0, mergeByteNum);
+                outputAdjList.set(null, 0, 0, adjBitMap, flag, KMER_SIZE + kmerSize);
+                mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputAdjList);
+            } else
+                mos.getCollector("uncomplete" + I_MERGE, reporter).collect(key, outputAdjList);
+        }
+    }
+
+    public void close() throws IOException {
+        // TODO Auto-generated method stub
+        mos.close();
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathValueWritable.java
new file mode 100644
index 0000000..f1dee39
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathValueWritable.java

@@ -0,0 +1,150 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.IOException;
+import java.io.DataInput;
+import java.io.DataOutput;
+import org.apache.hadoop.io.BinaryComparable;
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.type.Kmer;
+
+public class MergePathValueWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
+
+    private static final byte[] EMPTY_BYTES = {};
+    private int size;
+    private byte[] bytes;
+
+    private byte adjBitMap;
+    private byte flag;
+    private int kmerSize;
+
+    public MergePathValueWritable() {
+        this((byte) 0, (byte) 0, (byte) 0, EMPTY_BYTES);
+    }
+
+    public MergePathValueWritable(byte adjBitMap, byte flag, byte kmerSize, byte[] bytes) {
+        this.adjBitMap = adjBitMap;
+        this.flag = flag;
+        this.kmerSize = kmerSize;
+
+        this.bytes = bytes;
+        this.size = bytes.length;
+    }
+
+    public void setSize(int size) {
+        if (size > getCapacity()) {
+            setCapacity(size * 3 / 2);
+        }
+        this.size = size;
+    }
+
+    public int getCapacity() {
+        return bytes.length;
+    }
+
+    public void setCapacity(int new_cap) {
+        if (new_cap != getCapacity()) {
+            byte[] new_data = new byte[new_cap];
+            if (new_cap < size) {
+                size = new_cap;
+            }
+            if (size != 0) {
+                System.arraycopy(bytes, 0, new_data, 0, size);
+            }
+            bytes = new_data;
+        }
+    }
+
+    public void set(MergePathValueWritable newData) {
+        set(newData.bytes, 0, newData.size, newData.adjBitMap, newData.flag, newData.kmerSize);
+    }
+
+    public void set(byte[] newData, int offset, int length, byte adjBitMap, byte flag, int kmerSize) {
+        setSize(0);        
+        if (length != 0) {
+            setSize(length);
+            System.arraycopy(newData, offset, bytes, 0, size);
+        }
+            this.adjBitMap = adjBitMap;
+            this.flag = flag;
+            this.kmerSize = kmerSize;
+    }
+
+    @Override
+    public void readFields(DataInput arg0) throws IOException {
+        // TODO Auto-generated method stub
+        setSize(0); // clear the old data
+        setSize(arg0.readInt());
+        if(size != 0){
+        arg0.readFully(bytes, 0, size);
+        }
+        adjBitMap = arg0.readByte();
+        flag = arg0.readByte();
+        kmerSize = arg0.readInt();
+    }
+
+    @Override
+    public void write(DataOutput arg0) throws IOException {
+        // TODO Auto-generated method stub
+        arg0.writeInt(size);
+        arg0.write(bytes, 0, size);
+        arg0.writeByte(adjBitMap);
+        arg0.writeByte(flag);
+        arg0.writeInt(kmerSize);
+    }
+
+    @Override
+    public byte[] getBytes() {
+        // TODO Auto-generated method stub
+        return bytes;
+    }
+
+    @Override
+    public int getLength() {
+        // TODO Auto-generated method stub
+        return size;
+    }
+
+    public byte getAdjBitMap() {
+        return this.adjBitMap;
+    }
+
+    public byte getFlag() {
+        return this.flag;
+    }
+
+    public int getKmerSize() {
+        return this.kmerSize;
+    }
+
+    public String toString() {
+        StringBuffer sb = new StringBuffer(3 * size);
+        for (int idx = 0; idx < size; idx++) {
+            // if not the first, put a blank separator in
+            if (idx != 0) {
+                sb.append(' ');
+            }
+            String num = Integer.toHexString(0xff & bytes[idx]);
+            // if it is only one digit, add a leading 0.
+            if (num.length() < 2) {
+                sb.append('0');
+            }
+            sb.append(num);
+        }
+        return Kmer.GENE_CODE.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag) + '\t' + sb.toString();
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java
new file mode 100644
index 0000000..3552681
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java

@@ -0,0 +1,140 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.IOException;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.KmerUtil;
+
+@SuppressWarnings("deprecation")
+public class SNodeInitialMapper extends MapReduceBase implements
+        Mapper<BytesWritable, ByteWritable, BytesWritable, MergePathValueWritable> {
+
+    public static int KMER_SIZE;
+    public BytesWritable outputKmer = new BytesWritable();
+    public MergePathValueWritable outputAdjList = new MergePathValueWritable();
+
+    public void configure(JobConf job) {
+        KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
+    }
+
+    boolean measureDegree(byte adjacent) {
+        boolean result = true;
+        switch (adjacent) {
+            case 0:
+                result = true;
+                break;
+            case 1:
+                result = false;
+                break;
+            case 2:
+                result = false;
+                break;
+            case 3:
+                result = true;
+                break;
+            case 4:
+                result = false;
+                break;
+            case 5:
+                result = true;
+                break;
+            case 6:
+                result = true;
+                break;
+            case 7:
+                result = true;
+                break;
+            case 8:
+                result = false;
+                break;
+            case 9:
+                result = true;
+                break;
+            case 10:
+                result = true;
+                break;
+            case 11:
+                result = true;
+                break;
+            case 12:
+                result = true;
+                break;
+            case 13:
+                result = true;
+                break;
+            case 14:
+                result = true;
+                break;
+            case 15:
+                result = true;
+                break;
+        }
+        return result;
+    }
+
+    @Override
+    public void map(BytesWritable key, ByteWritable value,
+            OutputCollector<BytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+
+        byte precursor = (byte) 0xF0;
+        byte succeed = (byte) 0x0F;
+        byte adjBitMap = value.get();
+        byte flag = (byte) 0;
+        precursor = (byte) (precursor & adjBitMap);
+        precursor = (byte) ((precursor & 0xff) >> 4);
+        succeed = (byte) (succeed & adjBitMap);
+        boolean inDegree = measureDegree(precursor);
+        boolean outDegree = measureDegree(succeed);
+        byte initial = 0;
+        byte[] kmerValue = new byte[key.getLength()];
+        for (int i = 0; i < kmerValue.length; i++) {
+            kmerValue[i] = key.getBytes()[i];
+        }
+        if (inDegree == true && outDegree == false) {
+            flag = (byte) 2;
+            switch (succeed) {
+                case 1:
+                    initial = (byte) 0x00;
+                    break;
+                case 2:
+                    initial = (byte) 0x01;
+                    break;
+                case 4:
+                    initial = (byte) 0x02;
+                    break;
+                case 8:
+                    initial = (byte) 0x03;
+                    break;
+            }
+            byte[] newKmer = KmerUtil.shiftKmerWithNextCode(KMER_SIZE, kmerValue, initial);
+            outputKmer.set(newKmer, 0, kmerValue.length);
+            adjBitMap = (byte) (adjBitMap & 0xF0);
+            outputAdjList.set(null, 0, 0, adjBitMap, flag, KMER_SIZE);
+            output.collect(outputKmer, outputAdjList);
+        }
+        if (inDegree == false && outDegree == false) {
+            outputKmer.set(key);
+            outputAdjList.set(null, 0, 0, adjBitMap, flag, KMER_SIZE);
+            output.collect(outputKmer, outputAdjList);
+        }
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialReducer.java
new file mode 100644
index 0000000..44a0af2
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialReducer.java

@@ -0,0 +1,66 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmerging;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+@SuppressWarnings("deprecation")
+public class SNodeInitialReducer extends MapReduceBase implements
+        Reducer<BytesWritable, MergePathValueWritable, BytesWritable, MergePathValueWritable> {
+    public BytesWritable outputKmer = new BytesWritable();
+    public MergePathValueWritable outputAdjList = new MergePathValueWritable();
+
+    @Override
+    public void reduce(BytesWritable key, Iterator<MergePathValueWritable> values,
+            OutputCollector<BytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+        outputAdjList = values.next();
+        outputKmer.set(key);
+        if (values.hasNext() == true) {
+            if (outputAdjList.getFlag() != 2) {
+                byte adjBitMap = outputAdjList.getAdjBitMap();
+                int kmerSize = outputAdjList.getKmerSize();
+                byte bitFlag = 1;
+                outputAdjList.set(null, 0, 0, adjBitMap, bitFlag, kmerSize);
+                output.collect(outputKmer, outputAdjList);
+                
+            } else {
+                boolean flag = false;
+                while (values.hasNext()) {
+                    outputAdjList = values.next();
+                    if (outputAdjList.getFlag() != 2) {
+                        flag = true;
+                        break;
+                    }
+                }
+                if (flag == true) {
+                    byte adjBitMap = outputAdjList.getAdjBitMap();
+                    int kmerSize = outputAdjList.getKmerSize();
+                    byte bitFlag = 1;
+                    outputAdjList.set(null, 0, 0, adjBitMap, bitFlag, kmerSize);
+                    output.collect(outputKmer, outputAdjList);
+                }
+            }
+        }
+        else {
+            output.collect(outputKmer, outputAdjList);
+        }
+    }
+}

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatDriver.java
new file mode 100644
index 0000000..efe1589
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatDriver.java

@@ -0,0 +1,86 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.statistics;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+@SuppressWarnings("deprecation")
+public class GenomixStatDriver {
+    private static class Options {
+        @Option(name = "-inputpath", usage = "the input path", required = true)
+        public String inputPath;
+
+        @Option(name = "-outputpath", usage = "the output path", required = true)
+        public String outputPath;
+
+        @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+        public int numReducers;
+
+    }
+   
+    public void run(String inputPath, String outputPath, int numReducers, String defaultConfPath)
+            throws IOException {
+
+        JobConf conf = new JobConf(GenomixStatDriver.class);
+
+        if (defaultConfPath != null) {
+            conf.addResource(new Path(defaultConfPath));
+        }
+
+        conf.setJobName("Genomix Statistics");
+        conf.setMapperClass(GenomixStatMapper.class);
+        conf.setReducerClass(GenomixStatReducer.class);
+        conf.setCombinerClass(GenomixStatReducer.class);
+
+        conf.setMapOutputKeyClass(BytesWritable.class);
+        conf.setMapOutputValueClass(KmerCountValue.class);
+
+        conf.setInputFormat(SequenceFileInputFormat.class);
+        conf.setOutputFormat(SequenceFileOutputFormat.class);
+        
+        conf.setOutputKeyClass(BytesWritable.class);
+        conf.setOutputValueClass(KmerCountValue.class);
+        
+        FileInputFormat.setInputPaths(conf, new Path(inputPath));
+        FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+        conf.setNumReduceTasks(numReducers);
+
+        FileSystem dfs = FileSystem.get(conf);
+        dfs.delete(new Path(outputPath), true);
+        JobClient.runJob(conf);
+    }
+
+    public static void main(String[] args) throws Exception {
+        Options options = new Options();
+        CmdLineParser parser = new CmdLineParser(options);
+        parser.parseArgument(args);
+        GenomixStatDriver driver = new GenomixStatDriver();
+        driver.run(options.inputPath, options.outputPath, options.numReducers, null);
+    }
+}
\ No newline at end of file

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatMapper.java
new file mode 100644
index 0000000..c5feefe
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatMapper.java

@@ -0,0 +1,102 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.statistics;
+
+import java.io.IOException;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+@SuppressWarnings({ "unused", "deprecation" })
+public class GenomixStatMapper extends MapReduceBase implements
+        Mapper<BytesWritable, KmerCountValue, BytesWritable, KmerCountValue> {
+    
+    boolean measureDegree(byte adjacent) {
+        boolean result = true;
+        switch (adjacent) {
+            case 0:
+                result = true;
+                break;
+            case 1:
+                result = false;
+                break;
+            case 2:
+                result = false;
+                break;
+            case 3:
+                result = true;
+                break;
+            case 4:
+                result = false;
+                break;
+            case 5:
+                result = true;
+                break;
+            case 6:
+                result = true;
+                break;
+            case 7:
+                result = true;
+                break;
+            case 8:
+                result = false;
+                break;
+            case 9:
+                result = true;
+                break;
+            case 10:
+                result = true;
+                break;
+            case 11:
+                result = true;
+                break;
+            case 12:
+                result = true;
+                break;
+            case 13:
+                result = true;
+                break;
+            case 14:
+                result = true;
+                break;
+            case 15:
+                result = true;
+                break;
+        }
+        return result;
+    }
+    @Override
+    public void map(BytesWritable key, KmerCountValue value, OutputCollector<BytesWritable, KmerCountValue> output,
+            Reporter reporter) throws IOException {
+        byte precursor = (byte) 0xF0;
+        byte succeed = (byte) 0x0F;
+        byte adj = value.getAdjBitMap();
+        precursor = (byte) (precursor & adj);
+        precursor = (byte) ((precursor & 0xff) >> 4);
+        succeed = (byte) (succeed & adj);
+        boolean inDegree = measureDegree(precursor);
+        boolean outDegree = measureDegree(succeed);
+        if (inDegree == true && outDegree == false) {
+            output.collect(key, value);
+        }
+    }
+}
\ No newline at end of file

diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatReducer.java
new file mode 100644
index 0000000..ea9a915
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/statistics/GenomixStatReducer.java

@@ -0,0 +1,39 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.statistics;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+import edu.uci.ics.genomix.type.KmerCountValue;
+
+@SuppressWarnings("deprecation")
+public class GenomixStatReducer extends MapReduceBase implements
+        Reducer<BytesWritable, KmerCountValue, BytesWritable, KmerCountValue> {
+    static enum MyCounters { NUM_RECORDS };
+    KmerCountValue valWriter = new KmerCountValue();
+    @Override
+    public void reduce(BytesWritable key, Iterator<KmerCountValue> values,
+            OutputCollector<BytesWritable, KmerCountValue> output, Reporter reporter) throws IOException {
+        reporter.incrCounter(MyCounters.NUM_RECORDS, 1);
+        valWriter = values.next();
+        output.collect(key, valWriter); 
+    }
+}
\ No newline at end of file

diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/gbresultschecking/ResultsCheckingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/gbresultschecking/ResultsCheckingTest.java
new file mode 100644
index 0000000..72e9b45
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/gbresultschecking/ResultsCheckingTest.java

@@ -0,0 +1,76 @@
+package edu.uci.ics.gbresultschecking;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.junit.Test;
+
+@SuppressWarnings("deprecation")
+public class ResultsCheckingTest {
+    private static final String ACTUAL_RESULT_DIR = "actual4";
+    private JobConf conf = new JobConf();
+    private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+    private static final String DATA_PATH1 = "ResultsCheckingData" + "/part-00000";
+    private static final String DATA_PATH2 = "ResultsCheckingData" + "/part-00001";
+    private static final String HDFS_PATH1 = "/webmap1";
+    private static final String HDFS_PATH2 = "/webmap2";
+    private static final String RESULT_PATH = "/result4";
+    private static final int COUNT_REDUCER = 4;
+    private static final int SIZE_KMER = 3;
+    private MiniDFSCluster dfsCluster;
+    private MiniMRCluster mrCluster;
+    private FileSystem dfs;
+
+    @Test
+    public void test() throws Exception {
+        FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+        FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+        startHadoop();
+        ResultsCheckingDriver tldriver = new ResultsCheckingDriver();
+        tldriver.run(HDFS_PATH1, HDFS_PATH2, RESULT_PATH, COUNT_REDUCER, SIZE_KMER, HADOOP_CONF_PATH);
+        dumpResult();
+        cleanupHadoop();
+
+    }
+    private void startHadoop() throws IOException {
+        FileSystem lfs = FileSystem.getLocal(new Configuration());
+        lfs.delete(new Path("build"), true);
+        System.setProperty("hadoop.log.dir", "logs");
+        dfsCluster = new MiniDFSCluster(conf, 2, true, null);
+        dfs = dfsCluster.getFileSystem();
+        mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+
+        Path src = new Path(DATA_PATH1);
+        Path dest = new Path(HDFS_PATH1 + "/");
+        dfs.mkdirs(dest);
+        dfs.copyFromLocalFile(src, dest);
+        src = new Path(DATA_PATH2);
+        dest = new Path(HDFS_PATH2 + "/");
+        dfs.copyFromLocalFile(src, dest);
+        
+        DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+        conf.writeXml(confOutput);
+        confOutput.flush();
+        confOutput.close();
+    }
+
+    private void cleanupHadoop() throws IOException {
+        mrCluster.shutdown();
+        dfsCluster.shutdown();
+    }
+
+    private void dumpResult() throws IOException {
+        Path src = new Path(RESULT_PATH);
+        Path dest = new Path(ACTUAL_RESULT_DIR + "/");
+        dfs.copyToLocalFile(src, dest);
+    }
+}
+

diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphcountfilter/CountFilterTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphcountfilter/CountFilterTest.java
new file mode 100644
index 0000000..4bf0be7
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphcountfilter/CountFilterTest.java

@@ -0,0 +1,102 @@
+package edu.uci.ics.graphcountfilter;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.Test;
+import edu.uci.ics.genomix.type.Kmer;
+import edu.uci.ics.utils.TestUtils;
+
+
+@SuppressWarnings("deprecation")
+public class CountFilterTest {
+    private static final String ACTUAL_RESULT_DIR = "actual2";
+    private static final String COMPARE_DIR = "compare";
+    private JobConf conf = new JobConf();
+    private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+    private static final String DATA_PATH = "actual1" + "/result1" + "/part-00000";
+    private static final String HDFS_PATH = "/webmap";
+    private static final String RESULT_PATH = "/result2";
+    private static final String EXPECTED_PATH = "expected/result2";
+    private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH + "/comparesource.txt";
+    private static final int COUNT_REDUCER = 4;
+    private static final int SIZE_KMER = 3;
+    private MiniDFSCluster dfsCluster;
+    private MiniMRCluster mrCluster;
+    private FileSystem dfs;
+
+    @SuppressWarnings("resource")
+    @Test
+    public void test() throws Exception {
+        FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+        FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+        startHadoop();
+
+        // run graph transformation tests
+        CountFilterDriver tldriver = new CountFilterDriver();
+        tldriver.run(HDFS_PATH, RESULT_PATH, COUNT_REDUCER, 1, HADOOP_CONF_PATH);
+
+        SequenceFile.Reader reader = null;
+        Path path = new Path(RESULT_PATH + "/part-00000");
+        reader = new SequenceFile.Reader(dfs, path, conf);
+        BytesWritable key = (BytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+        ByteWritable value = (ByteWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
+        File filePathTo = new File(TEST_SOURCE_DIR);
+        BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+        while (reader.next(key, value)) {
+            bw.write(Kmer.recoverKmerFrom(SIZE_KMER, key.getBytes(), 0, key.getLength()) + "\t" + value.toString());
+            bw.newLine();
+        }
+        bw.close();
+
+        dumpResult();
+        TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
+
+        cleanupHadoop();
+
+    }
+    private void startHadoop() throws IOException {
+        FileSystem lfs = FileSystem.getLocal(new Configuration());
+        lfs.delete(new Path("build"), true);
+        System.setProperty("hadoop.log.dir", "logs");
+        dfsCluster = new MiniDFSCluster(conf, 2, true, null);
+        dfs = dfsCluster.getFileSystem();
+        mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+
+        Path src = new Path(DATA_PATH);
+        Path dest = new Path(HDFS_PATH + "/");
+        dfs.mkdirs(dest);
+        dfs.copyFromLocalFile(src, dest);
+
+        DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+        conf.writeXml(confOutput);
+        confOutput.flush();
+        confOutput.close();
+    }
+
+    private void cleanupHadoop() throws IOException {
+        mrCluster.shutdown();
+        dfsCluster.shutdown();
+    }
+
+    private void dumpResult() throws IOException {
+        Path src = new Path(RESULT_PATH);
+        Path dest = new Path(ACTUAL_RESULT_DIR + "/");
+        dfs.copyToLocalFile(src, dest);
+    }
+}

diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/pathmerging/MergePathTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/pathmerging/MergePathTest.java
new file mode 100644
index 0000000..7b8d285
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/pathmerging/MergePathTest.java

@@ -0,0 +1,102 @@
+package edu.uci.ics.pathmerging;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.Test;
+import edu.uci.ics.utils.TestUtils;
+
+@SuppressWarnings("deprecation")
+public class MergePathTest {
+    private static final String ACTUAL_RESULT_DIR = "actual3";
+    private static final String COMPARE_DIR = "compare";
+    private JobConf conf = new JobConf();
+    private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+    private static final String DATA_PATH = "actual2" + "/result2" + "/part-00000";
+    private static final String HDFS_PATH = "/webmap";
+    private static final String HDFA_PATH_DATA = "/webmapdata";
+    
+    private static final String RESULT_PATH = "/result3";
+    private static final String EXPECTED_PATH = "expected/result3";
+    private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH + "/comparesource.txt";
+    private static final int COUNT_REDUCER = 4;
+    private static final int SIZE_KMER = 3;
+
+    private MiniDFSCluster dfsCluster;
+    private MiniMRCluster mrCluster;
+    private FileSystem dfs;
+
+    @SuppressWarnings("resource")
+    @Test
+    public void test() throws Exception {
+        FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+        FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+        startHadoop();
+
+        MergePathDriver tldriver = new MergePathDriver();
+        tldriver.run(HDFS_PATH, RESULT_PATH, HDFA_PATH_DATA, COUNT_REDUCER, SIZE_KMER, 3, HADOOP_CONF_PATH);
+
+        SequenceFile.Reader reader = null;
+        Path path = new Path(HDFA_PATH_DATA + "/complete2" + "/complete2-r-00000");
+        reader = new SequenceFile.Reader(dfs, path, conf);
+        BytesWritable key = (BytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+        MergePathValueWritable value = (MergePathValueWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
+        File filePathTo = new File(TEST_SOURCE_DIR);
+        BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+        while (reader.next(key, value)) {
+            bw.write(key.toString() + "\t" + value.getAdjBitMap() + "\t" + value.getFlag());
+            bw.newLine();
+        }
+        bw.close();
+
+        dumpResult();
+        TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
+
+        cleanupHadoop();
+
+    }
+    private void startHadoop() throws IOException {
+        FileSystem lfs = FileSystem.getLocal(new Configuration());
+        lfs.delete(new Path("build"), true);
+        System.setProperty("hadoop.log.dir", "logs");
+        dfsCluster = new MiniDFSCluster(conf, 2, true, null);
+        dfs = dfsCluster.getFileSystem();
+        mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+
+        Path src = new Path(DATA_PATH);
+        Path dest = new Path(HDFS_PATH + "/");
+        dfs.mkdirs(dest);
+        dfs.copyFromLocalFile(src, dest);
+        Path data = new Path(HDFA_PATH_DATA + "/");
+        dfs.mkdirs(data);
+   
+        DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+        conf.writeXml(confOutput);
+        confOutput.flush();
+        confOutput.close();
+    }
+
+    private void cleanupHadoop() throws IOException {
+        mrCluster.shutdown();
+        dfsCluster.shutdown();
+    }
+
+    private void dumpResult() throws IOException {
+        Path src = new Path(HDFA_PATH_DATA + "/" + "complete2");
+        Path dest = new Path(ACTUAL_RESULT_DIR + "/");
+        dfs.copyToLocalFile(src, dest);
+    }
+}
commit	8a8987636c9c1f2425d622564cf92305c3cb87f1	[log] [tgz]
author	zhangnan2920214@gmail.com <zhangnan2920214@gmail.com@123451ca-8445-de46-9d55-352943316053>	Thu Apr 11 07:16:32 2013 +0000
committer	zhangnan2920214@gmail.com <zhangnan2920214@gmail.com@123451ca-8445-de46-9d55-352943316053>	Thu Apr 11 07:16:32 2013 +0000
tree	ee5780c44c7d56cf22e943ec4709a63f9bd0db7d
parent	c4102497f95e950fa2bf93bbd9dafea07034cc19 [diff]