create svn dir

git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_genomix@2733 123451ca-8445-de46-9d55-352943316053
diff --git a/hadoop/actual/conf.xml b/hadoop/actual/conf.xml
new file mode 100755
index 0000000..b3ca482
--- /dev/null
+++ b/hadoop/actual/conf.xml
@@ -0,0 +1,179 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value></property>
+<property><name>dfs.https.need.client.auth</name><value>false</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>dfs.namenode.logging.level</name><value>info</value></property>
+<property><name>dfs.datanode.address</name><value>127.0.0.1:0</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>fs.default.name</name><value>hdfs://localhost:49297</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>dfs.namenode.handler.count</name><value>10</value></property>
+<property><name>dfs.blockreport.initialDelay</name><value>0</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>dfs.safemode.extension</name><value>0</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.StaticMapping</value></property>
+<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>dfs.name.edits.dir</name><value>${dfs.name.dir}</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>hadoop.job.ugi</name><value>hadoop,staff,everyone,localaccounts</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>dfs.block.size</name><value>67108864</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>dfs.datanode.ipc.address</name><value>127.0.0.1:0</value></property>
+<property><name>dfs.permissions</name><value>true</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
+<property><name>dfs.replication.max</name><value>512</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>dfs.https.client.keystore.resource</name><value>ssl-client.xml</value></property>
+<property><name>dfs.namenode.startup</name><value>REGULAR</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>dfs.https.address</name><value>0.0.0.0:50470</value></property>
+<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>fs.checkpoint.dir</name><value>build/test/data/dfs/namesecondary1,build/test/data/dfs/namesecondary2</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>dfs.max.objects</name><value>0</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>dfs.client.block.write.retries</name><value>3</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>dfs.https.enable</name><value>false</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>dfs.df.interval</name><value>60000</value></property>
+<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
+<property><name>dfs.support.append</name><value>false</value></property>
+<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>dfs.replication.min</name><value>1</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>dfs.namenode.decommission.interval</name><value>3</value></property>
+<property><name>dfs.http.address</name><value>localhost:49298</value></property>
+<property><name>dfs.heartbeat.interval</name><value>3</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>dfs.name.dir</name><value>build/test/data/dfs/name1,build/test/data/dfs/name2</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>dfs.datanode.http.address</name><value>127.0.0.1:0</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>dfs.replication.interval</name><value>3</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>dfs.replication</name><value>2</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>dfs.access.time.precision</name><value>3600000</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>dfs.datanode.handler.count</name><value>3</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>dfs.replication.considerLoad</name><value>true</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+</configuration>
\ No newline at end of file
diff --git a/hadoop/actual/result2/.part-00000.crc b/hadoop/actual/result2/.part-00000.crc
new file mode 100755
index 0000000..190ef20
--- /dev/null
+++ b/hadoop/actual/result2/.part-00000.crc
Binary files differ
diff --git a/hadoop/actual/result2/part-00000 b/hadoop/actual/result2/part-00000
new file mode 100755
index 0000000..bd04159
--- /dev/null
+++ b/hadoop/actual/result2/part-00000
@@ -0,0 +1,7 @@
+1	33	1
+3	1	1
+4	153	1
+12	18	1
+16	18	1
+19	16	1
+49	17	1
diff --git a/hadoop/build/test/data/dfs/data/data1/current/VERSION b/hadoop/build/test/data/dfs/data/data1/current/VERSION
new file mode 100755
index 0000000..e089f52
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data1/current/VERSION
@@ -0,0 +1,6 @@
+#Mon Jan 28 16:24:49 PST 2013
+namespaceID=1113662599
+storageID=DS-51694310-169.234.14.178-49301-1359419089185
+cTime=0
+storageType=DATA_NODE
+layoutVersion=-18
diff --git a/hadoop/build/test/data/dfs/data/data1/current/blk_-1799015858217215231 b/hadoop/build/test/data/dfs/data/data1/current/blk_-1799015858217215231
new file mode 100755
index 0000000..bd04159
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data1/current/blk_-1799015858217215231
@@ -0,0 +1,7 @@
+1	33	1
+3	1	1
+4	153	1
+12	18	1
+16	18	1
+19	16	1
+49	17	1
diff --git a/hadoop/build/test/data/dfs/data/data1/current/blk_-1799015858217215231_1005.meta b/hadoop/build/test/data/dfs/data/data1/current/blk_-1799015858217215231_1005.meta
new file mode 100755
index 0000000..3db5645
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data1/current/blk_-1799015858217215231_1005.meta
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data1/current/blk_-6101674499548096236 b/hadoop/build/test/data/dfs/data/data1/current/blk_-6101674499548096236
new file mode 100755
index 0000000..593f470
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data1/current/blk_-6101674499548096236
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data1/current/blk_-6101674499548096236_1001.meta b/hadoop/build/test/data/dfs/data/data1/current/blk_-6101674499548096236_1001.meta
new file mode 100755
index 0000000..a6d5932
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data1/current/blk_-6101674499548096236_1001.meta
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data1/current/blk_5252270394021184001 b/hadoop/build/test/data/dfs/data/data1/current/blk_5252270394021184001
new file mode 100755
index 0000000..7f3e555
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data1/current/blk_5252270394021184001
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data1/current/blk_5252270394021184001_1003.meta b/hadoop/build/test/data/dfs/data/data1/current/blk_5252270394021184001_1003.meta
new file mode 100755
index 0000000..408e149
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data1/current/blk_5252270394021184001_1003.meta
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data1/current/dncp_block_verification.log.curr b/hadoop/build/test/data/dfs/data/data1/current/dncp_block_verification.log.curr
new file mode 100755
index 0000000..148a02e
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data1/current/dncp_block_verification.log.curr
@@ -0,0 +1,2 @@
+
+date="2013-01-28 16:24:51,611"	 time="1359419091611"	 genstamp="1004"	 id="7390568412602717794"
\ No newline at end of file
diff --git a/hadoop/build/test/data/dfs/data/data1/storage b/hadoop/build/test/data/dfs/data/data1/storage
new file mode 100755
index 0000000..60e7000
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data1/storage
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data2/current/VERSION b/hadoop/build/test/data/dfs/data/data2/current/VERSION
new file mode 100755
index 0000000..e089f52
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data2/current/VERSION
@@ -0,0 +1,6 @@
+#Mon Jan 28 16:24:49 PST 2013
+namespaceID=1113662599
+storageID=DS-51694310-169.234.14.178-49301-1359419089185
+cTime=0
+storageType=DATA_NODE
+layoutVersion=-18
diff --git a/hadoop/build/test/data/dfs/data/data2/current/blk_-7373744087367611078 b/hadoop/build/test/data/dfs/data/data2/current/blk_-7373744087367611078
new file mode 100755
index 0000000..c7fb713
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data2/current/blk_-7373744087367611078
@@ -0,0 +1,4 @@
+@625E1AAXX100810:1:100:10000:10271/1

+AATAGAAGAT

++

+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?
\ No newline at end of file
diff --git a/hadoop/build/test/data/dfs/data/data2/current/blk_-7373744087367611078_1002.meta b/hadoop/build/test/data/dfs/data/data2/current/blk_-7373744087367611078_1002.meta
new file mode 100755
index 0000000..58ae5ca
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data2/current/blk_-7373744087367611078_1002.meta
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data2/current/blk_7390568412602717794 b/hadoop/build/test/data/dfs/data/data2/current/blk_7390568412602717794
new file mode 100755
index 0000000..03bbf67
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data2/current/blk_7390568412602717794
@@ -0,0 +1,195 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value></property>
+<property><name>dfs.https.need.client.auth</name><value>false</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>dfs.namenode.logging.level</name><value>info</value></property>
+<property><name>dfs.datanode.address</name><value>127.0.0.1:0</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>fs.default.name</name><value>hdfs://localhost:49297</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>dfs.namenode.handler.count</name><value>10</value></property>
+<property><name>dfs.blockreport.initialDelay</name><value>0</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>mapred.mapoutput.key.class</name><value>org.apache.hadoop.io.LongWritable</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>mapred.output.key.class</name><value>org.apache.hadoop.io.LongWritable</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>dfs.safemode.extension</name><value>0</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>user.name</name><value>hadoop</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.StaticMapping</value></property>
+<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>dfs.name.edits.dir</name><value>${dfs.name.dir}</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>hadoop.job.ugi</name><value>hadoop,staff,everyone,localaccounts</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>dfs.block.size</name><value>67108864</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>dfs.datanode.ipc.address</name><value>127.0.0.1:0</value></property>
+<property><name>dfs.permissions</name><value>true</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
+<property><name>dfs.replication.max</name><value>512</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>mapred.output.value.class</name><value>edu.uci.ics.graphbuilding.ValueWritable</value></property>
+<property><name>dfs.https.client.keystore.resource</name><value>ssl-client.xml</value></property>
+<property><name>dfs.namenode.startup</name><value>REGULAR</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>dfs.https.address</name><value>0.0.0.0:50470</value></property>
+<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>group.name</name><value>staff</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>fs.checkpoint.dir</name><value>build/test/data/dfs/namesecondary1,build/test/data/dfs/namesecondary2</value></property>
+<property><name>mapred.reduce.tasks</name><value>2</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>dfs.max.objects</name><value>0</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>mapred.job.split.file</name><value>hdfs://localhost:49297/tmp/hadoop-hadoop/mapred/system/job_local_0001/job.split</value></property>
+<property><name>mapred.job.name</name><value>Genomix Graph Building</value></property>
+<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>dfs.client.block.write.retries</name><value>3</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>mapred.input.format.class</name><value>org.apache.hadoop.mapred.TextInputFormat</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>dfs.https.enable</name><value>false</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>mapred.output.format.class</name><value>org.apache.hadoop.mapred.TextOutputFormat</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>mapred.working.dir</name><value>hdfs://localhost:49297/user/hadoop</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.reducer.class</name><value>edu.uci.ics.graphbuilding.GenomixReducer</value></property>
+<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>mapred.combiner.class</name><value>edu.uci.ics.graphbuilding.GenomixCombiner</value></property>
+<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>dfs.df.interval</name><value>60000</value></property>
+<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
+<property><name>mapred.mapoutput.value.class</name><value>org.apache.hadoop.io.IntWritable</value></property>
+<property><name>dfs.support.append</name><value>false</value></property>
+<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>mapred.mapper.class</name><value>edu.uci.ics.graphbuilding.GenomixMapper</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>dfs.replication.min</name><value>1</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>dfs.namenode.decommission.interval</name><value>3</value></property>
+<property><name>dfs.http.address</name><value>localhost:49298</value></property>
+<property><name>dfs.heartbeat.interval</name><value>3</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>dfs.name.dir</name><value>build/test/data/dfs/name1,build/test/data/dfs/name2</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>dfs.datanode.http.address</name><value>127.0.0.1:0</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>dfs.replication.interval</name><value>3</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>dfs.replication</name><value>2</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>dfs.access.time.precision</name><value>3600000</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>mapred.output.dir</name><value>hdfs://localhost:49297/result2</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>mapred.input.dir</name><value>hdfs://localhost:49297/webmap</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>dfs.datanode.handler.count</name><value>3</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>dfs.replication.considerLoad</name><value>true</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+</configuration>
\ No newline at end of file
diff --git a/hadoop/build/test/data/dfs/data/data2/current/blk_7390568412602717794_1004.meta b/hadoop/build/test/data/dfs/data/data2/current/blk_7390568412602717794_1004.meta
new file mode 100755
index 0000000..b7bad35
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data2/current/blk_7390568412602717794_1004.meta
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data2/storage b/hadoop/build/test/data/dfs/data/data2/storage
new file mode 100755
index 0000000..60e7000
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data2/storage
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data3/current/VERSION b/hadoop/build/test/data/dfs/data/data3/current/VERSION
new file mode 100755
index 0000000..4055c1f
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data3/current/VERSION
@@ -0,0 +1,6 @@
+#Mon Jan 28 16:24:49 PST 2013
+namespaceID=1113662599
+storageID=DS-1199309190-169.234.14.178-49304-1359419089463
+cTime=0
+storageType=DATA_NODE
+layoutVersion=-18
diff --git a/hadoop/build/test/data/dfs/data/data3/current/blk_-1799015858217215231 b/hadoop/build/test/data/dfs/data/data3/current/blk_-1799015858217215231
new file mode 100755
index 0000000..bd04159
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data3/current/blk_-1799015858217215231
@@ -0,0 +1,7 @@
+1	33	1
+3	1	1
+4	153	1
+12	18	1
+16	18	1
+19	16	1
+49	17	1
diff --git a/hadoop/build/test/data/dfs/data/data3/current/blk_-1799015858217215231_1005.meta b/hadoop/build/test/data/dfs/data/data3/current/blk_-1799015858217215231_1005.meta
new file mode 100755
index 0000000..3db5645
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data3/current/blk_-1799015858217215231_1005.meta
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data3/current/blk_-6101674499548096236 b/hadoop/build/test/data/dfs/data/data3/current/blk_-6101674499548096236
new file mode 100755
index 0000000..593f470
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data3/current/blk_-6101674499548096236
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data3/current/blk_-6101674499548096236_1001.meta b/hadoop/build/test/data/dfs/data/data3/current/blk_-6101674499548096236_1001.meta
new file mode 100755
index 0000000..a6d5932
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data3/current/blk_-6101674499548096236_1001.meta
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data3/current/blk_5252270394021184001 b/hadoop/build/test/data/dfs/data/data3/current/blk_5252270394021184001
new file mode 100755
index 0000000..7f3e555
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data3/current/blk_5252270394021184001
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data3/current/blk_5252270394021184001_1003.meta b/hadoop/build/test/data/dfs/data/data3/current/blk_5252270394021184001_1003.meta
new file mode 100755
index 0000000..408e149
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data3/current/blk_5252270394021184001_1003.meta
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data3/current/dncp_block_verification.log.curr b/hadoop/build/test/data/dfs/data/data3/current/dncp_block_verification.log.curr
new file mode 100755
index 0000000..0f203b9
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data3/current/dncp_block_verification.log.curr
@@ -0,0 +1,3 @@
+
+date="2013-01-28 16:24:51,797"	 time="1359419091797"	 genstamp="1002"	 id="-7373744087367611078"
+date="2013-01-28 16:24:52,650"	 time="1359419092650"	 genstamp="1005"	 id="-1799015858217215231"
\ No newline at end of file
diff --git a/hadoop/build/test/data/dfs/data/data3/storage b/hadoop/build/test/data/dfs/data/data3/storage
new file mode 100755
index 0000000..60e7000
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data3/storage
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data4/current/VERSION b/hadoop/build/test/data/dfs/data/data4/current/VERSION
new file mode 100755
index 0000000..4055c1f
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data4/current/VERSION
@@ -0,0 +1,6 @@
+#Mon Jan 28 16:24:49 PST 2013
+namespaceID=1113662599
+storageID=DS-1199309190-169.234.14.178-49304-1359419089463
+cTime=0
+storageType=DATA_NODE
+layoutVersion=-18
diff --git a/hadoop/build/test/data/dfs/data/data4/current/blk_-7373744087367611078 b/hadoop/build/test/data/dfs/data/data4/current/blk_-7373744087367611078
new file mode 100755
index 0000000..c7fb713
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data4/current/blk_-7373744087367611078
@@ -0,0 +1,4 @@
+@625E1AAXX100810:1:100:10000:10271/1

+AATAGAAGAT

++

+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?
\ No newline at end of file
diff --git a/hadoop/build/test/data/dfs/data/data4/current/blk_-7373744087367611078_1002.meta b/hadoop/build/test/data/dfs/data/data4/current/blk_-7373744087367611078_1002.meta
new file mode 100755
index 0000000..58ae5ca
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data4/current/blk_-7373744087367611078_1002.meta
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data4/current/blk_7390568412602717794 b/hadoop/build/test/data/dfs/data/data4/current/blk_7390568412602717794
new file mode 100755
index 0000000..03bbf67
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data4/current/blk_7390568412602717794
@@ -0,0 +1,195 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value></property>
+<property><name>dfs.https.need.client.auth</name><value>false</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>dfs.namenode.logging.level</name><value>info</value></property>
+<property><name>dfs.datanode.address</name><value>127.0.0.1:0</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>fs.default.name</name><value>hdfs://localhost:49297</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>dfs.namenode.handler.count</name><value>10</value></property>
+<property><name>dfs.blockreport.initialDelay</name><value>0</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>mapred.mapoutput.key.class</name><value>org.apache.hadoop.io.LongWritable</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>mapred.output.key.class</name><value>org.apache.hadoop.io.LongWritable</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>dfs.safemode.extension</name><value>0</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>user.name</name><value>hadoop</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.StaticMapping</value></property>
+<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>dfs.name.edits.dir</name><value>${dfs.name.dir}</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>hadoop.job.ugi</name><value>hadoop,staff,everyone,localaccounts</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>dfs.block.size</name><value>67108864</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>dfs.datanode.ipc.address</name><value>127.0.0.1:0</value></property>
+<property><name>dfs.permissions</name><value>true</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
+<property><name>dfs.replication.max</name><value>512</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>mapred.output.value.class</name><value>edu.uci.ics.graphbuilding.ValueWritable</value></property>
+<property><name>dfs.https.client.keystore.resource</name><value>ssl-client.xml</value></property>
+<property><name>dfs.namenode.startup</name><value>REGULAR</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>dfs.https.address</name><value>0.0.0.0:50470</value></property>
+<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>group.name</name><value>staff</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>fs.checkpoint.dir</name><value>build/test/data/dfs/namesecondary1,build/test/data/dfs/namesecondary2</value></property>
+<property><name>mapred.reduce.tasks</name><value>2</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>dfs.max.objects</name><value>0</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>mapred.job.split.file</name><value>hdfs://localhost:49297/tmp/hadoop-hadoop/mapred/system/job_local_0001/job.split</value></property>
+<property><name>mapred.job.name</name><value>Genomix Graph Building</value></property>
+<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>dfs.client.block.write.retries</name><value>3</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>mapred.input.format.class</name><value>org.apache.hadoop.mapred.TextInputFormat</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>dfs.https.enable</name><value>false</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>mapred.output.format.class</name><value>org.apache.hadoop.mapred.TextOutputFormat</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>mapred.working.dir</name><value>hdfs://localhost:49297/user/hadoop</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.reducer.class</name><value>edu.uci.ics.graphbuilding.GenomixReducer</value></property>
+<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>mapred.combiner.class</name><value>edu.uci.ics.graphbuilding.GenomixCombiner</value></property>
+<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>dfs.df.interval</name><value>60000</value></property>
+<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
+<property><name>mapred.mapoutput.value.class</name><value>org.apache.hadoop.io.IntWritable</value></property>
+<property><name>dfs.support.append</name><value>false</value></property>
+<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>mapred.mapper.class</name><value>edu.uci.ics.graphbuilding.GenomixMapper</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>dfs.replication.min</name><value>1</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>dfs.namenode.decommission.interval</name><value>3</value></property>
+<property><name>dfs.http.address</name><value>localhost:49298</value></property>
+<property><name>dfs.heartbeat.interval</name><value>3</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>dfs.name.dir</name><value>build/test/data/dfs/name1,build/test/data/dfs/name2</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>dfs.datanode.http.address</name><value>127.0.0.1:0</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>dfs.replication.interval</name><value>3</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>dfs.replication</name><value>2</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>dfs.access.time.precision</name><value>3600000</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>mapred.output.dir</name><value>hdfs://localhost:49297/result2</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>mapred.input.dir</name><value>hdfs://localhost:49297/webmap</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>dfs.datanode.handler.count</name><value>3</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>dfs.replication.considerLoad</name><value>true</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+</configuration>
\ No newline at end of file
diff --git a/hadoop/build/test/data/dfs/data/data4/current/blk_7390568412602717794_1004.meta b/hadoop/build/test/data/dfs/data/data4/current/blk_7390568412602717794_1004.meta
new file mode 100755
index 0000000..b7bad35
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data4/current/blk_7390568412602717794_1004.meta
Binary files differ
diff --git a/hadoop/build/test/data/dfs/data/data4/storage b/hadoop/build/test/data/dfs/data/data4/storage
new file mode 100755
index 0000000..60e7000
--- /dev/null
+++ b/hadoop/build/test/data/dfs/data/data4/storage
Binary files differ
diff --git a/hadoop/build/test/data/dfs/name1/current/VERSION b/hadoop/build/test/data/dfs/name1/current/VERSION
new file mode 100755
index 0000000..048d57a
--- /dev/null
+++ b/hadoop/build/test/data/dfs/name1/current/VERSION
@@ -0,0 +1,5 @@
+#Mon Jan 28 16:24:48 PST 2013
+namespaceID=1113662599
+cTime=0
+storageType=NAME_NODE
+layoutVersion=-18
diff --git a/hadoop/build/test/data/dfs/name1/current/edits b/hadoop/build/test/data/dfs/name1/current/edits
new file mode 100755
index 0000000..88e5d71
--- /dev/null
+++ b/hadoop/build/test/data/dfs/name1/current/edits
Binary files differ
diff --git a/hadoop/build/test/data/dfs/name1/current/fsimage b/hadoop/build/test/data/dfs/name1/current/fsimage
new file mode 100755
index 0000000..18adf06
--- /dev/null
+++ b/hadoop/build/test/data/dfs/name1/current/fsimage
Binary files differ
diff --git a/hadoop/build/test/data/dfs/name1/current/fstime b/hadoop/build/test/data/dfs/name1/current/fstime
new file mode 100755
index 0000000..171e022
--- /dev/null
+++ b/hadoop/build/test/data/dfs/name1/current/fstime
Binary files differ
diff --git a/hadoop/build/test/data/dfs/name1/image/fsimage b/hadoop/build/test/data/dfs/name1/image/fsimage
new file mode 100755
index 0000000..60e7000
--- /dev/null
+++ b/hadoop/build/test/data/dfs/name1/image/fsimage
Binary files differ
diff --git a/hadoop/build/test/data/dfs/name2/current/VERSION b/hadoop/build/test/data/dfs/name2/current/VERSION
new file mode 100755
index 0000000..048d57a
--- /dev/null
+++ b/hadoop/build/test/data/dfs/name2/current/VERSION
@@ -0,0 +1,5 @@
+#Mon Jan 28 16:24:48 PST 2013
+namespaceID=1113662599
+cTime=0
+storageType=NAME_NODE
+layoutVersion=-18
diff --git a/hadoop/build/test/data/dfs/name2/current/edits b/hadoop/build/test/data/dfs/name2/current/edits
new file mode 100755
index 0000000..88e5d71
--- /dev/null
+++ b/hadoop/build/test/data/dfs/name2/current/edits
Binary files differ
diff --git a/hadoop/build/test/data/dfs/name2/current/fsimage b/hadoop/build/test/data/dfs/name2/current/fsimage
new file mode 100755
index 0000000..18adf06
--- /dev/null
+++ b/hadoop/build/test/data/dfs/name2/current/fsimage
Binary files differ
diff --git a/hadoop/build/test/data/dfs/name2/current/fstime b/hadoop/build/test/data/dfs/name2/current/fstime
new file mode 100755
index 0000000..171e022
--- /dev/null
+++ b/hadoop/build/test/data/dfs/name2/current/fstime
Binary files differ
diff --git a/hadoop/build/test/data/dfs/name2/image/fsimage b/hadoop/build/test/data/dfs/name2/image/fsimage
new file mode 100755
index 0000000..60e7000
--- /dev/null
+++ b/hadoop/build/test/data/dfs/name2/image/fsimage
Binary files differ
diff --git a/hadoop/data/webmap/text.txt b/hadoop/data/webmap/text.txt
new file mode 100755
index 0000000..c7fb713
--- /dev/null
+++ b/hadoop/data/webmap/text.txt
@@ -0,0 +1,4 @@
+@625E1AAXX100810:1:100:10000:10271/1

+AATAGAAGAT

++

+EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?
\ No newline at end of file
diff --git a/hadoop/expected/result2 b/hadoop/expected/result2
new file mode 100755
index 0000000..6ead1cc
--- /dev/null
+++ b/hadoop/expected/result2
@@ -0,0 +1,7 @@
+1 33 1
+3 1 1
+4 153 1
+12 18 1
+16 18 1
+19 16 1
+49 17 1
diff --git a/hadoop/pom.xml b/hadoop/pom.xml
new file mode 100755
index 0000000..3e8cf5e
--- /dev/null
+++ b/hadoop/pom.xml
@@ -0,0 +1,150 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+	<modelVersion>4.0.0</modelVersion>
+	<groupId>graphbuilding</groupId>
+	<artifactId>graphbuilding</artifactId>
+	<version>0.0.1-SNAPSHOT</version>
+	<name>genomix</name>
+
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+	</properties>
+	
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-compiler-plugin</artifactId>
+				<version>2.0.2</version>
+				<configuration>
+					<source>1.6</source>
+					<target>1.6</target>
+				</configuration>
+			</plugin>
+			<plugin>
+				<artifactId>maven-assembly-plugin</artifactId>
+				<configuration>
+					<descriptorRefs>
+						<descriptorRef>jar-with-dependencies</descriptorRef>
+					</descriptorRefs>
+				</configuration>
+				<executions>
+					<execution>
+						<id>make-my-jar-with-dependencies</id>
+						<phase>package</phase>
+						<goals>
+							<goal>single</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+				<groupId>org.codehaus.mojo</groupId>
+				<artifactId>appassembler-maven-plugin</artifactId>
+				<executions>
+					<execution>
+						<configuration>
+							<programs>
+								<program>
+									<mainClass>edu.uci.ics.maxclique.Driver</mainClass>
+									<name>maxclique</name>
+								</program>
+							</programs>
+							<repositoryLayout>flat</repositoryLayout>
+							<repositoryName>lib</repositoryName>
+						</configuration>
+						<phase>package</phase>
+						<goals>
+							<goal>assemble</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-surefire-plugin</artifactId>
+				<version>2.7.2</version>
+				<configuration>
+					<forkMode>pertest</forkMode>
+					<argLine>-enableassertions -Xmx512m -XX:MaxPermSize=300m
+						-Dfile.encoding=UTF-8
+						-Djava.util.logging.config.file=src/test/resources/logging.properties</argLine>
+					<includes>
+						<include>**/*TestSuite.java</include>
+						<include>**/*Test.java</include>
+					</includes>
+				</configuration>
+			</plugin>
+			<plugin>
+				<artifactId>maven-clean-plugin</artifactId>
+				<configuration>
+					<filesets>
+						<fileset>
+							<directory>.</directory>
+							<includes>
+								<include>teststore*</include>
+								<include>edu*</include>
+								<include>actual*</include>
+								<include>build*</include>
+								<include>log*</include>
+								<include>ClusterController*</include>
+							</includes>
+						</fileset>
+					</filesets>
+				</configuration>
+			</plugin>
+		</plugins>
+	</build>
+	
+	<dependencies>
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+			<version>4.8.1</version>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-core</artifactId>
+			<version>0.20.2</version>
+			<scope>compile</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.apache.hadoop</groupId>
+			<artifactId>hadoop-test</artifactId>
+			<version>0.20.2</version>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>com.kenai.nbpwr</groupId>
+			<artifactId>org-apache-commons-io</artifactId>
+			<version>1.3.1-201002241208</version>
+			<type>nbm</type>
+			<scope>test</scope>
+		</dependency>
+		<dependency>
+			<groupId>org.slf4j</groupId>
+			<artifactId>slf4j-jcl</artifactId>
+			<version>1.6.3</version>
+		</dependency>
+		<dependency>
+			<groupId>org.slf4j</groupId>
+			<artifactId>slf4j-api</artifactId>
+			<version>1.6.3</version>
+		</dependency>
+		<dependency>
+			<groupId>args4j</groupId>
+			<artifactId>args4j</artifactId>
+			<version>2.0.16</version>
+		</dependency>
+		<dependency>
+			<groupId>com.kenai.nbpwr</groupId>
+			<artifactId>org-apache-commons-io</artifactId>
+			<version>1.3.1-201002241208</version>
+			<type>nbm</type>
+			<scope>test</scope>
+		</dependency>
+		
+	</dependencies>
+</project>
diff --git a/hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java b/hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java
new file mode 100755
index 0000000..a67d20e
--- /dev/null
+++ b/hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixCombiner.java
@@ -0,0 +1,22 @@
+package edu.uci.ics.graphbuilding;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+public class GenomixCombiner extends MapReduceBase implements Reducer<LongWritable, IntWritable, LongWritable, IntWritable> {
+	public void reduce(LongWritable key, Iterator<IntWritable> values, OutputCollector<LongWritable, IntWritable> output, Reporter reporter) throws IOException {
+    	int groupByAdjList = 0;
+        while (values.hasNext()) {
+        	//Merge By the all adjacent Nodes;
+        	groupByAdjList = groupByAdjList|values.next().get(); 
+        	}
+        output.collect(key, new IntWritable(groupByAdjList)); 
+        }
+ 	}
diff --git a/hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java b/hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java
new file mode 100755
index 0000000..71c8733
--- /dev/null
+++ b/hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixDriver.java
@@ -0,0 +1,66 @@
+package edu.uci.ics.graphbuilding;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+@SuppressWarnings("deprecation")
+public class GenomixDriver {
+	private static class Options {
+       @Option(name = "-inputpath", usage = "the input path", required = true)
+        public String inputPath;
+
+        @Option(name = "-outputpath", usage = "the output path", required = true)
+        public String outputPath;
+
+        @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+        public int numReducers;
+    }
+	public void run(String inputPath, String outputPath, int numReducers, String defaultConfPath) throws IOException {
+		
+		JobConf conf = new JobConf(GenomixDriver.class);
+        if (defaultConfPath != null) {
+            conf.addResource(new Path(defaultConfPath));
+        }
+
+		conf.setJobName("Genomix Graph Building");
+		conf.setMapperClass(GenomixMapper.class);
+		conf.setReducerClass(GenomixReducer.class);
+		conf.setCombinerClass(GenomixCombiner.class);	
+		
+		conf.setMapOutputKeyClass(LongWritable.class);
+		conf.setMapOutputValueClass(IntWritable.class);
+		
+		conf.setInputFormat(TextInputFormat.class);
+		conf.setOutputFormat(TextOutputFormat.class);
+		conf.setOutputKeyClass(LongWritable.class);
+		conf.setOutputValueClass(ValueWritable.class);
+		FileInputFormat.setInputPaths(conf, new Path(inputPath));
+        FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+        conf.setNumReduceTasks(numReducers);
+
+        FileSystem dfs = FileSystem.get(conf);
+        dfs.delete(new Path(outputPath), true);
+        JobClient.runJob(conf);		
+	}
+	public static void main(String[] args) throws Exception {
+		Options options = new Options();
+        CmdLineParser parser = new CmdLineParser(options);
+        parser.parseArgument(args);
+        GenomixDriver driver = new GenomixDriver();
+        driver.run(options.inputPath, options.outputPath, options.numReducers, null);
+    }
+	
+}
diff --git a/hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java b/hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
new file mode 100755
index 0000000..7d0d75b
--- /dev/null
+++ b/hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixMapper.java
@@ -0,0 +1,137 @@
+package edu.uci.ics.graphbuilding;
+
+import java.io.IOException;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+public class GenomixMapper extends MapReduceBase implements Mapper<LongWritable, Text, LongWritable, IntWritable> {
+	
+    public static final int KMER_SIZE = 3; //User Specify
+//    private Text Map_Pair_Key = new Text();
+    /*precursor node
+      A 00000001 1
+      G 00000010 2
+	  C 00000100 4
+	  T 00001000 8
+      succeed node
+	  A 00010000 16
+	  G 00100000 32
+	  C 01000000 64
+	  T 10000000 128*/
+    public void map(LongWritable key, Text value, OutputCollector<LongWritable, IntWritable> output, 
+    		Reporter reporter) throws IOException {
+    	/* A 00
+  	       G 01
+  	       C 10
+  	       T 11*/
+    	try
+      	{
+      		String geneLine = value.toString(); // Read the Real Gene Line
+          	Pattern genePattern = Pattern.compile("[AGCT]+");
+          	Matcher geneMatcher = genePattern.matcher(geneLine);
+          	boolean isValid = geneMatcher.matches();
+          	if(isValid == true)
+          	{
+    	    	long kmerValue = 0;
+          		long PreMarker = -1;
+          		//Initialization: get the first kmer of this geneLine 
+          		for(int i = 0; i < KMER_SIZE; i++)
+          		{
+          			kmerValue = (kmerValue << 2);
+          			switch(geneLine.charAt(i))
+          			{
+          			case 'A': kmerValue = kmerValue + 0;
+      				break;
+      				case 'G': kmerValue = kmerValue + 1;
+      				break;
+      				case 'C': kmerValue = kmerValue + 2;
+      				break;
+      				case 'T': kmerValue = kmerValue + 3;
+      				break;
+          			}
+          		}
+          		int i;
+          		//Get the next kmer by shiftint one letter every time
+          		for(i = KMER_SIZE; i < geneLine.length(); i++)
+          		{
+          			LongWritable outputKmer = new LongWritable(kmerValue);
+          			int kmerAdjList = 0;
+          			//Get the precursor node using the premarker
+              		switch((int)PreMarker)
+          			{
+              		case -1: kmerAdjList = kmerAdjList + 0;
+              		break;
+          			case 0: kmerAdjList = kmerAdjList + 16;
+          		    break;
+          		    case 16: kmerAdjList = kmerAdjList + 32;
+          			break;
+          			case 32: kmerAdjList = kmerAdjList + 64;
+          			break;
+          			case 48: kmerAdjList = kmerAdjList + 128;
+          			break;
+              		}
+              		//Update the premarker
+              		PreMarker = 3;
+              		PreMarker = PreMarker<<(KMER_SIZE-1)*2;
+              		PreMarker = PreMarker & kmerValue;
+              		//Reset the top two bits
+              		long reset = 3;
+              		kmerValue = kmerValue << 2;
+              		reset = ~(reset << KMER_SIZE*2);
+              		kmerValue = kmerValue & reset;
+          			switch(geneLine.charAt(i))
+      				{
+      				case 'A': 
+      					kmerAdjList = kmerAdjList + 1;
+      					kmerValue = kmerValue + 0;
+      				break;
+      				case 'G': 
+      					kmerAdjList = kmerAdjList + 2;
+      					kmerValue = kmerValue + 1;
+      				break;
+      				case 'C': 
+      					kmerAdjList = kmerAdjList + 4;
+      					kmerValue = kmerValue + 2;
+      				break;
+      				case 'T': 
+      					kmerAdjList = kmerAdjList + 8;
+      					kmerValue = kmerValue + 3;
+      				break;
+      				}
+          			IntWritable outputAdjList = new IntWritable(kmerAdjList);
+              		output.collect(outputKmer, outputAdjList);
+    	    	}
+          		// arrive the last letter of this gene line
+          		if(i == geneLine.length())
+          		{
+          			int kmerAdjList = 0;
+          			switch((int)PreMarker)
+          			{
+          			case 0: kmerAdjList = kmerAdjList + 16;
+          		    break;
+          		    case 16: kmerAdjList = kmerAdjList + 32;
+          			break;
+          			case 32: kmerAdjList = kmerAdjList + 64;
+          			break;
+          			case 48: kmerAdjList = kmerAdjList + 128;
+          			break;
+              		}
+          			IntWritable outputAdjList = new IntWritable(kmerAdjList);
+          			LongWritable outputKmer = new LongWritable(kmerValue);
+              		output.collect(outputKmer, outputAdjList);
+          		}
+          	}
+      	}
+      	catch( Exception e ) {     
+              System.out.println( "Exception:"+e );
+    	    }
+    	}
+  }
diff --git a/hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java b/hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java
new file mode 100755
index 0000000..231e089
--- /dev/null
+++ b/hadoop/src/main/java/edu/uci/ics/graphbuilding/GenomixReducer.java
@@ -0,0 +1,25 @@
+package edu.uci.ics.graphbuilding;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+
+public class GenomixReducer extends MapReduceBase implements Reducer<LongWritable, IntWritable, LongWritable, ValueWritable> {
+    public void reduce(LongWritable key, Iterator<IntWritable> values, OutputCollector<LongWritable, ValueWritable> output, Reporter reporter) throws IOException {
+    	int groupByAdjList = 0;
+    	int count = 0;
+        while (values.hasNext()) {
+        	//Merge By the all adjacent Nodes;
+        	groupByAdjList = groupByAdjList|values.next().get(); 
+        	count ++;
+        	}
+        output.collect(key, new ValueWritable(groupByAdjList, count));
+        }
+    }
diff --git a/hadoop/src/main/java/edu/uci/ics/graphbuilding/ValueWritable.java b/hadoop/src/main/java/edu/uci/ics/graphbuilding/ValueWritable.java
new file mode 100755
index 0000000..37775d7
--- /dev/null
+++ b/hadoop/src/main/java/edu/uci/ics/graphbuilding/ValueWritable.java
@@ -0,0 +1,76 @@
+package edu.uci.ics.graphbuilding;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+
+
+public class ValueWritable implements WritableComparable<ValueWritable>{
+	private int first;
+	  private int second;
+	  
+	  public ValueWritable() {
+	  }
+	  
+	  public ValueWritable(int first, int second) {
+		  set(first,second);
+	  }	  
+	  
+	  public void set(int first, int second) {
+	    this.first = first;
+	    this.second = second;
+	  }
+	  
+	  public int getFirst() {
+	    return first;
+	  }
+
+	  public int getSecond() {
+	    return second;
+	  }
+
+	  public void write(DataOutput out) throws IOException {
+		out.writeInt(first);
+	    out.writeInt(second);
+	  }
+
+	  public void readFields(DataInput in) throws IOException {
+		first = in.readInt();
+	    second = in.readInt();
+	  }
+	  
+	  public int hashCode() {
+	    return first+second;
+	  }
+	  
+	  public boolean equals(Object o) {
+	    if (o instanceof ValueWritable) {
+	      ValueWritable tp = (ValueWritable) o;
+	      return first == tp.first&&second==tp.second;
+	    }
+	    return false;
+	  }
+
+
+	  public String toString() {
+		return Integer.toString(first) + "\t" + Integer.toString(second);
+	  }
+	  
+	  public int compareTo(ValueWritable tp) {
+		 int cmp;
+		 if(first == tp.first)
+			 cmp = 0;
+	     else
+			cmp = 1;
+		 if(cmp != 0)
+			 return cmp;
+		 if(second == tp.second)
+			 return 0;
+		 else
+			 return 1;
+	  }
+
+}
+
diff --git a/hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java b/hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
new file mode 100755
index 0000000..090aa0b
--- /dev/null
+++ b/hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
@@ -0,0 +1,78 @@
+package edu.uci.ics.graphbuilding;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.junit.Test;
+
+import edu.uci.ics.utils.TestUtils;
+
+public class GraphBuildingTest{
+	
+	private static final String ACTUAL_RESULT_DIR = "actual";
+    private JobConf conf = new JobConf();
+    private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+    private static final String DATA_PATH = "data/webmap/text.txt";
+    private static final String HDFS_PATH = "/webmap";
+    private static final String RESULT_PATH = "/result2";
+    private static final String DUMPED_RESULT = ACTUAL_RESULT_DIR + RESULT_PATH + "/part-00000";
+    private static final String EXPECTED_PATH = "expected/result2";
+    
+	private MiniDFSCluster dfsCluster;
+    private MiniMRCluster mrCluster;
+    private FileSystem dfs;
+    
+    @Test
+    public void test() throws Exception{
+    	FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+        FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+        startHadoop();
+
+        // run graph transformation tests
+        GenomixDriver tldriver = new GenomixDriver();
+        tldriver.run(HDFS_PATH, RESULT_PATH, 2, HADOOP_CONF_PATH);
+        dumpResult();
+        TestUtils.compareWithResult(new File(DUMPED_RESULT), new File(EXPECTED_PATH));
+
+        cleanupHadoop();
+
+    }
+    private void startHadoop() throws IOException {
+        FileSystem lfs = FileSystem.getLocal(new Configuration());
+        lfs.delete(new Path("build"), true);
+        System.setProperty("hadoop.log.dir", "logs");
+        dfsCluster = new MiniDFSCluster(conf, 2, true, null);
+        dfs = dfsCluster.getFileSystem();
+        mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+
+        Path src = new Path(DATA_PATH);
+        Path dest = new Path(HDFS_PATH + "/");
+        dfs.mkdirs(dest);
+        dfs.copyFromLocalFile(src, dest);
+
+        DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+        conf.writeXml(confOutput);
+        confOutput.flush();
+        confOutput.close();
+    }
+
+    private void cleanupHadoop() throws IOException {
+        mrCluster.shutdown();
+        dfsCluster.shutdown();
+    }
+
+    private void dumpResult() throws IOException {
+        Path src = new Path(RESULT_PATH);
+        Path dest = new Path(ACTUAL_RESULT_DIR + "/");
+        dfs.copyToLocalFile(src, dest);
+    }
+}
diff --git a/hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java b/hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
new file mode 100755
index 0000000..c2e30ed
--- /dev/null
+++ b/hadoop/src/test/java/edu/uci/ics/utils/TestUtils.java
@@ -0,0 +1,58 @@
+package edu.uci.ics.utils;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader; 
+
+public class TestUtils {
+	public static void compareWithResult(File expectedFile, File actualFile) throws Exception {
+        BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
+        BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
+        String lineExpected, lineActual;
+        int num = 1;
+        try {
+            while ((lineExpected = readerExpected.readLine()) != null) {
+                lineActual = readerActual.readLine();
+                // Assert.assertEquals(lineExpected, lineActual);
+                if (lineActual == null) {
+                    throw new Exception("Actual result changed at line " + num + ":\n< " + lineExpected + "\n> ");
+                }
+                if (!equalStrings(lineExpected, lineActual)) {
+                    throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
+                            + lineActual);
+                }
+                ++num;
+            }
+            lineActual = readerActual.readLine();
+            if (lineActual != null) {
+                throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineActual);
+            }
+        } finally {
+            readerExpected.close();
+            readerActual.close();
+        }
+    }
+
+    private static boolean equalStrings(String s1, String s2) {
+        String[] rowsOne = s1.split("\t");
+        String[] rowsTwo = s2.split(" ");
+
+        if (rowsOne.length != rowsTwo.length)
+            return false;
+
+        for (int i = 0; i < rowsOne.length; i++) {
+            String row1 = rowsOne[i];
+            String row2 = rowsTwo[i];
+
+            if (row1.equals(row2))
+                continue;
+            else
+                return false;
+        }
+        return true;
+    }
+    public static void main(String[] args) throws Exception {
+    	TestUtils TUtils = new TestUtils();
+    	TUtils.compareWithResult(new File("/Users/hadoop/Documents/workspace/Test/part-00000"), new File("/Users/hadoop/Documents/workspace/Test/test.txt"));
+    }
+}