Merge branch 'fullstack_genomix' of https://code.google.com/p/hyracks into fullstack_genomix
diff --git a/genomix/genomix-hadoop/actual1/conf.xml b/genomix/genomix-hadoop/actual1/conf.xml
deleted file mode 100644
index 506913d..0000000
--- a/genomix/genomix-hadoop/actual1/conf.xml
+++ /dev/null
@@ -1,179 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
-<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
-<property><name>mapred.task.cache.levels</name><value>2</value></property>
-<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
-<property><name>hadoop.native.lib</name><value>true</value></property>
-<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
-<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value></property>
-<property><name>dfs.https.need.client.auth</name><value>false</value></property>
-<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
-<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
-<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
-<property><name>dfs.namenode.logging.level</name><value>info</value></property>
-<property><name>dfs.datanode.address</name><value>127.0.0.1:0</value></property>
-<property><name>io.skip.checksum.errors</name><value>false</value></property>
-<property><name>fs.default.name</name><value>hdfs://localhost:61115</value></property>
-<property><name>mapred.child.tmp</name><value>./tmp</value></property>
-<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
-<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
-<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
-<property><name>dfs.namenode.handler.count</name><value>10</value></property>
-<property><name>dfs.blockreport.initialDelay</name><value>0</value></property>
-<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
-<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
-<property><name>io.sort.factor</name><value>10</value></property>
-<property><name>mapred.task.timeout</name><value>600000</value></property>
-<property><name>mapred.max.tracker.failures</name><value>4</value></property>
-<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
-<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
-<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
-<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
-<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
-<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
-<property><name>dfs.safemode.extension</name><value>0</value></property>
-<property><name>tasktracker.http.threads</name><value>40</value></property>
-<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
-<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
-<property><name>mapred.output.compress</name><value>false</value></property>
-<property><name>io.bytes.per.checksum</name><value>512</value></property>
-<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.StaticMapping</value></property>
-<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value></property>
-<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
-<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
-<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
-<property><name>dfs.name.edits.dir</name><value>${dfs.name.dir}</value></property>
-<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
-<property><name>hadoop.job.ugi</name><value>hadoop,staff,everyone,localaccounts</value></property>
-<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
-<property><name>dfs.block.size</name><value>67108864</value></property>
-<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
-<property><name>job.end.retry.attempts</name><value>0</value></property>
-<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
-<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
-<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
-<property><name>dfs.datanode.ipc.address</name><value>127.0.0.1:0</value></property>
-<property><name>dfs.permissions</name><value>true</value></property>
-<property><name>topology.script.number.args</name><value>100</value></property>
-<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
-<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
-<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
-<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value></property>
-<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
-<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
-<property><name>dfs.replication.max</name><value>512</value></property>
-<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
-<property><name>hadoop.security.authorization</name><value>false</value></property>
-<property><name>local.cache.size</name><value>10737418240</value></property>
-<property><name>mapred.min.split.size</name><value>0</value></property>
-<property><name>mapred.map.tasks</name><value>2</value></property>
-<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
-<property><name>dfs.https.client.keystore.resource</name><value>ssl-client.xml</value></property>
-<property><name>dfs.namenode.startup</name><value>REGULAR</value></property>
-<property><name>mapred.job.queue.name</name><value>default</value></property>
-<property><name>dfs.https.address</name><value>0.0.0.0:50470</value></property>
-<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
-<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
-<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
-<property><name>job.end.retry.interval</name><value>30000</value></property>
-<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
-<property><name>fs.checkpoint.dir</name><value>build/test/data/dfs/namesecondary1,build/test/data/dfs/namesecondary2</value></property>
-<property><name>mapred.reduce.tasks</name><value>1</value></property>
-<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
-<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
-<property><name>dfs.max.objects</name><value>0</value></property>
-<property><name>webinterface.private.actions</name><value>false</value></property>
-<property><name>io.sort.spill.percent</name><value>0.80</value></property>
-<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
-<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
-<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
-<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
-<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
-<property><name>mapred.map.max.attempts</name><value>4</value></property>
-<property><name>dfs.client.block.write.retries</name><value>3</value></property>
-<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
-<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
-<property><name>dfs.https.enable</name><value>false</value></property>
-<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
-<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
-<property><name>keep.failed.task.files</name><value>false</value></property>
-<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
-<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
-<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
-<property><name>io.map.index.skip</name><value>0</value></property>
-<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
-<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
-<property><name>hadoop.logfile.size</name><value>10000000</value></property>
-<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
-<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
-<property><name>fs.checkpoint.period</name><value>3600</value></property>
-<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
-<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
-<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
-<property><name>dfs.df.interval</name><value>60000</value></property>
-<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
-<property><name>fs.s3.maxRetries</name><value>4</value></property>
-<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
-<property><name>dfs.support.append</name><value>false</value></property>
-<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
-<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
-<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
-<property><name>fs.trash.interval</name><value>0</value></property>
-<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
-<property><name>dfs.replication.min</name><value>1</value></property>
-<property><name>mapred.submit.replication</name><value>10</value></property>
-<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
-<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
-<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
-<property><name>dfs.namenode.decommission.interval</name><value>3</value></property>
-<property><name>dfs.http.address</name><value>localhost:61116</value></property>
-<property><name>dfs.heartbeat.interval</name><value>3</value></property>
-<property><name>mapred.job.tracker</name><value>local</value></property>
-<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
-<property><name>dfs.name.dir</name><value>build/test/data/dfs/name1,build/test/data/dfs/name2</value></property>
-<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
-<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
-<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
-<property><name>dfs.datanode.http.address</name><value>127.0.0.1:0</value></property>
-<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
-<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
-<property><name>dfs.replication.interval</name><value>3</value></property>
-<property><name>io.sort.record.percent</name><value>0.05</value></property>
-<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
-<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
-<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
-<property><name>dfs.replication</name><value>2</value></property>
-<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
-<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
-<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
-<property><name>hadoop.logfile.count</name><value>10</value></property>
-<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
-<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
-<property><name>fs.s3.block.size</name><value>67108864</value></property>
-<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
-<property><name>mapred.acls.enabled</name><value>false</value></property>
-<property><name>mapred.queue.names</name><value>default</value></property>
-<property><name>dfs.access.time.precision</name><value>3600000</value></property>
-<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
-<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
-<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
-<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
-<property><name>io.sort.mb</name><value>100</value></property>
-<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
-<property><name>mapred.compress.map.output</name><value>false</value></property>
-<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
-<property><name>ipc.client.kill.max</name><value>10</value></property>
-<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
-<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
-<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
-<property><name>io.file.buffer.size</name><value>4096</value></property>
-<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
-<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
-<property><name>dfs.datanode.handler.count</name><value>3</value></property>
-<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
-<property><name>mapred.task.profile</name><value>false</value></property>
-<property><name>dfs.replication.considerLoad</name><value>true</value></property>
-<property><name>jobclient.output.filter</name><value>FAILED</value></property>
-<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
-<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
-<property><name>fs.checkpoint.size</name><value>67108864</value></property>
-</configuration>
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/actual1/result1/.part-00000.crc b/genomix/genomix-hadoop/actual1/result1/.part-00000.crc
deleted file mode 100644
index 3422e04..0000000
--- a/genomix/genomix-hadoop/actual1/result1/.part-00000.crc
+++ /dev/null
Binary files differ
diff --git a/genomix/genomix-hadoop/actual1/result1/part-00000 b/genomix/genomix-hadoop/actual1/result1/part-00000
deleted file mode 100755
index c21f5f6..0000000
--- a/genomix/genomix-hadoop/actual1/result1/part-00000
+++ /dev/null
Binary files differ
diff --git a/genomix/genomix-hadoop/actual2/conf.xml b/genomix/genomix-hadoop/actual2/conf.xml
deleted file mode 100644
index ff11b9e..0000000
--- a/genomix/genomix-hadoop/actual2/conf.xml
+++ /dev/null
@@ -1,179 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
-<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
-<property><name>mapred.task.cache.levels</name><value>2</value></property>
-<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
-<property><name>hadoop.native.lib</name><value>true</value></property>
-<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
-<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value></property>
-<property><name>dfs.https.need.client.auth</name><value>false</value></property>
-<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
-<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
-<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
-<property><name>dfs.namenode.logging.level</name><value>info</value></property>
-<property><name>dfs.datanode.address</name><value>127.0.0.1:0</value></property>
-<property><name>io.skip.checksum.errors</name><value>false</value></property>
-<property><name>fs.default.name</name><value>hdfs://localhost:61195</value></property>
-<property><name>mapred.child.tmp</name><value>./tmp</value></property>
-<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
-<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
-<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
-<property><name>dfs.namenode.handler.count</name><value>10</value></property>
-<property><name>dfs.blockreport.initialDelay</name><value>0</value></property>
-<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
-<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
-<property><name>io.sort.factor</name><value>10</value></property>
-<property><name>mapred.task.timeout</name><value>600000</value></property>
-<property><name>mapred.max.tracker.failures</name><value>4</value></property>
-<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
-<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
-<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
-<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
-<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
-<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
-<property><name>dfs.safemode.extension</name><value>0</value></property>
-<property><name>tasktracker.http.threads</name><value>40</value></property>
-<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
-<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
-<property><name>mapred.output.compress</name><value>false</value></property>
-<property><name>io.bytes.per.checksum</name><value>512</value></property>
-<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.StaticMapping</value></property>
-<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value></property>
-<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
-<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
-<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
-<property><name>dfs.name.edits.dir</name><value>${dfs.name.dir}</value></property>
-<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
-<property><name>hadoop.job.ugi</name><value>hadoop,staff,everyone,localaccounts</value></property>
-<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
-<property><name>dfs.block.size</name><value>67108864</value></property>
-<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
-<property><name>job.end.retry.attempts</name><value>0</value></property>
-<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
-<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
-<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
-<property><name>dfs.datanode.ipc.address</name><value>127.0.0.1:0</value></property>
-<property><name>dfs.permissions</name><value>true</value></property>
-<property><name>topology.script.number.args</name><value>100</value></property>
-<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
-<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
-<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
-<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value></property>
-<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
-<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
-<property><name>dfs.replication.max</name><value>512</value></property>
-<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
-<property><name>hadoop.security.authorization</name><value>false</value></property>
-<property><name>local.cache.size</name><value>10737418240</value></property>
-<property><name>mapred.min.split.size</name><value>0</value></property>
-<property><name>mapred.map.tasks</name><value>2</value></property>
-<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
-<property><name>dfs.https.client.keystore.resource</name><value>ssl-client.xml</value></property>
-<property><name>dfs.namenode.startup</name><value>REGULAR</value></property>
-<property><name>mapred.job.queue.name</name><value>default</value></property>
-<property><name>dfs.https.address</name><value>0.0.0.0:50470</value></property>
-<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
-<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
-<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
-<property><name>job.end.retry.interval</name><value>30000</value></property>
-<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
-<property><name>fs.checkpoint.dir</name><value>build/test/data/dfs/namesecondary1,build/test/data/dfs/namesecondary2</value></property>
-<property><name>mapred.reduce.tasks</name><value>1</value></property>
-<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
-<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
-<property><name>dfs.max.objects</name><value>0</value></property>
-<property><name>webinterface.private.actions</name><value>false</value></property>
-<property><name>io.sort.spill.percent</name><value>0.80</value></property>
-<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
-<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
-<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
-<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
-<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
-<property><name>mapred.map.max.attempts</name><value>4</value></property>
-<property><name>dfs.client.block.write.retries</name><value>3</value></property>
-<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
-<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
-<property><name>dfs.https.enable</name><value>false</value></property>
-<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
-<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
-<property><name>keep.failed.task.files</name><value>false</value></property>
-<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
-<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
-<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
-<property><name>io.map.index.skip</name><value>0</value></property>
-<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
-<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
-<property><name>hadoop.logfile.size</name><value>10000000</value></property>
-<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
-<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
-<property><name>fs.checkpoint.period</name><value>3600</value></property>
-<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
-<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
-<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
-<property><name>dfs.df.interval</name><value>60000</value></property>
-<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
-<property><name>fs.s3.maxRetries</name><value>4</value></property>
-<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
-<property><name>dfs.support.append</name><value>false</value></property>
-<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
-<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
-<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
-<property><name>fs.trash.interval</name><value>0</value></property>
-<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
-<property><name>dfs.replication.min</name><value>1</value></property>
-<property><name>mapred.submit.replication</name><value>10</value></property>
-<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
-<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
-<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
-<property><name>dfs.namenode.decommission.interval</name><value>3</value></property>
-<property><name>dfs.http.address</name><value>localhost:61196</value></property>
-<property><name>dfs.heartbeat.interval</name><value>3</value></property>
-<property><name>mapred.job.tracker</name><value>local</value></property>
-<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
-<property><name>dfs.name.dir</name><value>build/test/data/dfs/name1,build/test/data/dfs/name2</value></property>
-<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
-<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
-<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
-<property><name>dfs.datanode.http.address</name><value>127.0.0.1:0</value></property>
-<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
-<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
-<property><name>dfs.replication.interval</name><value>3</value></property>
-<property><name>io.sort.record.percent</name><value>0.05</value></property>
-<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
-<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
-<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
-<property><name>dfs.replication</name><value>2</value></property>
-<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
-<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
-<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
-<property><name>hadoop.logfile.count</name><value>10</value></property>
-<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
-<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
-<property><name>fs.s3.block.size</name><value>67108864</value></property>
-<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
-<property><name>mapred.acls.enabled</name><value>false</value></property>
-<property><name>mapred.queue.names</name><value>default</value></property>
-<property><name>dfs.access.time.precision</name><value>3600000</value></property>
-<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
-<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
-<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
-<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
-<property><name>io.sort.mb</name><value>100</value></property>
-<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
-<property><name>mapred.compress.map.output</name><value>false</value></property>
-<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
-<property><name>ipc.client.kill.max</name><value>10</value></property>
-<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
-<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
-<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
-<property><name>io.file.buffer.size</name><value>4096</value></property>
-<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
-<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
-<property><name>dfs.datanode.handler.count</name><value>3</value></property>
-<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
-<property><name>mapred.task.profile</name><value>false</value></property>
-<property><name>dfs.replication.considerLoad</name><value>true</value></property>
-<property><name>jobclient.output.filter</name><value>FAILED</value></property>
-<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
-<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
-<property><name>fs.checkpoint.size</name><value>67108864</value></property>
-</configuration>
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/actual2/result2/.part-00000.crc b/genomix/genomix-hadoop/actual2/result2/.part-00000.crc
deleted file mode 100644
index 3f8c2c5..0000000
--- a/genomix/genomix-hadoop/actual2/result2/.part-00000.crc
+++ /dev/null
Binary files differ
diff --git a/genomix/genomix-hadoop/actual2/result2/part-00000 b/genomix/genomix-hadoop/actual2/result2/part-00000
deleted file mode 100755
index ea3e875..0000000
--- a/genomix/genomix-hadoop/actual2/result2/part-00000
+++ /dev/null
Binary files differ
diff --git a/genomix/genomix-hadoop/actual3/complete2/.complete2-r-00000.crc b/genomix/genomix-hadoop/actual3/complete2/.complete2-r-00000.crc
deleted file mode 100644
index b0b2753..0000000
--- a/genomix/genomix-hadoop/actual3/complete2/.complete2-r-00000.crc
+++ /dev/null
Binary files differ
diff --git a/genomix/genomix-hadoop/actual3/complete2/complete2-r-00000 b/genomix/genomix-hadoop/actual3/complete2/complete2-r-00000
deleted file mode 100755
index d3d3667..0000000
--- a/genomix/genomix-hadoop/actual3/complete2/complete2-r-00000
+++ /dev/null
Binary files differ
diff --git a/genomix/genomix-hadoop/actual3/conf.xml b/genomix/genomix-hadoop/actual3/conf.xml
deleted file mode 100644
index 16a0edc..0000000
--- a/genomix/genomix-hadoop/actual3/conf.xml
+++ /dev/null
@@ -1,179 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
-<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
-<property><name>mapred.task.cache.levels</name><value>2</value></property>
-<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
-<property><name>hadoop.native.lib</name><value>true</value></property>
-<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
-<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value></property>
-<property><name>dfs.https.need.client.auth</name><value>false</value></property>
-<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
-<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
-<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
-<property><name>dfs.namenode.logging.level</name><value>info</value></property>
-<property><name>dfs.datanode.address</name><value>127.0.0.1:0</value></property>
-<property><name>io.skip.checksum.errors</name><value>false</value></property>
-<property><name>fs.default.name</name><value>hdfs://localhost:62106</value></property>
-<property><name>mapred.child.tmp</name><value>./tmp</value></property>
-<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
-<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
-<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
-<property><name>dfs.namenode.handler.count</name><value>10</value></property>
-<property><name>dfs.blockreport.initialDelay</name><value>0</value></property>
-<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
-<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
-<property><name>io.sort.factor</name><value>10</value></property>
-<property><name>mapred.task.timeout</name><value>600000</value></property>
-<property><name>mapred.max.tracker.failures</name><value>4</value></property>
-<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
-<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
-<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
-<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
-<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
-<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
-<property><name>dfs.safemode.extension</name><value>0</value></property>
-<property><name>tasktracker.http.threads</name><value>40</value></property>
-<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
-<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
-<property><name>mapred.output.compress</name><value>false</value></property>
-<property><name>io.bytes.per.checksum</name><value>512</value></property>
-<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.StaticMapping</value></property>
-<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value></property>
-<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
-<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
-<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
-<property><name>dfs.name.edits.dir</name><value>${dfs.name.dir}</value></property>
-<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
-<property><name>hadoop.job.ugi</name><value>hadoop,staff,everyone,localaccounts</value></property>
-<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
-<property><name>dfs.block.size</name><value>67108864</value></property>
-<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
-<property><name>job.end.retry.attempts</name><value>0</value></property>
-<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
-<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
-<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
-<property><name>dfs.datanode.ipc.address</name><value>127.0.0.1:0</value></property>
-<property><name>dfs.permissions</name><value>true</value></property>
-<property><name>topology.script.number.args</name><value>100</value></property>
-<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
-<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
-<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
-<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value></property>
-<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
-<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
-<property><name>dfs.replication.max</name><value>512</value></property>
-<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
-<property><name>hadoop.security.authorization</name><value>false</value></property>
-<property><name>local.cache.size</name><value>10737418240</value></property>
-<property><name>mapred.min.split.size</name><value>0</value></property>
-<property><name>mapred.map.tasks</name><value>2</value></property>
-<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
-<property><name>dfs.https.client.keystore.resource</name><value>ssl-client.xml</value></property>
-<property><name>dfs.namenode.startup</name><value>REGULAR</value></property>
-<property><name>mapred.job.queue.name</name><value>default</value></property>
-<property><name>dfs.https.address</name><value>0.0.0.0:50470</value></property>
-<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
-<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
-<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
-<property><name>job.end.retry.interval</name><value>30000</value></property>
-<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
-<property><name>fs.checkpoint.dir</name><value>build/test/data/dfs/namesecondary1,build/test/data/dfs/namesecondary2</value></property>
-<property><name>mapred.reduce.tasks</name><value>1</value></property>
-<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
-<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
-<property><name>dfs.max.objects</name><value>0</value></property>
-<property><name>webinterface.private.actions</name><value>false</value></property>
-<property><name>io.sort.spill.percent</name><value>0.80</value></property>
-<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
-<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
-<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
-<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
-<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
-<property><name>mapred.map.max.attempts</name><value>4</value></property>
-<property><name>dfs.client.block.write.retries</name><value>3</value></property>
-<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
-<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
-<property><name>dfs.https.enable</name><value>false</value></property>
-<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
-<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
-<property><name>keep.failed.task.files</name><value>false</value></property>
-<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
-<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
-<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
-<property><name>io.map.index.skip</name><value>0</value></property>
-<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
-<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
-<property><name>hadoop.logfile.size</name><value>10000000</value></property>
-<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
-<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
-<property><name>fs.checkpoint.period</name><value>3600</value></property>
-<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
-<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
-<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
-<property><name>dfs.df.interval</name><value>60000</value></property>
-<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
-<property><name>fs.s3.maxRetries</name><value>4</value></property>
-<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
-<property><name>dfs.support.append</name><value>false</value></property>
-<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
-<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
-<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
-<property><name>fs.trash.interval</name><value>0</value></property>
-<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
-<property><name>dfs.replication.min</name><value>1</value></property>
-<property><name>mapred.submit.replication</name><value>10</value></property>
-<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
-<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
-<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
-<property><name>dfs.namenode.decommission.interval</name><value>3</value></property>
-<property><name>dfs.http.address</name><value>localhost:62107</value></property>
-<property><name>dfs.heartbeat.interval</name><value>3</value></property>
-<property><name>mapred.job.tracker</name><value>local</value></property>
-<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
-<property><name>dfs.name.dir</name><value>build/test/data/dfs/name1,build/test/data/dfs/name2</value></property>
-<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
-<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
-<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
-<property><name>dfs.datanode.http.address</name><value>127.0.0.1:0</value></property>
-<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
-<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
-<property><name>dfs.replication.interval</name><value>3</value></property>
-<property><name>io.sort.record.percent</name><value>0.05</value></property>
-<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
-<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
-<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
-<property><name>dfs.replication</name><value>2</value></property>
-<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
-<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
-<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
-<property><name>hadoop.logfile.count</name><value>10</value></property>
-<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
-<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
-<property><name>fs.s3.block.size</name><value>67108864</value></property>
-<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
-<property><name>mapred.acls.enabled</name><value>false</value></property>
-<property><name>mapred.queue.names</name><value>default</value></property>
-<property><name>dfs.access.time.precision</name><value>3600000</value></property>
-<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
-<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
-<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
-<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
-<property><name>io.sort.mb</name><value>100</value></property>
-<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
-<property><name>mapred.compress.map.output</name><value>false</value></property>
-<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
-<property><name>ipc.client.kill.max</name><value>10</value></property>
-<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
-<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
-<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
-<property><name>io.file.buffer.size</name><value>4096</value></property>
-<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
-<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
-<property><name>dfs.datanode.handler.count</name><value>3</value></property>
-<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
-<property><name>mapred.task.profile</name><value>false</value></property>
-<property><name>dfs.replication.considerLoad</name><value>true</value></property>
-<property><name>jobclient.output.filter</name><value>FAILED</value></property>
-<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
-<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
-<property><name>fs.checkpoint.size</name><value>67108864</value></property>
-</configuration>
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/actual5/conf.xml b/genomix/genomix-hadoop/actual5/conf.xml
deleted file mode 100644
index d19b061..0000000
--- a/genomix/genomix-hadoop/actual5/conf.xml
+++ /dev/null
@@ -1,179 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
-<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
-<property><name>mapred.task.cache.levels</name><value>2</value></property>
-<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
-<property><name>hadoop.native.lib</name><value>true</value></property>
-<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
-<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value></property>
-<property><name>dfs.https.need.client.auth</name><value>false</value></property>
-<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
-<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
-<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
-<property><name>dfs.namenode.logging.level</name><value>info</value></property>
-<property><name>dfs.datanode.address</name><value>127.0.0.1:0</value></property>
-<property><name>io.skip.checksum.errors</name><value>false</value></property>
-<property><name>fs.default.name</name><value>hdfs://localhost:58289</value></property>
-<property><name>mapred.child.tmp</name><value>./tmp</value></property>
-<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
-<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
-<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
-<property><name>dfs.namenode.handler.count</name><value>10</value></property>
-<property><name>dfs.blockreport.initialDelay</name><value>0</value></property>
-<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
-<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
-<property><name>io.sort.factor</name><value>10</value></property>
-<property><name>mapred.task.timeout</name><value>600000</value></property>
-<property><name>mapred.max.tracker.failures</name><value>4</value></property>
-<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
-<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
-<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
-<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
-<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
-<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
-<property><name>dfs.safemode.extension</name><value>0</value></property>
-<property><name>tasktracker.http.threads</name><value>40</value></property>
-<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
-<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
-<property><name>mapred.output.compress</name><value>false</value></property>
-<property><name>io.bytes.per.checksum</name><value>512</value></property>
-<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.StaticMapping</value></property>
-<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value></property>
-<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
-<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
-<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
-<property><name>dfs.name.edits.dir</name><value>${dfs.name.dir}</value></property>
-<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
-<property><name>hadoop.job.ugi</name><value>hadoop,staff,everyone,localaccounts</value></property>
-<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
-<property><name>dfs.block.size</name><value>67108864</value></property>
-<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
-<property><name>job.end.retry.attempts</name><value>0</value></property>
-<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
-<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
-<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
-<property><name>dfs.datanode.ipc.address</name><value>127.0.0.1:0</value></property>
-<property><name>dfs.permissions</name><value>true</value></property>
-<property><name>topology.script.number.args</name><value>100</value></property>
-<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
-<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
-<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
-<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value></property>
-<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
-<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
-<property><name>dfs.replication.max</name><value>512</value></property>
-<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
-<property><name>hadoop.security.authorization</name><value>false</value></property>
-<property><name>local.cache.size</name><value>10737418240</value></property>
-<property><name>mapred.min.split.size</name><value>0</value></property>
-<property><name>mapred.map.tasks</name><value>2</value></property>
-<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
-<property><name>dfs.https.client.keystore.resource</name><value>ssl-client.xml</value></property>
-<property><name>dfs.namenode.startup</name><value>REGULAR</value></property>
-<property><name>mapred.job.queue.name</name><value>default</value></property>
-<property><name>dfs.https.address</name><value>0.0.0.0:50470</value></property>
-<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
-<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
-<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
-<property><name>job.end.retry.interval</name><value>30000</value></property>
-<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
-<property><name>fs.checkpoint.dir</name><value>build/test/data/dfs/namesecondary1,build/test/data/dfs/namesecondary2</value></property>
-<property><name>mapred.reduce.tasks</name><value>1</value></property>
-<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
-<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
-<property><name>dfs.max.objects</name><value>0</value></property>
-<property><name>webinterface.private.actions</name><value>false</value></property>
-<property><name>io.sort.spill.percent</name><value>0.80</value></property>
-<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
-<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
-<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
-<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
-<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
-<property><name>mapred.map.max.attempts</name><value>4</value></property>
-<property><name>dfs.client.block.write.retries</name><value>3</value></property>
-<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
-<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
-<property><name>dfs.https.enable</name><value>false</value></property>
-<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
-<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
-<property><name>keep.failed.task.files</name><value>false</value></property>
-<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
-<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
-<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
-<property><name>io.map.index.skip</name><value>0</value></property>
-<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
-<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
-<property><name>hadoop.logfile.size</name><value>10000000</value></property>
-<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
-<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
-<property><name>fs.checkpoint.period</name><value>3600</value></property>
-<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
-<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
-<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
-<property><name>dfs.df.interval</name><value>60000</value></property>
-<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
-<property><name>fs.s3.maxRetries</name><value>4</value></property>
-<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
-<property><name>dfs.support.append</name><value>false</value></property>
-<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
-<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
-<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
-<property><name>fs.trash.interval</name><value>0</value></property>
-<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
-<property><name>dfs.replication.min</name><value>1</value></property>
-<property><name>mapred.submit.replication</name><value>10</value></property>
-<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
-<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
-<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
-<property><name>dfs.namenode.decommission.interval</name><value>3</value></property>
-<property><name>dfs.http.address</name><value>localhost:58290</value></property>
-<property><name>dfs.heartbeat.interval</name><value>3</value></property>
-<property><name>mapred.job.tracker</name><value>local</value></property>
-<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
-<property><name>dfs.name.dir</name><value>build/test/data/dfs/name1,build/test/data/dfs/name2</value></property>
-<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
-<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
-<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
-<property><name>dfs.datanode.http.address</name><value>127.0.0.1:0</value></property>
-<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
-<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
-<property><name>dfs.replication.interval</name><value>3</value></property>
-<property><name>io.sort.record.percent</name><value>0.05</value></property>
-<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
-<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
-<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
-<property><name>dfs.replication</name><value>2</value></property>
-<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
-<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
-<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
-<property><name>hadoop.logfile.count</name><value>10</value></property>
-<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
-<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
-<property><name>fs.s3.block.size</name><value>67108864</value></property>
-<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
-<property><name>mapred.acls.enabled</name><value>false</value></property>
-<property><name>mapred.queue.names</name><value>default</value></property>
-<property><name>dfs.access.time.precision</name><value>3600000</value></property>
-<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
-<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
-<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
-<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
-<property><name>io.sort.mb</name><value>100</value></property>
-<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
-<property><name>mapred.compress.map.output</name><value>false</value></property>
-<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
-<property><name>ipc.client.kill.max</name><value>10</value></property>
-<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
-<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
-<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
-<property><name>io.file.buffer.size</name><value>4096</value></property>
-<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
-<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
-<property><name>dfs.datanode.handler.count</name><value>3</value></property>
-<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
-<property><name>mapred.task.profile</name><value>false</value></property>
-<property><name>dfs.replication.considerLoad</name><value>true</value></property>
-<property><name>jobclient.output.filter</name><value>FAILED</value></property>
-<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
-<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
-<property><name>fs.checkpoint.size</name><value>67108864</value></property>
-</configuration>
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/actual5/result5/.part-00000.crc b/genomix/genomix-hadoop/actual5/result5/.part-00000.crc
deleted file mode 100644
index dafaae3..0000000
--- a/genomix/genomix-hadoop/actual5/result5/.part-00000.crc
+++ /dev/null
Binary files differ
diff --git a/genomix/genomix-hadoop/actual5/result5/part-00000 b/genomix/genomix-hadoop/actual5/result5/part-00000
deleted file mode 100755
index deeff28..0000000
--- a/genomix/genomix-hadoop/actual5/result5/part-00000
+++ /dev/null
Binary files differ
diff --git a/genomix/genomix-hadoop/compare/result1/comparesource.txt b/genomix/genomix-hadoop/compare/result1/comparesource.txt
deleted file mode 100644
index ba52008..0000000
--- a/genomix/genomix-hadoop/compare/result1/comparesource.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-GCA ACT|T 3
-AGC |A 1
-CGC T|AT 2
-TGC |A 1
-ATC C|G 2
-TCG A|C 2
-CAT G|C 2
-GCT C| 1
diff --git a/genomix/genomix-hadoop/compare/result2/comparesource.txt b/genomix/genomix-hadoop/compare/result2/comparesource.txt
deleted file mode 100644
index db55a38..0000000
--- a/genomix/genomix-hadoop/compare/result2/comparesource.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-GCA -72
-AGC 1
-CGC -119
-TGC 1
-ATC 36
-TCG 18
-CAT 66
-GCT 32
diff --git a/genomix/genomix-hadoop/compare/result3/comparesource.txt b/genomix/genomix-hadoop/compare/result3/comparesource.txt
deleted file mode 100644
index 5f9dd78..0000000
--- a/genomix/genomix-hadoop/compare/result3/comparesource.txt
+++ /dev/null
@@ -1 +0,0 @@
-02 71 66 1
diff --git a/genomix/genomix-hadoop/compare/result5/comparesource.txt b/genomix/genomix-hadoop/compare/result5/comparesource.txt
deleted file mode 100644
index 6f4bd5e..0000000
--- a/genomix/genomix-hadoop/compare/result5/comparesource.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-GCA AT|T 2
-AGC |A 1
-TGC |A 1
diff --git a/genomix/genomix-hadoop/data/.DS_Store b/genomix/genomix-hadoop/data/.DS_Store
new file mode 100644
index 0000000..07f4ca0
--- /dev/null
+++ b/genomix/genomix-hadoop/data/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/data/webmap/.DS_Store b/genomix/genomix-hadoop/data/webmap/.DS_Store
new file mode 100644
index 0000000..17172c4
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/data/webmap/BridgePath b/genomix/genomix-hadoop/data/webmap/BridgePath
new file mode 100644
index 0000000..0717611
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/BridgePath
@@ -0,0 +1,2 @@
+TTTCCACTCCGTG
+TTTCCACCCCGTG
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/data/webmap/CyclePath b/genomix/genomix-hadoop/data/webmap/CyclePath
new file mode 100644
index 0000000..04080f4
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/CyclePath
@@ -0,0 +1 @@
+GCAACTTCATCAACT
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/data/webmap/SimplePath b/genomix/genomix-hadoop/data/webmap/SimplePath
new file mode 100644
index 0000000..80c03af
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/SimplePath
@@ -0,0 +1,3 @@
+ATATCGCATC
+AAGACAGCAC
+GCGGCAAGAA
diff --git a/genomix/genomix-hadoop/data/webmap/SinglePath b/genomix/genomix-hadoop/data/webmap/SinglePath
new file mode 100644
index 0000000..56ef5f8
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/SinglePath
@@ -0,0 +1 @@
+AGACAACAGT
diff --git a/genomix/genomix-hadoop/data/webmap/TreePath b/genomix/genomix-hadoop/data/webmap/TreePath
new file mode 100644
index 0000000..f3c13ce
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/TreePath
@@ -0,0 +1,3 @@
+GGCCTGGCTATCCC
+GGCCTCAGTAACTAAAC
+GGCCTCAGTACGCCCGG
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/data/webmap/simplePath~ b/genomix/genomix-hadoop/data/webmap/simplePath~
new file mode 100644
index 0000000..6615392
--- /dev/null
+++ b/genomix/genomix-hadoop/data/webmap/simplePath~
@@ -0,0 +1 @@
+AGCATGCTATAT
diff --git a/genomix/genomix-hadoop/data/webmap/text.txt b/genomix/genomix-hadoop/data/webmap/text.txt
deleted file mode 100755
index c6cd7fe..0000000
--- a/genomix/genomix-hadoop/data/webmap/text.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-@625E1AAXX100810:1:100:10000:10271/1
-AATAGAAGATCGAT
-+
-EDBDB?BEEEDGGEGGGDGGGA>DG@GGD;GD@DG@F?<B<BFFD?
diff --git a/genomix/genomix-hadoop/pom.xml b/genomix/genomix-hadoop/pom.xml
index f791f5b..ff6a7ee 100755
--- a/genomix/genomix-hadoop/pom.xml
+++ b/genomix/genomix-hadoop/pom.xml
@@ -22,8 +22,9 @@
<artifactId>maven-compiler-plugin</artifactId>
<version>2.0.2</version>
<configuration>
- <source>1.6</source>
- <target>1.6</target>
+ <source>1.7</source>
+ <target>1.7</target>
+ <fork>true</fork>
</configuration>
</plugin>
<plugin>
diff --git a/genomix/genomix-hadoop/src/.DS_Store b/genomix/genomix-hadoop/src/.DS_Store
new file mode 100644
index 0000000..e0bf627
--- /dev/null
+++ b/genomix/genomix-hadoop/src/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/.DS_Store b/genomix/genomix-hadoop/src/main/.DS_Store
new file mode 100644
index 0000000..325c6de
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/.DS_Store b/genomix/genomix-hadoop/src/main/java/.DS_Store
new file mode 100644
index 0000000..dd6c872
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/.DS_Store b/genomix/genomix-hadoop/src/main/java/edu/.DS_Store
new file mode 100644
index 0000000..5e0c641
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/.DS_Store b/genomix/genomix-hadoop/src/main/java/edu/uci/.DS_Store
new file mode 100644
index 0000000..4f27e83
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/.DS_Store b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/.DS_Store
new file mode 100644
index 0000000..8f46380
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/.DS_Store b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/.DS_Store
new file mode 100644
index 0000000..f5eb144
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/gbresultschecking/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/.DS_Store b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/.DS_Store
new file mode 100644
index 0000000..f9e3926
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathDriver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathDriver.java
index cfdf8d5..36c12ae 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathDriver.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathDriver.java
@@ -23,12 +23,15 @@
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.MultipleOutputs;
import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
+import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
@SuppressWarnings("deprecation")
public class MergePathDriver {
@@ -74,7 +77,7 @@
conf.setInputFormat(SequenceFileInputFormat.class);
conf.setOutputFormat(SequenceFileOutputFormat.class);
- conf.setOutputKeyClass(KmerBytesWritable.class);
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
conf.setOutputValueClass(MergePathValueWritable.class);
FileInputFormat.setInputPaths(conf, new Path(inputPath));
@@ -83,8 +86,9 @@
FileSystem dfs = FileSystem.get(conf);
dfs.delete(new Path(inputPath + "-step1"), true);
JobClient.runJob(conf);
+ int iMerge = 0;
/*----------------------------------------------------------------------*/
- for(int iMerge = 0; iMerge < mergeRound; iMerge ++){
+ for(iMerge = 0; iMerge < mergeRound; iMerge ++){
conf = new JobConf(MergePathDriver.class);
conf.setInt("sizeKmer", sizeKmer);
@@ -98,24 +102,23 @@
conf.setMapperClass(MergePathMapper.class);
conf.setReducerClass(MergePathReducer.class);
- conf.setMapOutputKeyClass(KmerBytesWritable.class);
+ conf.setMapOutputKeyClass(VKmerBytesWritable.class);
conf.setMapOutputValueClass(MergePathValueWritable.class);
conf.setInputFormat(SequenceFileInputFormat.class);
- conf.setOutputFormat(MultipleSequenceFileOutputFormat.class);
String uncomplete = "uncomplete" + iMerge;
String complete = "complete" + iMerge;
MultipleOutputs.addNamedOutput(conf, uncomplete,
- MergePathMultiSeqOutputFormat.class, KmerBytesWritable.class,
+ MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
MergePathValueWritable.class);
MultipleOutputs.addNamedOutput(conf, complete,
- MergePathMultiSeqOutputFormat.class, KmerBytesWritable.class,
+ MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
MergePathValueWritable.class);
- conf.setOutputKeyClass(KmerBytesWritable.class);
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
conf.setOutputValueClass(MergePathValueWritable.class);
FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
@@ -127,6 +130,46 @@
dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
}
+ /*----------------------------------------*/
+ conf = new JobConf(MergePathDriver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+ conf.setInt("iMerge", iMerge);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+ conf.setJobName("Path Merge");
+
+ conf.setMapperClass(MergePathMapper.class);
+ conf.setReducerClass(MergePathReducer.class);
+
+ conf.setMapOutputKeyClass(VKmerBytesWritable.class);
+ conf.setMapOutputValueClass(MergePathValueWritable.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+
+ String uncomplete = "uncomplete" + iMerge;
+ String complete = "complete" + iMerge;
+
+ MultipleOutputs.addNamedOutput(conf, uncomplete,
+ MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ MultipleOutputs.addNamedOutput(conf, complete,
+ MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
+ conf.setOutputValueClass(MergePathValueWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+ FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+ conf.setNumReduceTasks(numReducers);
+ dfs.delete(new Path(outputPath), true);
+ JobClient.runJob(conf);
+ dfs.delete(new Path(inputPath + "-step1"), true);
+ dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
+ dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
}
public static void main(String[] args) throws Exception {
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java
index c3255f2..0c7dcc1 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMapper.java
@@ -20,32 +20,31 @@
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
-
import edu.uci.ics.genomix.type.GeneCode;
-import edu.uci.ics.genomix.type.KmerBytesWritable;
import edu.uci.ics.genomix.type.VKmerBytesWritable;
import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
@SuppressWarnings("deprecation")
public class MergePathMapper extends MapReduceBase implements
- Mapper<KmerBytesWritable, MergePathValueWritable, KmerBytesWritable, MergePathValueWritable> {
+ Mapper<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
private int KMER_SIZE;
private VKmerBytesWritableFactory outputKmerFactory;
- private MergePathValueWritable outputAdjList;
+ private MergePathValueWritable outputValue;
private VKmerBytesWritable tmpKmer;
private VKmerBytesWritable outputKmer;
+
public void configure(JobConf job) {
KMER_SIZE = job.getInt("sizeKmer", 0);
outputKmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
- outputAdjList = new MergePathValueWritable();
+ outputValue = new MergePathValueWritable();
tmpKmer = new VKmerBytesWritable(KMER_SIZE);
outputKmer = new VKmerBytesWritable(KMER_SIZE);
}
@Override
- public void map(KmerBytesWritable key, MergePathValueWritable value,
- OutputCollector<KmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ public void map(VKmerBytesWritable key, MergePathValueWritable value,
+ OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
byte precursor = (byte) 0xF0;
byte succeed = (byte) 0x0F;
@@ -54,19 +53,18 @@
precursor = (byte) (precursor & adjBitMap);
precursor = (byte) ((precursor & 0xff) >> 4);
succeed = (byte) (succeed & adjBitMap);
-
- if (bitFlag == 1) {
+ if (bitFlag == 1) {
byte succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
-
- KmerBytesWritable mergedKmer = outputKmerFactory.getFirstKmerFromChain(value.getKmerSize()
- - (KMER_SIZE - 1), value.getKmer());
- outputAdjList.set(mergedKmer, adjBitMap, bitFlag);
- output.collect(outputKmer, outputAdjList);
+
+ tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
+ outputValue.set(adjBitMap, bitFlag, tmpKmer);
+ output.collect(outputKmer, outputValue);
} else {
- outputAdjList.set(value);
- output.collect(key, outputAdjList);
+ outputKmer.set(key);
+ outputValue.set(value);
+ output.collect(key, outputValue);
}
}
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiSeqOutputFormat.java
index 64fbb91..bd4cd2a 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiSeqOutputFormat.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathMultiSeqOutputFormat.java
@@ -17,9 +17,10 @@
import java.io.File;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
-public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<BytesWritable, MergePathValueWritable>{
+public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<VKmerBytesWritable, MergePathValueWritable>{
@Override
protected String generateLeafFileName(String name) {
// TODO Auto-generated method stub System.out.println(name);
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java
index cead0e8..52abc1c 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathReducer.java
@@ -22,19 +22,18 @@
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.MultipleOutputs;
-
-import edu.uci.ics.genomix.type.KmerBytesWritable;
import edu.uci.ics.genomix.type.VKmerBytesWritable;
import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
@SuppressWarnings("deprecation")
public class MergePathReducer extends MapReduceBase implements
- Reducer<KmerBytesWritable, MergePathValueWritable, KmerBytesWritable, MergePathValueWritable> {
+ Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
private VKmerBytesWritableFactory kmerFactory;
private VKmerBytesWritable outputKmer;
private VKmerBytesWritable tmpKmer;
private int KMER_SIZE;
- private MergePathValueWritable outputAdjList;
+ private MergePathValueWritable outputValue;
+ private MergePathValueWritable tmpOutputValue;
MultipleOutputs mos = null;
private int I_MERGE;
@@ -42,7 +41,8 @@
mos = new MultipleOutputs(job);
I_MERGE = Integer.parseInt(job.get("iMerge"));
KMER_SIZE = job.getInt("sizeKmer", 0);
- outputAdjList = new MergePathValueWritable();
+ outputValue = new MergePathValueWritable();
+ tmpOutputValue = new MergePathValueWritable();
kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
outputKmer = new VKmerBytesWritable(KMER_SIZE);
tmpKmer = new VKmerBytesWritable(KMER_SIZE);
@@ -50,55 +50,87 @@
@SuppressWarnings("unchecked")
@Override
- public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
- OutputCollector<KmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- outputAdjList = values.next();
-
-
+ public void reduce(VKmerBytesWritable key, Iterator<MergePathValueWritable> values,
+ OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ outputValue = values.next();
if (values.hasNext() == true) {
-
- if (outputAdjList.getFlag() == 1) {
- byte adjBitMap = outputAdjList.getAdjBitMap();
- byte bitFlag = outputAdjList.getFlag();
- outputKmer.set(kmerFactory.mergeTwoKmer(outputAdjList.getKmer(), key));
-
- outputAdjList = values.next();
- byte nextAdj = outputAdjList.getAdjBitMap();
+ if (outputValue.getFlag() != 1) {
+ byte nextAdj = outputValue.getAdjBitMap();
byte succeed = (byte) 0x0F;
succeed = (byte) (succeed & nextAdj);
+
+ outputValue = values.next();
+ byte adjBitMap = outputValue.getAdjBitMap();
+ byte flag = outputValue.getFlag();
+ if (outputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
+ else
+ outputKmer.set(key);
+
adjBitMap = (byte) (adjBitMap & 0xF0);
adjBitMap = (byte) (adjBitMap | succeed);
- outputAdjList.set(null, 0, 0, adjBitMap, bitFlag, KMER_SIZE + outputAdjList.getKmerSize());
-
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputAdjList);
+ outputValue.set(adjBitMap, flag, null);
+ mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
} else {
- byte nextAdj = outputAdjList.getAdjBitMap();
- byte succeed = (byte) 0x0F;
- succeed = (byte) (succeed & nextAdj);
- outputAdjList = values.next();
- byte adjBitMap = outputAdjList.getAdjBitMap();
- byte flag = outputAdjList.getFlag();
- int kmerSize = outputAdjList.getKmerSize();
-
- outputKmer.set(kmerFactory.mergeTwoKmer(outputAdjList.getKmer(), key));
- adjBitMap = (byte) (adjBitMap & 0xF0);
- adjBitMap = (byte) (adjBitMap | succeed);
- outputAdjList.set(null, 0, 0, adjBitMap, flag, KMER_SIZE + kmerSize);
+ tmpOutputValue.set(outputValue);
+ byte tmpAdjMap = tmpOutputValue.getAdjBitMap();
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputAdjList);
+ outputValue = values.next();
+ if (outputValue.getFlag() != 1) {
+ if (tmpOutputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(tmpOutputValue.getKmer(), key));
+ else
+ outputKmer.set(key);
+
+ byte nextAdj = outputValue.getAdjBitMap();
+ byte succeed = (byte) 0x0F;
+ succeed = (byte) (succeed & nextAdj);
+ tmpAdjMap = (byte) (tmpAdjMap & 0xF0);
+ tmpAdjMap = (byte) (tmpAdjMap | succeed);
+ outputValue.set(tmpAdjMap, tmpOutputValue.getFlag(), null);
+ mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ } else {
+
+ tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
+ if (tmpOutputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(tmpOutputValue.getKmer(), tmpKmer));
+ else
+ outputKmer.set(tmpKmer);
+ tmpOutputValue.set(tmpAdjMap, tmpOutputValue.getFlag(), null);
+ mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, tmpOutputValue);
+
+ tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
+ if (outputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer));
+ else
+ outputKmer.set(tmpKmer);
+ outputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
+ mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+
+ while (values.hasNext()) {
+ outputValue = values.next();
+ tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
+ if (outputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer));
+ else
+ outputKmer.set(tmpKmer);
+ outputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
+ mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ }
+ }
}
} else {
- if (outputAdjList.getFlag() != 0) {
- byte adjBitMap = outputAdjList.getAdjBitMap();
- byte flag = outputAdjList.getFlag();
- int kmerSize = outputAdjList.getKmerSize();
-
+ if (outputValue.getFlag() != 0) {
tmpKmer.set(kmerFactory.getFirstKmerFromChain(KMER_SIZE - 1, key));
- outputKmer.set(kmerFactory.mergeTwoKmer(outputAdjList.getKmer(), tmpKmer));
- outputAdjList.set(null, 0, 0, adjBitMap, flag, KMER_SIZE + kmerSize);
- mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputAdjList);
+ if (outputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer));
+ else
+ outputKmer.set(tmpKmer);
+ outputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
+ mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+
} else
- mos.getCollector("uncomplete" + I_MERGE, reporter).collect(key, outputAdjList);
+ mos.getCollector("uncomplete" + I_MERGE, reporter).collect(key, outputValue);
}
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathValueWritable.java
index c5ff116..9686c18 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathValueWritable.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/MergePathValueWritable.java
@@ -28,115 +28,53 @@
public class MergePathValueWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
private static final byte[] EMPTY_BYTES = {};
- private int size;
- private byte[] bytes;
-
private byte adjBitMap;
private byte flag;
- private int kmerSize;
-
private VKmerBytesWritable kmer;
public MergePathValueWritable() {
- this((byte) 0, (byte) 0, (byte) 0, EMPTY_BYTES);
+ this((byte) 0, (byte) 0, 0, EMPTY_BYTES);
}
- public MergePathValueWritable(byte adjBitMap, byte flag, byte kmerSize, byte[] bytes) {
+ public MergePathValueWritable(byte adjBitMap, byte flag, int kmerSize, byte[] bytes) {
this.adjBitMap = adjBitMap;
this.flag = flag;
- this.kmerSize = kmerSize;
-
- this.bytes = bytes;
- this.size = bytes.length;
- this.kmer = new VKmerBytesWritable(kmerSize);
+ this.kmer = new VKmerBytesWritable(kmerSize, bytes);
kmer.set(bytes, 0, bytes.length);
}
- public void setSize(int size) {
- if (size > getCapacity()) {
- setCapacity(size * 3 / 2);
- }
- this.size = size;
+ public void set(MergePathValueWritable right) {
+ set(right.getAdjBitMap(), right.getFlag(), right.getKmer());
}
- public int getCapacity() {
- return bytes.length;
- }
-
- public void setCapacity(int new_cap) {
- if (new_cap != getCapacity()) {
- byte[] new_data = new byte[new_cap];
- if (new_cap < size) {
- size = new_cap;
- }
- if (size != 0) {
- System.arraycopy(bytes, 0, new_data, 0, size);
- }
- bytes = new_data;
- }
- }
-
- public void set(MergePathValueWritable newData) {
- set(newData.bytes, 0, newData.size, newData.adjBitMap, newData.flag, newData.kmerSize);
- }
-
- public void set(KmerBytesWritable mergedKmer, byte adjBitMap, byte bitFlag) {
- set(mergedKmer.getBytes(),0,mergedKmer.getLength(), adjBitMap, bitFlag, mergedKmer.getKmerLength());
- }
-
- public void set(byte[] newData, int offset, int length, byte adjBitMap, byte flag, int kmerSize) {
- setSize(0);
- if (length != 0) {
- setSize(length);
- System.arraycopy(newData, offset, bytes, 0, size);
- kmer.set(kmerSize, newData, offset, length);
- }
- this.adjBitMap = adjBitMap;
- this.flag = flag;
- this.kmerSize = kmerSize;
- }
-
- public KmerBytesWritable getKmer(){
- if (size != 0){
- return kmer;
- }
- return null;
+ public void set(byte adjBitMap, byte flag, VKmerBytesWritable kmer) {
+ this.kmer.set(kmer);
+ this.adjBitMap = adjBitMap;
+ this.flag = flag;
}
@Override
public void readFields(DataInput arg0) throws IOException {
// TODO Auto-generated method stub
- setSize(0); // clear the old data
- setSize(arg0.readInt());
- if(size != 0){
- arg0.readFully(bytes, 0, size);
- kmer.set(bytes,0,size);
- }
+ kmer.readFields(arg0);
adjBitMap = arg0.readByte();
flag = arg0.readByte();
- kmerSize = arg0.readInt();
}
@Override
public void write(DataOutput arg0) throws IOException {
// TODO Auto-generated method stub
- arg0.writeInt(size);
- arg0.write(bytes, 0, size);
+
+ kmer.write(arg0);
arg0.writeByte(adjBitMap);
arg0.writeByte(flag);
- arg0.writeInt(kmerSize);
}
- @Override
- public byte[] getBytes() {
- // TODO Auto-generated method stub
- return bytes;
- }
-
- @Override
- public int getLength() {
- // TODO Auto-generated method stub
- return size;
+ public VKmerBytesWritable getKmer() {
+ if (kmer.getLength() != 0) {
+ return kmer;
+ }
+ return null;
}
public byte getAdjBitMap() {
@@ -147,26 +85,26 @@
return this.flag;
}
- public int getKmerSize() {
- return this.kmerSize;
- }
-
public String toString() {
- StringBuffer sb = new StringBuffer(3 * size);
- for (int idx = 0; idx < size; idx++) {
- // if not the first, put a blank separator in
- if (idx != 0) {
- sb.append(' ');
- }
- String num = Integer.toHexString(0xff & bytes[idx]);
- // if it is only one digit, add a leading 0.
- if (num.length() < 2) {
- sb.append('0');
- }
- sb.append(num);
- }
- return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag) + '\t' + sb.toString();
+ return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag);
}
-
+ @Override
+ public byte[] getBytes() {
+ // TODO Auto-generated method stub
+ if (kmer.getLength() != 0) {
+ return kmer.getBytes();
+ } else
+ return null;
+
+ }
+
+ public int getKmerLength() {
+ return kmer.getKmerLength();
+ }
+
+ @Override
+ public int getLength() {
+ return kmer.getLength();
+ }
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java
index ae824e7..1058fda 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialMapper.java
@@ -22,8 +22,8 @@
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
-
import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.GeneCode;
@SuppressWarnings("deprecation")
public class SNodeInitialMapper extends MapReduceBase implements
@@ -97,43 +97,36 @@
@Override
public void map(KmerBytesWritable key, ByteWritable value,
OutputCollector<KmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
-
byte precursor = (byte) 0xF0;
byte succeed = (byte) 0x0F;
byte adjBitMap = value.get();
- byte flag = (byte) 0;
+ byte bitFlag = (byte) 0;
precursor = (byte) (precursor & adjBitMap);
precursor = (byte) ((precursor & 0xff) >> 4);
succeed = (byte) (succeed & adjBitMap);
boolean inDegree = measureDegree(precursor);
boolean outDegree = measureDegree(succeed);
- byte initial = 0;
- if (inDegree == true && outDegree == false) {
- flag = (byte) 2;
- switch (succeed) {
- case 1:
- initial = (byte) 0x00;
- break;
- case 2:
- initial = (byte) 0x01;
- break;
- case 4:
- initial = (byte) 0x02;
- break;
- case 8:
- initial = (byte) 0x03;
- break;
- }
- outputKmer.set(key);
- outputKmer.shiftKmerWithNextCode(initial);
- adjBitMap = (byte) (adjBitMap & 0xF0);
- outputAdjList.set(null, 0, 0, adjBitMap, flag, KMER_SIZE);
- output.collect(outputKmer, outputAdjList);
- }
if (inDegree == false && outDegree == false) {
outputKmer.set(key);
- outputAdjList.set(null, 0, 0, adjBitMap, flag, KMER_SIZE);
+ bitFlag = (byte) 2;
+ outputAdjList.set(adjBitMap, bitFlag, null);///~~~~~kmersize----->0
output.collect(outputKmer, outputAdjList);
}
+ else{
+ for(int i = 0 ; i < 4; i ++){
+ byte temp = 0x01;
+ byte shiftedCode = 0;
+ temp = (byte)(temp << i);
+ temp = (byte) (succeed & temp);
+ if(temp != 0 ){
+ byte succeedCode = GeneCode.getGeneCodeFromBitMap(temp);
+ shiftedCode = key.shiftKmerWithNextCode(succeedCode);
+ outputKmer.set(key);
+ outputAdjList.set((byte)0, bitFlag, null);
+ output.collect(outputKmer, outputAdjList);
+ key.shiftKmerWithPreCode(shiftedCode);
+ }
+ }
+ }
}
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialReducer.java
index 734abd6..07cc32f 100644
--- a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialReducer.java
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmerging/SNodeInitialReducer.java
@@ -16,50 +16,51 @@
import java.io.IOException;
import java.util.Iterator;
-
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
-
import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
@SuppressWarnings("deprecation")
public class SNodeInitialReducer extends MapReduceBase implements
- Reducer<KmerBytesWritable, MergePathValueWritable, KmerBytesWritable, MergePathValueWritable> {
- private MergePathValueWritable outputAdjList = new MergePathValueWritable();
+ Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+ private VKmerBytesWritable outputKmer = new VKmerBytesWritable();
+ private MergePathValueWritable outputValue = new MergePathValueWritable();
+
@Override
public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
- OutputCollector<KmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
- outputAdjList = values.next();
+ OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ outputKmer.set(key);
+ outputValue = values.next();
if (values.hasNext() == true) {
- if (outputAdjList.getFlag() != 2) {
- byte adjBitMap = outputAdjList.getAdjBitMap();
- int kmerSize = outputAdjList.getKmerSize();
+ if (outputValue.getFlag() == 2) {
byte bitFlag = 1;
- outputAdjList.set(null, 0, 0, adjBitMap, bitFlag, kmerSize);
- output.collect(key, outputAdjList);
-
+ outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);///outputValue.getKmerLength()
+ output.collect(outputKmer, outputValue);
} else {
boolean flag = false;
while (values.hasNext()) {
- outputAdjList = values.next();
- if (outputAdjList.getFlag() != 2) {
+ outputValue = values.next();
+ if (outputValue.getFlag() == 2) {
flag = true;
break;
}
}
if (flag == true) {
- byte adjBitMap = outputAdjList.getAdjBitMap();
- int kmerSize = outputAdjList.getKmerSize();
byte bitFlag = 1;
- outputAdjList.set(null, 0, 0, adjBitMap, bitFlag, kmerSize);
- output.collect(key, outputAdjList);
+ outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
+ output.collect(outputKmer, outputValue);
}
}
} else {
- output.collect(key, outputAdjList);
+ if (outputValue.getFlag() == 2) {
+ byte bitFlag = 0;
+ outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
+ output.collect(outputKmer, outputValue);
+ }
}
}
}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/.DS_Store b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/.DS_Store
new file mode 100644
index 0000000..7c4ae29
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/ENodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/ENodeInitialReducer.java
new file mode 100644
index 0000000..1f9bc82
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/ENodeInitialReducer.java
@@ -0,0 +1,58 @@
+package edu.uci.ics.pathmergingh2;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.KmerUtil;
+
+@SuppressWarnings("deprecation")
+public class ENodeInitialReducer extends MapReduceBase implements
+ Reducer<BytesWritable, MergePathValueWritable, BytesWritable, MergePathValueWritable> {
+ public BytesWritable outputKmer = new BytesWritable();
+ public MergePathValueWritable outputAdjList = new MergePathValueWritable();
+
+ @Override
+ public void reduce(BytesWritable key, Iterator<MergePathValueWritable> values,
+ OutputCollector<BytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ outputAdjList = values.next();
+ outputKmer.set(key);
+ if (values.hasNext() == true) {
+ byte bitFlag = outputAdjList.getFlag();
+ bitFlag = (byte) (bitFlag & 0xFE);
+ if (bitFlag == 2) {
+ bitFlag = (byte) (0x80 | outputAdjList.getFlag());
+ outputAdjList.set(outputAdjList.getAdjBitMap(), bitFlag, null);
+ output.collect(outputKmer, outputAdjList);
+
+ } else {
+ boolean flag = false;
+ while (values.hasNext()) {
+ outputAdjList = values.next();
+ if (outputAdjList.getFlag() == 2) {
+ flag = true;
+ break;
+ }
+ }
+ if (flag == true) {
+ bitFlag = (byte) (0x80 | outputAdjList.getFlag());
+ outputAdjList.set(outputAdjList.getAdjBitMap(), bitFlag, null);
+ output.collect(outputKmer, outputAdjList);
+ }
+ }
+ } else {
+ byte bitFlag = outputAdjList.getFlag();
+ bitFlag = (byte) (bitFlag & 0xFE);
+ if (bitFlag == 2) {
+ bitFlag = 0;
+ outputAdjList.set(outputAdjList.getAdjBitMap(), bitFlag, null);
+ output.collect(outputKmer, outputAdjList);
+ }
+ }
+ }
+}
+
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Driver.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Driver.java
new file mode 100644
index 0000000..58849e2
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Driver.java
@@ -0,0 +1,166 @@
+package edu.uci.ics.pathmergingh2;
+
+import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
+import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+
+@SuppressWarnings("deprecation")
+public class MergePathH2Driver {
+
+ private static class Options {
+ @Option(name = "-inputpath", usage = "the input path", required = true)
+ public String inputPath;
+
+ @Option(name = "-outputpath", usage = "the output path", required = true)
+ public String outputPath;
+
+ @Option(name = "-mergeresultpath", usage = "the merging results path", required = true)
+ public String mergeResultPath;
+
+ @Option(name = "-num-reducers", usage = "the number of reducers", required = true)
+ public int numReducers;
+
+ @Option(name = "-kmer-size", usage = "the size of kmer", required = true)
+ public int sizeKmer;
+
+ @Option(name = "-merge-rounds", usage = "the while rounds of merging", required = true)
+ public int mergeRound;
+
+ }
+
+
+ public void run(String inputPath, String outputPath, String mergeResultPath, int numReducers, int sizeKmer, int mergeRound, String defaultConfPath)
+ throws IOException{
+
+ JobConf conf = new JobConf(MergePathH2Driver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+ conf.setJobName("Initial Path-Starting-Points Table");
+ conf.setMapperClass(SNodeInitialMapper.class);
+ conf.setReducerClass(SNodeInitialReducer.class);
+
+ conf.setMapOutputKeyClass(KmerBytesWritable.class);
+ conf.setMapOutputValueClass(MergePathValueWritable.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+ conf.setOutputFormat(SequenceFileOutputFormat.class);
+
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
+ conf.setOutputValueClass(MergePathValueWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath));
+ FileOutputFormat.setOutputPath(conf, new Path(inputPath + "-step1"));
+ conf.setNumReduceTasks(numReducers);
+ FileSystem dfs = FileSystem.get(conf);
+ dfs.delete(new Path(inputPath + "-step1"), true);
+ JobClient.runJob(conf);
+ int iMerge = 0;
+/*----------------------------------------------------------------------*/
+ for(iMerge = 0; iMerge < mergeRound; iMerge ++){
+ conf = new JobConf(MergePathH2Driver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+ conf.setInt("iMerge", iMerge);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+ conf.setJobName("Path Merge");
+
+ conf.setMapperClass(MergePathH2Mapper.class);
+ conf.setReducerClass(MergePathH2Reducer.class);
+
+ conf.setMapOutputKeyClass(VKmerBytesWritable.class);
+ conf.setMapOutputValueClass(MergePathValueWritable.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+
+ String uncomplete = "uncomplete" + iMerge;
+ String complete = "complete" + iMerge;
+
+ MultipleOutputs.addNamedOutput(conf, uncomplete,
+ MergePathMultiSeqOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ MultipleOutputs.addNamedOutput(conf, complete,
+ MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
+ conf.setOutputValueClass(MergePathValueWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+ FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+ conf.setNumReduceTasks(numReducers);
+ dfs.delete(new Path(outputPath), true);
+ JobClient.runJob(conf);
+ dfs.delete(new Path(inputPath + "-step1"), true);
+ dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
+ dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
+ }
+ conf = new JobConf(MergePathH2Driver.class);
+ conf.setInt("sizeKmer", sizeKmer);
+ conf.setInt("iMerge", iMerge);
+
+ if (defaultConfPath != null) {
+ conf.addResource(new Path(defaultConfPath));
+ }
+ conf.setJobName("Path Merge");
+
+ conf.setMapperClass(MergePathH2Mapper.class);
+ conf.setReducerClass(MergePathH2Reducer.class);
+
+ conf.setMapOutputKeyClass(VKmerBytesWritable.class);
+ conf.setMapOutputValueClass(MergePathValueWritable.class);
+
+ conf.setInputFormat(SequenceFileInputFormat.class);
+
+ String uncomplete = "uncomplete" + iMerge;
+ String complete = "complete" + iMerge;
+
+ MultipleOutputs.addNamedOutput(conf, uncomplete,
+ MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ MultipleOutputs.addNamedOutput(conf, complete,
+ MergePathMultiTextOutputFormat.class, VKmerBytesWritable.class,
+ MergePathValueWritable.class);
+
+ conf.setOutputKeyClass(VKmerBytesWritable.class);
+ conf.setOutputValueClass(MergePathValueWritable.class);
+
+ FileInputFormat.setInputPaths(conf, new Path(inputPath + "-step1"));
+ FileOutputFormat.setOutputPath(conf, new Path(outputPath));
+ conf.setNumReduceTasks(numReducers);
+ dfs.delete(new Path(outputPath), true);
+ JobClient.runJob(conf);
+ dfs.delete(new Path(inputPath + "-step1"), true);
+ dfs.rename(new Path(outputPath + "/" + uncomplete), new Path(inputPath + "-step1"));
+ dfs.rename(new Path(outputPath + "/" + complete), new Path(mergeResultPath + "/" + complete));
+ }
+
+ public static void main(String[] args) throws Exception {
+ Options options = new Options();
+ CmdLineParser parser = new CmdLineParser(options);
+ parser.parseArgument(args);
+ MergePathH2Driver driver = new MergePathH2Driver();
+ driver.run(options.inputPath, options.outputPath, options.mergeResultPath, options.numReducers, options.sizeKmer, options.mergeRound, null);
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Mapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Mapper.java
new file mode 100644
index 0000000..6ea9dd3
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Mapper.java
@@ -0,0 +1,87 @@
+package edu.uci.ics.pathmergingh2;
+
+import java.io.IOException;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
+
+@SuppressWarnings("deprecation")
+public class MergePathH2Mapper extends MapReduceBase implements
+ Mapper<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+
+ private int KMER_SIZE;
+ private VKmerBytesWritableFactory outputKmerFactory;
+ private MergePathValueWritable outputValue;
+ private VKmerBytesWritable tmpKmer;
+ private VKmerBytesWritable outputKmer;
+
+ public void configure(JobConf job) {
+ KMER_SIZE = job.getInt("sizeKmer", 0);
+ outputKmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
+ outputValue = new MergePathValueWritable();
+ tmpKmer = new VKmerBytesWritable(KMER_SIZE);
+ outputKmer = new VKmerBytesWritable(KMER_SIZE);
+ }
+
+ @Override
+ public void map(VKmerBytesWritable key, MergePathValueWritable value,
+ OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ byte precursor = (byte) 0xF0;
+ byte succeed = (byte) 0x0F;
+ byte adjBitMap = value.getAdjBitMap();
+ byte bitFlag = value.getFlag();
+ precursor = (byte) (precursor & adjBitMap);
+ precursor = (byte) ((precursor & 0xff) >> 4);
+ succeed = (byte) (succeed & adjBitMap);
+ byte bitStartEnd = (byte) (0x81 & bitFlag);
+
+ switch (bitStartEnd) {
+ case (byte) 0x01:
+ byte succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
+ tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
+ outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
+
+ tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
+ bitFlag = (byte) (bitFlag | 0x08);
+ outputValue.set(adjBitMap, bitFlag, tmpKmer);
+ output.collect(outputKmer, outputValue);
+ break;
+ case (byte) 0x80:
+ tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(KMER_SIZE, key));
+ outputKmer.set(tmpKmer);
+ tmpKmer.set(outputKmerFactory.getLastKmerFromChain(key.getKmerLength() - KMER_SIZE, key));
+ bitFlag = (byte) (bitFlag | 0x10);
+ outputValue.set(adjBitMap, bitFlag, tmpKmer);
+ output.collect(outputKmer, outputValue);
+ break;
+ case (byte) 0x00:
+ succeedCode = GeneCode.getGeneCodeFromBitMap(succeed);
+ tmpKmer.set(outputKmerFactory.getLastKmerFromChain(KMER_SIZE, key));
+ outputKmer.set(outputKmerFactory.shiftKmerWithNextCode(tmpKmer, succeedCode));
+
+ tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(key.getKmerLength() - (KMER_SIZE - 1), key));
+ bitFlag = (byte) (bitFlag | 0x08);
+ outputValue.set(adjBitMap, bitFlag, tmpKmer);
+ output.collect(outputKmer, outputValue);
+
+ bitFlag = (byte) (bitFlag & 0xF7);
+ tmpKmer.set(outputKmerFactory.getFirstKmerFromChain(KMER_SIZE, key));
+ outputKmer.set(tmpKmer);
+ tmpKmer.set(outputKmerFactory.getLastKmerFromChain(key.getKmerLength() - KMER_SIZE, key));
+ bitFlag = (byte) (bitFlag | 0x10);
+ outputValue.set(adjBitMap, bitFlag, tmpKmer);
+ output.collect(outputKmer, outputValue);
+ break;
+ case (byte) 0x81:
+ outputKmer.set(key);
+ outputValue.set(adjBitMap, bitFlag, null);
+ output.collect(outputKmer, outputValue);
+ break;
+ }
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Reducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Reducer.java
new file mode 100644
index 0000000..ad8b3c2
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathH2Reducer.java
@@ -0,0 +1,119 @@
+package edu.uci.ics.pathmergingh2;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.lib.MultipleOutputs;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritableFactory;
+
+@SuppressWarnings("deprecation")
+public class MergePathH2Reducer extends MapReduceBase implements
+ Reducer<VKmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+ private VKmerBytesWritableFactory kmerFactory;
+ private VKmerBytesWritable outputKmer;
+ private VKmerBytesWritable tmpKmer1;
+ private VKmerBytesWritable tmpKmer2;
+ private int KMER_SIZE;
+ private MergePathValueWritable outputValue;
+ private MergePathValueWritable tmpOutputValue;
+
+ MultipleOutputs mos = null;
+ private int I_MERGE;
+
+ public void configure(JobConf job) {
+ mos = new MultipleOutputs(job);
+ I_MERGE = Integer.parseInt(job.get("iMerge"));
+ KMER_SIZE = job.getInt("sizeKmer", 0);
+ outputValue = new MergePathValueWritable();
+ tmpOutputValue = new MergePathValueWritable();
+ kmerFactory = new VKmerBytesWritableFactory(KMER_SIZE);
+ outputKmer = new VKmerBytesWritable(KMER_SIZE);
+ tmpKmer1 = new VKmerBytesWritable(KMER_SIZE);
+ tmpKmer2 = new VKmerBytesWritable(KMER_SIZE);
+ }
+
+ @SuppressWarnings("unchecked")
+ public void reduce(VKmerBytesWritable key, Iterator<MergePathValueWritable> values,
+ OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ outputValue = values.next();
+ outputKmer.set(key);
+ if (values.hasNext() == true) {
+ byte bitFlag = outputValue.getFlag();
+ byte bitStartEnd = (byte) (0x81 & bitFlag);
+ byte bitPosiNegative = (byte) (0x18 & bitFlag);
+ byte succeed = (byte) 0x0F;
+ switch (bitPosiNegative) {
+ case (byte) 0x08:
+ if (outputValue.getKmerLength() != 0)
+ tmpKmer1.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), key));
+ else
+ tmpKmer1.set(key);
+ byte adjBitMap = outputValue.getAdjBitMap();
+ outputValue = values.next();
+ bitStartEnd = (byte) (0x81 & outputValue.getFlag());
+ if (bitStartEnd == (byte) 0x80) {
+ if (outputValue.getKmerLength() != 0)
+ tmpKmer2.set(kmerFactory.mergeTwoKmer(key, outputValue.getKmer()));
+ else
+ tmpKmer2.set(key);
+ byte tmpFlag = (byte) 0x80;
+ tmpOutputValue.set(outputValue.getAdjBitMap(), outputValue.getFlag(), null);
+ mos.getCollector("uncomplete" + I_MERGE, reporter).collect(tmpKmer2, tmpOutputValue);
+ }
+ if (outputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(tmpKmer1, outputValue.getKmer()));
+ else
+ outputKmer.set(tmpKmer1);
+ succeed = (byte) (succeed & outputValue.getAdjBitMap());
+ adjBitMap = (byte) (adjBitMap & 0xF0);
+ adjBitMap = (byte) (adjBitMap | succeed);
+ byte outputFlag = (byte) (0x81 & bitFlag);
+ outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
+ outputValue.set(adjBitMap, outputFlag, null);
+ mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ break;
+ case (byte) 0x10:
+ if (outputValue.getKmerLength() != 0)
+ tmpKmer1.set(kmerFactory.mergeTwoKmer(key, outputValue.getKmer()));
+ else
+ tmpKmer1.set(key);
+ if (bitStartEnd == (byte) 0x80) {
+ byte tmpFlag = (byte) 0x80;
+ tmpOutputValue.set(outputValue.getAdjBitMap(), tmpFlag, null);
+ mos.getCollector("uncomplete" + I_MERGE, reporter).collect(tmpKmer1, tmpOutputValue);
+ }
+ succeed = (byte) (succeed & outputValue.getAdjBitMap());
+ outputValue = values.next();
+ if (outputValue.getKmerLength() != 0)
+ outputKmer.set(kmerFactory.mergeTwoKmer(outputValue.getKmer(), tmpKmer1));
+ else
+ outputKmer.set(tmpKmer1);
+ adjBitMap = outputValue.getAdjBitMap();
+ adjBitMap = (byte) (adjBitMap & 0xF0);
+ adjBitMap = (byte) (adjBitMap | succeed);
+ outputFlag = (byte) (0x81 & bitFlag);
+ outputFlag = (byte) (outputFlag | ((byte) 0x81 & outputValue.getFlag()));
+ outputValue.set(adjBitMap, outputFlag, null);
+ mos.getCollector("uncomplete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ break;
+ }
+ } else {
+ byte bitFlag = outputValue.getFlag();
+ byte bitStartEnd = (byte) (0x81 & bitFlag);
+ if (bitStartEnd == (byte) 0x81) {
+ outputKmer.set(key);
+ mos.getCollector("complete" + I_MERGE, reporter).collect(outputKmer, outputValue);
+ }
+ }
+ }
+ public void close() throws IOException {
+ // TODO Auto-generated method stub
+ mos.close();
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathMultiSeqOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathMultiSeqOutputFormat.java
new file mode 100644
index 0000000..5e6f008
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathMultiSeqOutputFormat.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmergingh2;
+
+import java.io.File;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
+
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+
+public class MergePathMultiSeqOutputFormat extends MultipleSequenceFileOutputFormat<VKmerBytesWritable, MergePathValueWritable>{
+ @Override
+ protected String generateLeafFileName(String name) {
+ // TODO Auto-generated method stub System.out.println(name);
+ String[] names = name.split("-");
+ return names[0] + File.separator + name;
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathMultiTextOutputFormat.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathMultiTextOutputFormat.java
new file mode 100644
index 0000000..d6176e2
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathMultiTextOutputFormat.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmergingh2;
+
+import java.io.File;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
+
+public class MergePathMultiTextOutputFormat extends MultipleTextOutputFormat<Text, Text>{
+ @Override
+ protected String generateLeafFileName(String name) {
+ // TODO Auto-generated method stub System.out.println(name);
+ String[] names = name.split("-");
+ return names[0] + File.separator + name;
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathValueWritable.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathValueWritable.java
new file mode 100644
index 0000000..2f1869d
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/MergePathValueWritable.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pathmergingh2;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.BinaryComparable;
+import org.apache.hadoop.io.WritableComparable;
+
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+public class MergePathValueWritable extends BinaryComparable implements WritableComparable<BinaryComparable> {
+
+ private static final byte[] EMPTY_BYTES = {};
+ private byte adjBitMap;
+ private byte flag;
+ private VKmerBytesWritable kmer;
+
+ public MergePathValueWritable() {
+ this((byte) 0, (byte) 0, 0, EMPTY_BYTES);
+ }
+
+ public MergePathValueWritable(int k) {
+ this.adjBitMap = 0;
+ this.flag = 0;
+ this.kmer = new VKmerBytesWritable(k);
+ }
+
+ public MergePathValueWritable(byte adjBitMap, byte flag, int kmerSize, byte[] bytes) {
+ this.adjBitMap = adjBitMap;
+ this.flag = flag;
+ this.kmer = new VKmerBytesWritable(kmerSize, bytes);
+ kmer.set(bytes, 0, bytes.length);
+ }
+
+ public void set(MergePathValueWritable right) {
+ set(right.getAdjBitMap(), right.getFlag(), right.getKmer());
+ }
+
+ public void set(byte adjBitMap, byte flag, VKmerBytesWritable kmer) {
+ this.kmer.set(kmer);
+ this.adjBitMap = adjBitMap;
+ this.flag = flag;
+ }
+
+ @Override
+ public void readFields(DataInput arg0) throws IOException {
+ // TODO Auto-generated method stub
+ kmer.readFields(arg0);
+ adjBitMap = arg0.readByte();
+ flag = arg0.readByte();
+ }
+
+ @Override
+ public void write(DataOutput arg0) throws IOException {
+ // TODO Auto-generated method stub
+
+ kmer.write(arg0);
+ arg0.writeByte(adjBitMap);
+ arg0.writeByte(flag);
+ }
+
+ public VKmerBytesWritable getKmer() {
+ if (kmer.getLength() != 0) {
+ return kmer;
+ }
+ return null;
+ }
+
+ public byte getAdjBitMap() {
+ return this.adjBitMap;
+ }
+
+ public byte getFlag() {
+ return this.flag;
+ }
+
+ public String toString() {
+ return GeneCode.getSymbolFromBitMap(adjBitMap) + '\t' + String.valueOf(flag);
+ }
+
+ @Override
+ public byte[] getBytes() {
+ // TODO Auto-generated method stub
+ if (kmer.getLength() != 0) {
+ return kmer.getBytes();
+ } else
+ return null;
+
+ }
+
+ public int getKmerLength() {
+ return kmer.getKmerLength();
+ }
+
+ @Override
+ public int getLength() {
+ return kmer.getLength();
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/SNodeInitialMapper.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/SNodeInitialMapper.java
new file mode 100644
index 0000000..4c05dac
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/SNodeInitialMapper.java
@@ -0,0 +1,141 @@
+package edu.uci.ics.pathmergingh2;
+
+import java.io.IOException;
+import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.GeneCode;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+
+@SuppressWarnings("deprecation")
+public class SNodeInitialMapper extends MapReduceBase implements
+ Mapper<KmerBytesWritable, ByteWritable, KmerBytesWritable, MergePathValueWritable> {
+
+ public int KMER_SIZE;
+ public KmerBytesWritable outputKmer;
+ public MergePathValueWritable outputAdjList;
+
+ public void configure(JobConf job) {
+ KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
+ outputKmer = new KmerBytesWritable(KMER_SIZE);
+ outputAdjList = new MergePathValueWritable();
+ }
+
+ boolean measureDegree(byte adjacent) {
+ boolean result = true;
+ switch (adjacent) {
+ case 0:
+ result = true;
+ break;
+ case 1:
+ result = false;
+ break;
+ case 2:
+ result = false;
+ break;
+ case 3:
+ result = true;
+ break;
+ case 4:
+ result = false;
+ break;
+ case 5:
+ result = true;
+ break;
+ case 6:
+ result = true;
+ break;
+ case 7:
+ result = true;
+ break;
+ case 8:
+ result = false;
+ break;
+ case 9:
+ result = true;
+ break;
+ case 10:
+ result = true;
+ break;
+ case 11:
+ result = true;
+ break;
+ case 12:
+ result = true;
+ break;
+ case 13:
+ result = true;
+ break;
+ case 14:
+ result = true;
+ break;
+ case 15:
+ result = true;
+ break;
+ }
+ return result;
+ }
+
+ @Override
+ public void map(KmerBytesWritable key, ByteWritable value,
+ OutputCollector<KmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ byte precursor = (byte) 0xF0;
+ byte succeed = (byte) 0x0F;
+ byte adjBitMap = value.get();
+ byte bitFlag = (byte) 0;
+ precursor = (byte) (precursor & adjBitMap);
+ precursor = (byte) ((precursor & 0xff) >> 4);
+ succeed = (byte) (succeed & adjBitMap);
+ boolean inDegree = measureDegree(precursor);
+ boolean outDegree = measureDegree(succeed);
+ if (key.toString().equals("CGC")) {
+ int a = 2;
+ int b = a;
+ }
+ if (key.toString().equals("TCG")) {
+ int a = 2;
+ int b = a;
+ }
+ if (inDegree == false && outDegree == false) {
+ outputKmer.set(key);
+ System.out.println(outputKmer.hashCode());
+ bitFlag = (byte) 2;
+ outputAdjList.set(adjBitMap, bitFlag, null);
+ output.collect(outputKmer, outputAdjList);
+ } else {
+ for (int i = 0; i < 4; i++) {
+ byte temp = (byte) 0x01;
+ byte shiftedCode = 0;
+ temp = (byte) (temp << i);
+ temp = (byte) (precursor & temp);
+ if (temp != 0) {
+ byte precurCode = GeneCode.getGeneCodeFromBitMap(temp);
+ shiftedCode = key.shiftKmerWithPreCode(precurCode);
+ outputKmer.set(key);
+ bitFlag = (byte) 0x80;
+ outputAdjList.set((byte) 0, bitFlag, null);
+ output.collect(outputKmer, outputAdjList);
+ key.shiftKmerWithNextCode(shiftedCode);
+ }
+ }
+ for (int i = 0; i < 4; i++) {
+ byte temp = (byte) 0x01;
+ byte shiftedCode = 0;
+ temp = (byte) (temp << i);
+ temp = (byte) (succeed & temp);
+ if (temp != 0) {
+ byte succeedCode = GeneCode.getGeneCodeFromBitMap(temp);
+ shiftedCode = key.shiftKmerWithNextCode(succeedCode);
+ outputKmer.set(key);
+ bitFlag = (byte) 0x01;
+ outputAdjList.set((byte) 0, bitFlag, null);
+ output.collect(outputKmer, outputAdjList);
+ key.shiftKmerWithPreCode(shiftedCode);
+ }
+ }
+ }
+ }
+}
diff --git a/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/SNodeInitialReducer.java b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/SNodeInitialReducer.java
new file mode 100644
index 0000000..7fd7a2e
--- /dev/null
+++ b/genomix/genomix-hadoop/src/main/java/edu/uci/ics/pathmergingh2/SNodeInitialReducer.java
@@ -0,0 +1,80 @@
+package edu.uci.ics.pathmergingh2;
+
+import java.io.IOException;
+import java.util.Iterator;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+
+@SuppressWarnings("deprecation")
+public class SNodeInitialReducer extends MapReduceBase implements
+ Reducer<KmerBytesWritable, MergePathValueWritable, VKmerBytesWritable, MergePathValueWritable> {
+ private VKmerBytesWritable outputKmer = new VKmerBytesWritable();
+ private MergePathValueWritable outputValue = new MergePathValueWritable();
+
+ @Override
+ public void reduce(KmerBytesWritable key, Iterator<MergePathValueWritable> values,
+ OutputCollector<VKmerBytesWritable, MergePathValueWritable> output, Reporter reporter) throws IOException {
+ outputKmer.set(key);
+ outputValue = values.next();
+ byte startFlag = 0x00;
+ byte endFlag = 0x00;
+ byte targetPointFlag = 0x00;
+ byte targetAdjList = 0x00;
+ byte outputFlag = 0x00;
+ if(key.toString().equals("TCG")){
+ int a = 2;
+ int b = a;
+ }
+ if (values.hasNext() == true) {
+ switch (outputValue.getFlag()) {
+ case (byte) 0x01:
+ startFlag = (byte) 0x01;
+ break;
+ case (byte) 0x80:
+ endFlag = (byte) 0x80;
+ break;
+ case (byte) 0x02:
+ targetPointFlag = (byte) 0x02;
+ targetAdjList = outputValue.getAdjBitMap();
+ break;
+ }
+ while (values.hasNext()) {
+ outputValue = values.next();
+ switch (outputValue.getFlag()) {
+ case (byte) 0x01:
+ startFlag = (byte) 0x01;
+ break;
+ case (byte) 0x80:
+ endFlag = (byte) 0x80;
+ break;
+ case (byte) 0x02:
+ targetPointFlag = (byte) 0x02;
+ targetAdjList = outputValue.getAdjBitMap();
+ break;
+ }
+ if(startFlag != (byte) 0x00 && endFlag!= (byte) 0x00 && targetPointFlag != (byte) 0x00)
+ break;
+ }
+ if(targetPointFlag == (byte) 0x02) {
+ if(startFlag == (byte) 0x01) {
+ outputFlag = (byte) (outputFlag | startFlag);
+ }
+ if(endFlag == (byte) 0x80) {
+ outputFlag = (byte) (outputFlag | endFlag);
+ }
+ outputValue.set(targetAdjList, outputFlag, null);
+ output.collect(outputKmer, outputValue);
+ }
+ } else {
+ if (outputValue.getFlag() == 2) {
+ byte bitFlag = 0;
+ outputValue.set(outputValue.getAdjBitMap(), bitFlag, null);
+ output.collect(outputKmer, outputValue);
+ }
+ }
+ }
+}
diff --git a/genomix/genomix-hadoop/src/test/.DS_Store b/genomix/genomix-hadoop/src/test/.DS_Store
new file mode 100644
index 0000000..bfe14e8
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/java/.DS_Store b/genomix/genomix-hadoop/src/test/java/.DS_Store
new file mode 100644
index 0000000..fb3684c
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/java/edu/.DS_Store b/genomix/genomix-hadoop/src/test/java/edu/.DS_Store
new file mode 100644
index 0000000..f50e64b
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/.DS_Store b/genomix/genomix-hadoop/src/test/java/edu/uci/.DS_Store
new file mode 100644
index 0000000..9aea623
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/.DS_Store b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/.DS_Store
new file mode 100644
index 0000000..64f18c4
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/.DS_Store
Binary files differ
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
index 44af11b..97b861c 100755
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/graphbuilding/GraphBuildingTest.java
@@ -32,6 +32,7 @@
import org.apache.hadoop.util.ReflectionUtils;
import org.junit.Test;
+import edu.uci.ics.genomix.type.GeneCode;
import edu.uci.ics.genomix.type.KmerBytesWritable;
import edu.uci.ics.genomix.type.KmerCountValue;
import edu.uci.ics.utils.TestUtils;
@@ -45,13 +46,14 @@
private static final String COMPARE_DIR = "compare";
private JobConf conf = new JobConf();
private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
- private static final String DATA_PATH = "data/webmap/Test.txt";
+ private static final String DATA_PATH = "data/webmap/BridgePath";
private static final String HDFS_PATH = "/webmap";
private static final String RESULT_PATH = "/result1";
private static final String EXPECTED_PATH = "expected/result1";
private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH + "/comparesource.txt";
private static final int COUNT_REDUCER = 4;
- private static final int SIZE_KMER = 3;
+ private static final int SIZE_KMER = 5;
+ private static final String GRAPHVIZ = "Graphviz/GenomixSource.txt";
private MiniDFSCluster dfsCluster;
private MiniMRCluster mrCluster;
@@ -76,15 +78,35 @@
KmerCountValue value = (KmerCountValue) ReflectionUtils.newInstance(reader.getValueClass(), conf);
File filePathTo = new File(TEST_SOURCE_DIR);
BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+ File GraphViz = new File(GRAPHVIZ);
+ BufferedWriter bw2 = new BufferedWriter(new FileWriter(GraphViz));
while (reader.next(key, value)) {
+ byte succeed = (byte) 0x0F;
+ byte adjBitMap = value.getAdjBitMap();
+ succeed = (byte) (succeed & adjBitMap);
+ byte shiftedCode = 0;
+ for(int i = 0 ; i < 4; i ++){
+ byte temp = 0x01;
+ temp = (byte)(temp << i);
+ temp = (byte) (succeed & temp);
+ if(temp != 0 ){
+ bw2.write(key.toString());
+ bw2.newLine();
+ byte succeedCode = GeneCode.getGeneCodeFromBitMap(temp);
+ shiftedCode = key.shiftKmerWithNextCode(succeedCode);
+ bw2.write(key.toString());
+ bw2.newLine();
+ key.shiftKmerWithPreCode(shiftedCode);
+ }
+ }
bw.write(key.toString() + "\t" + value.toString());
- bw.newLine();
+ bw.newLine();
}
bw.close();
dumpResult();
- TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
+// TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
cleanupHadoop();
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/pathmerging/MergePathTest.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/pathmerging/MergePathTest.java
index 95a9785..5e6e51e 100644
--- a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/pathmerging/MergePathTest.java
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/pathmerging/MergePathTest.java
@@ -18,6 +18,7 @@
import org.junit.Test;
import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
import edu.uci.ics.utils.TestUtils;
@SuppressWarnings("deprecation")
@@ -33,7 +34,7 @@
private static final String RESULT_PATH = "/result3";
private static final String EXPECTED_PATH = "expected/result3";
private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH + "/comparesource.txt";
- private static final int COUNT_REDUCER = 4;
+ private static final int COUNT_REDUCER = 1;
private static final int SIZE_KMER = 3;
private MiniDFSCluster dfsCluster;
@@ -48,12 +49,13 @@
startHadoop();
MergePathDriver tldriver = new MergePathDriver();
- tldriver.run(HDFS_PATH, RESULT_PATH, HDFA_PATH_DATA, COUNT_REDUCER, SIZE_KMER, 3, HADOOP_CONF_PATH);
-
- SequenceFile.Reader reader = null;
- Path path = new Path(HDFA_PATH_DATA + "/complete2" + "/complete2-r-00000");
+ tldriver.run(HDFS_PATH, RESULT_PATH, HDFA_PATH_DATA, COUNT_REDUCER, SIZE_KMER, 1, HADOOP_CONF_PATH);
+
+/* SequenceFile.Reader reader = null;
+ Path path = new Path(RESULT_PATH + "/part-00000");
+// Path path = new Path(RESULT_PATH + "/uncomplete0" + "/uncomplete0-r-00000");
reader = new SequenceFile.Reader(dfs, path, conf);
- KmerBytesWritable key = (KmerBytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ VKmerBytesWritable key = (VKmerBytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
MergePathValueWritable value = (MergePathValueWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
File filePathTo = new File(TEST_SOURCE_DIR);
BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
@@ -61,10 +63,10 @@
bw.write(key.toString() + "\t" + value.getAdjBitMap() + "\t" + value.getFlag());
bw.newLine();
}
- bw.close();
-
+ bw.close();*/
dumpResult();
- TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
+
+// TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
cleanupHadoop();
@@ -96,7 +98,8 @@
}
private void dumpResult() throws IOException {
- Path src = new Path(HDFA_PATH_DATA + "/" + "complete2");
+// Path src = new Path(HDFA_PATH_DATA + "/" + "complete2");
+ Path src = new Path(RESULT_PATH);
Path dest = new Path(ACTUAL_RESULT_DIR + "/");
dfs.copyToLocalFile(src, dest);
}
diff --git a/genomix/genomix-hadoop/src/test/java/edu/uci/ics/pathmergingh2/MergePathH2Test.java b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/pathmergingh2/MergePathH2Test.java
new file mode 100644
index 0000000..ff15299
--- /dev/null
+++ b/genomix/genomix-hadoop/src/test/java/edu/uci/ics/pathmergingh2/MergePathH2Test.java
@@ -0,0 +1,105 @@
+package edu.uci.ics.pathmergingh2;
+
+import java.io.BufferedWriter;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.junit.Test;
+import edu.uci.ics.genomix.type.KmerBytesWritable;
+import edu.uci.ics.genomix.type.VKmerBytesWritable;
+import edu.uci.ics.utils.TestUtils;
+
+@SuppressWarnings("deprecation")
+public class MergePathH2Test {
+ private static final String ACTUAL_RESULT_DIR = "actual4";
+ private static final String COMPARE_DIR = "compare";
+ private JobConf conf = new JobConf();
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private static final String DATA_PATH = "actual2" + "/result2" + "/part-00000";
+ private static final String HDFS_PATH = "/webmap";
+ private static final String HDFA_PATH_DATA = "/webmapdata";
+
+ private static final String RESULT_PATH = "/result4";
+ private static final String EXPECTED_PATH = "expected/result4";
+ private static final String TEST_SOURCE_DIR = COMPARE_DIR + RESULT_PATH + "/comparesource.txt";
+ private static final int COUNT_REDUCER = 1;
+ private static final int SIZE_KMER = 3;
+
+ private MiniDFSCluster dfsCluster;
+ private MiniMRCluster mrCluster;
+ private FileSystem dfs;
+
+ @SuppressWarnings("resource")
+ @Test
+ public void test() throws Exception {
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHadoop();
+
+ MergePathH2Driver tldriver = new MergePathH2Driver();
+ tldriver.run(HDFS_PATH, RESULT_PATH, HDFA_PATH_DATA, COUNT_REDUCER, SIZE_KMER, 1, HADOOP_CONF_PATH);
+
+/* SequenceFile.Reader reader = null;
+// Path path = new Path(RESULT_PATH + "/part-00000");
+ Path path = new Path(RESULT_PATH + "/uncomplete0" + "/uncomplete0-r-00000");
+ reader = new SequenceFile.Reader(dfs, path, conf);
+ VKmerBytesWritable key = (VKmerBytesWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+ MergePathValueWritable value = (MergePathValueWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
+ File filePathTo = new File(TEST_SOURCE_DIR);
+ BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
+ while (reader.next(key, value)) {
+ bw.write(key.toString() + "\t" + value.getAdjBitMap() + "\t" + value.getFlag());
+ bw.newLine();
+ }
+ bw.close();*/
+// dumpResult();
+
+// TestUtils.compareWithResult(new File(TEST_SOURCE_DIR), new File(EXPECTED_PATH));
+
+ cleanupHadoop();
+
+ }
+ private void startHadoop() throws IOException {
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, 2, true, null);
+ dfs = dfsCluster.getFileSystem();
+ mrCluster = new MiniMRCluster(4, dfs.getUri().toString(), 2);
+
+ Path src = new Path(DATA_PATH);
+ Path dest = new Path(HDFS_PATH + "/");
+ dfs.mkdirs(dest);
+ dfs.copyFromLocalFile(src, dest);
+ Path data = new Path(HDFA_PATH_DATA + "/");
+ dfs.mkdirs(data);
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ private void cleanupHadoop() throws IOException {
+ mrCluster.shutdown();
+ dfsCluster.shutdown();
+ }
+
+ private void dumpResult() throws IOException {
+// Path src = new Path(HDFA_PATH_DATA + "/" + "complete2");
+ Path src = new Path(RESULT_PATH);
+ Path dest = new Path(ACTUAL_RESULT_DIR + "/");
+ dfs.copyToLocalFile(src, dest);
+ }
+}
\ No newline at end of file
diff --git a/genomix/genomix-hadoop/testactual/source.txt b/genomix/genomix-hadoop/testactual/source.txt
deleted file mode 100644
index aa7a107..0000000
--- a/genomix/genomix-hadoop/testactual/source.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-ATAGAAGATCGA A|T 1
-AATAGAAGATCG |A 1
-TAGAAGATCGAT A| 1