complete the old code cleaning
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/NodeReference.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/NodeReference.java
deleted file mode 100644
index 60c0682..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/NodeReference.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.data.primitive;
-
-import edu.uci.ics.genomix.velvet.oldtype.NodeWritable;
-
-public class NodeReference extends NodeWritable {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public NodeReference(int kmerSize) {
- super(kmerSize);
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionListReference.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionListReference.java
deleted file mode 100644
index 47a3047..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionListReference.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.data.primitive;
-
-import edu.uci.ics.genomix.oldtype.PositionListWritable;
-import edu.uci.ics.hyracks.data.std.api.IValueReference;
-
-public class PositionListReference extends PositionListWritable implements IValueReference {
-
- public PositionListReference(int countByDataLength, byte[] byteArray, int startOffset) {
- super(countByDataLength, byteArray, startOffset);
- }
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionReference.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionReference.java
deleted file mode 100644
index f066dc7..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionReference.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.data.primitive;
-
-import edu.uci.ics.genomix.oldtype.PositionWritable;
-import edu.uci.ics.hyracks.data.std.api.IValueReference;
-
-public class PositionReference extends PositionWritable implements IValueReference {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java
deleted file mode 100644
index de56b83..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapred.JobConf;
-
-@SuppressWarnings("deprecation")
-public class GenomixJobConf extends JobConf {
-
- public static final String JOB_NAME = "genomix";
-
- /** Kmers length */
- public static final String KMER_LENGTH = "genomix.kmerlen";
- /** Read length */
- public static final String READ_LENGTH = "genomix.readlen";
- /** Frame Size */
- public static final String FRAME_SIZE = "genomix.framesize";
- /** Frame Limit, hyracks need */
- public static final String FRAME_LIMIT = "genomix.framelimit";
- /** Table Size, hyracks need */
- public static final String TABLE_SIZE = "genomix.tablesize";
- /** Groupby types */
- public static final String GROUPBY_TYPE = "genomix.graph.groupby.type";
- /** Graph outputformat */
- public static final String OUTPUT_FORMAT = "genomix.graph.output";
- /** Get reversed Kmer Sequence */
- public static final String REVERSED_KMER = "genomix.kmer.reversed";
-
- /** Configurations used by hybrid groupby function in graph build phrase */
- public static final String GROUPBY_HYBRID_INPUTSIZE = "genomix.graph.groupby.hybrid.inputsize";
- public static final String GROUPBY_HYBRID_INPUTKEYS = "genomix.graph.groupby.hybrid.inputkeys";
- public static final String GROUPBY_HYBRID_RECORDSIZE_SINGLE = "genomix.graph.groupby.hybrid.recordsize.single";
- public static final String GROUPBY_HYBRID_RECORDSIZE_CROSS = "genomix.graph.groupby.hybrid.recordsize.cross";
- public static final String GROUPBY_HYBRID_HASHLEVEL = "genomix.graph.groupby.hybrid.hashlevel";
-
- public static final int DEFAULT_KMERLEN = 21;
- public static final int DEFAULT_READLEN = 124;
- public static final int DEFAULT_FRAME_SIZE = 128 * 1024;
- public static final int DEFAULT_FRAME_LIMIT = 4096;
- public static final int DEFAULT_TABLE_SIZE = 10485767;
- public static final long DEFAULT_GROUPBY_HYBRID_INPUTSIZE = 154000000L;
- public static final long DEFAULT_GROUPBY_HYBRID_INPUTKEYS = 38500000L;
- public static final int DEFAULT_GROUPBY_HYBRID_RECORDSIZE_SINGLE = 9;
- public static final int DEFAULT_GROUPBY_HYBRID_HASHLEVEL = 1;
- public static final int DEFAULT_GROUPBY_HYBRID_RECORDSIZE_CROSS = 13;
-
- public static final boolean DEFAULT_REVERSED = true;
-
- public static final String JOB_PLAN_GRAPHBUILD = "graphbuild";
- public static final String JOB_PLAN_GRAPHSTAT = "graphstat";
-
- public static final String GROUPBY_TYPE_HYBRID = "hybrid";
- public static final String GROUPBY_TYPE_EXTERNAL = "external";
- public static final String GROUPBY_TYPE_PRECLUSTER = "precluster";
- public static final String OUTPUT_FORMAT_BINARY = "binary";
- public static final String OUTPUT_FORMAT_TEXT = "text";
-
- public GenomixJobConf() throws IOException {
- super(new Configuration());
- }
-
- public GenomixJobConf(Configuration conf) throws IOException {
- super(conf);
- }
-
- /**
- * Set the kmer length
- *
- * @param the
- * desired frame kmerByteSize
- */
- final public void setKmerLength(int kmerlength) {
- setInt(KMER_LENGTH, kmerlength);
- }
-
- final public void setFrameSize(int frameSize) {
- setInt(FRAME_SIZE, frameSize);
- }
-
- final public void setFrameLimit(int frameLimit) {
- setInt(FRAME_LIMIT, frameLimit);
- }
-
- final public void setTableSize(int tableSize) {
- setInt(TABLE_SIZE, tableSize);
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGen.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGen.java
deleted file mode 100644
index c8cb701..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGen.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.io.Serializable;
-import java.util.UUID;
-
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.exceptions.HyracksException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
-
-public abstract class JobGen implements Serializable {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
- protected final ConfFactory confFactory;
- protected String jobId = new UUID(System.currentTimeMillis(), System.nanoTime()).toString();
-
- public JobGen(GenomixJobConf job) throws HyracksDataException {
- this.confFactory = new ConfFactory(job);
- }
-
- public abstract JobSpecification generateJob() throws HyracksException;
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenBrujinGraph.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenBrujinGraph.java
deleted file mode 100644
index 7571653..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenBrujinGraph.java
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.io.IOException;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.genomix.hyracks.data.accessors.KmerHashPartitioncomputerFactory;
-import edu.uci.ics.genomix.hyracks.data.accessors.KmerNormarlizedComputerFactory;
-import edu.uci.ics.genomix.hyracks.data.accessors.ReadIDPartitionComputerFactory;
-import edu.uci.ics.genomix.hyracks.data.primitive.KmerPointable;
-import edu.uci.ics.genomix.hyracks.dataflow.ConnectorPolicyAssignmentPolicy;
-import edu.uci.ics.genomix.hyracks.dataflow.MapKmerPositionToReadOperator;
-import edu.uci.ics.genomix.hyracks.dataflow.MapReadToNodeOperator;
-import edu.uci.ics.genomix.hyracks.dataflow.ReadsKeyValueParserFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.aggregators.AggregateKmerAggregateFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.aggregators.AggregateReadIDAggregateFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.aggregators.MergeKmerAggregateFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.aggregators.MergeReadIDAggregateFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.io.KMerSequenceWriterFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.io.KMerTextWriterFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.io.NodeSequenceWriterFactory;
-import edu.uci.ics.genomix.hyracks.dataflow.io.NodeTextWriterFactory;
-import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
-import edu.uci.ics.hyracks.api.constraints.PartitionConstraintHelper;
-import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
-import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
-import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.exceptions.HyracksException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
-import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory;
-import edu.uci.ics.hyracks.data.std.api.IPointableFactory;
-import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
-import edu.uci.ics.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.group.HashSpillableTableFactory;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
-import edu.uci.ics.hyracks.dataflow.std.group.external.ExternalGroupOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
-import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSWriteOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
-
-@SuppressWarnings("deprecation")
-public class JobGenBrujinGraph extends JobGen {
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public enum GroupbyType {
- EXTERNAL,
- PRECLUSTER,
- HYBRIDHASH,
- }
-
- public enum OutputFormat {
- TEXT,
- BINARY,
- }
-
- protected ConfFactory hadoopJobConfFactory;
- protected static final Log LOG = LogFactory.getLog(JobGenBrujinGraph.class);
- protected String[] ncNodeNames;
- protected String[] readSchedule;
-
- protected int readLength;
- protected int kmerSize;
- protected int frameLimits;
- protected int frameSize;
- protected int tableSize;
- protected GroupbyType groupbyType;
- protected OutputFormat outputFormat;
- protected boolean bGenerateReversedKmer;
-
- protected void logDebug(String status) {
- LOG.debug(status + " nc nodes:" + ncNodeNames.length);
- }
-
- public JobGenBrujinGraph(GenomixJobConf job, Scheduler scheduler, final Map<String, NodeControllerInfo> ncMap,
- int numPartitionPerMachine) throws HyracksDataException {
- super(job);
- String[] nodes = new String[ncMap.size()];
- ncMap.keySet().toArray(nodes);
- ncNodeNames = new String[nodes.length * numPartitionPerMachine];
- for (int i = 0; i < numPartitionPerMachine; i++) {
- System.arraycopy(nodes, 0, ncNodeNames, i * nodes.length, nodes.length);
- }
- initJobConfiguration(scheduler);
- }
-
- private ExternalGroupOperatorDescriptor newExternalGroupby(JobSpecification jobSpec, int[] keyFields,
- IAggregatorDescriptorFactory aggeragater, IAggregatorDescriptorFactory merger,
- ITuplePartitionComputerFactory partition, INormalizedKeyComputerFactory normalizer,
- IPointableFactory pointable, RecordDescriptor outRed) {
- return new ExternalGroupOperatorDescriptor(jobSpec, keyFields, frameLimits,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(pointable) }, normalizer,
- aggeragater, merger, outRed, new HashSpillableTableFactory(new FieldHashPartitionComputerFactory(
- keyFields,
- new IBinaryHashFunctionFactory[] { PointableBinaryHashFunctionFactory.of(pointable) }),
- tableSize), true);
- }
-
- private Object[] generateAggeragateDescriptorbyType(JobSpecification jobSpec, int[] keyFields,
- IAggregatorDescriptorFactory aggregator, IAggregatorDescriptorFactory merger,
- ITuplePartitionComputerFactory partition, INormalizedKeyComputerFactory normalizer,
- IPointableFactory pointable, RecordDescriptor combineRed, RecordDescriptor finalRec)
- throws HyracksDataException {
-
- Object[] obj = new Object[3];
-
- switch (groupbyType) {
- case EXTERNAL:
- obj[0] = newExternalGroupby(jobSpec, keyFields, aggregator, merger, partition, normalizer, pointable,
- combineRed);
- obj[1] = new MToNPartitioningConnectorDescriptor(jobSpec, partition);
- obj[2] = newExternalGroupby(jobSpec, keyFields, merger, merger, partition, normalizer, pointable,
- finalRec);
- break;
- case PRECLUSTER:
- default:
-
- obj[0] = new PreclusteredGroupOperatorDescriptor(jobSpec, keyFields,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(pointable) }, aggregator,
- combineRed);
- obj[1] = new MToNPartitioningMergingConnectorDescriptor(jobSpec, partition, keyFields,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(pointable) });
- obj[2] = new PreclusteredGroupOperatorDescriptor(jobSpec, keyFields,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(pointable) }, merger,
- finalRec);
- jobSpec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
- break;
- }
- return obj;
- }
-
- public HDFSReadOperatorDescriptor createHDFSReader(JobSpecification jobSpec) throws HyracksDataException {
- try {
- InputSplit[] splits = hadoopJobConfFactory.getConf().getInputFormat()
- .getSplits(hadoopJobConfFactory.getConf(), ncNodeNames.length);
-
- return new HDFSReadOperatorDescriptor(jobSpec, ReadsKeyValueParserFactory.readKmerOutputRec,
- hadoopJobConfFactory.getConf(), splits, readSchedule, new ReadsKeyValueParserFactory(readLength,
- kmerSize, bGenerateReversedKmer));
- } catch (Exception e) {
- throw new HyracksDataException(e);
- }
- }
-
- public static void connectOperators(JobSpecification jobSpec, IOperatorDescriptor preOp, String[] preNodes,
- IOperatorDescriptor nextOp, String[] nextNodes, IConnectorDescriptor conn) {
- PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, preOp, preNodes);
- PartitionConstraintHelper.addAbsoluteLocationConstraint(jobSpec, nextOp, nextNodes);
- jobSpec.connect(conn, preOp, 0, nextOp, 0);
- }
-
- public AbstractOperatorDescriptor generateGroupbyKmerJob(JobSpecification jobSpec,
- AbstractOperatorDescriptor readOperator) throws HyracksDataException {
- int[] keyFields = new int[] { 0 }; // the id of grouped key
-
- ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(jobSpec, frameLimits, keyFields,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(KmerPointable.FACTORY) },
- ReadsKeyValueParserFactory.readKmerOutputRec);
- connectOperators(jobSpec, readOperator, ncNodeNames, sorter, ncNodeNames, new OneToOneConnectorDescriptor(
- jobSpec));
-
- RecordDescriptor combineKmerOutputRec = new RecordDescriptor(new ISerializerDeserializer[] { null, null });
- jobSpec.setFrameSize(frameSize);
-
- Object[] objs = generateAggeragateDescriptorbyType(jobSpec, keyFields, new AggregateKmerAggregateFactory(),
- new MergeKmerAggregateFactory(), new KmerHashPartitioncomputerFactory(),
- new KmerNormarlizedComputerFactory(), KmerPointable.FACTORY, combineKmerOutputRec, combineKmerOutputRec);
- AbstractOperatorDescriptor kmerLocalAggregator = (AbstractOperatorDescriptor) objs[0];
- logDebug("LocalKmerGroupby Operator");
- connectOperators(jobSpec, sorter, ncNodeNames, kmerLocalAggregator, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
-
- logDebug("CrossKmerGroupby Operator");
- IConnectorDescriptor kmerConnPartition = (IConnectorDescriptor) objs[1];
- AbstractOperatorDescriptor kmerCrossAggregator = (AbstractOperatorDescriptor) objs[2];
- connectOperators(jobSpec, kmerLocalAggregator, ncNodeNames, kmerCrossAggregator, ncNodeNames, kmerConnPartition);
- return kmerCrossAggregator;
- }
-
- public AbstractOperatorDescriptor generateMapperFromKmerToRead(JobSpecification jobSpec,
- AbstractOperatorDescriptor kmerCrossAggregator) {
- // Map (Kmer, {(ReadID,PosInRead),...}) into
- // (ReadID,PosInRead,{OtherPosition,...},Kmer)
-
- AbstractOperatorDescriptor mapKmerToRead = new MapKmerPositionToReadOperator(jobSpec,
- MapKmerPositionToReadOperator.readIDOutputRec, readLength, kmerSize);
- connectOperators(jobSpec, kmerCrossAggregator, ncNodeNames, mapKmerToRead, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
- return mapKmerToRead;
- }
-
- public AbstractOperatorDescriptor generateGroupbyReadJob(JobSpecification jobSpec,
- AbstractOperatorDescriptor mapKmerToRead) throws HyracksDataException {
- int[] keyFields = new int[] { 0 }; // the id of grouped key
- // (ReadID, {(PosInRead,{OtherPositoin..},Kmer) ...}
- ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(jobSpec, frameLimits, keyFields,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) },
- MapKmerPositionToReadOperator.readIDOutputRec);
- connectOperators(jobSpec, mapKmerToRead, ncNodeNames, sorter, ncNodeNames, new OneToOneConnectorDescriptor(
- jobSpec));
-
- RecordDescriptor readIDFinalRec = new RecordDescriptor(
- new ISerializerDeserializer[1 + 2 * MergeReadIDAggregateFactory.getPositionCount(readLength, kmerSize)]);
- Object[] objs = generateAggeragateDescriptorbyType(jobSpec, keyFields, new AggregateReadIDAggregateFactory(),
- new MergeReadIDAggregateFactory(readLength, kmerSize), new ReadIDPartitionComputerFactory(), null,
- IntegerPointable.FACTORY, AggregateReadIDAggregateFactory.readIDAggregateRec, readIDFinalRec);
- AbstractOperatorDescriptor readLocalAggregator = (AbstractOperatorDescriptor) objs[0];
- connectOperators(jobSpec, sorter, ncNodeNames, readLocalAggregator, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
-
- logDebug("Group by ReadID merger");
- IConnectorDescriptor readconn = (IConnectorDescriptor) objs[1];
- AbstractOperatorDescriptor readCrossAggregator = (AbstractOperatorDescriptor) objs[2];
- connectOperators(jobSpec, readLocalAggregator, ncNodeNames, readCrossAggregator, ncNodeNames, readconn);
- return readCrossAggregator;
- }
-
- public AbstractOperatorDescriptor generateMapperFromReadToNode(JobSpecification jobSpec,
- AbstractOperatorDescriptor readCrossAggregator) {
- // Map (ReadID, [(Poslist,Kmer) ... ]) to (Node, IncomingList,
- // OutgoingList, Kmer)
-
- AbstractOperatorDescriptor mapEachReadToNode = new MapReadToNodeOperator(jobSpec,
- MapReadToNodeOperator.nodeOutputRec, kmerSize, true);
- connectOperators(jobSpec, readCrossAggregator, ncNodeNames, mapEachReadToNode, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
- return mapEachReadToNode;
- }
-
- public AbstractOperatorDescriptor generateKmerWritorOperator(JobSpecification jobSpec,
- AbstractOperatorDescriptor kmerCrossAggregator) throws HyracksException {
- // Output Kmer
- ITupleWriterFactory kmerWriter = null;
- switch (outputFormat) {
- case TEXT:
- kmerWriter = new KMerTextWriterFactory(kmerSize);
- break;
- case BINARY:
- default:
- kmerWriter = new KMerSequenceWriterFactory(hadoopJobConfFactory.getConf());
- break;
- }
- logDebug("WriteOperator");
- HDFSWriteOperatorDescriptor writeKmerOperator = new HDFSWriteOperatorDescriptor(jobSpec,
- hadoopJobConfFactory.getConf(), kmerWriter);
- connectOperators(jobSpec, kmerCrossAggregator, ncNodeNames, writeKmerOperator, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
- return writeKmerOperator;
- }
-
- public AbstractOperatorDescriptor generateNodeWriterOpertator(JobSpecification jobSpec,
- AbstractOperatorDescriptor mapEachReadToNode) throws HyracksException {
- ITupleWriterFactory nodeWriter = null;
- switch (outputFormat) {
- case TEXT:
- nodeWriter = new NodeTextWriterFactory(kmerSize);
- break;
- case BINARY:
- default:
- nodeWriter = new NodeSequenceWriterFactory(hadoopJobConfFactory.getConf());
- break;
- }
- logDebug("WriteOperator");
- // Output Node
- HDFSWriteOperatorDescriptor writeNodeOperator = new HDFSWriteOperatorDescriptor(jobSpec,
- hadoopJobConfFactory.getConf(), nodeWriter);
- connectOperators(jobSpec, mapEachReadToNode, ncNodeNames, writeNodeOperator, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
- return writeNodeOperator;
- }
-
- @Override
- public JobSpecification generateJob() throws HyracksException {
-
- JobSpecification jobSpec = new JobSpecification();
- logDebug("ReadKmer Operator");
-
- HDFSReadOperatorDescriptor readOperator = createHDFSReader(jobSpec);
-
- logDebug("Group by Kmer");
- AbstractOperatorDescriptor lastOperator = generateGroupbyKmerJob(jobSpec, readOperator);
-
- // logDebug("Write kmer to result");
- // generateRootByWriteKmerGroupbyResult(jobSpec, lastOperator);
-
- logDebug("Map Kmer to Read Operator");
- lastOperator = generateMapperFromKmerToRead(jobSpec, lastOperator);
-
- logDebug("Group by Read Operator");
- lastOperator = generateGroupbyReadJob(jobSpec, lastOperator);
-
- logDebug("Generate final node");
- lastOperator = generateMapperFromReadToNode(jobSpec, lastOperator);
- logDebug("Write node to result");
- lastOperator = generateNodeWriterOpertator(jobSpec, lastOperator);
-
- jobSpec.addRoot(lastOperator);
- return jobSpec;
- }
-
- protected void initJobConfiguration(Scheduler scheduler) throws HyracksDataException {
- Configuration conf = confFactory.getConf();
- readLength = conf.getInt(GenomixJobConf.READ_LENGTH, GenomixJobConf.DEFAULT_READLEN);
- kmerSize = conf.getInt(GenomixJobConf.KMER_LENGTH, GenomixJobConf.DEFAULT_KMERLEN);
- if (kmerSize % 2 == 0) {
- kmerSize--;
- conf.setInt(GenomixJobConf.KMER_LENGTH, kmerSize);
- }
- frameLimits = conf.getInt(GenomixJobConf.FRAME_LIMIT, GenomixJobConf.DEFAULT_FRAME_LIMIT);
- tableSize = conf.getInt(GenomixJobConf.TABLE_SIZE, GenomixJobConf.DEFAULT_TABLE_SIZE);
- frameSize = conf.getInt(GenomixJobConf.FRAME_SIZE, GenomixJobConf.DEFAULT_FRAME_SIZE);
-
- bGenerateReversedKmer = conf.getBoolean(GenomixJobConf.REVERSED_KMER, GenomixJobConf.DEFAULT_REVERSED);
-
- String type = conf.get(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
- if (type.equalsIgnoreCase(GenomixJobConf.GROUPBY_TYPE_EXTERNAL)) {
- groupbyType = GroupbyType.EXTERNAL;
- } else if (type.equalsIgnoreCase(GenomixJobConf.GROUPBY_TYPE_PRECLUSTER)) {
- groupbyType = GroupbyType.PRECLUSTER;
- } else {
- groupbyType = GroupbyType.HYBRIDHASH;
- }
-
- String output = conf.get(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_BINARY);
- if (output.equalsIgnoreCase("text")) {
- outputFormat = OutputFormat.TEXT;
- } else {
- outputFormat = OutputFormat.BINARY;
- }
- try {
- hadoopJobConfFactory = new ConfFactory(new JobConf(conf));
- InputSplit[] splits = hadoopJobConfFactory.getConf().getInputFormat()
- .getSplits(hadoopJobConfFactory.getConf(), ncNodeNames.length);
- readSchedule = scheduler.getLocationConstraints(splits);
- } catch (IOException ex) {
- throw new HyracksDataException(ex);
- }
-
- LOG.info("Genomix Graph Build Configuration");
- LOG.info("Kmer:" + kmerSize);
- LOG.info("Groupby type:" + type);
- LOG.info("Output format:" + output);
- LOG.info("Frame limit" + frameLimits);
- LOG.info("Frame kmerByteSize" + frameSize);
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCheckReader.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCheckReader.java
deleted file mode 100644
index b4b1e73..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCheckReader.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Map;
-
-import edu.uci.ics.genomix.hyracks.dataflow.ReadsKeyValueParserFactory;
-import edu.uci.ics.genomix.velvet.oldtype.PositionWritable;
-import edu.uci.ics.genomix.velvet.oldtype.KmerBytesWritable;
-import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.exceptions.HyracksException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSWriteOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
-
-public class JobGenCheckReader extends JobGenBrujinGraph {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public JobGenCheckReader(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
- int numPartitionPerMachine) throws HyracksDataException {
- super(job, scheduler, ncMap, numPartitionPerMachine);
- }
-
- @Override
- public JobSpecification generateJob() throws HyracksException {
-
- JobSpecification jobSpec = new JobSpecification();
- logDebug("ReadKmer Operator");
- HDFSReadOperatorDescriptor readOperator = createHDFSReader(jobSpec);
-
- logDebug("Write kmer to result");
- generateRootByWriteKmerReader(jobSpec, readOperator);
-
- return jobSpec;
- }
-
- public AbstractSingleActivityOperatorDescriptor generateRootByWriteKmerReader(JobSpecification jobSpec,
- HDFSReadOperatorDescriptor readOperator) throws HyracksException {
- // Output Kmer
- HDFSWriteOperatorDescriptor writeKmerOperator = new HDFSWriteOperatorDescriptor(jobSpec,
- hadoopJobConfFactory.getConf(), new ITupleWriterFactory() {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- @Override
- public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
- return new ITupleWriter() {
-
- private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
- private PositionWritable pos = new PositionWritable();
-
- @Override
- public void open(DataOutput output) throws HyracksDataException {
- }
-
- @Override
- public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
- try {
- if (kmer.getLength() > tuple
- .getFieldLength(ReadsKeyValueParserFactory.OutputKmerField)) {
- throw new IllegalArgumentException("Not enough kmer bytes");
- }
- kmer.setNewReference(
- tuple.getFieldData(ReadsKeyValueParserFactory.OutputKmerField),
- tuple.getFieldStart(ReadsKeyValueParserFactory.OutputKmerField));
- pos.setNewReference(tuple.getFieldData(ReadsKeyValueParserFactory.OutputPosition),
- tuple.getFieldStart(ReadsKeyValueParserFactory.OutputPosition));
-
- output.write(kmer.toString().getBytes());
- output.writeByte('\t');
- output.write(pos.toString().getBytes());
- output.writeByte('\n');
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void close(DataOutput output) throws HyracksDataException {
-
- }
-
- };
- }
-
- });
- connectOperators(jobSpec, readOperator, ncNodeNames, writeKmerOperator, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
- jobSpec.addRoot(writeKmerOperator);
- return writeKmerOperator;
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCreateKmerInfo.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCreateKmerInfo.java
deleted file mode 100644
index 5202ba2..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCreateKmerInfo.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.util.Map;
-
-import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.exceptions.HyracksException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
-
-public class JobGenCreateKmerInfo extends JobGenBrujinGraph {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public JobGenCreateKmerInfo(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
- int numPartitionPerMachine) throws HyracksDataException {
- super(job, scheduler, ncMap, numPartitionPerMachine);
- }
-
- @Override
- public JobSpecification generateJob() throws HyracksException {
-
- JobSpecification jobSpec = new JobSpecification();
- logDebug("ReadKmer Operator");
- HDFSReadOperatorDescriptor readOperator = createHDFSReader(jobSpec);
-
- logDebug("Group by Kmer");
- AbstractOperatorDescriptor lastOperator = generateGroupbyKmerJob(jobSpec, readOperator);
-
- logDebug("Write kmer to result");
- lastOperator = generateKmerWritorOperator(jobSpec, lastOperator);
- jobSpec.addRoot(lastOperator);
-
- return jobSpec;
- }
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenGroupbyReadID.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenGroupbyReadID.java
deleted file mode 100644
index 1e78b79..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenGroupbyReadID.java
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Map;
-
-import edu.uci.ics.genomix.data.Marshal;
-import edu.uci.ics.genomix.velvet.oldtype.PositionListWritable;
-import edu.uci.ics.genomix.velvet.oldtype.KmerBytesWritable;
-import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.exceptions.HyracksException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSWriteOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
-
-public class JobGenGroupbyReadID extends JobGenBrujinGraph {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public JobGenGroupbyReadID(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
- int numPartitionPerMachine) throws HyracksDataException {
- super(job, scheduler, ncMap, numPartitionPerMachine);
- }
-
- @Override
- public JobSpecification generateJob() throws HyracksException {
-
- JobSpecification jobSpec = new JobSpecification();
- logDebug("ReadKmer Operator");
- HDFSReadOperatorDescriptor readOperator = createHDFSReader(jobSpec);
-
- logDebug("Group by Kmer");
- AbstractOperatorDescriptor lastOperator = generateGroupbyKmerJob(jobSpec, readOperator);
-
- //logDebug("Write kmer to result");
- //generateRootByWriteKmerGroupbyResult(jobSpec, lastOperator);
-
- logDebug("Map Kmer to Read Operator");
- lastOperator = generateMapperFromKmerToRead(jobSpec, lastOperator);
-
- logDebug("Group by Read Operator");
- lastOperator = generateGroupbyReadJob(jobSpec, lastOperator);
-
- logDebug("Write node to result");
- lastOperator = generateRootByWriteReadIDAggregationResult(jobSpec, lastOperator);
- jobSpec.addRoot(lastOperator);
- return jobSpec;
- }
-
- public AbstractOperatorDescriptor generateRootByWriteReadIDAggregationResult(JobSpecification jobSpec,
- AbstractOperatorDescriptor readCrossAggregator) throws HyracksException {
- HDFSWriteOperatorDescriptor writeKmerOperator = new HDFSWriteOperatorDescriptor(jobSpec,
- hadoopJobConfFactory.getConf(), new ITupleWriterFactory() {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- @Override
- public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
- return new ITupleWriter() {
-
- private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
- private PositionListWritable plist = new PositionListWritable();
-
- @Override
- public void open(DataOutput output) throws HyracksDataException {
-
- }
-
- @Override
- public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
- int readId = Marshal.getInt(tuple.getFieldData(0), tuple.getFieldStart(0));
- try {
- output.write((Integer.toString(readId) + "\t").getBytes());
- for (int i = 1; i < tuple.getFieldCount(); i++) {
- int fieldOffset = tuple.getFieldStart(i);
- while (fieldOffset < tuple.getFieldStart(i) + tuple.getFieldLength(i)) {
- byte[] buffer = tuple.getFieldData(i);
- // read poslist
- int posCount = PositionListWritable.getCountByDataLength(Marshal.getInt(
- buffer, fieldOffset));
- fieldOffset += 4;
- plist.setNewReference(posCount, buffer, fieldOffset);
- fieldOffset += plist.getLength();
-
- int posInRead = (i + 1) / 2;
- if (i % 2 == 0) {
- posInRead = -posInRead;
- }
- String kmerString = "";
- if (posInRead > 0) {
- int kmerbytes = Marshal.getInt(buffer, fieldOffset);
- if (kmer.getLength() != kmerbytes) {
- throw new IllegalArgumentException("kmerlength is invalid");
- }
- fieldOffset += 4;
- kmer.setNewReference(buffer, fieldOffset);
- fieldOffset += kmer.getLength();
- kmerString = kmer.toString();
- }
-
- output.write(Integer.toString(posInRead).getBytes());
- output.writeByte('\t');
- output.write(plist.toString().getBytes());
- output.writeByte('\t');
- output.write(kmerString.getBytes());
- output.writeByte('\t');
- }
- }
- output.writeByte('\n');
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void close(DataOutput output) throws HyracksDataException {
-
- }
-
- };
- }
-
- });
- connectOperators(jobSpec, readCrossAggregator, ncNodeNames, writeKmerOperator, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
-
- return writeKmerOperator;
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenMapKmerToRead.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenMapKmerToRead.java
deleted file mode 100644
index 8e727959..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenMapKmerToRead.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.Map;
-
-import edu.uci.ics.genomix.data.Marshal;
-import edu.uci.ics.genomix.hyracks.dataflow.MapKmerPositionToReadOperator;
-import edu.uci.ics.genomix.velvet.oldtype.PositionListWritable;
-import edu.uci.ics.genomix.velvet.oldtype.KmerBytesWritable;
-import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.exceptions.HyracksException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
-import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.dataflow.HDFSWriteOperatorDescriptor;
-import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
-
-public class JobGenMapKmerToRead extends JobGenBrujinGraph {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public JobGenMapKmerToRead(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
- int numPartitionPerMachine) throws HyracksDataException {
- super(job, scheduler, ncMap, numPartitionPerMachine);
- }
-
- public AbstractOperatorDescriptor generateRootByWriteMapperFromKmerToReadID(JobSpecification jobSpec,
- AbstractOperatorDescriptor mapper) throws HyracksException {
- // Output Kmer
- HDFSWriteOperatorDescriptor writeKmerOperator = new HDFSWriteOperatorDescriptor(jobSpec,
- hadoopJobConfFactory.getConf(), new ITupleWriterFactory() {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- @Override
- public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
- return new ITupleWriter() {
-
- private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
- private PositionListWritable plist = new PositionListWritable();
-
- @Override
- public void open(DataOutput output) throws HyracksDataException {
-
- }
-
- @Override
- public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
- try {
- int readID = Marshal.getInt(
- tuple.getFieldData(MapKmerPositionToReadOperator.OutputReadIDField),
- tuple.getFieldStart(MapKmerPositionToReadOperator.OutputReadIDField));
- byte posInRead = tuple
- .getFieldData(MapKmerPositionToReadOperator.OutputPosInReadField)[tuple
- .getFieldStart(MapKmerPositionToReadOperator.OutputPosInReadField)];
- int posCount = PositionListWritable.getCountByDataLength(tuple
- .getFieldLength(MapKmerPositionToReadOperator.OutputOtherReadIDListField));
- plist.setNewReference(
- posCount,
- tuple.getFieldData(MapKmerPositionToReadOperator.OutputOtherReadIDListField),
- tuple.getFieldStart(MapKmerPositionToReadOperator.OutputOtherReadIDListField));
-
- String kmerString = "";
- if (posInRead > 0) {
- if (kmer.getLength() > tuple
- .getFieldLength(MapKmerPositionToReadOperator.OutputKmerField)) {
- throw new IllegalArgumentException("Not enough kmer bytes");
- }
- kmer.setNewReference(
- tuple.getFieldData(MapKmerPositionToReadOperator.OutputKmerField),
- tuple.getFieldStart(MapKmerPositionToReadOperator.OutputKmerField));
- kmerString = kmer.toString();
- }
-
- output.write(Integer.toString(readID).getBytes());
- output.writeByte('\t');
- output.write(Integer.toString(posInRead).getBytes());
- output.writeByte('\t');
- output.write(plist.toString().getBytes());
- output.writeByte('\t');
- output.write(kmerString.getBytes());
- output.writeByte('\n');
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
-
- }
-
- @Override
- public void close(DataOutput output) throws HyracksDataException {
-
- }
-
- };
- }
-
- });
- connectOperators(jobSpec, mapper, ncNodeNames, writeKmerOperator, ncNodeNames, new OneToOneConnectorDescriptor(
- jobSpec));
- jobSpec.addRoot(writeKmerOperator);
- return writeKmerOperator;
- }
-
- @Override
- public JobSpecification generateJob() throws HyracksException {
-
- JobSpecification jobSpec = new JobSpecification();
- logDebug("ReadKmer Operator");
- HDFSReadOperatorDescriptor readOperator = createHDFSReader(jobSpec);
-
- logDebug("Group by Kmer");
- AbstractOperatorDescriptor lastOperator = generateGroupbyKmerJob(jobSpec, readOperator);
-
- logDebug("Map Kmer to Read Operator");
- lastOperator = generateMapperFromKmerToRead(jobSpec, lastOperator);
-
- generateRootByWriteMapperFromKmerToReadID(jobSpec, lastOperator);
-
- return jobSpec;
- }
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenUnMerged.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenUnMerged.java
deleted file mode 100644
index 21b6385..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenUnMerged.java
+++ /dev/null
@@ -1,34 +0,0 @@
-package edu.uci.ics.genomix.hyracks.job;
-
-import java.util.Map;
-
-import edu.uci.ics.genomix.hyracks.dataflow.MapReadToNodeOperator;
-import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
-import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
-
-public class JobGenUnMerged extends JobGenBrujinGraph {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public JobGenUnMerged(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
- int numPartitionPerMachine) throws HyracksDataException {
- super(job, scheduler, ncMap, numPartitionPerMachine);
- }
-
- @Override
- public AbstractOperatorDescriptor generateMapperFromReadToNode(JobSpecification jobSpec,
- AbstractOperatorDescriptor readCrossAggregator) {
- AbstractOperatorDescriptor mapEachReadToNode = new MapReadToNodeOperator(jobSpec,
- MapReadToNodeOperator.nodeOutputRec, kmerSize, false);
- connectOperators(jobSpec, readCrossAggregator, ncNodeNames, mapEachReadToNode, ncNodeNames,
- new OneToOneConnectorDescriptor(jobSpec));
- return mapEachReadToNode;
- }
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/io/NodeSequenceWriterFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/io/NodeSequenceWriterFactory.java
index b77ee2c..1ca59d8 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/io/NodeSequenceWriterFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/newgraph/io/NodeSequenceWriterFactory.java
@@ -22,7 +22,7 @@
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.mapred.JobConf;
-import edu.uci.ics.genomix.hyracks.job.GenomixJobConf;
+import edu.uci.ics.genomix.hyracks.newgraph.job.GenomixJobConf;
import edu.uci.ics.genomix.hyracks.newgraph.dataflow.AssembleKeyIntoNodeOperator;
import edu.uci.ics.genomix.type.NodeWritable;
import edu.uci.ics.genomix.type.KmerBytesWritable;
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java
index 5b5ef25..87627e1 100644
--- a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/newgraph/test/JobRun.java
@@ -26,7 +26,7 @@
import edu.uci.ics.genomix.hyracks.newgraph.job.GenomixJobConf;
import edu.uci.ics.genomix.hyracks.newgraph.driver.Driver;
import edu.uci.ics.genomix.hyracks.newgraph.driver.Driver.Plan;
-import edu.uci.ics.genomix.hyracks.test.TestUtils;
+//import edu.uci.ics.genomix.hyracks.test.TestUtils;
//import edu.uci.ics.genomix.oldtype.NodeWritable;
@SuppressWarnings("deprecation")
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java
deleted file mode 100644
index fbbc89a..0000000
--- a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.genomix.hyracks.test;
-
-import java.io.BufferedWriter;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileWriter;
-import java.io.IOException;
-
-import junit.framework.Assert;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-import edu.uci.ics.genomix.hyracks.driver.Driver;
-import edu.uci.ics.genomix.hyracks.driver.Driver.Plan;
-import edu.uci.ics.genomix.hyracks.job.GenomixJobConf;
-import edu.uci.ics.genomix.velvet.oldtype.NodeWritable;
-
-@SuppressWarnings("deprecation")
-public class JobRunStepByStepTest {
- private static final int KmerSize = 5;
- private static final int ReadLength = 7;
- private static final String ACTUAL_RESULT_DIR = "actual";
- private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
-
- private static final String DATA_INPUT_PATH = "src/test/resources/data/webmap/test.txt";
- private static final String HDFS_INPUT_PATH = "/webmap";
- private static final String HDFS_OUTPUT_PATH = "/webmap_result";
-
- private static final String EXPECTED_DIR = "src/test/resources/expected/";
- private static final String EXPECTED_READER_RESULT = EXPECTED_DIR + "result_after_initial_read";
- private static final String EXPECTED_OUPUT_KMER = EXPECTED_DIR + "result_after_kmerAggregate";
- private static final String EXPECTED_KMER_TO_READID = EXPECTED_DIR + "result_after_kmer2readId";
- private static final String EXPECTED_GROUPBYREADID = EXPECTED_DIR + "result_after_readIDAggreage";
- private static final String EXPECTED_OUPUT_NODE = EXPECTED_DIR + "result_after_generateNode";
- private static final String EXPECTED_UNMERGED = EXPECTED_DIR + "result_unmerged";
-
- private static final String DUMPED_RESULT = ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH + "/merged.txt";
- private static final String CONVERT_RESULT = DUMPED_RESULT + ".txt";
- private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
- private MiniDFSCluster dfsCluster;
-
- private JobConf conf = new JobConf();
- private int numberOfNC = 2;
- private int numPartitionPerMachine = 2;
-
- private Driver driver;
-
- @Test
- public void TestAll() throws Exception {
- TestReader();
-// TestGroupbyKmer();
-// TestMapKmerToRead();
-// TestGroupByReadID();
-// TestEndToEnd();
-// TestUnMergedNode();
- }
-
- public void TestUnMergedNode() throws Exception {
- conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_BINARY);
- cleanUpReEntry();
- conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
- driver.runJob(new GenomixJobConf(conf), Plan.BUILD_UNMERGED_GRAPH, true);
- Assert.assertEquals(true, checkResults(EXPECTED_UNMERGED, new int[] { 1, 2, 3, 4 }));
- }
-
- public void TestReader() throws Exception {
- cleanUpReEntry();
- conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
- driver.runJob(new GenomixJobConf(conf), Plan.CHECK_KMERREADER, true);
- Assert.assertEquals(true, checkResults(EXPECTED_READER_RESULT, null));
- }
-
- public void TestGroupbyKmer() throws Exception {
- cleanUpReEntry();
- conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
- conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
- driver.runJob(new GenomixJobConf(conf), Plan.OUTPUT_KMERHASHTABLE, true);
- Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_KMER, new int[] { 1 }));
- }
-
- public void TestMapKmerToRead() throws Exception {
- cleanUpReEntry();
- conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
- driver.runJob(new GenomixJobConf(conf), Plan.OUTPUT_MAP_KMER_TO_READ, true);
- Assert.assertEquals(true, checkResults(EXPECTED_KMER_TO_READID, new int[] { 2 }));
- }
-
- public void TestGroupByReadID() throws Exception {
- cleanUpReEntry();
- conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
- conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
- driver.runJob(new GenomixJobConf(conf), Plan.OUTPUT_GROUPBY_READID, true);
- Assert.assertEquals(true, checkResults(EXPECTED_GROUPBYREADID, new int[] { 2, 5, 8, 11, 14, 17, 20, 23 }));
- }
-
- public void TestEndToEnd() throws Exception {
- //conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
- conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_BINARY);
- cleanUpReEntry();
- conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
- driver.runJob(new GenomixJobConf(conf), Plan.BUILD_DEBRUJIN_GRAPH, true);
- Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_NODE, new int[] { 1, 2, 3, 4 }));
- }
-
- @Before
- public void setUp() throws Exception {
- cleanupStores();
- edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.init();
- FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
- FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
- startHDFS();
-
- FileInputFormat.setInputPaths(conf, HDFS_INPUT_PATH);
- FileOutputFormat.setOutputPath(conf, new Path(HDFS_OUTPUT_PATH));
-
- conf.setInt(GenomixJobConf.KMER_LENGTH, KmerSize);
- conf.setInt(GenomixJobConf.READ_LENGTH, ReadLength);
- driver = new Driver(edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.CC_HOST,
- edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.TEST_HYRACKS_CC_CLIENT_PORT, numPartitionPerMachine);
- }
-
- private void cleanupStores() throws IOException {
- FileUtils.forceMkdir(new File("teststore"));
- FileUtils.forceMkdir(new File("build"));
- FileUtils.cleanDirectory(new File("teststore"));
- FileUtils.cleanDirectory(new File("build"));
- }
-
- private void startHDFS() throws IOException {
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
-
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.delete(new Path("build"), true);
- System.setProperty("hadoop.log.dir", "logs");
- dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
- FileSystem dfs = FileSystem.get(conf);
- Path src = new Path(DATA_INPUT_PATH);
- Path dest = new Path(HDFS_INPUT_PATH);
- dfs.mkdirs(dest);
- // dfs.mkdirs(result);
- dfs.copyFromLocalFile(src, dest);
-
- DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
- conf.writeXml(confOutput);
- confOutput.flush();
- confOutput.close();
- }
-
- private void cleanUpReEntry() throws IOException {
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- if (lfs.exists(new Path(DUMPED_RESULT))) {
- lfs.delete(new Path(DUMPED_RESULT), true);
- }
- FileSystem dfs = FileSystem.get(conf);
- if (dfs.exists(new Path(HDFS_OUTPUT_PATH))) {
- dfs.delete(new Path(HDFS_OUTPUT_PATH), true);
- }
- }
-
- private boolean checkResults(String expectedPath, int[] poslistField) throws Exception {
- File dumped = null;
- String format = conf.get(GenomixJobConf.OUTPUT_FORMAT);
- if (GenomixJobConf.OUTPUT_FORMAT_TEXT.equalsIgnoreCase(format)) {
- FileUtil.copyMerge(FileSystem.get(conf), new Path(HDFS_OUTPUT_PATH),
- FileSystem.getLocal(new Configuration()), new Path(DUMPED_RESULT), false, conf, null);
- dumped = new File(DUMPED_RESULT);
- } else {
-
- FileSystem.getLocal(new Configuration()).mkdirs(new Path(ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH));
- File filePathTo = new File(CONVERT_RESULT);
- BufferedWriter bw = new BufferedWriter(new FileWriter(filePathTo));
- for (int i = 0; i < numPartitionPerMachine * numberOfNC; i++) {
- String partname = "/part-" + i;
- // FileUtil.copy(FileSystem.get(conf), new Path(HDFS_OUTPUT_PATH
- // + partname), FileSystem.getLocal(new Configuration()),
- // new Path(ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH + partname),
- // false, conf);
-
- Path path = new Path(HDFS_OUTPUT_PATH + partname);
- FileSystem dfs = FileSystem.get(conf);
- if (dfs.getFileStatus(path).getLen() == 0) {
- continue;
- }
- SequenceFile.Reader reader = new SequenceFile.Reader(dfs, path, conf);
-
- NodeWritable node = new NodeWritable(conf.getInt(GenomixJobConf.KMER_LENGTH, KmerSize));
- NullWritable value = NullWritable.get();
- while (reader.next(node, value)) {
- if (node == null) {
- break;
- }
- bw.write(node.toString());
- System.out.println(node.toString());
- bw.newLine();
- }
- reader.close();
- }
- bw.close();
- dumped = new File(CONVERT_RESULT);
- }
-
- if (poslistField != null) {
- TestUtils.compareWithUnSortedPosition(new File(expectedPath), dumped, poslistField);
- } else {
- TestUtils.compareWithSortedResult(new File(expectedPath), dumped);
- }
- return true;
- }
-
- @After
- public void tearDown() throws Exception {
- edu.uci.ics.hyracks.hdfs.utils.HyracksUtils.deinit();
- cleanupHDFS();
- }
-
- private void cleanupHDFS() throws Exception {
- dfsCluster.shutdown();
- }
-}
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/TestUtils.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/TestUtils.java
deleted file mode 100644
index cb19d0c..0000000
--- a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/TestUtils.java
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.test;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.util.ArrayList;
-import java.util.Collections;
-
-public class TestUtils {
- /**
- * Compare with the sorted expected file.
- * The actual file may not be sorted;
- *
- * @param expectedFile
- * @param actualFile
- */
- public static void compareWithSortedResult(File expectedFile, File actualFile) throws Exception {
- BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
- BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
- ArrayList<String> actualLines = new ArrayList<String>();
- String lineExpected, lineActual;
- try {
- while ((lineActual = readerActual.readLine()) != null) {
- actualLines.add(lineActual);
- }
- Collections.sort(actualLines);
- int num = 1;
- for (String actualLine : actualLines) {
- lineExpected = readerExpected.readLine();
- if (lineExpected == null) {
- throw new Exception("Actual result changed at line " + num + ":\n< " + actualLine + "\n> ");
- }
- if (!equalStrings(lineExpected, actualLine)) {
- throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
- + actualLine);
- }
- ++num;
- }
- lineExpected = readerExpected.readLine();
- if (lineExpected != null) {
- throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineExpected);
- }
- } finally {
- readerActual.close();
- readerExpected.close();
- }
- }
-
- public static void compareWithUnSortedPosition(File expectedFile, File actualFile, int[] poslistField)
- throws Exception {
- BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
- BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
- ArrayList<String> actualLines = new ArrayList<String>();
- String lineExpected, lineActual;
- try {
- while ((lineActual = readerActual.readLine()) != null) {
- actualLines.add(lineActual);
- }
- Collections.sort(actualLines);
- int num = 1;
- for (String actualLine : actualLines) {
- lineExpected = readerExpected.readLine();
- if (lineExpected == null) {
- throw new Exception("Actual result changed at line " + num + ":\n< " + actualLine + "\n> ");
- }
- if (!containStrings(lineExpected, actualLine, poslistField)) {
- throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
- + actualLine);
- }
- ++num;
- }
- lineExpected = readerExpected.readLine();
- if (lineExpected != null) {
- throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineExpected);
- }
- } finally {
- readerActual.close();
- readerExpected.close();
- }
- }
-
- public static void compareWithResult(File expectedFile, File actualFile) throws Exception {
- BufferedReader readerExpected = new BufferedReader(new FileReader(expectedFile));
- BufferedReader readerActual = new BufferedReader(new FileReader(actualFile));
- String lineExpected, lineActual;
- int num = 1;
- try {
- while ((lineExpected = readerExpected.readLine()) != null) {
- lineActual = readerActual.readLine();
- // Assert.assertEquals(lineExpected, lineActual);
- if (lineActual == null) {
- throw new Exception("Actual result changed at line " + num + ":\n< " + lineExpected + "\n> ");
- }
- if (!equalStrings(lineExpected, lineActual)) {
- throw new Exception("Result for changed at line " + num + ":\n< " + lineExpected + "\n> "
- + lineActual);
- }
- ++num;
- }
- lineActual = readerActual.readLine();
- if (lineActual != null) {
- throw new Exception("Actual result changed at line " + num + ":\n< \n> " + lineActual);
- }
- } finally {
- readerExpected.close();
- readerActual.close();
- }
- }
-
- private static boolean equalStrings(String s1, String s2) {
- String[] rowsOne = s1.split("\n");
- String[] rowsTwo = s2.split("\n");
-
- if (rowsOne.length != rowsTwo.length)
- return false;
-
- for (int i = 0; i < rowsOne.length; i++) {
- String row1 = rowsOne[i];
- String row2 = rowsTwo[i];
-
- if (row1.equals(row2))
- continue;
-
- String[] fields1 = row1.split(",");
- String[] fields2 = row2.split(",");
-
- for (int j = 0; j < fields1.length; j++) {
- if (fields1[j].equals(fields2[j])) {
- continue;
- } else if (fields1[j].indexOf('.') < 0) {
- return false;
- } else {
- fields1[j] = fields1[j].split("=")[1];
- fields2[j] = fields2[j].split("=")[1];
- Double double1 = Double.parseDouble(fields1[j]);
- Double double2 = Double.parseDouble(fields2[j]);
- float float1 = (float) double1.doubleValue();
- float float2 = (float) double2.doubleValue();
-
- if (Math.abs(float1 - float2) == 0)
- continue;
- else {
- return false;
- }
- }
- }
- }
- return true;
- }
-
- private static boolean containStrings(String lineExpected, String actualLine, int[] poslistField) {
- if (lineExpected.equals(actualLine)) {
- return true;
- }
- String[] fieldsExp = lineExpected.split("\\\t");
- String[] fieldsAct = actualLine.split("\\\t");
- if (fieldsAct.length != fieldsExp.length) {
- return false;
- }
- for (int i = 0; i < fieldsAct.length; i++) {
- boolean cont = false;
- for (int x : poslistField) {
- if (i == x) {
- cont = true;
- break;
- }
- }
- if (cont) {
- continue;
- }
- if (!fieldsAct[i].equals(fieldsExp[i])) {
- return false;
- }
- }
-
- ArrayList<String> posExp = new ArrayList<String>();
- ArrayList<String> posAct = new ArrayList<String>();
-
- for (int x : poslistField) {
- String valueExp = lineExpected.split("\\\t")[x];
- for (int i = 1; i < valueExp.length() - 1;) {
- if (valueExp.charAt(i) == '(') {
- String str = "";
- i++;
- while (i < valueExp.length() - 1 && valueExp.charAt(i) != ')') {
- str += valueExp.charAt(i);
- i++;
- }
- posExp.add(str);
- }
- i++;
- }
- String valueAct = actualLine.split("\\\t")[x];
- for (int i = 1; i < valueAct.length() - 1;) {
- if (valueAct.charAt(i) == '(') {
- String str = "";
- i++;
- while (i < valueAct.length() - 1 && valueAct.charAt(i) != ')') {
- str += valueAct.charAt(i);
- i++;
- }
- posAct.add(str);
- }
- i++;
- }
-
- if (posExp.size() != posAct.size()) {
- return false;
- }
- Collections.sort(posExp);
- Collections.sort(posAct);
- for (int i = 0; i < posExp.size(); i++) {
- if (!posExp.get(i).equals(posAct.get(i))) {
- return false;
- }
- }
- }
- return true;
- }
-}