Merge branch 'genomix/fullstack_genomix' of https://code.google.com/p/hyracks into jianfeng/genomix
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/data/KmerUtil.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/data/KmerUtil.java
index 86b9117..866d6c5 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/data/KmerUtil.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/data/KmerUtil.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/data/Marshal.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/data/Marshal.java
index ecbf1ec..e80d7da 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/data/Marshal.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/data/Marshal.java
@@ -1,3 +1,17 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package edu.uci.ics.genomix.data;
public class Marshal {
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
index d9f5c48..416ab49 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/GeneCode.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
index d3f47cf..50baeb4 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritable.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritableFactory.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritableFactory.java
index 9d458d2..d2e3a94 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritableFactory.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/KmerBytesWritableFactory.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
index 5d6c36a..d4dab00 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/NodeWritable.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.genomix.type;
import java.io.DataInput;
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
index cd9fc66..eca4a28 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionListWritable.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.genomix.type;
import java.io.DataInput;
@@ -103,7 +118,6 @@
@Override
public void remove() {
- // TODO Auto-generated method stub
}
};
return it;
diff --git a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
index b548934..8895f5c 100644
--- a/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
+++ b/genomix/genomix-data/src/main/java/edu/uci/ics/genomix/type/PositionWritable.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.genomix.type;
import java.io.DataInput;
diff --git a/genomix/genomix-hyracks/pom.xml b/genomix/genomix-hyracks/pom.xml
index 8377df6..69d5839 100644
--- a/genomix/genomix-hyracks/pom.xml
+++ b/genomix/genomix-hyracks/pom.xml
@@ -38,15 +38,15 @@
<mainClass>edu.uci.ics.genomix.hyracks.driver.Driver</mainClass>
<name>genomix</name>
</program>
- <program>
- <mainClass>edu.uci.ics.hyracks.control.cc.CCDriver</mainClass>
- <name>genomixcc</name>
- </program>
- <program>
- <mainClass>edu.uci.ics.hyracks.control.nc.NCDriver</mainClass>
- <name>genomixnc</name>
- </program>
- </programs>
+ <program>
+ <mainClass>edu.uci.ics.hyracks.control.cc.CCDriver</mainClass>
+ <name>genomixcc</name>
+ </program>
+ <program>
+ <mainClass>edu.uci.ics.hyracks.control.nc.NCDriver</mainClass>
+ <name>genomixnc</name>
+ </program>
+ </programs>
<repositoryLayout>flat</repositoryLayout>
<repositoryName>lib</repositoryName>
</configuration>
@@ -261,6 +261,6 @@
<type>jar</type>
<scope>compile</scope>
</dependency>
-
+
</dependencies>
</project>
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ByteSerializerDeserializer.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ByteSerializerDeserializer.java
index 3826f9b..257d5a3 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ByteSerializerDeserializer.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ByteSerializerDeserializer.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
@@ -57,4 +57,4 @@
bytes[offset] = val;
}
-}
\ No newline at end of file
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerBinaryHashFunctionFamily.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerBinaryHashFunctionFamily.java
index a4cfd3b..130d5ab 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerBinaryHashFunctionFamily.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerBinaryHashFunctionFamily.java
@@ -1,44 +1,44 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.data.accessors;
-
-import edu.uci.ics.genomix.hyracks.data.primitive.KmerPointable;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
-
-public class KmerBinaryHashFunctionFamily implements IBinaryHashFunctionFamily {
- private static final long serialVersionUID = 1L;
-
- @Override
- public IBinaryHashFunction createBinaryHashFunction(final int seed) {
-
- return new IBinaryHashFunction() {
- private KmerPointable p = new KmerPointable();
-
- @Override
- public int hash(byte[] bytes, int offset, int length) {
- if (length + offset >= bytes.length)
- throw new IllegalStateException("out of bound");
- p.set(bytes, offset, length);
- int hash = p.hash() * (seed + 1);
- if (hash < 0) {
- hash = -(hash + 1);
- }
- return hash;
- }
- };
- }
-}
\ No newline at end of file
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.data.accessors;
+
+import edu.uci.ics.genomix.hyracks.data.primitive.KmerPointable;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
+
+public class KmerBinaryHashFunctionFamily implements IBinaryHashFunctionFamily {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IBinaryHashFunction createBinaryHashFunction(final int seed) {
+
+ return new IBinaryHashFunction() {
+ private KmerPointable p = new KmerPointable();
+
+ @Override
+ public int hash(byte[] bytes, int offset, int length) {
+ if (length + offset >= bytes.length)
+ throw new IllegalStateException("out of bound");
+ p.set(bytes, offset, length);
+ int hash = p.hash() * (seed + 1);
+ if (hash < 0) {
+ hash = -(hash + 1);
+ }
+ return hash;
+ }
+ };
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerHashPartitioncomputerFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerHashPartitioncomputerFactory.java
index eb0d6bb..f8d4f84 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerHashPartitioncomputerFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerHashPartitioncomputerFactory.java
@@ -1,56 +1,56 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.data.accessors;
-
-import java.nio.ByteBuffer;
-
-import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
-import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFactory;
-
-public class KmerHashPartitioncomputerFactory implements ITuplePartitionComputerFactory {
-
- private static final long serialVersionUID = 1L;
-
- public static int hashBytes(byte[] bytes, int offset, int length) {
- int hash = 1;
- for (int i = offset; i < offset + length; i++)
- hash = (31 * hash) + (int) bytes[i];
- return hash;
- }
-
- @Override
- public ITuplePartitionComputer createPartitioner() {
- return new ITuplePartitionComputer() {
- @Override
- public int partition(IFrameTupleAccessor accessor, int tIndex, int nParts) {
- int startOffset = accessor.getTupleStartOffset(tIndex);
- int fieldOffset = accessor.getFieldStartOffset(tIndex, 0);
- int slotLength = accessor.getFieldSlotsLength();
- int fieldLength = accessor.getFieldLength(tIndex, 0);
-
- ByteBuffer buf = accessor.getBuffer();
-
- int hash = hashBytes(buf.array(), startOffset + fieldOffset + slotLength, fieldLength);
- if (hash < 0) {
- hash = -(hash + 1);
- }
-
- return hash % nParts;
- }
- };
- }
-}
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.data.accessors;
+
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFactory;
+
+public class KmerHashPartitioncomputerFactory implements ITuplePartitionComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ public static int hashBytes(byte[] bytes, int offset, int length) {
+ int hash = 1;
+ for (int i = offset; i < offset + length; i++)
+ hash = (31 * hash) + (int) bytes[i];
+ return hash;
+ }
+
+ @Override
+ public ITuplePartitionComputer createPartitioner() {
+ return new ITuplePartitionComputer() {
+ @Override
+ public int partition(IFrameTupleAccessor accessor, int tIndex, int nParts) {
+ int startOffset = accessor.getTupleStartOffset(tIndex);
+ int fieldOffset = accessor.getFieldStartOffset(tIndex, 0);
+ int slotLength = accessor.getFieldSlotsLength();
+ int fieldLength = accessor.getFieldLength(tIndex, 0);
+
+ ByteBuffer buf = accessor.getBuffer();
+
+ int hash = hashBytes(buf.array(), startOffset + fieldOffset + slotLength, fieldLength);
+ if (hash < 0) {
+ hash = -(hash + 1);
+ }
+
+ return hash % nParts;
+ }
+ };
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerNormarlizedComputerFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerNormarlizedComputerFactory.java
index 44b0e10..7dd3b14 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerNormarlizedComputerFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/KmerNormarlizedComputerFactory.java
@@ -1,37 +1,37 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.data.accessors;
-
-import edu.uci.ics.genomix.hyracks.data.primitive.KmerPointable;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class KmerNormarlizedComputerFactory implements INormalizedKeyComputerFactory {
- private static final long serialVersionUID = 1L;
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
- return new INormalizedKeyComputer() {
- /**
- * read one int from Kmer, make sure this int is consistent whith Kmer compartor
- */
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- return KmerPointable.getIntReverse(bytes, start, length);
- }
- };
- }
-}
\ No newline at end of file
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.data.accessors;
+
+import edu.uci.ics.genomix.hyracks.data.primitive.KmerPointable;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class KmerNormarlizedComputerFactory implements INormalizedKeyComputerFactory {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+ return new INormalizedKeyComputer() {
+ /**
+ * read one int from Kmer, make sure this int is consistent whith Kmer compartor
+ */
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ return KmerPointable.getIntReverse(bytes, start, length);
+ }
+ };
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ReadIDNormarlizedComputeFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ReadIDNormarlizedComputeFactory.java
deleted file mode 100644
index 1f3efbf..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ReadIDNormarlizedComputeFactory.java
+++ /dev/null
@@ -1,26 +0,0 @@
-package edu.uci.ics.genomix.hyracks.data.accessors;
-
-import edu.uci.ics.genomix.data.Marshal;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class ReadIDNormarlizedComputeFactory implements INormalizedKeyComputerFactory{
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
- return new INormalizedKeyComputer(){
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- return Marshal.getInt(bytes, start);
- }
-
- };
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ReadIDPartitionComputerFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ReadIDPartitionComputerFactory.java
index 7229364..6773a73 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ReadIDPartitionComputerFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/accessors/ReadIDPartitionComputerFactory.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.genomix.hyracks.data.accessors;
import java.nio.ByteBuffer;
@@ -16,7 +31,6 @@
@Override
public ITuplePartitionComputer createPartitioner() {
- // TODO Auto-generated method stub
return new ITuplePartitionComputer() {
@Override
public int partition(IFrameTupleAccessor accessor, int tIndex, int nParts) {
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/KmerPointable.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/KmerPointable.java
index 4ceda78..0457de9 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/KmerPointable.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/KmerPointable.java
@@ -1,145 +1,145 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.data.primitive;
-
-import edu.uci.ics.genomix.hyracks.data.accessors.KmerHashPartitioncomputerFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
-import edu.uci.ics.hyracks.data.std.api.AbstractPointable;
-import edu.uci.ics.hyracks.data.std.api.IComparable;
-import edu.uci.ics.hyracks.data.std.api.IHashable;
-import edu.uci.ics.hyracks.data.std.api.INumeric;
-import edu.uci.ics.hyracks.data.std.api.IPointable;
-import edu.uci.ics.hyracks.data.std.api.IPointableFactory;
-
-public final class KmerPointable extends AbstractPointable implements IHashable, IComparable, INumeric {
- public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
- private static final long serialVersionUID = 1L;
-
- @Override
- public boolean isFixedLength() {
- return false;
- }
-
- @Override
- public int getFixedLength() {
- return -1;
- }
- };
-
- public static final IPointableFactory FACTORY = new IPointableFactory() {
- private static final long serialVersionUID = 1L;
-
- @Override
- public IPointable createPointable() {
- return new KmerPointable();
- }
-
- @Override
- public ITypeTraits getTypeTraits() {
- return TYPE_TRAITS;
- }
- };
-
- public static short getShortReverse(byte[] bytes, int offset, int length) {
- if (length < 2) {
- return (short) (bytes[offset] & 0xff);
- }
- return (short) (((bytes[offset + length - 1] & 0xff) << 8) + (bytes[offset + length - 2] & 0xff));
- }
-
- public static int getIntReverse(byte[] bytes, int offset, int length) {
- int shortValue = getShortReverse(bytes, offset, length) & 0xffff;
-
- if (length < 3) {
- return shortValue;
- }
- if (length == 3) {
- return (((bytes[offset + 2] & 0xff) << 16) + ((bytes[offset + 1] & 0xff) << 8) + ((bytes[offset] & 0xff)));
- }
- return ((bytes[offset + length - 1] & 0xff) << 24) + ((bytes[offset + length - 2] & 0xff) << 16)
- + ((bytes[offset + length - 3] & 0xff) << 8) + ((bytes[offset + length - 4] & 0xff) << 0);
- }
-
- public static long getLongReverse(byte[] bytes, int offset, int length) {
- if (length < 8) {
- return ((long) getIntReverse(bytes, offset, length)) & 0x0ffffffffL;
- }
- return (((long) (bytes[offset + length - 1] & 0xff)) << 56)
- + (((long) (bytes[offset + length - 2] & 0xff)) << 48)
- + (((long) (bytes[offset + length - 3] & 0xff)) << 40)
- + (((long) (bytes[offset + length - 4] & 0xff)) << 32)
- + (((long) (bytes[offset + length - 5] & 0xff)) << 24)
- + (((long) (bytes[offset + length - 6] & 0xff)) << 16)
- + (((long) (bytes[offset + length - 7] & 0xff)) << 8) + (((long) (bytes[offset + length - 8] & 0xff)));
- }
-
- @Override
- public int compareTo(IPointable pointer) {
- return compareTo(pointer.getByteArray(), pointer.getStartOffset(), pointer.getLength());
- }
-
- @Override
- public int compareTo(byte[] bytes, int offset, int length) {
-
- if (this.length != length) {
- return this.length - length;
- }
- for (int i = length - 1; i >= 0; i--) {
- int cmp = (this.bytes[this.start + i] & 0xff) - (bytes[offset + i] & 0xff);
- if (cmp != 0) {
- return cmp;
- }
- }
-
- return 0;
- }
-
- @Override
- public int hash() {
- int hash = KmerHashPartitioncomputerFactory.hashBytes(bytes, start, length);
- return hash;
- }
-
- @Override
- public byte byteValue() {
- return bytes[start + length - 1];
- }
-
- @Override
- public short shortValue() {
- return getShortReverse(bytes, start, length);
- }
-
- @Override
- public int intValue() {
- return getIntReverse(bytes, start, length);
- }
-
- @Override
- public long longValue() {
- return getLongReverse(bytes, start, length);
- }
-
- @Override
- public float floatValue() {
- return Float.intBitsToFloat(intValue());
- }
-
- @Override
- public double doubleValue() {
- return Double.longBitsToDouble(longValue());
- }
-}
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.data.primitive;
+
+import edu.uci.ics.genomix.hyracks.data.accessors.KmerHashPartitioncomputerFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.data.std.api.AbstractPointable;
+import edu.uci.ics.hyracks.data.std.api.IComparable;
+import edu.uci.ics.hyracks.data.std.api.IHashable;
+import edu.uci.ics.hyracks.data.std.api.INumeric;
+import edu.uci.ics.hyracks.data.std.api.IPointable;
+import edu.uci.ics.hyracks.data.std.api.IPointableFactory;
+
+public final class KmerPointable extends AbstractPointable implements IHashable, IComparable, INumeric {
+ public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public boolean isFixedLength() {
+ return false;
+ }
+
+ @Override
+ public int getFixedLength() {
+ return -1;
+ }
+ };
+
+ public static final IPointableFactory FACTORY = new IPointableFactory() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IPointable createPointable() {
+ return new KmerPointable();
+ }
+
+ @Override
+ public ITypeTraits getTypeTraits() {
+ return TYPE_TRAITS;
+ }
+ };
+
+ public static short getShortReverse(byte[] bytes, int offset, int length) {
+ if (length < 2) {
+ return (short) (bytes[offset] & 0xff);
+ }
+ return (short) (((bytes[offset + length - 1] & 0xff) << 8) + (bytes[offset + length - 2] & 0xff));
+ }
+
+ public static int getIntReverse(byte[] bytes, int offset, int length) {
+ int shortValue = getShortReverse(bytes, offset, length) & 0xffff;
+
+ if (length < 3) {
+ return shortValue;
+ }
+ if (length == 3) {
+ return (((bytes[offset + 2] & 0xff) << 16) + ((bytes[offset + 1] & 0xff) << 8) + ((bytes[offset] & 0xff)));
+ }
+ return ((bytes[offset + length - 1] & 0xff) << 24) + ((bytes[offset + length - 2] & 0xff) << 16)
+ + ((bytes[offset + length - 3] & 0xff) << 8) + ((bytes[offset + length - 4] & 0xff) << 0);
+ }
+
+ public static long getLongReverse(byte[] bytes, int offset, int length) {
+ if (length < 8) {
+ return ((long) getIntReverse(bytes, offset, length)) & 0x0ffffffffL;
+ }
+ return (((long) (bytes[offset + length - 1] & 0xff)) << 56)
+ + (((long) (bytes[offset + length - 2] & 0xff)) << 48)
+ + (((long) (bytes[offset + length - 3] & 0xff)) << 40)
+ + (((long) (bytes[offset + length - 4] & 0xff)) << 32)
+ + (((long) (bytes[offset + length - 5] & 0xff)) << 24)
+ + (((long) (bytes[offset + length - 6] & 0xff)) << 16)
+ + (((long) (bytes[offset + length - 7] & 0xff)) << 8) + (((long) (bytes[offset + length - 8] & 0xff)));
+ }
+
+ @Override
+ public int compareTo(IPointable pointer) {
+ return compareTo(pointer.getByteArray(), pointer.getStartOffset(), pointer.getLength());
+ }
+
+ @Override
+ public int compareTo(byte[] bytes, int offset, int length) {
+
+ if (this.length != length) {
+ return this.length - length;
+ }
+ for (int i = length - 1; i >= 0; i--) {
+ int cmp = (this.bytes[this.start + i] & 0xff) - (bytes[offset + i] & 0xff);
+ if (cmp != 0) {
+ return cmp;
+ }
+ }
+
+ return 0;
+ }
+
+ @Override
+ public int hash() {
+ int hash = KmerHashPartitioncomputerFactory.hashBytes(bytes, start, length);
+ return hash;
+ }
+
+ @Override
+ public byte byteValue() {
+ return bytes[start + length - 1];
+ }
+
+ @Override
+ public short shortValue() {
+ return getShortReverse(bytes, start, length);
+ }
+
+ @Override
+ public int intValue() {
+ return getIntReverse(bytes, start, length);
+ }
+
+ @Override
+ public long longValue() {
+ return getLongReverse(bytes, start, length);
+ }
+
+ @Override
+ public float floatValue() {
+ return Float.intBitsToFloat(intValue());
+ }
+
+ @Override
+ public double doubleValue() {
+ return Double.longBitsToDouble(longValue());
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/NodeReference.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/NodeReference.java
index f98c684..7c1431b 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/NodeReference.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/NodeReference.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.genomix.hyracks.data.primitive;
import edu.uci.ics.genomix.type.NodeWritable;
@@ -7,13 +22,10 @@
/**
*
*/
- private static final long serialVersionUID = 1L;
+ private static final long serialVersionUID = 1L;
- public NodeReference(int kmerSize) {
+ public NodeReference(int kmerSize) {
super(kmerSize);
- // TODO Auto-generated constructor stub
}
-
-
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionListReference.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionListReference.java
index 49dd263..8885c0f 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionListReference.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionListReference.java
@@ -1,17 +1,32 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.genomix.hyracks.data.primitive;
import edu.uci.ics.genomix.type.PositionListWritable;
import edu.uci.ics.hyracks.data.std.api.IValueReference;
-public class PositionListReference extends PositionListWritable implements IValueReference {
+public class PositionListReference extends PositionListWritable implements IValueReference {
- public PositionListReference(int countByDataLength, byte[] byteArray, int startOffset) {
- super(countByDataLength, byteArray, startOffset);
+ public PositionListReference(int countByDataLength, byte[] byteArray, int startOffset) {
+ super(countByDataLength, byteArray, startOffset);
}
/**
*
*/
- private static final long serialVersionUID = 1L;
+ private static final long serialVersionUID = 1L;
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionReference.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionReference.java
index 1e22529..610f01a 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionReference.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/PositionReference.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.genomix.hyracks.data.primitive;
import edu.uci.ics.genomix.type.PositionWritable;
@@ -5,9 +20,9 @@
public class PositionReference extends PositionWritable implements IValueReference {
- /**
+ /**
*
*/
- private static final long serialVersionUID = 1L;
+ private static final long serialVersionUID = 1L;
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/ReadIDPointable.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/ReadIDPointable.java
deleted file mode 100644
index b1e08c4..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/data/primitive/ReadIDPointable.java
+++ /dev/null
@@ -1,118 +0,0 @@
-package edu.uci.ics.genomix.hyracks.data.primitive;
-
-import edu.uci.ics.genomix.data.Marshal;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
-import edu.uci.ics.hyracks.data.std.api.AbstractPointable;
-import edu.uci.ics.hyracks.data.std.api.IComparable;
-import edu.uci.ics.hyracks.data.std.api.IHashable;
-import edu.uci.ics.hyracks.data.std.api.INumeric;
-import edu.uci.ics.hyracks.data.std.api.IPointable;
-import edu.uci.ics.hyracks.data.std.api.IPointableFactory;
-import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
-
-public class ReadIDPointable extends AbstractPointable implements IHashable, IComparable, INumeric {
- public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
- private static final long serialVersionUID = 1L;
-
- @Override
- public boolean isFixedLength() {
- return false;
- }
-
- @Override
- public int getFixedLength() {
- return -1;
- }
- };
-
- public static final IPointableFactory FACTORY = new IPointableFactory() {
- private static final long serialVersionUID = 1L;
-
- @Override
- public IPointable createPointable() {
- return new IntegerPointable();
- }
-
- @Override
- public ITypeTraits getTypeTraits() {
- return TYPE_TRAITS;
- }
- };
-
- public static int getInteger(byte[] bytes, int start) {
- return Marshal.getInt(bytes, start);
- }
-
- public static void setInteger(byte[] bytes, int start, int value) {
- Marshal.putInt(value, bytes, start);
- }
-
- public int getInteger() {
- return getInteger(bytes, start);
- }
-
- public void setInteger(int value) {
- setInteger(bytes, start, value);
- }
-
- public int preIncrement() {
- int v = getInteger();
- ++v;
- setInteger(v);
- return v;
- }
-
- public int postIncrement() {
- int v = getInteger();
- int ov = v++;
- setInteger(v);
- return ov;
- }
-
- @Override
- public int compareTo(IPointable pointer) {
- return compareTo(pointer.getByteArray(), pointer.getStartOffset(), pointer.getLength());
- }
-
- @Override
- public int compareTo(byte[] bytes, int start, int length) {
- int v = getInteger();
- int ov = getInteger(bytes, start);
- return v < ov ? -1 : (v > ov ? 1 : 0);
- }
-
- @Override
- public int hash() {
- return getInteger();
- }
-
- @Override
- public byte byteValue() {
- return (byte) getInteger();
- }
-
- @Override
- public short shortValue() {
- return (short) getInteger();
- }
-
- @Override
- public int intValue() {
- return getInteger();
- }
-
- @Override
- public long longValue() {
- return getInteger();
- }
-
- @Override
- public float floatValue() {
- return getInteger();
- }
-
- @Override
- public double doubleValue() {
- return getInteger();
- }
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ConnectorPolicyAssignmentPolicy.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ConnectorPolicyAssignmentPolicy.java
index fe44f51..61f16b2 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ConnectorPolicyAssignmentPolicy.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ConnectorPolicyAssignmentPolicy.java
@@ -1,42 +1,42 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.dataflow;
-
-import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
-import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicyAssignmentPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.PipeliningConnectorPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedPipeliningConnectorPolicy;
-import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
-
-/**
- * used by precluster groupby
- */
-public class ConnectorPolicyAssignmentPolicy implements IConnectorPolicyAssignmentPolicy {
- private static final long serialVersionUID = 1L;
- private IConnectorPolicy senderSideMaterializePolicy = new SendSideMaterializedPipeliningConnectorPolicy();
- private IConnectorPolicy pipeliningPolicy = new PipeliningConnectorPolicy();
-
- @Override
- public IConnectorPolicy getConnectorPolicyAssignment(IConnectorDescriptor c, int nProducers, int nConsumers,
- int[] fanouts) {
- if (c instanceof MToNPartitioningMergingConnectorDescriptor) {
- return senderSideMaterializePolicy;
- } else {
- return pipeliningPolicy;
- }
- }
-}
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.dataflow;
+
+import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicyAssignmentPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.PipeliningConnectorPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedPipeliningConnectorPolicy;
+import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
+
+/**
+ * used by precluster groupby
+ */
+public class ConnectorPolicyAssignmentPolicy implements IConnectorPolicyAssignmentPolicy {
+ private static final long serialVersionUID = 1L;
+ private IConnectorPolicy senderSideMaterializePolicy = new SendSideMaterializedPipeliningConnectorPolicy();
+ private IConnectorPolicy pipeliningPolicy = new PipeliningConnectorPolicy();
+
+ @Override
+ public IConnectorPolicy getConnectorPolicyAssignment(IConnectorDescriptor c, int nProducers, int nConsumers,
+ int[] fanouts) {
+ if (c instanceof MToNPartitioningMergingConnectorDescriptor) {
+ return senderSideMaterializePolicy;
+ } else {
+ return pipeliningPolicy;
+ }
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapKmerPositionToReadOperator.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapKmerPositionToReadOperator.java
index 8609519..e6e1f52 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapKmerPositionToReadOperator.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapKmerPositionToReadOperator.java
@@ -1,9 +1,27 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.genomix.hyracks.dataflow;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
import edu.uci.ics.genomix.hyracks.data.primitive.PositionReference;
import edu.uci.ics.genomix.type.PositionListWritable;
import edu.uci.ics.genomix.type.PositionWritable;
@@ -24,6 +42,9 @@
public class MapKmerPositionToReadOperator extends AbstractSingleActivityOperatorDescriptor {
+ private static final Log LOG = LogFactory.getLog(MapKmerPositionToReadOperator.class);
+ public static int WARNSIZE = 100 * 1000 * 5;
+
public MapKmerPositionToReadOperator(IOperatorDescriptorRegistry spec, RecordDescriptor recDesc, int readlength,
int kmerSize) {
super(spec, 1, 1);
@@ -64,6 +85,7 @@
private ArrayBackedValueStorage posListEntry;
private ArrayBackedValueStorage zeroPositionCollection;
private ArrayBackedValueStorage noneZeroPositionCollection;
+ private PositionListWritable plistEntry;
public MapKmerPositionToReadNodePushable(IHyracksTaskContext ctx, RecordDescriptor inputRecDesc,
RecordDescriptor outputRecDesc) {
@@ -74,6 +96,7 @@
this.posListEntry = new ArrayBackedValueStorage();
this.zeroPositionCollection = new ArrayBackedValueStorage();
this.noneZeroPositionCollection = new ArrayBackedValueStorage();
+ this.plistEntry = new PositionListWritable();
}
@Override
@@ -140,10 +163,13 @@
builder2.reset();
builder2.addField(pos.getByteArray(), pos.getStartOffset(), PositionReference.INTBYTES);
builder2.addField(pos.getByteArray(), pos.getStartOffset() + PositionReference.INTBYTES, 1);
-
+
if (posList2 == null) {
builder2.addFieldEndOffset();
} else {
+ if (posList2.getLength() > WARNSIZE){
+ LOG.warn("Hot overlap @" + pos.toString() + " :" + posList2.getLength());
+ }
writePosToFieldAndSkipSameReadID(pos, builder2.getDataOutput(), posList2);
builder2.addFieldEndOffset();
}
@@ -162,21 +188,23 @@
FrameUtils.flushFrame(writeBuffer, writer);
appender.reset(writeBuffer, true);
if (!appender.append(builder2.getFieldEndOffsets(), builder2.getByteArray(), 0, builder2.getSize())) {
- throw new IllegalStateException();
+ throw new IllegalStateException("length:" + builder2.getSize() );
}
}
} catch (HyracksDataException e) {
throw new IllegalStateException(
- "Failed to Add a field to the tuple by copying the data bytes from a byte array.");
+ "Failed to Add a field to the tuple by copying the data bytes from a byte array."
+ + e.getMessage());
}
}
private void writePosToFieldAndSkipSameReadID(PositionReference pos, DataOutput ds,
ArrayBackedValueStorage posList2) throws HyracksDataException {
- PositionListWritable plist = new PositionListWritable(PositionListWritable.getCountByDataLength(posList2
- .getLength()), posList2.getByteArray(), posList2.getStartOffset());
- for (PositionWritable p : plist) {
+ plistEntry.setNewReference(PositionListWritable.getCountByDataLength(posList2.getLength()),
+ posList2.getByteArray(), posList2.getStartOffset());
+ for (int i = 0; i < plistEntry.getCountOfPosition(); i++) {
+ PositionWritable p = plistEntry.getPosition(i);
if (!pos.isSameReadID(p)) {
try {
ds.write(p.getByteArray(), p.getStartOffset(), p.getLength());
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapReadToNodeOperator.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapReadToNodeOperator.java
index 7394b71..77907ee 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapReadToNodeOperator.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/MapReadToNodeOperator.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.genomix.hyracks.dataflow;
import java.io.IOException;
@@ -23,10 +38,12 @@
public class MapReadToNodeOperator extends AbstractSingleActivityOperatorDescriptor {
- public MapReadToNodeOperator(IOperatorDescriptorRegistry spec, RecordDescriptor outRecDesc, int kmerSize) {
+ public MapReadToNodeOperator(IOperatorDescriptorRegistry spec, RecordDescriptor outRecDesc, int kmerSize,
+ boolean bMergeNode) {
super(spec, 1, 1);
recordDescriptors[0] = outRecDesc;
this.kmerSize = kmerSize;
+ this.DoMergeNodeInRead = bMergeNode;
}
/**
@@ -46,6 +63,8 @@
public static final int OutputReverseReverseField = 5;
public static final int OutputKmerBytesField = 6;
+ public final boolean DoMergeNodeInRead;
+
public static final RecordDescriptor nodeOutputRec = new RecordDescriptor(new ISerializerDeserializer[7]);
/**
@@ -126,9 +145,9 @@
for (int i = InputInfoFieldStart + 2; i < accessor.getFieldCount(); i += 2) {
readNodesInfo(tIndex, readID, nextNodeEntry, nextNextNodeEntry, i);
- if (curNodeEntry.inDegree() > 1 || curNodeEntry.outDegree() > 0 || nextNodeEntry.inDegree() > 0
- || nextNodeEntry.outDegree() > 0 || nextNextNodeEntry.inDegree() > 0
- || nextNextNodeEntry.outDegree() > 0) {
+ if (!DoMergeNodeInRead || curNodeEntry.inDegree() > 1 || curNodeEntry.outDegree() > 0
+ || nextNodeEntry.inDegree() > 0 || nextNodeEntry.outDegree() > 0
+ || nextNextNodeEntry.inDegree() > 0 || nextNextNodeEntry.outDegree() > 0) {
connect(curNodeEntry, nextNodeEntry);
outputNode(curNodeEntry);
curNodeEntry.set(nextNodeEntry);
@@ -188,7 +207,8 @@
private void setReverseOutgoingList(NodeReference node, int offset) {
setCachList(offset);
- for (PositionWritable pos : cachePositionList) {
+ for (int i = 0; i < cachePositionList.getCountOfPosition(); i++) {
+ PositionWritable pos = cachePositionList.getPosition(i);
if (pos.getPosInRead() > 0) {
node.getRFList().append(pos);
} else {
@@ -199,7 +219,8 @@
private void setReverseIncomingList(NodeReference node, int offset) {
setCachList(offset);
- for (PositionWritable pos : cachePositionList) {
+ for (int i = 0; i < cachePositionList.getCountOfPosition(); i++) {
+ PositionWritable pos = cachePositionList.getPosition(i);
if (pos.getPosInRead() > 0) {
if (pos.getPosInRead() > 1) {
node.getFRList().append(pos.getReadID(), (byte) (pos.getPosInRead() - 1));
@@ -216,7 +237,8 @@
private void setForwardOutgoingList(NodeReference node, int offset) {
setCachList(offset);
- for (PositionWritable pos : cachePositionList) {
+ for (int i = 0; i < cachePositionList.getCountOfPosition(); i++) {
+ PositionWritable pos = cachePositionList.getPosition(i);
if (pos.getPosInRead() > 0) {
node.getFFList().append(pos);
} else {
@@ -227,7 +249,8 @@
private void setForwardIncomingList(NodeReference node, int offset) {
setCachList(offset);
- for (PositionWritable pos : cachePositionList) {
+ for (int i = 0; i < cachePositionList.getCountOfPosition(); i++) {
+ PositionWritable pos = cachePositionList.getPosition(i);
if (pos.getPosInRead() > 0) {
if (pos.getPosInRead() > 1) {
node.getRRList().append(pos.getReadID(), (byte) (pos.getPosInRead() - 1));
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ReadsKeyValueParserFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ReadsKeyValueParserFactory.java
index 3667d43..8c69201 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ReadsKeyValueParserFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/ReadsKeyValueParserFactory.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
@@ -69,7 +69,7 @@
private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
private PositionReference pos = new PositionReference();
-
+
@Override
public void parse(LongWritable key, Text value, IFrameWriter writer) throws HyracksDataException {
String[] geneLine = value.toString().split("\\t"); // Read the Real Gene Line
@@ -158,4 +158,4 @@
};
}
-}
\ No newline at end of file
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/AggregateKmerAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/AggregateKmerAggregateFactory.java
index 6ee7946..52aeb7a 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/AggregateKmerAggregateFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/AggregateKmerAggregateFactory.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.genomix.hyracks.dataflow.aggregators;
import java.io.DataOutput;
@@ -41,7 +56,6 @@
@Override
public void close() {
- // TODO Auto-generated method stub
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/AggregateReadIDAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/AggregateReadIDAggregateFactory.java
index d0fc24b..92b85b3 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/AggregateReadIDAggregateFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/AggregateReadIDAggregateFactory.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.genomix.hyracks.dataflow.aggregators;
import java.io.DataOutput;
@@ -96,7 +111,6 @@
@Override
public void reset() {
- // TODO Auto-generated method stub
}
@@ -140,7 +154,6 @@
@Override
public void close() {
- // TODO Auto-generated method stub
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeKmerAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeKmerAggregateFactory.java
index eab197e..ce178c6 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeKmerAggregateFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeKmerAggregateFactory.java
@@ -1,117 +1,116 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.dataflow.aggregators;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-import edu.uci.ics.genomix.hyracks.data.primitive.PositionReference;
-import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
-
-public class MergeKmerAggregateFactory implements IAggregatorDescriptorFactory {
- private static final long serialVersionUID = 1L;
- private static final Log LOG = LogFactory.getLog(MergeKmerAggregateFactory.class);
- @Override
- public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor,
- RecordDescriptor outRecordDescriptor, int[] keyFields, int[] keyFieldsInPartialResults)
- throws HyracksDataException {
- final int frameSize = ctx.getFrameSize();
- return new IAggregatorDescriptor() {
-
- private PositionReference position = new PositionReference();
-
- @Override
- public AggregateState createAggregateStates() {
- return new AggregateState(new ArrayBackedValueStorage());
- }
-
- @Override
- public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- ArrayBackedValueStorage inputVal = (ArrayBackedValueStorage) state.state;
- inputVal.reset();
- int leadOffset = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength();
- for (int offset = accessor.getFieldStartOffset(tIndex, 1); offset < accessor.getFieldEndOffset(tIndex,
- 1); offset += PositionReference.LENGTH) {
- position.setNewReference(accessor.getBuffer().array(), leadOffset + offset);
- inputVal.append(position);
- }
- //make a fake feild to cheat caller
- tupleBuilder.addFieldEndOffset();
- }
-
- @Override
- public void reset() {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor,
- int stateTupleIndex, AggregateState state) throws HyracksDataException {
- ArrayBackedValueStorage inputVal = (ArrayBackedValueStorage) state.state;
- int leadOffset = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength();
- for (int offset = accessor.getFieldStartOffset(tIndex, 1); offset < accessor.getFieldEndOffset(tIndex,
- 1); offset += PositionReference.LENGTH) {
- position.setNewReference(accessor.getBuffer().array(), leadOffset + offset);
- inputVal.append(position);
- }
- }
-
- @Override
- public void outputPartialResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- throw new IllegalStateException("partial result method should not be called");
- }
-
- @Override
- public void outputFinalResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- DataOutput fieldOutput = tupleBuilder.getDataOutput();
- ArrayBackedValueStorage inputVal = (ArrayBackedValueStorage) state.state;
- try {
- if (inputVal.getLength() > frameSize/2){
- LOG.warn("MergeKmer: output data size is too big: " + inputVal.getLength());
- }
- fieldOutput.write(inputVal.getByteArray(), inputVal.getStartOffset(), inputVal.getLength());
- tupleBuilder.addFieldEndOffset();
-
- } catch (IOException e) {
- throw new HyracksDataException("I/O exception when writing aggregation to the output buffer.");
- }
- }
-
- @Override
- public void close() {
- // TODO Auto-generated method stub
-
- }
-
- };
-
- }
-}
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.genomix.hyracks.dataflow.aggregators;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import edu.uci.ics.genomix.hyracks.data.primitive.PositionReference;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
+
+public class MergeKmerAggregateFactory implements IAggregatorDescriptorFactory {
+ private static final long serialVersionUID = 1L;
+ private static final Log LOG = LogFactory.getLog(MergeKmerAggregateFactory.class);
+
+ @Override
+ public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor,
+ RecordDescriptor outRecordDescriptor, int[] keyFields, int[] keyFieldsInPartialResults)
+ throws HyracksDataException {
+ final int frameSize = ctx.getFrameSize();
+ return new IAggregatorDescriptor() {
+
+ private PositionReference position = new PositionReference();
+
+ @Override
+ public AggregateState createAggregateStates() {
+ return new AggregateState(new ArrayBackedValueStorage());
+ }
+
+ @Override
+ public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
+ AggregateState state) throws HyracksDataException {
+ ArrayBackedValueStorage inputVal = (ArrayBackedValueStorage) state.state;
+ inputVal.reset();
+ int leadOffset = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength();
+ for (int offset = accessor.getFieldStartOffset(tIndex, 1); offset < accessor.getFieldEndOffset(tIndex,
+ 1); offset += PositionReference.LENGTH) {
+ position.setNewReference(accessor.getBuffer().array(), leadOffset + offset);
+ inputVal.append(position);
+ }
+ //make a fake feild to cheat caller
+ tupleBuilder.addFieldEndOffset();
+ }
+
+ @Override
+ public void reset() {
+
+ }
+
+ @Override
+ public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor,
+ int stateTupleIndex, AggregateState state) throws HyracksDataException {
+ ArrayBackedValueStorage inputVal = (ArrayBackedValueStorage) state.state;
+ int leadOffset = accessor.getTupleStartOffset(tIndex) + accessor.getFieldSlotsLength();
+ for (int offset = accessor.getFieldStartOffset(tIndex, 1); offset < accessor.getFieldEndOffset(tIndex,
+ 1); offset += PositionReference.LENGTH) {
+ position.setNewReference(accessor.getBuffer().array(), leadOffset + offset);
+ inputVal.append(position);
+ }
+ }
+
+ @Override
+ public void outputPartialResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
+ AggregateState state) throws HyracksDataException {
+ throw new IllegalStateException("partial result method should not be called");
+ }
+
+ @Override
+ public void outputFinalResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
+ AggregateState state) throws HyracksDataException {
+ DataOutput fieldOutput = tupleBuilder.getDataOutput();
+ ArrayBackedValueStorage inputVal = (ArrayBackedValueStorage) state.state;
+ try {
+ if (inputVal.getLength() > frameSize / 2) {
+ LOG.warn("MergeKmer: output data size is too big: " + inputVal.getLength());
+ }
+ fieldOutput.write(inputVal.getByteArray(), inputVal.getStartOffset(), inputVal.getLength());
+ tupleBuilder.addFieldEndOffset();
+
+ } catch (IOException e) {
+ throw new HyracksDataException("I/O exception when writing aggregation to the output buffer.");
+ }
+ }
+
+ @Override
+ public void close() {
+
+ }
+
+ };
+
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeReadIDAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeReadIDAggregateFactory.java
index 974dead..73e3093 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeReadIDAggregateFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/aggregators/MergeReadIDAggregateFactory.java
@@ -1,3 +1,18 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package edu.uci.ics.genomix.hyracks.dataflow.aggregators;
import java.io.DataOutput;
@@ -146,7 +161,6 @@
@Override
public void reset() {
- // TODO Auto-generated method stub
}
@@ -195,7 +209,7 @@
LOG.warn("MergeReadID on read:" + readID + " is of size: " + totalSize + ", current frameSize:"
+ frameSize + "\n Recommendate to enlarge the FrameSize");
}
- if (totalSize > frameSize){
+ if (totalSize > frameSize) {
for (StackTraceElement ste : Thread.currentThread().getStackTrace()) {
System.out.println(ste);
}
@@ -208,7 +222,6 @@
@Override
public void close() {
- // TODO Auto-generated method stub
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/KMerSequenceWriterFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/KMerSequenceWriterFactory.java
index 0a1ef96..bb8ea16 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/KMerSequenceWriterFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/KMerSequenceWriterFactory.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
@@ -71,7 +71,7 @@
throw new IllegalArgumentException("Not enough kmer bytes");
}
reEnterKey.setNewReference(tuple.getFieldData(InputKmerField), tuple.getFieldStart(InputKmerField));
- int countOfPos = tuple.getFieldLength(InputPositionListField)/PositionWritable.LENGTH;
+ int countOfPos = tuple.getFieldLength(InputPositionListField) / PositionWritable.LENGTH;
if (tuple.getFieldLength(InputPositionListField) % PositionWritable.LENGTH != 0) {
throw new IllegalArgumentException("Invalid count of position byte");
}
@@ -96,7 +96,6 @@
@Override
public void close(DataOutput output) throws HyracksDataException {
- // TODO Auto-generated method stub
}
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/KMerTextWriterFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/KMerTextWriterFactory.java
index 7ea065d..683fb80 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/KMerTextWriterFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/KMerTextWriterFactory.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
@@ -12,7 +12,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package edu.uci.ics.genomix.hyracks.dataflow.io;
import java.io.DataOutput;
@@ -33,15 +32,17 @@
*
*/
private static final long serialVersionUID = 1L;
-
+
private final int kmerSize;
+
public KMerTextWriterFactory(int k) {
- kmerSize =k;
+ kmerSize = k;
}
public class TupleWriter implements ITupleWriter {
private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
private PositionListWritable plist = new PositionListWritable();
+
@Override
public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
try {
@@ -69,19 +70,16 @@
@Override
public void open(DataOutput output) throws HyracksDataException {
- // TODO Auto-generated method stub
}
@Override
public void close(DataOutput output) throws HyracksDataException {
- // TODO Auto-generated method stub
}
}
@Override
public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
- // TODO Auto-generated method stub
return new TupleWriter();
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeSequenceWriterFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeSequenceWriterFactory.java
index 00409ef..affab71 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeSequenceWriterFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeSequenceWriterFactory.java
@@ -1,3 +1,17 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package edu.uci.ics.genomix.hyracks.dataflow.io;
import java.io.DataOutput;
@@ -36,7 +50,7 @@
public static final int InputFRField = MapReadToNodeOperator.OutputForwardReverseField;
public static final int InputRFField = MapReadToNodeOperator.OutputReverseForwardField;
public static final int InputRRField = MapReadToNodeOperator.OutputReverseReverseField;
-
+
public static final int InputKmerBytesField = MapReadToNodeOperator.OutputKmerBytesField;
private ConfFactory confFactory;
@@ -60,8 +74,8 @@
@Override
public void open(DataOutput output) throws HyracksDataException {
try {
- writer = SequenceFile.createWriter(cf.getConf(), (FSDataOutputStream) output, NodeWritable.class, NullWritable.class,
- CompressionType.NONE, null);
+ writer = SequenceFile.createWriter(cf.getConf(), (FSDataOutputStream) output, NodeWritable.class,
+ NullWritable.class, CompressionType.NONE, null);
} catch (IOException e) {
throw new HyracksDataException(e);
}
@@ -79,7 +93,7 @@
tuple.getFieldData(InputRFField), tuple.getFieldStart(InputRFField));
node.getRRList().setNewReference(tuple.getFieldLength(InputRRField) / PositionWritable.LENGTH,
tuple.getFieldData(InputRRField), tuple.getFieldStart(InputRRField));
-
+
node.getKmer().setNewReference(
Marshal.getInt(tuple.getFieldData(NodeSequenceWriterFactory.InputCountOfKmerField),
tuple.getFieldStart(NodeSequenceWriterFactory.InputCountOfKmerField)),
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeTextWriterFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeTextWriterFactory.java
index cec702e..54adf24 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeTextWriterFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/dataflow/io/NodeTextWriterFactory.java
@@ -1,3 +1,17 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package edu.uci.ics.genomix.hyracks.dataflow.io;
import java.io.DataOutput;
@@ -31,7 +45,6 @@
@Override
public void open(DataOutput output) throws HyracksDataException {
- // TODO Auto-generated method stub
}
@@ -71,7 +84,6 @@
@Override
public void close(DataOutput output) throws HyracksDataException {
- // TODO Auto-generated method stub
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/driver/Driver.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/driver/Driver.java
index 5014a8a..4c24919 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/driver/Driver.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/driver/Driver.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
@@ -31,6 +31,7 @@
import edu.uci.ics.genomix.hyracks.job.JobGenCreateKmerInfo;
import edu.uci.ics.genomix.hyracks.job.JobGenGroupbyReadID;
import edu.uci.ics.genomix.hyracks.job.JobGenMapKmerToRead;
+import edu.uci.ics.genomix.hyracks.job.JobGenUnMerged;
import edu.uci.ics.hyracks.api.client.HyracksConnection;
import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
@@ -47,6 +48,7 @@
OUTPUT_MAP_KMER_TO_READ,
OUTPUT_GROUPBY_READID,
BUILD_DEBRUJIN_GRAPH,
+ BUILD_UNMERGED_GRAPH,
}
private static final String IS_PROFILING = "genomix.driver.profiling";
@@ -101,7 +103,7 @@
jobGen = new JobGenCreateKmerInfo(job, scheduler, ncMap, numPartitionPerMachine);
break;
case OUTPUT_MAP_KMER_TO_READ:
- jobGen = new JobGenMapKmerToRead(job,scheduler, ncMap, numPartitionPerMachine);
+ jobGen = new JobGenMapKmerToRead(job, scheduler, ncMap, numPartitionPerMachine);
break;
case OUTPUT_GROUPBY_READID:
jobGen = new JobGenGroupbyReadID(job, scheduler, ncMap, numPartitionPerMachine);
@@ -109,6 +111,8 @@
case CHECK_KMERREADER:
jobGen = new JobGenCheckReader(job, scheduler, ncMap, numPartitionPerMachine);
break;
+ case BUILD_UNMERGED_GRAPH:
+ jobGen = new JobGenUnMerged(job, scheduler, ncMap, numPartitionPerMachine);
}
start = System.currentTimeMillis();
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java
index 1f12bb5..d1f47c2 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/GenomixJobConf.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
@@ -20,6 +20,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobConf;
+@SuppressWarnings("deprecation")
public class GenomixJobConf extends JobConf {
public static final String JOB_NAME = "genomix";
@@ -50,7 +51,7 @@
public static final int DEFAULT_KMERLEN = 21;
public static final int DEFAULT_READLEN = 124;
- public static final int DEFAULT_FRAME_SIZE = 128*1024;
+ public static final int DEFAULT_FRAME_SIZE = 128 * 1024;
public static final int DEFAULT_FRAME_LIMIT = 4096;
public static final int DEFAULT_TABLE_SIZE = 10485767;
public static final long DEFAULT_GROUPBY_HYBRID_INPUTSIZE = 154000000L;
@@ -62,15 +63,14 @@
public static final boolean DEFAULT_REVERSED = true;
public static final String JOB_PLAN_GRAPHBUILD = "graphbuild";
- public static final String JOB_PLAN_GRAPHSTAT = "graphstat";
-
+ public static final String JOB_PLAN_GRAPHSTAT = "graphstat";
+
public static final String GROUPBY_TYPE_HYBRID = "hybrid";
public static final String GROUPBY_TYPE_EXTERNAL = "external";
public static final String GROUPBY_TYPE_PRECLUSTER = "precluster";
public static final String OUTPUT_FORMAT_BINARY = "binary";
public static final String OUTPUT_FORMAT_TEXT = "text";
-
public GenomixJobConf() throws IOException {
super(new Configuration());
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGen.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGen.java
index a9dfc9b..c8cb701 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGen.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGen.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
@@ -18,8 +18,6 @@
import java.io.Serializable;
import java.util.UUID;
-import org.apache.hadoop.mapred.JobConf;
-
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
import edu.uci.ics.hyracks.api.job.JobSpecification;
@@ -39,4 +37,4 @@
}
public abstract JobSpecification generateJob() throws HyracksException;
-}
\ No newline at end of file
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenBrujinGraph.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenBrujinGraph.java
index 94d619a..09794d0 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenBrujinGraph.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenBrujinGraph.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
@@ -26,10 +26,8 @@
import edu.uci.ics.genomix.hyracks.data.accessors.KmerHashPartitioncomputerFactory;
import edu.uci.ics.genomix.hyracks.data.accessors.KmerNormarlizedComputerFactory;
-import edu.uci.ics.genomix.hyracks.data.accessors.ReadIDNormarlizedComputeFactory;
import edu.uci.ics.genomix.hyracks.data.accessors.ReadIDPartitionComputerFactory;
import edu.uci.ics.genomix.hyracks.data.primitive.KmerPointable;
-import edu.uci.ics.genomix.hyracks.data.primitive.ReadIDPointable;
import edu.uci.ics.genomix.hyracks.dataflow.ConnectorPolicyAssignmentPolicy;
import edu.uci.ics.genomix.hyracks.dataflow.MapKmerPositionToReadOperator;
import edu.uci.ics.genomix.hyracks.dataflow.MapReadToNodeOperator;
@@ -58,6 +56,7 @@
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory;
import edu.uci.ics.hyracks.data.std.api.IPointableFactory;
+import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
import edu.uci.ics.hyracks.dataflow.common.data.partition.FieldHashPartitionComputerFactory;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor;
@@ -74,6 +73,7 @@
import edu.uci.ics.hyracks.hdfs.dataflow.HDFSWriteOperatorDescriptor;
import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
+@SuppressWarnings("deprecation")
public class JobGenBrujinGraph extends JobGen {
/**
*
@@ -231,7 +231,7 @@
int[] keyFields = new int[] { 0 }; // the id of grouped key
// (ReadID, {(PosInRead,{OtherPositoin..},Kmer) ...}
ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(jobSpec, frameLimits, keyFields,
- new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(ReadIDPointable.FACTORY) },
+ new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory.of(IntegerPointable.FACTORY) },
MapKmerPositionToReadOperator.readIDOutputRec);
connectOperators(jobSpec, mapKmerToRead, ncNodeNames, sorter, ncNodeNames, new OneToOneConnectorDescriptor(
jobSpec));
@@ -239,9 +239,8 @@
RecordDescriptor readIDFinalRec = new RecordDescriptor(
new ISerializerDeserializer[1 + 2 * MergeReadIDAggregateFactory.getPositionCount(readLength, kmerSize)]);
Object[] objs = generateAggeragateDescriptorbyType(jobSpec, keyFields, new AggregateReadIDAggregateFactory(),
- new MergeReadIDAggregateFactory(readLength, kmerSize), new ReadIDPartitionComputerFactory(),
- new ReadIDNormarlizedComputeFactory(), ReadIDPointable.FACTORY,
- AggregateReadIDAggregateFactory.readIDAggregateRec, readIDFinalRec);
+ new MergeReadIDAggregateFactory(readLength, kmerSize), new ReadIDPartitionComputerFactory(), null,
+ IntegerPointable.FACTORY, AggregateReadIDAggregateFactory.readIDAggregateRec, readIDFinalRec);
AbstractOperatorDescriptor readLocalAggregator = (AbstractOperatorDescriptor) objs[0];
connectOperators(jobSpec, sorter, ncNodeNames, readLocalAggregator, ncNodeNames,
new OneToOneConnectorDescriptor(jobSpec));
@@ -259,7 +258,7 @@
// OutgoingList, Kmer)
AbstractOperatorDescriptor mapEachReadToNode = new MapReadToNodeOperator(jobSpec,
- MapReadToNodeOperator.nodeOutputRec, kmerSize);
+ MapReadToNodeOperator.nodeOutputRec, kmerSize, true);
connectOperators(jobSpec, readCrossAggregator, ncNodeNames, mapEachReadToNode, ncNodeNames,
new OneToOneConnectorDescriptor(jobSpec));
return mapEachReadToNode;
@@ -367,7 +366,6 @@
}
try {
hadoopJobConfFactory = new ConfFactory(new JobConf(conf));
- @SuppressWarnings("deprecation")
InputSplit[] splits = hadoopJobConfFactory.getConf().getInputFormat()
.getSplits(hadoopJobConfFactory.getConf(), ncNodeNames.length);
readSchedule = scheduler.getLocationConstraints(splits);
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCheckReader.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCheckReader.java
index 28e4ff5..a9e6d97 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCheckReader.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCheckReader.java
@@ -1,3 +1,17 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package edu.uci.ics.genomix.hyracks.job;
import java.io.DataOutput;
@@ -31,7 +45,6 @@
public JobGenCheckReader(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
int numPartitionPerMachine) throws HyracksDataException {
super(job, scheduler, ncMap, numPartitionPerMachine);
- // TODO Auto-generated constructor stub
}
@Override
@@ -50,56 +63,56 @@
public AbstractSingleActivityOperatorDescriptor generateRootByWriteKmerReader(JobSpecification jobSpec,
HDFSReadOperatorDescriptor readOperator) throws HyracksException {
// Output Kmer
- HDFSWriteOperatorDescriptor writeKmerOperator = new HDFSWriteOperatorDescriptor(jobSpec, hadoopJobConfFactory.getConf(), new ITupleWriterFactory(){
+ HDFSWriteOperatorDescriptor writeKmerOperator = new HDFSWriteOperatorDescriptor(jobSpec,
+ hadoopJobConfFactory.getConf(), new ITupleWriterFactory() {
- /**
+ /**
*
*/
- private static final long serialVersionUID = 1L;
-
-
- @Override
- public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
- return new ITupleWriter(){
-
- private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
- private PositionWritable pos = new PositionWritable();
-
- @Override
- public void open(DataOutput output) throws HyracksDataException {
- // TODO Auto-generated method stub
-
- }
+ private static final long serialVersionUID = 1L;
@Override
- public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
- try {
- if (kmer.getLength() > tuple.getFieldLength(ReadsKeyValueParserFactory.OutputKmerField)) {
- throw new IllegalArgumentException("Not enough kmer bytes");
+ public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
+ return new ITupleWriter() {
+
+ private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
+ private PositionWritable pos = new PositionWritable();
+
+ @Override
+ public void open(DataOutput output) throws HyracksDataException {
}
- kmer.setNewReference(tuple.getFieldData(ReadsKeyValueParserFactory.OutputKmerField), tuple.getFieldStart(ReadsKeyValueParserFactory.OutputKmerField));
- pos.setNewReference(tuple.getFieldData(ReadsKeyValueParserFactory.OutputPosition),
- tuple.getFieldStart(ReadsKeyValueParserFactory.OutputPosition));
- output.write(kmer.toString().getBytes());
- output.writeByte('\t');
- output.write(pos.toString().getBytes());
- output.writeByte('\n');
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
+ @Override
+ public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
+ try {
+ if (kmer.getLength() > tuple
+ .getFieldLength(ReadsKeyValueParserFactory.OutputKmerField)) {
+ throw new IllegalArgumentException("Not enough kmer bytes");
+ }
+ kmer.setNewReference(
+ tuple.getFieldData(ReadsKeyValueParserFactory.OutputKmerField),
+ tuple.getFieldStart(ReadsKeyValueParserFactory.OutputKmerField));
+ pos.setNewReference(tuple.getFieldData(ReadsKeyValueParserFactory.OutputPosition),
+ tuple.getFieldStart(ReadsKeyValueParserFactory.OutputPosition));
+
+ output.write(kmer.toString().getBytes());
+ output.writeByte('\t');
+ output.write(pos.toString().getBytes());
+ output.writeByte('\n');
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void close(DataOutput output) throws HyracksDataException {
+
+ }
+
+ };
}
- @Override
- public void close(DataOutput output) throws HyracksDataException {
- // TODO Auto-generated method stub
-
- }
-
- };
- }
-
- });
+ });
connectOperators(jobSpec, readOperator, ncNodeNames, writeKmerOperator, ncNodeNames,
new OneToOneConnectorDescriptor(jobSpec));
jobSpec.addRoot(writeKmerOperator);
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCreateKmerInfo.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCreateKmerInfo.java
index 4ee36a0..5202ba2 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCreateKmerInfo.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenCreateKmerInfo.java
@@ -1,3 +1,17 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package edu.uci.ics.genomix.hyracks.job;
import java.util.Map;
@@ -20,7 +34,6 @@
public JobGenCreateKmerInfo(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
int numPartitionPerMachine) throws HyracksDataException {
super(job, scheduler, ncMap, numPartitionPerMachine);
- // TODO Auto-generated constructor stub
}
@Override
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenGroupbyReadID.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenGroupbyReadID.java
index 201be03..5fce20e 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenGroupbyReadID.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenGroupbyReadID.java
@@ -1,3 +1,17 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package edu.uci.ics.genomix.hyracks.job;
import java.io.DataOutput;
@@ -31,7 +45,6 @@
public JobGenGroupbyReadID(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
int numPartitionPerMachine) throws HyracksDataException {
super(job, scheduler, ncMap, numPartitionPerMachine);
- // TODO Auto-generated constructor stub
}
@Override
@@ -71,7 +84,6 @@
@Override
public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException {
- // TODO Auto-generated method stub
return new ITupleWriter() {
private KmerBytesWritable kmer = new KmerBytesWritable(kmerSize);
@@ -79,7 +91,6 @@
@Override
public void open(DataOutput output) throws HyracksDataException {
- // TODO Auto-generated method stub
}
@@ -131,7 +142,6 @@
@Override
public void close(DataOutput output) throws HyracksDataException {
- // TODO Auto-generated method stub
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenMapKmerToRead.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenMapKmerToRead.java
index 2872a2d..6c8feec 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenMapKmerToRead.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenMapKmerToRead.java
@@ -1,3 +1,17 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package edu.uci.ics.genomix.hyracks.job;
import java.io.DataOutput;
@@ -6,7 +20,6 @@
import edu.uci.ics.genomix.data.Marshal;
import edu.uci.ics.genomix.hyracks.dataflow.MapKmerPositionToReadOperator;
-import edu.uci.ics.genomix.hyracks.dataflow.ReadsKeyValueParserFactory;
import edu.uci.ics.genomix.type.KmerBytesWritable;
import edu.uci.ics.genomix.type.PositionListWritable;
import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
@@ -33,7 +46,6 @@
public JobGenMapKmerToRead(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
int numPartitionPerMachine) throws HyracksDataException {
super(job, scheduler, ncMap, numPartitionPerMachine);
- // TODO Auto-generated constructor stub
}
public AbstractOperatorDescriptor generateRootByWriteMapperFromKmerToReadID(JobSpecification jobSpec,
@@ -56,7 +68,6 @@
@Override
public void open(DataOutput output) throws HyracksDataException {
- // TODO Auto-generated method stub
}
@@ -76,7 +87,6 @@
tuple.getFieldData(MapKmerPositionToReadOperator.OutputOtherReadIDListField),
tuple.getFieldStart(MapKmerPositionToReadOperator.OutputOtherReadIDListField));
-
String kmerString = "";
if (posInRead > 0) {
if (kmer.getLength() > tuple
@@ -105,7 +115,6 @@
@Override
public void close(DataOutput output) throws HyracksDataException {
- // TODO Auto-generated method stub
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenUnMerged.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenUnMerged.java
new file mode 100644
index 0000000..21b6385
--- /dev/null
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/job/JobGenUnMerged.java
@@ -0,0 +1,34 @@
+package edu.uci.ics.genomix.hyracks.job;
+
+import java.util.Map;
+
+import edu.uci.ics.genomix.hyracks.dataflow.MapReadToNodeOperator;
+import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
+import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
+
+public class JobGenUnMerged extends JobGenBrujinGraph {
+
+ /**
+ *
+ */
+ private static final long serialVersionUID = 1L;
+
+ public JobGenUnMerged(GenomixJobConf job, Scheduler scheduler, Map<String, NodeControllerInfo> ncMap,
+ int numPartitionPerMachine) throws HyracksDataException {
+ super(job, scheduler, ncMap, numPartitionPerMachine);
+ }
+
+ @Override
+ public AbstractOperatorDescriptor generateMapperFromReadToNode(JobSpecification jobSpec,
+ AbstractOperatorDescriptor readCrossAggregator) {
+ AbstractOperatorDescriptor mapEachReadToNode = new MapReadToNodeOperator(jobSpec,
+ MapReadToNodeOperator.nodeOutputRec, kmerSize, false);
+ connectOperators(jobSpec, readCrossAggregator, ncNodeNames, mapEachReadToNode, ncNodeNames,
+ new OneToOneConnectorDescriptor(jobSpec));
+ return mapEachReadToNode;
+ }
+}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/util/ByteComparatorFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/util/ByteComparatorFactory.java
deleted file mode 100644
index b070b56..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/util/ByteComparatorFactory.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.util;
-
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-
-public class ByteComparatorFactory implements IBinaryComparatorFactory, IBinaryHashFunctionFactory {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- return b1[s1] - b2[s2];
- }
-
- };
- }
-
- @Override
- public IBinaryHashFunction createBinaryHashFunction() {
- return new IBinaryHashFunction() {
-
- @Override
- public int hash(byte[] bytes, int offset, int length) {
- return bytes[offset];
- }
-
- };
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/util/StatCountAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/util/StatCountAggregateFactory.java
index f483a9c..26daa96 100644
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/util/StatCountAggregateFactory.java
+++ b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/util/StatCountAggregateFactory.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
@@ -12,7 +12,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package edu.uci.ics.genomix.hyracks.util;
import java.io.DataOutput;
@@ -44,7 +43,6 @@
@Override
public AggregateState createAggregateStates() {
- // TODO Auto-generated method stub
return null;
}
@@ -63,7 +61,6 @@
@Override
public void reset() {
- // TODO Auto-generated method stub
}
@@ -112,7 +109,6 @@
@Override
public void close() {
- // TODO Auto-generated method stub
}
@@ -122,7 +118,6 @@
public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor,
RecordDescriptor outRecordDescriptor, int[] keyFields, int[] keyFieldsInPartialResults)
throws HyracksDataException {
- // TODO Auto-generated method stub
return new CountAggregator(keyFields);
}
diff --git a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/util/StatSumAggregateFactory.java b/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/util/StatSumAggregateFactory.java
deleted file mode 100644
index fb37056..0000000
--- a/genomix/genomix-hyracks/src/main/java/edu/uci/ics/genomix/hyracks/util/StatSumAggregateFactory.java
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright 2009-2012 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.genomix.hyracks.util;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
-import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
-
-public class StatSumAggregateFactory implements IAggregatorDescriptorFactory {
-
- /**
- *
- */
- private static final long serialVersionUID = 1L;
-
- public class DistributeAggregatorDescriptor implements IAggregatorDescriptor {
-
- private final int[] keyFields;
-
- public DistributeAggregatorDescriptor(int[] keyFields) {
- this.keyFields = keyFields;
- }
-
- @Override
- public AggregateState createAggregateStates() {
- // TODO Auto-generated method stub
- return null;
- }
-
- protected int getCount(IFrameTupleAccessor accessor, int tIndex) {
- int tupleOffset = accessor.getTupleStartOffset(tIndex);
- int fieldStart = accessor.getFieldStartOffset(tIndex, 1);
- int countoffset = tupleOffset + accessor.getFieldSlotsLength() + fieldStart;
- byte[] data = accessor.getBuffer().array();
- return IntegerSerializerDeserializer.getInt(data, countoffset);
- }
-
- @Override
- public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex, AggregateState state)
- throws HyracksDataException {
- int count = getCount(accessor, tIndex);
-
- DataOutput fieldOutput = tupleBuilder.getDataOutput();
- try {
- fieldOutput.writeInt(count);
- tupleBuilder.addFieldEndOffset();
- } catch (IOException e) {
- throw new HyracksDataException("I/O exception when initializing the aggregator.");
- }
- }
-
- @Override
- public void reset() {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor,
- int stateTupleIndex, AggregateState state) throws HyracksDataException {
- int count = getCount(accessor, tIndex);
-
- int statetupleOffset = stateAccessor.getTupleStartOffset(stateTupleIndex);
- int countfieldStart = stateAccessor.getFieldStartOffset(stateTupleIndex, 1);
- int countoffset = statetupleOffset + stateAccessor.getFieldSlotsLength() + countfieldStart;
-
- byte[] data = stateAccessor.getBuffer().array();
- count += IntegerSerializerDeserializer.getInt(data, countoffset);
- IntegerSerializerDeserializer.putInt(count, data, countoffset);
- }
-
- @Override
- public void outputPartialResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- int count = getCount(accessor, tIndex);
- DataOutput fieldOutput = tupleBuilder.getDataOutput();
- try {
- fieldOutput.writeInt(count);
- tupleBuilder.addFieldEndOffset();
- } catch (IOException e) {
- throw new HyracksDataException("I/O exception when writing aggregation to the output buffer.");
- }
-
- }
-
- @Override
- public void outputFinalResult(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
- AggregateState state) throws HyracksDataException {
- outputPartialResult(tupleBuilder, accessor, tIndex, state);
-
- }
-
- @Override
- public void close() {
- // TODO Auto-generated method stub
-
- }
-
- }
-
- @Override
- public IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor,
- RecordDescriptor outRecordDescriptor, int[] keyFields, int[] keyFieldsInPartialResults)
- throws HyracksDataException {
- // TODO Auto-generated method stub
- return new DistributeAggregatorDescriptor(keyFields);
- }
-
-}
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java
index ec95aa6..3a8746c 100644
--- a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/JobRunStepByStepTest.java
@@ -1,3 +1,17 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package edu.uci.ics.genomix.hyracks.test;
import java.io.BufferedWriter;
@@ -46,6 +60,7 @@
private static final String EXPECTED_KMER_TO_READID = EXPECTED_DIR + "result_after_kmer2readId";
private static final String EXPECTED_GROUPBYREADID = EXPECTED_DIR + "result_after_readIDAggreage";
private static final String EXPECTED_OUPUT_NODE = EXPECTED_DIR + "result_after_generateNode";
+ private static final String EXPECTED_UNMERGED = EXPECTED_DIR + "result_unmerged";
private static final String DUMPED_RESULT = ACTUAL_RESULT_DIR + HDFS_OUTPUT_PATH + "/merged.txt";
private static final String CONVERT_RESULT = DUMPED_RESULT + ".txt";
@@ -65,6 +80,15 @@
TestMapKmerToRead();
TestGroupByReadID();
TestEndToEnd();
+ TestUnMergedNode();
+ }
+
+ public void TestUnMergedNode() throws Exception {
+ conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_BINARY);
+ cleanUpReEntry();
+ conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
+ driver.runJob(new GenomixJobConf(conf), Plan.BUILD_UNMERGED_GRAPH, true);
+ Assert.assertEquals(true, checkResults(EXPECTED_UNMERGED, new int[] { 1, 2, 3, 4 }));
}
public void TestReader() throws Exception {
@@ -94,7 +118,7 @@
conf.set(GenomixJobConf.OUTPUT_FORMAT, GenomixJobConf.OUTPUT_FORMAT_TEXT);
conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
driver.runJob(new GenomixJobConf(conf), Plan.OUTPUT_GROUPBY_READID, true);
- Assert.assertEquals(true, checkResults(EXPECTED_GROUPBYREADID, new int [] {2,5,8,11,14,17,20,23}));
+ Assert.assertEquals(true, checkResults(EXPECTED_GROUPBYREADID, new int[] { 2, 5, 8, 11, 14, 17, 20, 23 }));
}
public void TestEndToEnd() throws Exception {
@@ -103,7 +127,7 @@
cleanUpReEntry();
conf.set(GenomixJobConf.GROUPBY_TYPE, GenomixJobConf.GROUPBY_TYPE_PRECLUSTER);
driver.runJob(new GenomixJobConf(conf), Plan.BUILD_DEBRUJIN_GRAPH, true);
- Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_NODE, new int[] {1,2,3,4}));
+ Assert.assertEquals(true, checkResults(EXPECTED_OUPUT_NODE, new int[] { 1, 2, 3, 4 }));
}
@Before
@@ -195,7 +219,7 @@
if (node == null) {
break;
}
- bw.write(node.toString() );
+ bw.write(node.toString());
System.out.println(node.toString());
bw.newLine();
}
diff --git a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/TestUtils.java b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/TestUtils.java
index d22bd0c..cb19d0c 100644
--- a/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/TestUtils.java
+++ b/genomix/genomix-hyracks/src/test/java/edu/uci/ics/genomix/hyracks/test/TestUtils.java
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2012 by The Regents of the University of California
+ * Copyright 2009-2013 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
@@ -162,11 +162,11 @@
}
return true;
}
-
+
private static boolean containStrings(String lineExpected, String actualLine, int[] poslistField) {
-// if (lineExpected.equals(actualLine)){
-// return true;
-// }
+ if (lineExpected.equals(actualLine)) {
+ return true;
+ }
String[] fieldsExp = lineExpected.split("\\\t");
String[] fieldsAct = actualLine.split("\\\t");
if (fieldsAct.length != fieldsExp.length) {
@@ -180,7 +180,7 @@
break;
}
}
- if (cont){
+ if (cont) {
continue;
}
if (!fieldsAct[i].equals(fieldsExp[i])) {
diff --git a/genomix/genomix-hyracks/src/test/resources/expected/result_unmerged b/genomix/genomix-hyracks/src/test/resources/expected/result_unmerged
new file mode 100644
index 0000000..f617779
--- /dev/null
+++ b/genomix/genomix-hyracks/src/test/resources/expected/result_unmerged
@@ -0,0 +1,24 @@
+((1,1) [(1,2)] [] [] [] AATAG)
+((1,2) [(1,3)] [] [] [(1,1)] ATAGA)
+((1,3) [(6,1),(1,4)] [] [] [(1,2)] TAGAA)
+((1,4) [(6,2)] [] [] [(1,3)] AGAAG)
+((2,1) [(2,2)] [] [] [] AATAG)
+((2,2) [(2,3)] [] [] [(2,1)] ATAGC)
+((2,3) [(2,4)] [] [] [(2,2)] TAGCT)
+((2,4) [] [] [] [(2,3)] AGCTT)
+((3,1) [(3,2)] [] [] [] AATAG)
+((3,2) [(3,3)] [] [] [(3,1)] ATAGA)
+((3,3) [(6,1),(3,4)] [] [] [(3,2)] TAGAA)
+((3,4) [(6,2)] [] [] [(3,3)] AGAAG)
+((4,1) [(4,2)] [] [] [] AATAG)
+((4,2) [(4,3)] [] [] [(4,1)] ATAGC)
+((4,3) [(4,4)] [] [] [(4,2)] TAGCT)
+((4,4) [] [] [] [(4,3)] AGCTT)
+((5,1) [(5,2)] [] [] [] AATAG)
+((5,2) [(5,3)] [] [] [(5,1)] ATAGA)
+((5,3) [(6,1),(5,4)] [] [] [(5,2)] TAGAA)
+((5,4) [(6,2)] [] [] [(5,3)] AGAAG)
+((6,1) [(6,2)] [] [] [(1,3),(3,3),(5,3)] AGAAG)
+((6,2) [(6,3)] [] [] [(3,4),(1,4),(5,4),(6,1)] GAAGA)
+((6,3) [(6,4)] [] [] [(6,2)] AAGAA)
+((6,4) [] [] [] [(6,3)] AGAAG)